From 951b94ad57b1f5a783ad767e610dd1712a36fdc8 Mon Sep 17 00:00:00 2001 From: Greg Caporaso Date: Tue, 8 Oct 2024 08:53:09 -0700 Subject: [PATCH 01/13] integrate legend in Visualization if `export_legend` is enabled, present the legend in the resulting Visualization. Note that there was a minor bug in the original version of the Visualizer - since the conditional was being evaluated after `export_legend` was cast to a string, it always evaluated to `True`. --- gut_to_soil_manuscript_figures/_methods.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/gut_to_soil_manuscript_figures/_methods.py b/gut_to_soil_manuscript_figures/_methods.py index 96d131b..eef69ed 100644 --- a/gut_to_soil_manuscript_figures/_methods.py +++ b/gut_to_soil_manuscript_figures/_methods.py @@ -44,12 +44,11 @@ def pcoa_2d(output_dir: str, metadata: qiime2.Metadata, swap_axes = str(swap_axes) himalaya = str(himalaya) pit_toilet = str(pit_toilet) - export_legend = str(export_legend) + export_legend_str = str(export_legend) plot_fp = os.path.join(output_dir, 'pcoa_plot.png') - if export_legend: - legend_fp = os.path.join(output_dir, 'legend.png') + legend_fp = os.path.join(output_dir, 'legend.png') command = [ 'python', script_path, @@ -64,7 +63,7 @@ def pcoa_2d(output_dir: str, metadata: qiime2.Metadata, swap_axes, himalaya, pit_toilet, - export_legend, + export_legend_str, highlighted_buckets, legend_fp ] @@ -80,6 +79,13 @@ def pcoa_2d(output_dir: str, metadata: qiime2.Metadata,

2D PCoA Plot

PCoA Plot + ''') + if export_legend: + f.write(''' +

+ PCoA Plot legend + ''') + f.write(''' ''') From b9da739701dc4af9d49d3f889d32744bc946cb41 Mon Sep 17 00:00:00 2001 From: Greg Caporaso Date: Tue, 8 Oct 2024 09:09:12 -0700 Subject: [PATCH 02/13] updates to reflect refactored metadata --- README.md | 4 +- .../scripts/plot_pcoa_2d.py | 61 ++++++++++--------- 2 files changed, 33 insertions(+), 32 deletions(-) diff --git a/README.md b/README.md index 0029d6d..fc553c7 100644 --- a/README.md +++ b/README.md @@ -78,7 +78,7 @@ Your first step will be filtering the distance matrix you'd like to use for the qiime diversity filter-distance-matrix \ --i-distance-matrix unweighted-unifrac-distance-matrix.qza \ --m-metadata-file final-analysis-metadata.tsv \ ---p-where "[SampleType2] IN ('EMP-Soils', 'Food-Compost', 'Self Sample', 'Compost Post-Roll', 'Bulking Material')" \ +--p-where "[SampleType] IN ('Soil', 'Food Compost', 'Landscape Compost', 'Human Excrement', 'Human Excrement Compost', 'Bulking Material')" \ --o-filtered-distance-matrix filtered-unweighted-unifrac-distance-matrix.qza ``` @@ -95,7 +95,7 @@ Now we're ready to generate a pcoa plot! qiime gut-to-soil-manuscript-figures pcoa-2d \ --i-ordination filtered-unweighted-unifrac-2d-pcoa.qza \ --m-metadata-file final-analysis-metadata.tsv \ ---p-measure 'Unweighted Unifrac' \ +--p-measure 'Unweighted UniFrac' \ --p-average \ --p-export-legend \ --p-highlighted-buckets '3, 4' \ diff --git a/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py b/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py index df36685..e32d123 100644 --- a/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py +++ b/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py @@ -43,11 +43,12 @@ def _bucket_util(highlighted_buckets, md, ord_2d): # connecting time series data in order md_bucket_sorted = \ md[(md['Bucket'] == bucket) & - (md['SampleType2'] == 'Compost Post-Roll')].sort_values('Week') + (md['SampleType'] == 'Human Excrement Compost')].\ + sort_values('Composting Time Point') # week 1-52 IDs for selected bucket - bucket_ids_sorted = \ - md_bucket_sorted[md_bucket_sorted['Week'] > 0.0].index.values + bucket_ids_sorted = md_bucket_sorted[ + md_bucket_sorted['Composting Time Point'] > 0.0].index.values # making sure bucket IDs used are only ones that are present in both # the md and ordination results @@ -62,8 +63,7 @@ def _bucket_util(highlighted_buckets, md, ord_2d): # HE bucket_ids_HE_week0 = \ md[(md['Bucket'] == bucket) & - (md['SampleType2'] == 'Self Sample') & - (md['Week'] == 0.0)].index.values + (md['SampleType'] == 'Human Excrement')].index.values ids_HE_week0 = [] for i in bucket_ids_HE_week0: @@ -73,8 +73,7 @@ def _bucket_util(highlighted_buckets, md, ord_2d): # bulking bucket_ids_bulk_week0 = \ md[(md['Bucket'] == bucket) & - (md['SampleType2'] == 'Bulking Material') & - (md['Week'] == 0.0)].index.values + (md['SampleType'] == 'Bulking Material')].index.values ids_bulk_week0 = [] for i in bucket_ids_bulk_week0: @@ -82,8 +81,8 @@ def _bucket_util(highlighted_buckets, md, ord_2d): ids_bulk_week0.append(i) # week 1 i.e. end points for dotted line connecting HE & BM -> HEC - bucket_ids_HEC_week1 = \ - md_bucket_sorted[md_bucket_sorted['Week'] == 1.0].index.values + bucket_ids_HEC_week1 = md_bucket_sorted[ + md_bucket_sorted['Composting Time Point'] == 1.0].index.values ids_HEC_week1 = [] for i in bucket_ids_HEC_week1: @@ -141,8 +140,9 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure, ord_2d[1] = ord_2d[1].multiply(-1) # allowed sample types to be pulled from the md - sample_types = ['EMP-Soils', 'Food-Compost', 'Self Sample', - 'Compost Post-Roll', 'Bulking Material'] + sample_types = ['Soil', 'Food Compost', 'Landscape Compost', + 'Human Excrement', 'Human Excrement Compost', + 'Bulking Material'] # if using himalaya and/or pit toilet data if himalaya == 'True': @@ -151,21 +151,23 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure, sample_types.append('Pit Toilet') # sorting the filtered md (by allowed sample types) by week - md = metadata[metadata['SampleType2'] - .isin(sample_types)].sort_values('Week') + md = metadata[metadata['SampleType'] + .isin(sample_types)].sort_values('Composting Time Point') md['Bucket'] = md['Bucket'].astype(float) - md['Week'] = md['Week'].astype(float) + md['Composting Time Point'] = md['Composting Time Point'].astype(float) buckets_md = md[md['Bucket'].between(1, 16)] # ALL SUBJECT FECAL SAMPLES: IDs -> XY ordination points - fecal_ids = list(set(buckets_md[buckets_md['Week'] == 0.0].index.values) & - set(ord_2d.index.values)) + fecal_ids = list( + set(buckets_md[buckets_md['SampleType'] == 'Human Excrement'] + .index.values) & + set(ord_2d.index.values)) x_fecal, y_fecal = _swap_axis(ord_2d, fecal_ids, swap_axes) # ALL SUBJECT BULKING MATERIAL: IDs -> XY ordination points bulking_ids = \ - list(set(md[md['SampleType2'] == 'Bulking Material'].index.values) & + list(set(md[md['SampleType'] == 'Bulking Material'].index.values) & set(ord_2d.index.values)) x_bulking, y_bulking = _swap_axis(ord_2d, bulking_ids, swap_axes) @@ -176,8 +178,8 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure, # (OPTIONAL) WEEKLY MEAN FOR ALL BUCKETS: IDs -> XY ordination points if average == 'True': - weeks_md = md[md['Week'].between(1, 52)] - weeks = list(set(weeks_md['Week'].values)) + weeks_md = md[md['Composting Time Point'].between(1, 52)] + weeks = list(set(weeks_md['Composting Time Point'].values)) # dicts for each week's mean x&y values bucket_weekly_avgs_x = {} @@ -188,9 +190,9 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure, y_list = [] # filtering the md to only include post-roll sample types - weekly_bucket_ids = \ - md[(md['Week'] == week) & - (md['SampleType2'] == 'Compost Post-Roll')].index.values + weekly_bucket_ids = md[ + (md['Composting Time Point'] == week) & + (md['SampleType'] == 'Human Excrement Compost')].index.values # only use IDs that are present both in the md and ordination included_ids = [] @@ -233,8 +235,7 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure, if not highlighted_buckets: # HE wk 0 mean HE_week0 = \ - md[(md['SampleType2'] == 'Self Sample') & - (md['Week'] == 0.0)].index.values + md[(md['SampleType'] == 'Human Excrement')].index.values ids_HE_week0 = [] for i in HE_week0: if i in ord_2d.index.values: @@ -245,8 +246,7 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure, # bulk wk 0 mean bulk_week0 = \ - md[(md['SampleType2'] == 'Bulking Material') & - (md['Week'] == 0.0)].index.values + md[(md['SampleType'] == 'Bulking Material')].index.values ids_bulk_week0 = [] for i in bulk_week0: if i in ord_2d.index.values: @@ -268,13 +268,14 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure, x_buckets, y_buckets = _swap_axis(ord_2d, bucket_ids, swap_axes) # EMP SOILS - emp_ids = md.loc[md['Bucket'] == 0.0].index.values + emp_ids = md.loc[md['SampleType'] == 'Soil'].index.values x_emp, y_emp = _swap_axis(ord_2d, emp_ids, swap_axes) # FOOD COMPOST compost_ids = \ - list(set(md.loc[md['Bucket'] == 17.0].index.values) & - set(ord_2d.index.values)) + list(set(md.loc[(md['SampleType'] == 'Food Compost') | + (md['SampleType'] == 'Landscape Compost')].index.values) + & set(ord_2d.index.values)) x_compost, y_compost = _swap_axis(ord_2d, compost_ids, swap_axes) # (OPTIONAL SAMPLE TYPES) HIMALAYA @@ -424,7 +425,7 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure, # adding week annotations for each highlighted bucket if week_annotations == 'True': - for week, x, y in zip((md.loc[ids]['Week']), + for week, x, y in zip((md.loc[ids]['Composting Time Point']), x_bucket, y_bucket): week_int = int(week) ax.annotate(str(week_int), weight='bold', color='purple', From 0331476156d06eb2fa44c6ffa911ba86b5eb1d7b Mon Sep 17 00:00:00 2001 From: Greg Caporaso <192372+gregcaporaso@users.noreply.github.com> Date: Wed, 9 Oct 2024 06:46:55 -0700 Subject: [PATCH 03/13] Update gut_to_soil_manuscript_figures/_methods.py Co-authored-by: Liz Gehret <54517601+lizgehret@users.noreply.github.com> --- gut_to_soil_manuscript_figures/_methods.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gut_to_soil_manuscript_figures/_methods.py b/gut_to_soil_manuscript_figures/_methods.py index eef69ed..f2a0538 100644 --- a/gut_to_soil_manuscript_figures/_methods.py +++ b/gut_to_soil_manuscript_figures/_methods.py @@ -44,7 +44,7 @@ def pcoa_2d(output_dir: str, metadata: qiime2.Metadata, swap_axes = str(swap_axes) himalaya = str(himalaya) pit_toilet = str(pit_toilet) - export_legend_str = str(export_legend) + export_legend = str(export_legend) plot_fp = os.path.join(output_dir, 'pcoa_plot.png') From 04f45c929be15f90f397258babb2ca3720d54c26 Mon Sep 17 00:00:00 2001 From: Greg Caporaso <192372+gregcaporaso@users.noreply.github.com> Date: Wed, 9 Oct 2024 06:47:08 -0700 Subject: [PATCH 04/13] Update gut_to_soil_manuscript_figures/_methods.py Co-authored-by: Liz Gehret <54517601+lizgehret@users.noreply.github.com> --- gut_to_soil_manuscript_figures/_methods.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gut_to_soil_manuscript_figures/_methods.py b/gut_to_soil_manuscript_figures/_methods.py index f2a0538..8711672 100644 --- a/gut_to_soil_manuscript_figures/_methods.py +++ b/gut_to_soil_manuscript_figures/_methods.py @@ -63,7 +63,7 @@ def pcoa_2d(output_dir: str, metadata: qiime2.Metadata, swap_axes, himalaya, pit_toilet, - export_legend_str, + export_legend, highlighted_buckets, legend_fp ] From fffaaecf542f8cd6924bab7a0314de8edce68e50 Mon Sep 17 00:00:00 2001 From: Greg Caporaso <192372+gregcaporaso@users.noreply.github.com> Date: Wed, 9 Oct 2024 06:47:23 -0700 Subject: [PATCH 05/13] Update gut_to_soil_manuscript_figures/_methods.py Co-authored-by: Liz Gehret <54517601+lizgehret@users.noreply.github.com> --- gut_to_soil_manuscript_figures/_methods.py | 1 - 1 file changed, 1 deletion(-) diff --git a/gut_to_soil_manuscript_figures/_methods.py b/gut_to_soil_manuscript_figures/_methods.py index 8711672..2f1d97e 100644 --- a/gut_to_soil_manuscript_figures/_methods.py +++ b/gut_to_soil_manuscript_figures/_methods.py @@ -48,7 +48,6 @@ def pcoa_2d(output_dir: str, metadata: qiime2.Metadata, plot_fp = os.path.join(output_dir, 'pcoa_plot.png') - legend_fp = os.path.join(output_dir, 'legend.png') command = [ 'python', script_path, From aabe60ffa2f9a068df2d78d88da2d31a705e6e59 Mon Sep 17 00:00:00 2001 From: Greg Caporaso <192372+gregcaporaso@users.noreply.github.com> Date: Wed, 9 Oct 2024 06:47:36 -0700 Subject: [PATCH 06/13] Update gut_to_soil_manuscript_figures/_methods.py Co-authored-by: Liz Gehret <54517601+lizgehret@users.noreply.github.com> --- gut_to_soil_manuscript_figures/_methods.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gut_to_soil_manuscript_figures/_methods.py b/gut_to_soil_manuscript_figures/_methods.py index 2f1d97e..80c1cf5 100644 --- a/gut_to_soil_manuscript_figures/_methods.py +++ b/gut_to_soil_manuscript_figures/_methods.py @@ -79,7 +79,8 @@ def pcoa_2d(output_dir: str, metadata: qiime2.Metadata,

2D PCoA Plot

PCoA Plot ''') - if export_legend: + if export_legend == 'True': + legend_fp = os.path.join(output_dir, 'legend.png') f.write('''

PCoA Plot legend From ac3f9921b83d6eb2994cad3782c76922a8fdf1d6 Mon Sep 17 00:00:00 2001 From: Greg Caporaso Date: Wed, 9 Oct 2024 06:56:37 -0700 Subject: [PATCH 07/13] legend_fp was referenced before assignment --- gut_to_soil_manuscript_figures/_methods.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gut_to_soil_manuscript_figures/_methods.py b/gut_to_soil_manuscript_figures/_methods.py index 80c1cf5..67a42d4 100644 --- a/gut_to_soil_manuscript_figures/_methods.py +++ b/gut_to_soil_manuscript_figures/_methods.py @@ -47,7 +47,8 @@ def pcoa_2d(output_dir: str, metadata: qiime2.Metadata, export_legend = str(export_legend) plot_fp = os.path.join(output_dir, 'pcoa_plot.png') - + if export_legend == 'True': + legend_fp = os.path.join(output_dir, 'legend.png') command = [ 'python', script_path, @@ -80,7 +81,6 @@ def pcoa_2d(output_dir: str, metadata: qiime2.Metadata, PCoA Plot ''') if export_legend == 'True': - legend_fp = os.path.join(output_dir, 'legend.png') f.write('''

PCoA Plot legend From c917fee98dd1f0bdb99ad219ced21279665b7007 Mon Sep 17 00:00:00 2001 From: Greg Caporaso Date: Wed, 9 Oct 2024 07:10:49 -0700 Subject: [PATCH 08/13] address issue with missing Week 0.0 checks --- .../scripts/plot_pcoa_2d.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py b/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py index e32d123..e47de3c 100644 --- a/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py +++ b/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py @@ -63,7 +63,8 @@ def _bucket_util(highlighted_buckets, md, ord_2d): # HE bucket_ids_HE_week0 = \ md[(md['Bucket'] == bucket) & - (md['SampleType'] == 'Human Excrement')].index.values + (md['SampleType'] == 'Human Excrement') & + (md['Composting Time Point']).isna()].index.values ids_HE_week0 = [] for i in bucket_ids_HE_week0: @@ -73,7 +74,8 @@ def _bucket_util(highlighted_buckets, md, ord_2d): # bulking bucket_ids_bulk_week0 = \ md[(md['Bucket'] == bucket) & - (md['SampleType'] == 'Bulking Material')].index.values + (md['SampleType'] == 'Bulking Material') & + (md['Composting Time Point']).isna()].index.values ids_bulk_week0 = [] for i in bucket_ids_bulk_week0: @@ -235,7 +237,8 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure, if not highlighted_buckets: # HE wk 0 mean HE_week0 = \ - md[(md['SampleType'] == 'Human Excrement')].index.values + md[(md['SampleType'] == 'Human Excrement') & + (md['Composting Time Point']).isna()].index.values ids_HE_week0 = [] for i in HE_week0: if i in ord_2d.index.values: @@ -246,7 +249,8 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure, # bulk wk 0 mean bulk_week0 = \ - md[(md['SampleType'] == 'Bulking Material')].index.values + md[(md['SampleType'] == 'Bulking Material') & + (md['Composting Time Point']).isna()].index.values ids_bulk_week0 = [] for i in bulk_week0: if i in ord_2d.index.values: From 1e179a6fc0a1fecaa8452573f1269ededcb47014 Mon Sep 17 00:00:00 2001 From: Greg Caporaso Date: Tue, 15 Oct 2024 10:45:23 -0600 Subject: [PATCH 09/13] add debugging information --- .../scripts/plot_pcoa_2d.py | 33 +++++++++++-------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py b/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py index e47de3c..101f52c 100644 --- a/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py +++ b/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py @@ -113,11 +113,13 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure, ord_rslts = skbio.OrdinationResults.read(str(ordination_fp)) ord_2d = ord_rslts.samples.iloc[:, 0:2] + print(f"\n***\nSamples in ordination (n={ord_2d.shape[0]})\n***\n") metadata_in = pd.read_csv(str(metadata_fp), sep='\t').set_index('sample-id') # filtering metadata to only include samples w/IDs present in ordination metadata = metadata_in.loc[ord_2d.index.values] + print(metadata['SampleType'].value_counts()) # setting XY labels based on swap axis & # figure aspect based on proportion explained @@ -306,18 +308,19 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure, fecal_scatter = \ plt.scatter(x=x_fecal, y=y_fecal, facecolors='none', edgecolors='tab:brown', - label='HE (other buckets)') + label=f'HE (other buckets) (n={len(x_fecal)})') # Bulking Material - all subjects bulking_scatter = \ plt.scatter(x=x_bulking, y=y_bulking, facecolors='none', - edgecolors='g', label='Bulking Material (other buckets)') + edgecolors='g', + label=f'Bulking Material (other buckets) (n={len(x_bulking)})') # All buckets (minus highlighted bucket(s)) all_sample_buckets = \ plt.scatter(x=x_buckets, y=y_buckets, facecolors='none', edgecolors='#C5C9C7', marker='^', - label='HEC (other buckets)') + label=f'HEC (other buckets) (n={len(x_buckets)})') # (OPTIONAL) Weekly Mean for all Buckets if average == 'True': @@ -325,7 +328,8 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure, plt.scatter(x=bucket_weekly_avgs_x.values(), y=bucket_weekly_avgs_y.values(), marker='*', facecolors='#1f77b4', - s=100, label='HEC (Weekly Mean)') + s=100, + label=f'HEC (Weekly Mean) (n={len(bucket_weekly_avgs_x)})') # adding HE mean if only plotting the weekly mean # (w/o any highlighted bucket(s)) @@ -335,33 +339,36 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure, plt.scatter(x=x0_HE_mean, y=y0_HE_mean, marker='*', s=150, zorder=1, facecolors='tab:brown', edgecolors='k', - label='HE (Weekly Mean)') + label=f'HE (Weekly Mean) (n={len(x0_HE_mean)})') # bulking bulk_week0_scatter = \ plt.scatter(x=x0_bulk_mean, y=y0_bulk_mean, marker='*', s=150, zorder=1, facecolors='g', edgecolors='k', - label='Bulking Material (Weekly Mean)') + label=f'Bulking Material (Weekly Mean) (n={len(x0_bulk_mean)})') # EMP Soil emp_soil_scatter = plt.scatter(x=x_emp, y=y_emp, - facecolors='k', label='Soil') + facecolors='k', + label=f'Soil (n={len(x_emp)})') # Food Compost food_compost_scatter = plt.scatter(x=x_compost, y=y_compost, facecolors='r', - label='FLWC') + label=f'FLWC (n={len(x_compost)})') # (OPTIONAL SAMPLE TYPES) Himalaya if himalaya == 'True': himalaya_scatter = plt.scatter(x=x_hima, y=y_hima, - facecolors='b', label='Himalaya') + facecolors='b', + label=f'Himalaya (n={len(x_hima)})') # (OPTIONAL SAMPLE TYPES) Pit Toilet if pit_toilet == 'True': pit_toilet_scatter = plt.scatter(x=x_pt, y=y_pt, - facecolors='y', label='Pit Toilet') + facecolors='y', + label=f'Pit Toilet (n={len(x_pt)})') # collecting the handle info to add to the legend bucket_handles = [] @@ -405,12 +412,12 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure, HE_week0_scatter = \ plt.scatter(x=x0_HE, y=y0_HE, facecolors='tab:brown', - label=f'HE (Bucket #{bucket})') + label=f'HE (Bucket #{bucket}) (n={len(x0_HE)})') bucket_handles.append(HE_week0_scatter) bulk_week0_scatter = \ plt.scatter(x=x0_bulk, y=y0_bulk, facecolors='g', - label=f'Bulking Material (Bucket #{bucket})') + label=f'Bulking Material (Bucket #{bucket}) (n={len(x0_bulk)})') bucket_handles.append(bulk_week0_scatter) for bucket, ids in buckets_dict.items(): @@ -422,7 +429,7 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure, plt.scatter(x=x_bucket, y=y_bucket, facecolors=viridis(color_dict[bucket]), edgecolors='k', marker='^', - label=f'HEC (Bucket #{bucket})') + label=f'HEC (Bucket #{bucket}) (n={len(x_bucket)})') bucket_handles.append(highlighted_bucket_scatter) bucket_nums.append(bucket) From 3d77c78ad0f5ef6b83a6e90d4d75faf6810687e9 Mon Sep 17 00:00:00 2001 From: Greg Caporaso Date: Thu, 17 Oct 2024 09:23:25 -0600 Subject: [PATCH 10/13] working --- gut_to_soil_manuscript_figures/_methods.py | 3 +-- .../scripts/plot_pcoa_2d.py | 12 ++++++------ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/gut_to_soil_manuscript_figures/_methods.py b/gut_to_soil_manuscript_figures/_methods.py index 67a42d4..0e4f1a1 100644 --- a/gut_to_soil_manuscript_figures/_methods.py +++ b/gut_to_soil_manuscript_figures/_methods.py @@ -47,8 +47,7 @@ def pcoa_2d(output_dir: str, metadata: qiime2.Metadata, export_legend = str(export_legend) plot_fp = os.path.join(output_dir, 'pcoa_plot.png') - if export_legend == 'True': - legend_fp = os.path.join(output_dir, 'legend.png') + legend_fp = os.path.join(output_dir, 'legend.png') command = [ 'python', script_path, diff --git a/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py b/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py index 101f52c..9a997b3 100644 --- a/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py +++ b/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py @@ -120,6 +120,7 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure, # filtering metadata to only include samples w/IDs present in ordination metadata = metadata_in.loc[ord_2d.index.values] print(metadata['SampleType'].value_counts()) + xyz = metadata[metadata['SampleType'] == 'Human Excrement Compost'].index # setting XY labels based on swap axis & # figure aspect based on proportion explained @@ -262,11 +263,10 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure, y0_bulk_mean = np.mean(y0_bulk) # ALL BUCKETS (minus highlighted bucket(s)) - all_bucket_ids_w_fecal = \ + all_bucket_ids = \ list(set(md[md['Bucket'].between(1, 16)].index.values) & + set(md[md['SampleType'] == 'Human Excrement Compost'].index.values) & set(ord_2d.index.values)) - all_bucket_ids = list(set(all_bucket_ids_w_fecal) - set(fecal_ids)) - if highlighted_buckets: bucket_ids = list(set(all_bucket_ids) - bucket_set) else: @@ -329,7 +329,7 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure, y=bucket_weekly_avgs_y.values(), marker='*', facecolors='#1f77b4', s=100, - label=f'HEC (Weekly Mean) (n={len(bucket_weekly_avgs_x)})') + label=f'HEC (Weekly Mean)') # adding HE mean if only plotting the weekly mean # (w/o any highlighted bucket(s)) @@ -339,14 +339,14 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure, plt.scatter(x=x0_HE_mean, y=y0_HE_mean, marker='*', s=150, zorder=1, facecolors='tab:brown', edgecolors='k', - label=f'HE (Weekly Mean) (n={len(x0_HE_mean)})') + label=f'HE (Weekly Mean)') # bulking bulk_week0_scatter = \ plt.scatter(x=x0_bulk_mean, y=y0_bulk_mean, marker='*', s=150, zorder=1, facecolors='g', edgecolors='k', - label=f'Bulking Material (Weekly Mean) (n={len(x0_bulk_mean)})') + label=f'Bulking Material (Weekly Mean)') # EMP Soil emp_soil_scatter = plt.scatter(x=x_emp, y=y_emp, From 77edf76ab4c2337c24ccf61bab6b5f0319e6f3dd Mon Sep 17 00:00:00 2001 From: Greg Caporaso Date: Thu, 17 Oct 2024 09:27:23 -0600 Subject: [PATCH 11/13] remove debugging statements --- .../scripts/plot_pcoa_2d.py | 23 ++++++++----------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py b/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py index 9a997b3..8d5c34a 100644 --- a/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py +++ b/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py @@ -113,14 +113,11 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure, ord_rslts = skbio.OrdinationResults.read(str(ordination_fp)) ord_2d = ord_rslts.samples.iloc[:, 0:2] - print(f"\n***\nSamples in ordination (n={ord_2d.shape[0]})\n***\n") metadata_in = pd.read_csv(str(metadata_fp), sep='\t').set_index('sample-id') # filtering metadata to only include samples w/IDs present in ordination metadata = metadata_in.loc[ord_2d.index.values] - print(metadata['SampleType'].value_counts()) - xyz = metadata[metadata['SampleType'] == 'Human Excrement Compost'].index # setting XY labels based on swap axis & # figure aspect based on proportion explained @@ -308,19 +305,19 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure, fecal_scatter = \ plt.scatter(x=x_fecal, y=y_fecal, facecolors='none', edgecolors='tab:brown', - label=f'HE (other buckets) (n={len(x_fecal)})') + label=f'HE (other buckets)') # Bulking Material - all subjects bulking_scatter = \ plt.scatter(x=x_bulking, y=y_bulking, facecolors='none', edgecolors='g', - label=f'Bulking Material (other buckets) (n={len(x_bulking)})') + label=f'Bulking Material (other buckets)') # All buckets (minus highlighted bucket(s)) all_sample_buckets = \ plt.scatter(x=x_buckets, y=y_buckets, facecolors='none', edgecolors='#C5C9C7', marker='^', - label=f'HEC (other buckets) (n={len(x_buckets)})') + label=f'HEC (other buckets)') # (OPTIONAL) Weekly Mean for all Buckets if average == 'True': @@ -351,24 +348,24 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure, # EMP Soil emp_soil_scatter = plt.scatter(x=x_emp, y=y_emp, facecolors='k', - label=f'Soil (n={len(x_emp)})') + label=f'Soil') # Food Compost food_compost_scatter = plt.scatter(x=x_compost, y=y_compost, facecolors='r', - label=f'FLWC (n={len(x_compost)})') + label=f'FLWC') # (OPTIONAL SAMPLE TYPES) Himalaya if himalaya == 'True': himalaya_scatter = plt.scatter(x=x_hima, y=y_hima, facecolors='b', - label=f'Himalaya (n={len(x_hima)})') + label=f'Himalaya') # (OPTIONAL SAMPLE TYPES) Pit Toilet if pit_toilet == 'True': pit_toilet_scatter = plt.scatter(x=x_pt, y=y_pt, facecolors='y', - label=f'Pit Toilet (n={len(x_pt)})') + label=f'Pit Toilet') # collecting the handle info to add to the legend bucket_handles = [] @@ -412,12 +409,12 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure, HE_week0_scatter = \ plt.scatter(x=x0_HE, y=y0_HE, facecolors='tab:brown', - label=f'HE (Bucket #{bucket}) (n={len(x0_HE)})') + label=f'HE (Bucket #{bucket})') bucket_handles.append(HE_week0_scatter) bulk_week0_scatter = \ plt.scatter(x=x0_bulk, y=y0_bulk, facecolors='g', - label=f'Bulking Material (Bucket #{bucket}) (n={len(x0_bulk)})') + label=f'Bulking Material (Bucket #{bucket})') bucket_handles.append(bulk_week0_scatter) for bucket, ids in buckets_dict.items(): @@ -429,7 +426,7 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure, plt.scatter(x=x_bucket, y=y_bucket, facecolors=viridis(color_dict[bucket]), edgecolors='k', marker='^', - label=f'HEC (Bucket #{bucket}) (n={len(x_bucket)})') + label=f'HEC (Bucket #{bucket})') bucket_handles.append(highlighted_bucket_scatter) bucket_nums.append(bucket) From b86893aabf1f3cdc8f3dcd0e1b42290c266ffa38 Mon Sep 17 00:00:00 2001 From: Greg Caporaso Date: Thu, 17 Oct 2024 09:31:22 -0600 Subject: [PATCH 12/13] lint --- .../scripts/plot_pcoa_2d.py | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py b/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py index 8d5c34a..1992e79 100644 --- a/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py +++ b/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py @@ -261,9 +261,9 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure, # ALL BUCKETS (minus highlighted bucket(s)) all_bucket_ids = \ - list(set(md[md['Bucket'].between(1, 16)].index.values) & - set(md[md['SampleType'] == 'Human Excrement Compost'].index.values) & - set(ord_2d.index.values)) + list(set(md[md['Bucket'].between(1, 16)].index) & + set(md[md['SampleType'] == 'Human Excrement Compost'].index) & + set(ord_2d.index)) if highlighted_buckets: bucket_ids = list(set(all_bucket_ids) - bucket_set) else: @@ -305,19 +305,19 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure, fecal_scatter = \ plt.scatter(x=x_fecal, y=y_fecal, facecolors='none', edgecolors='tab:brown', - label=f'HE (other buckets)') + label='HE (other buckets)') # Bulking Material - all subjects bulking_scatter = \ plt.scatter(x=x_bulking, y=y_bulking, facecolors='none', edgecolors='g', - label=f'Bulking Material (other buckets)') + label='Bulking Material (other buckets)') # All buckets (minus highlighted bucket(s)) all_sample_buckets = \ plt.scatter(x=x_buckets, y=y_buckets, facecolors='none', edgecolors='#C5C9C7', marker='^', - label=f'HEC (other buckets)') + label='HEC (other buckets)') # (OPTIONAL) Weekly Mean for all Buckets if average == 'True': @@ -326,7 +326,7 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure, y=bucket_weekly_avgs_y.values(), marker='*', facecolors='#1f77b4', s=100, - label=f'HEC (Weekly Mean)') + label='HEC (Weekly Mean)') # adding HE mean if only plotting the weekly mean # (w/o any highlighted bucket(s)) @@ -336,36 +336,36 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure, plt.scatter(x=x0_HE_mean, y=y0_HE_mean, marker='*', s=150, zorder=1, facecolors='tab:brown', edgecolors='k', - label=f'HE (Weekly Mean)') + label='HE (Weekly Mean)') # bulking bulk_week0_scatter = \ plt.scatter(x=x0_bulk_mean, y=y0_bulk_mean, marker='*', s=150, zorder=1, facecolors='g', edgecolors='k', - label=f'Bulking Material (Weekly Mean)') + label='Bulking Material (Weekly Mean)') # EMP Soil emp_soil_scatter = plt.scatter(x=x_emp, y=y_emp, facecolors='k', - label=f'Soil') + label='Soil') # Food Compost food_compost_scatter = plt.scatter(x=x_compost, y=y_compost, facecolors='r', - label=f'FLWC') + label='FLWC') # (OPTIONAL SAMPLE TYPES) Himalaya if himalaya == 'True': himalaya_scatter = plt.scatter(x=x_hima, y=y_hima, facecolors='b', - label=f'Himalaya') + label='Himalaya') # (OPTIONAL SAMPLE TYPES) Pit Toilet if pit_toilet == 'True': pit_toilet_scatter = plt.scatter(x=x_pt, y=y_pt, facecolors='y', - label=f'Pit Toilet') + label='Pit Toilet') # collecting the handle info to add to the legend bucket_handles = [] From 0dcf1cdb1ce92b71d2e18d7f58111ca375bdba09 Mon Sep 17 00:00:00 2001 From: Liz Gehret Date: Thu, 17 Oct 2024 15:41:48 -0600 Subject: [PATCH 13/13] improve figure title --- gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py b/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py index 1992e79..3c796cc 100644 --- a/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py +++ b/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py @@ -484,9 +484,16 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure, ax.plot([x0_bulk_mean, x1_mean], [y0_bulk_mean, y1_mean], '--', color='#C5C9C7', linewidth=0.75, zorder=2) + # handling title text for buckets depending on bucket highlighting + if len(bucket_nums) == 0: + bucket_title_text = f'{measure} (all Buckets)' + elif len(bucket_nums) == 1: + bucket_title_text = f'{measure} for Bucket {bucket_nums}' + elif len(bucket_nums) > 1: + bucket_title_text = f'{measure} for Buckets {sorted(bucket_nums)}' # Adding title, labels & legend details plt.gca().set(xlabel=f'PCoA {x_label}', ylabel=f'PCoA {y_label}', - title=f'{measure} for Bucket(s) {bucket_nums}', + title=f'{bucket_title_text}', label='Bucket#') # Helper method for exporting legend as a separate figure