Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

updates for metadata and manuscript finalization #2

Closed
wants to merge 8 commits into from
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ Your first step will be filtering the distance matrix you'd like to use for the
qiime diversity filter-distance-matrix \
--i-distance-matrix unweighted-unifrac-distance-matrix.qza \
--m-metadata-file final-analysis-metadata.tsv \
--p-where "[SampleType2] IN ('EMP-Soils', 'Food-Compost', 'Self Sample', 'Compost Post-Roll', 'Bulking Material')" \
--p-where "[SampleType] IN ('Soil', 'Food Compost', 'Landscape Compost', 'Human Excrement', 'Human Excrement Compost', 'Bulking Material')" \
--o-filtered-distance-matrix filtered-unweighted-unifrac-distance-matrix.qza
```

Expand All @@ -95,7 +95,7 @@ Now we're ready to generate a pcoa plot!
qiime gut-to-soil-manuscript-figures pcoa-2d \
--i-ordination filtered-unweighted-unifrac-2d-pcoa.qza \
--m-metadata-file final-analysis-metadata.tsv \
--p-measure 'Unweighted Unifrac' \
--p-measure 'Unweighted UniFrac' \
--p-average \
--p-export-legend \
--p-highlighted-buckets '3, 4' \
Expand Down
14 changes: 10 additions & 4 deletions gut_to_soil_manuscript_figures/_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,11 @@ def pcoa_2d(output_dir: str, metadata: qiime2.Metadata,
swap_axes = str(swap_axes)
himalaya = str(himalaya)
pit_toilet = str(pit_toilet)
export_legend = str(export_legend)
export_legend_str = str(export_legend)
gregcaporaso marked this conversation as resolved.
Show resolved Hide resolved

plot_fp = os.path.join(output_dir, 'pcoa_plot.png')

if export_legend:
legend_fp = os.path.join(output_dir, 'legend.png')
legend_fp = os.path.join(output_dir, 'legend.png')
gregcaporaso marked this conversation as resolved.
Show resolved Hide resolved

command = [
'python', script_path,
Expand All @@ -64,7 +63,7 @@ def pcoa_2d(output_dir: str, metadata: qiime2.Metadata,
swap_axes,
himalaya,
pit_toilet,
export_legend,
export_legend_str,
gregcaporaso marked this conversation as resolved.
Show resolved Hide resolved
highlighted_buckets,
legend_fp
]
Expand All @@ -80,6 +79,13 @@ def pcoa_2d(output_dir: str, metadata: qiime2.Metadata,
<body>
<h1>2D PCoA Plot</h1>
<img src="pcoa_plot.png" alt="PCoA Plot">
''')
if export_legend:
gregcaporaso marked this conversation as resolved.
Show resolved Hide resolved
f.write('''
<p>
<img src="legend.png" alt="PCoA Plot legend">
''')
f.write('''
</body>
</html>
''')
61 changes: 31 additions & 30 deletions gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,12 @@ def _bucket_util(highlighted_buckets, md, ord_2d):
# connecting time series data in order
md_bucket_sorted = \
md[(md['Bucket'] == bucket) &
(md['SampleType2'] == 'Compost Post-Roll')].sort_values('Week')
(md['SampleType'] == 'Human Excrement Compost')].\
sort_values('Composting Time Point')

# week 1-52 IDs for selected bucket
bucket_ids_sorted = \
md_bucket_sorted[md_bucket_sorted['Week'] > 0.0].index.values
bucket_ids_sorted = md_bucket_sorted[
md_bucket_sorted['Composting Time Point'] > 0.0].index.values

# making sure bucket IDs used are only ones that are present in both
# the md and ordination results
Expand All @@ -62,8 +63,7 @@ def _bucket_util(highlighted_buckets, md, ord_2d):
# HE
bucket_ids_HE_week0 = \
md[(md['Bucket'] == bucket) &
(md['SampleType2'] == 'Self Sample') &
(md['Week'] == 0.0)].index.values
(md['SampleType'] == 'Human Excrement')].index.values
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like the (md['Week'] == 0.0)].index.values got removed. Was this intentional?


ids_HE_week0 = []
for i in bucket_ids_HE_week0:
Expand All @@ -73,17 +73,16 @@ def _bucket_util(highlighted_buckets, md, ord_2d):
# bulking
bucket_ids_bulk_week0 = \
md[(md['Bucket'] == bucket) &
(md['SampleType2'] == 'Bulking Material') &
(md['Week'] == 0.0)].index.values
(md['SampleType'] == 'Bulking Material')].index.values
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comment as above, it looks like (md['Week'] == 0.0)].index.values was removed here as well.


ids_bulk_week0 = []
for i in bucket_ids_bulk_week0:
if i in ord_2d.index.values:
ids_bulk_week0.append(i)

# week 1 i.e. end points for dotted line connecting HE & BM -> HEC
bucket_ids_HEC_week1 = \
md_bucket_sorted[md_bucket_sorted['Week'] == 1.0].index.values
bucket_ids_HEC_week1 = md_bucket_sorted[
md_bucket_sorted['Composting Time Point'] == 1.0].index.values

ids_HEC_week1 = []
for i in bucket_ids_HEC_week1:
Expand Down Expand Up @@ -141,8 +140,9 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure,
ord_2d[1] = ord_2d[1].multiply(-1)

# allowed sample types to be pulled from the md
sample_types = ['EMP-Soils', 'Food-Compost', 'Self Sample',
'Compost Post-Roll', 'Bulking Material']
sample_types = ['Soil', 'Food Compost', 'Landscape Compost',
'Human Excrement', 'Human Excrement Compost',
'Bulking Material']

# if using himalaya and/or pit toilet data
if himalaya == 'True':
Expand All @@ -151,21 +151,23 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure,
sample_types.append('Pit Toilet')

# sorting the filtered md (by allowed sample types) by week
md = metadata[metadata['SampleType2']
.isin(sample_types)].sort_values('Week')
md = metadata[metadata['SampleType']
.isin(sample_types)].sort_values('Composting Time Point')
md['Bucket'] = md['Bucket'].astype(float)
md['Week'] = md['Week'].astype(float)
md['Composting Time Point'] = md['Composting Time Point'].astype(float)

buckets_md = md[md['Bucket'].between(1, 16)]

# ALL SUBJECT FECAL SAMPLES: IDs -> XY ordination points
fecal_ids = list(set(buckets_md[buckets_md['Week'] == 0.0].index.values) &
set(ord_2d.index.values))
fecal_ids = list(
set(buckets_md[buckets_md['SampleType'] == 'Human Excrement']
.index.values) &
set(ord_2d.index.values))
x_fecal, y_fecal = _swap_axis(ord_2d, fecal_ids, swap_axes)

# ALL SUBJECT BULKING MATERIAL: IDs -> XY ordination points
bulking_ids = \
list(set(md[md['SampleType2'] == 'Bulking Material'].index.values) &
list(set(md[md['SampleType'] == 'Bulking Material'].index.values) &
set(ord_2d.index.values))
x_bulking, y_bulking = _swap_axis(ord_2d, bulking_ids, swap_axes)

Expand All @@ -176,8 +178,8 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure,

# (OPTIONAL) WEEKLY MEAN FOR ALL BUCKETS: IDs -> XY ordination points
if average == 'True':
weeks_md = md[md['Week'].between(1, 52)]
weeks = list(set(weeks_md['Week'].values))
weeks_md = md[md['Composting Time Point'].between(1, 52)]
weeks = list(set(weeks_md['Composting Time Point'].values))

# dicts for each week's mean x&y values
bucket_weekly_avgs_x = {}
Expand All @@ -188,9 +190,9 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure,
y_list = []

# filtering the md to only include post-roll sample types
weekly_bucket_ids = \
md[(md['Week'] == week) &
(md['SampleType2'] == 'Compost Post-Roll')].index.values
weekly_bucket_ids = md[
(md['Composting Time Point'] == week) &
(md['SampleType'] == 'Human Excrement Compost')].index.values

# only use IDs that are present both in the md and ordination
included_ids = []
Expand Down Expand Up @@ -233,8 +235,7 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure,
if not highlighted_buckets:
# HE wk 0 mean
HE_week0 = \
md[(md['SampleType2'] == 'Self Sample') &
(md['Week'] == 0.0)].index.values
md[(md['SampleType'] == 'Human Excrement')].index.values
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comment re: (md['Week'] == 0.0)].index.values

ids_HE_week0 = []
for i in HE_week0:
if i in ord_2d.index.values:
Expand All @@ -245,8 +246,7 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure,

# bulk wk 0 mean
bulk_week0 = \
md[(md['SampleType2'] == 'Bulking Material') &
(md['Week'] == 0.0)].index.values
md[(md['SampleType'] == 'Bulking Material')].index.values
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comment re: (md['Week'] == 0.0)].index.values

ids_bulk_week0 = []
for i in bulk_week0:
if i in ord_2d.index.values:
Expand All @@ -268,13 +268,14 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure,
x_buckets, y_buckets = _swap_axis(ord_2d, bucket_ids, swap_axes)

# EMP SOILS
emp_ids = md.loc[md['Bucket'] == 0.0].index.values
emp_ids = md.loc[md['SampleType'] == 'Soil'].index.values
x_emp, y_emp = _swap_axis(ord_2d, emp_ids, swap_axes)

# FOOD COMPOST
compost_ids = \
list(set(md.loc[md['Bucket'] == 17.0].index.values) &
set(ord_2d.index.values))
list(set(md.loc[(md['SampleType'] == 'Food Compost') |
(md['SampleType'] == 'Landscape Compost')].index.values)
& set(ord_2d.index.values))
x_compost, y_compost = _swap_axis(ord_2d, compost_ids, swap_axes)

# (OPTIONAL SAMPLE TYPES) HIMALAYA
Expand Down Expand Up @@ -424,7 +425,7 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure,

# adding week annotations for each highlighted bucket
if week_annotations == 'True':
for week, x, y in zip((md.loc[ids]['Week']),
for week, x, y in zip((md.loc[ids]['Composting Time Point']),
x_bucket, y_bucket):
week_int = int(week)
ax.annotate(str(week_int), weight='bold', color='purple',
Expand Down
Loading