From 9eaf08d6ce036c4ed3439d1dce83af42ca80be08 Mon Sep 17 00:00:00 2001 From: fefossa Date: Fri, 23 Jun 2023 12:01:40 -0400 Subject: [PATCH 1/5] collate accept less compartments --- pycytominer/cyto_utils/collate.py | 19 +++++++++++++++++-- pycytominer/cyto_utils/collate_cmd.py | 24 ++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/pycytominer/cyto_utils/collate.py b/pycytominer/cyto_utils/collate.py index 58532dbb..95fe59c7 100644 --- a/pycytominer/cyto_utils/collate.py +++ b/pycytominer/cyto_utils/collate.py @@ -32,6 +32,9 @@ def collate( add_image_features=True, image_feature_categories=["Granularity", "Texture", "ImageQuality", "Threshold"], printtoscreen=True, + no_nuclei=True, + no_cells=True, + no_cytoplasm=True ): """Collate the CellProfiler-created CSVs into a single SQLite file by calling cytominer-database @@ -68,6 +71,12 @@ def collate( """ from pycytominer.cyto_utils.cells import SingleCells + + + # Set up comparments based on the flags. If all True, all compartments will be used + filter_compartments = [no_nuclei, no_cells, no_cytoplasm] + to_filter = ["Nuclei", "Cells", "Cytoplasm"] + compartments = [to_filter[i] for i in [j for j in range(len(filter_compartments)) if filter_compartments[j]]] # Set up directories (these need to be abspaths to keep from confusing makedirs later) input_dir = pathlib.Path(f"{base_directory}/analysis/{batch}/{plate}/{csv_dir}") @@ -97,8 +106,13 @@ def collate( remote_backend_file = f"{aws_remote}/backend/{batch}/{plate}/{plate}.sqlite" remote_aggregated_file = f"{aws_remote}/backend/{batch}/{plate}/{plate}.csv" + + include_list = [] + for eachcompartment in compartments: + include = "--include */" + eachcompartment + ".csv" + include_list.append(include) + sync_cmd = f"aws s3 sync --exclude * {(' '.join(include_list))} --include */Image.csv {remote_input_dir} {input_dir}" - sync_cmd = f"aws s3 sync --exclude * --include */Cells.csv --include */Nuclei.csv --include */Cytoplasm.csv --include */Image.csv {remote_input_dir} {input_dir}" if printtoscreen: print(f"Downloading CSVs from {remote_input_dir} to {input_dir}") run_check_errors(sync_cmd) @@ -142,7 +156,7 @@ def collate( "CREATE INDEX IF NOT EXISTS table_image_idx ON Image(TableNumber, ImageNumber);", ] run_check_errors(index_cmd_img) - for eachcompartment in ["Cells", "Cytoplasm", "Nuclei"]: + for eachcompartment in compartments: index_cmd_compartment = [ "sqlite3", cache_backend_file, @@ -202,6 +216,7 @@ def collate( aggregation_operation="mean", add_image_features=add_image_features, image_feature_categories=image_feature_categories, + compartments=[cmp.lower() for cmp in compartments] ) database.aggregate_profiles(output_file=aggregated_file) diff --git a/pycytominer/cyto_utils/collate_cmd.py b/pycytominer/cyto_utils/collate_cmd.py index 0ccfcb9c..f9a54bd1 100644 --- a/pycytominer/cyto_utils/collate_cmd.py +++ b/pycytominer/cyto_utils/collate_cmd.py @@ -76,6 +76,27 @@ default=True, help="Whether to print status updates", ) + parser.add_argument( + "--no-nuclei", + dest="no_nuclei", + action="store_false", + default=True, + help="Whether or not to use nuclei objects", + ) + parser.add_argument( + "--no-cells", + dest="no_cells", + action="store_false", + default=True, + help="Whether or not to use cells objects", + ) + parser.add_argument( + "--no-cytoplasm", + dest="no_cytoplasm", + action="store_false", + default=True, + help="Whether or not to use cytoplasm objects", + ) args = parser.parse_args() @@ -94,4 +115,7 @@ add_image_features=args.add_image_features, image_feature_categories=args.image_feature_categories, printtoscreen=args.printtoscreen, + no_nuclei=args.no_nuclei, + no_cells=args.no_cells, + no_cytoplasm=args.no_cytoplasm ) From fe710f76c5b6ffeeffdc49ec582bb2b7aa749853 Mon Sep 17 00:00:00 2001 From: fefossa Date: Fri, 23 Jun 2023 12:03:33 -0400 Subject: [PATCH 2/5] check linking_cols only if more than 1 compartment --- .../cyto_utils/single_cell_ingest_utils.py | 33 ++++++++++--------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/pycytominer/cyto_utils/single_cell_ingest_utils.py b/pycytominer/cyto_utils/single_cell_ingest_utils.py index 26812ce5..d1a232b5 100644 --- a/pycytominer/cyto_utils/single_cell_ingest_utils.py +++ b/pycytominer/cyto_utils/single_cell_ingest_utils.py @@ -51,22 +51,23 @@ def assert_linking_cols_complete(linking_cols="default", compartments="default") comp_err = "compartment not found. Check the specified compartments" - linking_check = [] - unique_linking_cols = [] - for x in linking_cols: - unique_linking_cols.append(x) - assert x in compartments, "{com} {err}".format(com=x, err=comp_err) - for y in linking_cols[x]: - unique_linking_cols.append(y) - assert y in compartments, "{com} {err}".format(com=y, err=comp_err) - linking_check.append("-".join(sorted([x, y]))) - - # Make sure that each combination has been specified exactly twice - linking_counter = Counter(linking_check) - for combo in linking_counter: - assert ( - linking_counter[combo] == 2 - ), "Missing column identifier between {combo}".format(combo=combo) + if not len(compartments) == 1: + linking_check = [] + unique_linking_cols = [] + for x in linking_cols: + unique_linking_cols.append(x) + assert x in compartments, "{com} {err}".format(com=x, err=comp_err) + for y in linking_cols[x]: + unique_linking_cols.append(y) + assert y in compartments, "{com} {err}".format(com=y, err=comp_err) + linking_check.append("-".join(sorted([x, y]))) + + # Make sure that each combination has been specified exactly twice + linking_counter = Counter(linking_check) + for combo in linking_counter: + assert ( + linking_counter[combo] == 2 + ), "Missing column identifier between {combo}".format(combo=combo) # Confirm that every compartment has been specified in the linking_cols unique_linking_cols = sorted(list(set(unique_linking_cols))) From 95a617e7cb8aa11912ccc65df3d540b6c5a1a2c9 Mon Sep 17 00:00:00 2001 From: fefossa Date: Mon, 26 Jun 2023 09:18:46 -0400 Subject: [PATCH 3/5] only check linking cols if len(compartments)>1 --- .../cyto_utils/single_cell_ingest_utils.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pycytominer/cyto_utils/single_cell_ingest_utils.py b/pycytominer/cyto_utils/single_cell_ingest_utils.py index d1a232b5..abd71c87 100644 --- a/pycytominer/cyto_utils/single_cell_ingest_utils.py +++ b/pycytominer/cyto_utils/single_cell_ingest_utils.py @@ -69,14 +69,14 @@ def assert_linking_cols_complete(linking_cols="default", compartments="default") linking_counter[combo] == 2 ), "Missing column identifier between {combo}".format(combo=combo) - # Confirm that every compartment has been specified in the linking_cols - unique_linking_cols = sorted(list(set(unique_linking_cols))) - diff_column = set(compartments).difference(unique_linking_cols) - assert unique_linking_cols == sorted( - compartments - ), "All compartments must be specified in the linking_cols, {miss} is missing".format( - miss=diff_column - ) + # Confirm that every compartment has been specified in the linking_cols + unique_linking_cols = sorted(list(set(unique_linking_cols))) + diff_column = set(compartments).difference(unique_linking_cols) + assert unique_linking_cols == sorted( + compartments + ), "All compartments must be specified in the linking_cols, {miss} is missing".format( + miss=diff_column + ) def provide_linking_cols_feature_name_update(linking_cols="default"): From ee582412d9f8951a8d930407be7a119ca206c1ab Mon Sep 17 00:00:00 2001 From: fefossa Date: Mon, 26 Jun 2023 09:29:00 -0400 Subject: [PATCH 4/5] documentation collate --- pycytominer/cyto_utils/collate.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pycytominer/cyto_utils/collate.py b/pycytominer/cyto_utils/collate.py index 95fe59c7..9f41e1fb 100644 --- a/pycytominer/cyto_utils/collate.py +++ b/pycytominer/cyto_utils/collate.py @@ -68,6 +68,12 @@ def collate( The list of image feature groups to be used by add_image_features during aggregation printtoscreen: bool, optional, default True Whether or not to print output to the terminal + no_nuclei: bool, optional, default True + Whether or not the nuclei object is available. Set to False if there is no nuclei object. + no_cells: bool, optional, default True + Whether or not the cells object is available. Set to False if there is no cells object. + no_cytoplasm: bool, optional, default True + Whether or not the cytoplasm object is available. Set to False if there is no cytoplasm object. """ from pycytominer.cyto_utils.cells import SingleCells From 529ae4fd81db9bebb3975c1e39bf4653d7943513 Mon Sep 17 00:00:00 2001 From: fefossa Date: Mon, 26 Jun 2023 09:29:15 -0400 Subject: [PATCH 5/5] documentation linking_cols --- pycytominer/cyto_utils/cells.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/pycytominer/cyto_utils/cells.py b/pycytominer/cyto_utils/cells.py index f4954150..36e4b2c8 100644 --- a/pycytominer/cyto_utils/cells.py +++ b/pycytominer/cyto_utils/cells.py @@ -42,7 +42,7 @@ class SingleCells(object): compartments : list of str, default ["cells", "cytoplasm", "nuclei"] List of compartments to process. compartment_linking_cols : dict, default noted below - Dictionary identifying how to merge columns across tables. + Dictionary identifying how to merge columns across tables. For examples see note below. merge_cols : list of str, default ["TableNumber", "ImageNumber"] Columns indicating how to merge image and compartment data. image_cols : list of str, default ["TableNumber", "ImageNumber", "Metadata_Site"] @@ -89,7 +89,14 @@ class SingleCells(object): }, "cells": {"cytoplasm": "ObjectNumber"}, "nuclei": {"cytoplasm": "ObjectNumber"}, - } + }. + + The compartment_linking_cols dictionary template is: { + "child": + {"parent":"child_Parent_parent"}, + "parent": + {"child":"ObjectNumber"} + } """ def __init__(