From b7f9702212b93efb8838dfd24b76396ce899e9b8 Mon Sep 17 00:00:00 2001 From: Leila Mansouri <48998340+l-mansouri@users.noreply.github.com> Date: Mon, 20 Nov 2023 17:20:27 +0100 Subject: [PATCH] Changes test data (#11) * changed configurations to fit with standard * changed channel factoring * remove extra line * fix param name * fix param name * changed param name * changed channel factoring * fixes bug * fixes typo * fixes typo * fix bug * fix bug * fix bug * fix bug * fixes typo --- README.md | 8 ++++++-- conf/data/data.config | 15 +++++++++++++-- conf/family_test.config | 7 ------- conf/multi_hpo_test.config | 7 ------- conf/single_vcf_test.config | 8 -------- conf/tests/ci/ci_test_data.config | 5 +---- conf/tests/full/test_full.config | 2 +- conf/tests/full/test_full_family.config | 2 +- conf/tests/full/test_full_multi_hpo.config | 2 +- conf/tests/full/test_full_single_vcf.config | 2 +- main.nf | 12 ++++++++++-- nextflow.config | 14 ++++++++------ 12 files changed, 42 insertions(+), 42 deletions(-) delete mode 100644 conf/family_test.config delete mode 100644 conf/multi_hpo_test.config delete mode 100644 conf/single_vcf_test.config diff --git a/README.md b/README.md index 6a6a808..b06f3e9 100644 --- a/README.md +++ b/README.md @@ -62,9 +62,13 @@ This is a file needed by exomiser to run. It contains information on where to fi This is a file needed by exomiser to run. It contains placeholders in the text that get filled in by the second process of the pipeline just before running exomiser. The one used for testing can be found [here](https://lifebit-featured-datasets.s3.eu-west-1.amazonaws.com/pipelines/exomiser-nf/auto_config.yml) -### --exomiser_data +### --exomiser_profile -This path refers to the reference data bundle needed by exomiser (~120 GB!). A copy of such files can be found [here](https://lifebit-featured-datasets.s3.eu-west-1.amazonaws.com/pipelines/exomiser-data-bundle/) . The reference dataset has been added as a parameter, allowing flexibility to pull the data from any resource (i.e. cloud, local storage, ftp, ...) and Nextflow will automatically take care of fetching the data without having to add anything to the pipeline itself. +This is a parameter that defines the kind of reference data. It accepts "test" or "full". + +The "full" profile points to the reference data bundle needed by exomiser (~120 GB!). A copy of such files can be found [here](https://lifebit-featured-datasets.s3.eu-west-1.amazonaws.com/pipelines/exomiser-data-bundle/) . The reference dataset has been added as a parameter, allowing flexibility to pull the data from any resource (i.e. cloud, local storage, ftp, ...) and Nextflow will automatically take care of fetching the data without having to add anything to the pipeline itself. + +The "test" profile points to some mock data used in testing. There are other parameters that can be tweaked to personalize the behaviour of the pipeline. These are referenced in `nextflow.config` diff --git a/conf/data/data.config b/conf/data/data.config index fa26105..73472e6 100644 --- a/conf/data/data.config +++ b/conf/data/data.config @@ -1,7 +1,18 @@ // If there is any data that needs to be included in the config, it should be placed here using "${params.reference_data_bucket}/path/to/data" params { - exomiser_data = "${params.reference_data_bucket}/pipelines/exomiser-data-bundle" + exomiser_data_profile{ + 'test'{ + data_bundle = "${params.reference_data_bucket}/pipelines/exomiser/very_fake/hg38" + exomiser_phenotype_data = "${params.reference_data_bucket}/pipelines/exomiser/very_fake/2102_phenotype" + cadd_snvs = "${params.reference_data_bucket}/pipelines/exomiser/very_fake/cadd_snvs" + phenix_data = "${params.reference_data_bucket}/pipelines/exomiser/very_fake/phenix" + } + 'full'{ + data_bundle = "${params.reference_data_bucket}/pipelines/exomiser-data-bundle" + } + } + application_properties = "${params.reference_data_bucket}/pipelines/exomiser-nf/application.properties" - auto_config_yml = '${params.reference_data_bucket}/pipelines/exomiser-nf/auto_config_v2.yml' + auto_config_yml = "${params.reference_data_bucket}/pipelines/exomiser-nf/auto_config_V2.yml" } \ No newline at end of file diff --git a/conf/family_test.config b/conf/family_test.config deleted file mode 100644 index 5f2e5af..0000000 --- a/conf/family_test.config +++ /dev/null @@ -1,7 +0,0 @@ -params { - families_file = 's3://lifebit-featured-datasets/pipelines/exomiser-nf/fam_file.tsv' - prioritisers = 'hiPhivePrioritiser' - exomiser_data = "s3://lifebit-featured-datasets/pipelines/exomiser-data-bundle" - application_properties = 's3://lifebit-featured-datasets/pipelines/exomiser-nf/application.properties' - auto_config_yml = 's3://lifebit-featured-datasets/pipelines/exomiser-nf/auto_config.yml' -} diff --git a/conf/multi_hpo_test.config b/conf/multi_hpo_test.config deleted file mode 100644 index 38e67c3..0000000 --- a/conf/multi_hpo_test.config +++ /dev/null @@ -1,7 +0,0 @@ -params { - families_file = 's3://lifebit-featured-datasets/pipelines/exomiser-nf/fam_file_multi_hpo.tsv' - prioritisers = 'hiPhivePrioritiser' - exomiser_data = "s3://lifebit-featured-datasets/pipelines/exomiser-data-bundle" - application_properties = 's3://lifebit-featured-datasets/pipelines/exomiser-nf/application.properties' - auto_config_yml = 's3://lifebit-featured-datasets/pipelines/exomiser-nf/auto_config.yml' -} diff --git a/conf/single_vcf_test.config b/conf/single_vcf_test.config deleted file mode 100644 index 19eff49..0000000 --- a/conf/single_vcf_test.config +++ /dev/null @@ -1,8 +0,0 @@ -params { - families_file = 's3://lifebit-featured-datasets/pipelines/exomiser-nf/single_vcf.tsv' - hpo_terms_file = 's3://lifebit-featured-datasets/pipelines/exomiser-nf/hpo_terms_file.txt' - prioritisers = 'hiPhivePrioritiser' - exomiser_data = "s3://lifebit-featured-datasets/pipelines/exomiser-data-bundle" - application_properties = 's3://lifebit-featured-datasets/pipelines/exomiser-nf/application.properties' - auto_config_yml = 's3://lifebit-featured-datasets/pipelines/exomiser-nf/auto_config.yml' -} diff --git a/conf/tests/ci/ci_test_data.config b/conf/tests/ci/ci_test_data.config index c0a63f0..f3ecad7 100644 --- a/conf/tests/ci/ci_test_data.config +++ b/conf/tests/ci/ci_test_data.config @@ -1,6 +1,3 @@ params { - exomiser_data = "${params.reference_data_bucket}/pipelines/exomiser/very_fake/hg38" - exomiser_phenotype_data = "${params.reference_data_bucket}/pipelines/exomiser/very_fake/2102_phenotype" - cadd_snvs = "${params.reference_data_bucket}/pipelines/exomiser/very_fake/cadd_snvs" - phenix_data = "${params.reference_data_bucket}/pipelines/exomiser/very_fake/phenix" + exomiser_profile_files = 'test' } \ No newline at end of file diff --git a/conf/tests/full/test_full.config b/conf/tests/full/test_full.config index db5bc42..ced51fd 100644 --- a/conf/tests/full/test_full.config +++ b/conf/tests/full/test_full.config @@ -4,6 +4,6 @@ params { sample_name = 'HG001_NA12878' hpo_terms_file = "${params.reference_data_bucket}/pipelines/exomiser-nf/hpo_terms_file.txt" prioritisers = 'hiPhivePrioritiser' - exomiser_data = "${params.reference_data_bucket}/pipelines/exomiser-data-bundle" + exomiser_profile_files = 'full' application_properties = "${params.reference_data_bucket}/pipelines/exomiser-nf/application.properties" } diff --git a/conf/tests/full/test_full_family.config b/conf/tests/full/test_full_family.config index 5f2e5af..0944699 100644 --- a/conf/tests/full/test_full_family.config +++ b/conf/tests/full/test_full_family.config @@ -1,7 +1,7 @@ params { families_file = 's3://lifebit-featured-datasets/pipelines/exomiser-nf/fam_file.tsv' prioritisers = 'hiPhivePrioritiser' - exomiser_data = "s3://lifebit-featured-datasets/pipelines/exomiser-data-bundle" + exomiser_profile_files = 'full' application_properties = 's3://lifebit-featured-datasets/pipelines/exomiser-nf/application.properties' auto_config_yml = 's3://lifebit-featured-datasets/pipelines/exomiser-nf/auto_config.yml' } diff --git a/conf/tests/full/test_full_multi_hpo.config b/conf/tests/full/test_full_multi_hpo.config index 38e67c3..d57052b 100644 --- a/conf/tests/full/test_full_multi_hpo.config +++ b/conf/tests/full/test_full_multi_hpo.config @@ -1,7 +1,7 @@ params { families_file = 's3://lifebit-featured-datasets/pipelines/exomiser-nf/fam_file_multi_hpo.tsv' prioritisers = 'hiPhivePrioritiser' - exomiser_data = "s3://lifebit-featured-datasets/pipelines/exomiser-data-bundle" + exomiser_profile_files = 'full' application_properties = 's3://lifebit-featured-datasets/pipelines/exomiser-nf/application.properties' auto_config_yml = 's3://lifebit-featured-datasets/pipelines/exomiser-nf/auto_config.yml' } diff --git a/conf/tests/full/test_full_single_vcf.config b/conf/tests/full/test_full_single_vcf.config index 19eff49..050c6ad 100644 --- a/conf/tests/full/test_full_single_vcf.config +++ b/conf/tests/full/test_full_single_vcf.config @@ -2,7 +2,7 @@ params { families_file = 's3://lifebit-featured-datasets/pipelines/exomiser-nf/single_vcf.tsv' hpo_terms_file = 's3://lifebit-featured-datasets/pipelines/exomiser-nf/hpo_terms_file.txt' prioritisers = 'hiPhivePrioritiser' - exomiser_data = "s3://lifebit-featured-datasets/pipelines/exomiser-data-bundle" + exomiser_profile_files = 'full' application_properties = 's3://lifebit-featured-datasets/pipelines/exomiser-nf/application.properties' auto_config_yml = 's3://lifebit-featured-datasets/pipelines/exomiser-nf/auto_config.yml' } diff --git a/main.nf b/main.nf index a64bb00..f84148f 100644 --- a/main.nf +++ b/main.nf @@ -24,7 +24,7 @@ if(params.hpo_file) log.info "-${c_teal}filename_hpo:${c_reset}- ${params.filena if(params.ped_file) log.info "-${c_teal}filename_ped:${c_reset}- ${params.ped_file}" if(params.families_file) log.info "-${c_teal}families_file:${c_reset}- ${params.families_file}" log.info "-${c_teal}analysis_mode:${c_reset}- ${params.analysis_mode}" -log.info "-${c_teal}exomiser_data:${c_reset}- ${params.exomiser_data}" +log.info "-${c_teal}exomiser_data:${c_reset}- ${params.data_bundle}" log.info "-${c_teal}exomiser_phenotype_data:${c_reset}- ${params.exomiser_phenotype_data}" log.info "-${c_teal}phenix_data:${c_reset}- ${params.phenix_data}" log.info "-${c_teal}pathogenicity_sources:${c_reset}- ${params.pathogenicity_sources}" @@ -127,7 +127,15 @@ ch_combined = ch_vcf_paths2.join(ch_to_join, by: 0).view() Run containarised Exomiser ---------------------------------------------------*/ -ch_exomiser_data = Channel.fromPath("${params.exomiser_data}") +if (!params.data_bundle && params.exomiser_profile_files){ + exomiser_data=params.exomiser_data_profile[params.exomiser_profile_files].data_bundle + Channel.fromPath("${exomiser_data}") + .set{ch_exomiser_data } +}else{ + Channel.fromPath("${params.data_bundle}") + .set{ch_exomiser_data } +} + diff --git a/nextflow.config b/nextflow.config index 2629451..17c863f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -23,12 +23,13 @@ params { filename_hpo = '' sample_name = null config = 'conf/executors/standard.config' - exomiser_data = 's3://lifebit-featured-datasets/pipelines/exomiser/very_fake/hg38' - exomiser_phenotype_data = 's3://lifebit-featured-datasets/pipelines/exomiser/very_fake/2102_phenotype' - cadd_snvs = 's3://lifebit-featured-datasets/pipelines/exomiser/very_fake/cadd_snvs' - phenix_data = 's3://lifebit-featured-datasets/pipelines/exomiser/very_fake/phenix' + exomiser_profile_files = 'full' // 'test' for small mock data, 'full' for full data (120GB) + data_bundle = null + exomiser_phenotype_data = null + cadd_snvs = null + phenix_data = null application_properties = "${params.reference_data_bucket}/pipelines/exomiser-nf/application.properties" - auto_config_yml = '${params.reference_data_bucket}/pipelines/exomiser-nf/auto_config_v2.yml' + auto_config_yml = "${params.reference_data_bucket}/pipelines/exomiser-nf/auto_config_V2.yml" hpo_terms_file = false modes_of_inheritance = 'AUTOSOMAL_DOMINANT,AUTOSOMAL_RECESSIVE,X_RECESSIVE,UNDEFINED' prioritisers = 'hiPhivePrioritiser,phivePrioritiser,phenixPrioritiser' @@ -73,7 +74,7 @@ params { } includeConfig 'conf/containers/quay.config' -includeConfig 'conf/data/data.config' // Loads in data +//includeConfig 'conf/data/data.config' // Loads in data profiles { @@ -93,6 +94,7 @@ profiles { } includeConfig 'conf/resources.config' +includeConfig 'conf/data/data.config' process { echo = params.echo