diff --git a/README.md b/README.md index 6a6a808..b06f3e9 100644 --- a/README.md +++ b/README.md @@ -62,9 +62,13 @@ This is a file needed by exomiser to run. It contains information on where to fi This is a file needed by exomiser to run. It contains placeholders in the text that get filled in by the second process of the pipeline just before running exomiser. The one used for testing can be found [here](https://lifebit-featured-datasets.s3.eu-west-1.amazonaws.com/pipelines/exomiser-nf/auto_config.yml) -### --exomiser_data +### --exomiser_profile -This path refers to the reference data bundle needed by exomiser (~120 GB!). A copy of such files can be found [here](https://lifebit-featured-datasets.s3.eu-west-1.amazonaws.com/pipelines/exomiser-data-bundle/) . The reference dataset has been added as a parameter, allowing flexibility to pull the data from any resource (i.e. cloud, local storage, ftp, ...) and Nextflow will automatically take care of fetching the data without having to add anything to the pipeline itself. +This is a parameter that defines the kind of reference data. It accepts "test" or "full". + +The "full" profile points to the reference data bundle needed by exomiser (~120 GB!). A copy of such files can be found [here](https://lifebit-featured-datasets.s3.eu-west-1.amazonaws.com/pipelines/exomiser-data-bundle/) . The reference dataset has been added as a parameter, allowing flexibility to pull the data from any resource (i.e. cloud, local storage, ftp, ...) and Nextflow will automatically take care of fetching the data without having to add anything to the pipeline itself. + +The "test" profile points to some mock data used in testing. There are other parameters that can be tweaked to personalize the behaviour of the pipeline. These are referenced in `nextflow.config` diff --git a/conf/data/data.config b/conf/data/data.config index fa26105..73472e6 100644 --- a/conf/data/data.config +++ b/conf/data/data.config @@ -1,7 +1,18 @@ // If there is any data that needs to be included in the config, it should be placed here using "${params.reference_data_bucket}/path/to/data" params { - exomiser_data = "${params.reference_data_bucket}/pipelines/exomiser-data-bundle" + exomiser_data_profile{ + 'test'{ + data_bundle = "${params.reference_data_bucket}/pipelines/exomiser/very_fake/hg38" + exomiser_phenotype_data = "${params.reference_data_bucket}/pipelines/exomiser/very_fake/2102_phenotype" + cadd_snvs = "${params.reference_data_bucket}/pipelines/exomiser/very_fake/cadd_snvs" + phenix_data = "${params.reference_data_bucket}/pipelines/exomiser/very_fake/phenix" + } + 'full'{ + data_bundle = "${params.reference_data_bucket}/pipelines/exomiser-data-bundle" + } + } + application_properties = "${params.reference_data_bucket}/pipelines/exomiser-nf/application.properties" - auto_config_yml = '${params.reference_data_bucket}/pipelines/exomiser-nf/auto_config_v2.yml' + auto_config_yml = "${params.reference_data_bucket}/pipelines/exomiser-nf/auto_config_V2.yml" } \ No newline at end of file diff --git a/conf/family_test.config b/conf/family_test.config deleted file mode 100644 index 5f2e5af..0000000 --- a/conf/family_test.config +++ /dev/null @@ -1,7 +0,0 @@ -params { - families_file = 's3://lifebit-featured-datasets/pipelines/exomiser-nf/fam_file.tsv' - prioritisers = 'hiPhivePrioritiser' - exomiser_data = "s3://lifebit-featured-datasets/pipelines/exomiser-data-bundle" - application_properties = 's3://lifebit-featured-datasets/pipelines/exomiser-nf/application.properties' - auto_config_yml = 's3://lifebit-featured-datasets/pipelines/exomiser-nf/auto_config.yml' -} diff --git a/conf/multi_hpo_test.config b/conf/multi_hpo_test.config deleted file mode 100644 index 38e67c3..0000000 --- a/conf/multi_hpo_test.config +++ /dev/null @@ -1,7 +0,0 @@ -params { - families_file = 's3://lifebit-featured-datasets/pipelines/exomiser-nf/fam_file_multi_hpo.tsv' - prioritisers = 'hiPhivePrioritiser' - exomiser_data = "s3://lifebit-featured-datasets/pipelines/exomiser-data-bundle" - application_properties = 's3://lifebit-featured-datasets/pipelines/exomiser-nf/application.properties' - auto_config_yml = 's3://lifebit-featured-datasets/pipelines/exomiser-nf/auto_config.yml' -} diff --git a/conf/single_vcf_test.config b/conf/single_vcf_test.config deleted file mode 100644 index 19eff49..0000000 --- a/conf/single_vcf_test.config +++ /dev/null @@ -1,8 +0,0 @@ -params { - families_file = 's3://lifebit-featured-datasets/pipelines/exomiser-nf/single_vcf.tsv' - hpo_terms_file = 's3://lifebit-featured-datasets/pipelines/exomiser-nf/hpo_terms_file.txt' - prioritisers = 'hiPhivePrioritiser' - exomiser_data = "s3://lifebit-featured-datasets/pipelines/exomiser-data-bundle" - application_properties = 's3://lifebit-featured-datasets/pipelines/exomiser-nf/application.properties' - auto_config_yml = 's3://lifebit-featured-datasets/pipelines/exomiser-nf/auto_config.yml' -} diff --git a/conf/tests/ci/ci_test_data.config b/conf/tests/ci/ci_test_data.config index c0a63f0..f3ecad7 100644 --- a/conf/tests/ci/ci_test_data.config +++ b/conf/tests/ci/ci_test_data.config @@ -1,6 +1,3 @@ params { - exomiser_data = "${params.reference_data_bucket}/pipelines/exomiser/very_fake/hg38" - exomiser_phenotype_data = "${params.reference_data_bucket}/pipelines/exomiser/very_fake/2102_phenotype" - cadd_snvs = "${params.reference_data_bucket}/pipelines/exomiser/very_fake/cadd_snvs" - phenix_data = "${params.reference_data_bucket}/pipelines/exomiser/very_fake/phenix" + exomiser_profile_files = 'test' } \ No newline at end of file diff --git a/conf/tests/full/test_full.config b/conf/tests/full/test_full.config index db5bc42..ced51fd 100644 --- a/conf/tests/full/test_full.config +++ b/conf/tests/full/test_full.config @@ -4,6 +4,6 @@ params { sample_name = 'HG001_NA12878' hpo_terms_file = "${params.reference_data_bucket}/pipelines/exomiser-nf/hpo_terms_file.txt" prioritisers = 'hiPhivePrioritiser' - exomiser_data = "${params.reference_data_bucket}/pipelines/exomiser-data-bundle" + exomiser_profile_files = 'full' application_properties = "${params.reference_data_bucket}/pipelines/exomiser-nf/application.properties" } diff --git a/conf/tests/full/test_full_family.config b/conf/tests/full/test_full_family.config index 5f2e5af..0944699 100644 --- a/conf/tests/full/test_full_family.config +++ b/conf/tests/full/test_full_family.config @@ -1,7 +1,7 @@ params { families_file = 's3://lifebit-featured-datasets/pipelines/exomiser-nf/fam_file.tsv' prioritisers = 'hiPhivePrioritiser' - exomiser_data = "s3://lifebit-featured-datasets/pipelines/exomiser-data-bundle" + exomiser_profile_files = 'full' application_properties = 's3://lifebit-featured-datasets/pipelines/exomiser-nf/application.properties' auto_config_yml = 's3://lifebit-featured-datasets/pipelines/exomiser-nf/auto_config.yml' } diff --git a/conf/tests/full/test_full_multi_hpo.config b/conf/tests/full/test_full_multi_hpo.config index 38e67c3..d57052b 100644 --- a/conf/tests/full/test_full_multi_hpo.config +++ b/conf/tests/full/test_full_multi_hpo.config @@ -1,7 +1,7 @@ params { families_file = 's3://lifebit-featured-datasets/pipelines/exomiser-nf/fam_file_multi_hpo.tsv' prioritisers = 'hiPhivePrioritiser' - exomiser_data = "s3://lifebit-featured-datasets/pipelines/exomiser-data-bundle" + exomiser_profile_files = 'full' application_properties = 's3://lifebit-featured-datasets/pipelines/exomiser-nf/application.properties' auto_config_yml = 's3://lifebit-featured-datasets/pipelines/exomiser-nf/auto_config.yml' } diff --git a/conf/tests/full/test_full_single_vcf.config b/conf/tests/full/test_full_single_vcf.config index 19eff49..050c6ad 100644 --- a/conf/tests/full/test_full_single_vcf.config +++ b/conf/tests/full/test_full_single_vcf.config @@ -2,7 +2,7 @@ params { families_file = 's3://lifebit-featured-datasets/pipelines/exomiser-nf/single_vcf.tsv' hpo_terms_file = 's3://lifebit-featured-datasets/pipelines/exomiser-nf/hpo_terms_file.txt' prioritisers = 'hiPhivePrioritiser' - exomiser_data = "s3://lifebit-featured-datasets/pipelines/exomiser-data-bundle" + exomiser_profile_files = 'full' application_properties = 's3://lifebit-featured-datasets/pipelines/exomiser-nf/application.properties' auto_config_yml = 's3://lifebit-featured-datasets/pipelines/exomiser-nf/auto_config.yml' } diff --git a/main.nf b/main.nf index a64bb00..f84148f 100644 --- a/main.nf +++ b/main.nf @@ -24,7 +24,7 @@ if(params.hpo_file) log.info "-${c_teal}filename_hpo:${c_reset}- ${params.filena if(params.ped_file) log.info "-${c_teal}filename_ped:${c_reset}- ${params.ped_file}" if(params.families_file) log.info "-${c_teal}families_file:${c_reset}- ${params.families_file}" log.info "-${c_teal}analysis_mode:${c_reset}- ${params.analysis_mode}" -log.info "-${c_teal}exomiser_data:${c_reset}- ${params.exomiser_data}" +log.info "-${c_teal}exomiser_data:${c_reset}- ${params.data_bundle}" log.info "-${c_teal}exomiser_phenotype_data:${c_reset}- ${params.exomiser_phenotype_data}" log.info "-${c_teal}phenix_data:${c_reset}- ${params.phenix_data}" log.info "-${c_teal}pathogenicity_sources:${c_reset}- ${params.pathogenicity_sources}" @@ -127,7 +127,15 @@ ch_combined = ch_vcf_paths2.join(ch_to_join, by: 0).view() Run containarised Exomiser ---------------------------------------------------*/ -ch_exomiser_data = Channel.fromPath("${params.exomiser_data}") +if (!params.data_bundle && params.exomiser_profile_files){ + exomiser_data=params.exomiser_data_profile[params.exomiser_profile_files].data_bundle + Channel.fromPath("${exomiser_data}") + .set{ch_exomiser_data } +}else{ + Channel.fromPath("${params.data_bundle}") + .set{ch_exomiser_data } +} + diff --git a/nextflow.config b/nextflow.config index 2629451..17c863f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -23,12 +23,13 @@ params { filename_hpo = '' sample_name = null config = 'conf/executors/standard.config' - exomiser_data = 's3://lifebit-featured-datasets/pipelines/exomiser/very_fake/hg38' - exomiser_phenotype_data = 's3://lifebit-featured-datasets/pipelines/exomiser/very_fake/2102_phenotype' - cadd_snvs = 's3://lifebit-featured-datasets/pipelines/exomiser/very_fake/cadd_snvs' - phenix_data = 's3://lifebit-featured-datasets/pipelines/exomiser/very_fake/phenix' + exomiser_profile_files = 'full' // 'test' for small mock data, 'full' for full data (120GB) + data_bundle = null + exomiser_phenotype_data = null + cadd_snvs = null + phenix_data = null application_properties = "${params.reference_data_bucket}/pipelines/exomiser-nf/application.properties" - auto_config_yml = '${params.reference_data_bucket}/pipelines/exomiser-nf/auto_config_v2.yml' + auto_config_yml = "${params.reference_data_bucket}/pipelines/exomiser-nf/auto_config_V2.yml" hpo_terms_file = false modes_of_inheritance = 'AUTOSOMAL_DOMINANT,AUTOSOMAL_RECESSIVE,X_RECESSIVE,UNDEFINED' prioritisers = 'hiPhivePrioritiser,phivePrioritiser,phenixPrioritiser' @@ -73,7 +74,7 @@ params { } includeConfig 'conf/containers/quay.config' -includeConfig 'conf/data/data.config' // Loads in data +//includeConfig 'conf/data/data.config' // Loads in data profiles { @@ -93,6 +94,7 @@ profiles { } includeConfig 'conf/resources.config' +includeConfig 'conf/data/data.config' process { echo = params.echo