From 9e1b00e42f818309030c06c832ba0e8960c51a17 Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Thu, 19 Jul 2018 23:49:42 +0100 Subject: [PATCH] Allow user to set max pindel threads, reinstate `-force` to ascat.pl --- CHANGES.md | 5 +++++ Dockerfile | 2 +- Dockstore.cwl | 13 +++++++++++-- scripts/analysisWGS.sh | 10 +++++----- scripts/ds-wrapper.pl | 12 ++++++++++-- 5 files changed, 32 insertions(+), 10 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index ad6fb46..72f7c42 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,8 @@ +### 1.1.4 + +* Provide parameter to allow user to lower max pindel threads for analysis proving to have huge memory spikes +* Add `-force` to `ascat.pl` execution, seems to have been lost along the way + ### 1.1.3 * Limit max threads for pindel to prevent memory explosion on noisy datasets diff --git a/Dockerfile b/Dockerfile index b9faa0f..6c0deff 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,7 +3,7 @@ FROM quay.io/wtsicgp/dockstore-cgpwgs:1.1.2 MAINTAINER keiranmraine@gmail.com LABEL uk.ac.sanger.cgp="Cancer Genome Project, Wellcome Trust Sanger Institute" \ - version="1.1.3" \ + version="1.1.4" \ description="The CGP WGS pipeline for dockstore.org" USER root diff --git a/Dockstore.cwl b/Dockstore.cwl index 703560c..2b0fb28 100644 --- a/Dockstore.cwl +++ b/Dockstore.cwl @@ -22,7 +22,7 @@ dct:creator: requirements: - class: DockerRequirement - dockerPull: "quay.io/wtsicgp/dockstore-cgpwgs:1.1.3" + dockerPull: "quay.io/wtsicgp/dockstore-cgpwgs:1.1.4" hints: - class: ResourceRequirement @@ -130,13 +130,22 @@ inputs: position: 11 separate: true + pindelcpu: + type: int? + doc: "Max cpus for pindel, ignores >8" + default: 8 + inputBinding: + prefix: -pindelcpu + position: 12 + separate: true + cavereads: type: int? doc: "Number of reads in a split section for CaVEMan" default: 350000 inputBinding: prefix: -cavereads - position: 12 + position: 13 separate: true outputs: diff --git a/scripts/analysisWGS.sh b/scripts/analysisWGS.sh index 7dd442c..ba712f0 100755 --- a/scripts/analysisWGS.sh +++ b/scripts/analysisWGS.sh @@ -71,10 +71,9 @@ if [ -z ${CPU+x} ]; then CPU=`grep -c ^processor /proc/cpuinfo` fi -PINDEL_CPU=$CPU -if [ $PINDEL_CPU -gt 8 ]; then - PINDEL_CPU=8 -fi +# calculate the min of user defined, host and max we should ever allow for pindel +min_cpu=`echo -e "$PINDEL_MAXCPU\n$CPU\n8" | sort -k1,1n | head -n 1` +PINDEL_CPU=$min_cpu # create area which allows monitoring site to be started, not actively updated until after PRE-EXEC completes #cp -r /opt/wtsi-cgp/site $OUTPUT_DIR/site @@ -252,7 +251,8 @@ do_parallel[ascat]="ascat.pl \ -ra $ASSEMBLY \ -pr $PROTOCOL \ -pl ILLUMINA \ - -c $CPU" + -c $CPU\ + -force" echo -e "\t[Parallel block 3] BRASS_input added..." do_parallel[BRASS_input]="brass.pl -j 4 -k 4 -c $CPU \ diff --git a/scripts/ds-wrapper.pl b/scripts/ds-wrapper.pl index 6979be6..4d3effd 100644 --- a/scripts/ds-wrapper.pl +++ b/scripts/ds-wrapper.pl @@ -12,7 +12,7 @@ pod2usage(-verbose => 1, -exitval => 1) if(@ARGV == 0); # set defaults -my %opts = (); +my %opts = ('pc' => 8,); GetOptions( 'h|help' => \$opts{'h'}, 'm|man' => \$opts{'m'}, @@ -27,7 +27,8 @@ 'sp|species=s' => \$opts{'sp'}, 'as|assembly=s' => \$opts{'as'}, 'sb|skipbb' => \$opts{'sb'}, - 'cr|cavereads=i' => \$opts{'cr'} + 'cr|cavereads=i' => \$opts{'cr'}, + 'pc|pindelcpu:i' => \$opts{'pc'} ) or pod2usage(2); pod2usage(-verbose => 1, -exitval => 0) if(defined $opts{'h'}); @@ -103,6 +104,7 @@ printf $FH "BAM_MT='%s'\n", $opts{'t'}; printf $FH "BAM_WT='%s'\n", $opts{'n'}; printf $FH "PINDEL_EXCLUDE='%s'\n", $opts{'e'}; +printf $FH "PINDEL_MAXCPU=%d\n", $opts{'pc'}; printf $FH "SPECIES='%s'\n", $opts{'sp'}; printf $FH "ASSEMBLY='%s'\n", $opts{'as'}; printf $FH "CAVESPLIT='%s'\n", $opts{'cr'}; @@ -224,6 +226,7 @@ =head1 SYNOPSIS -species -sp Species name (may require quoting) -assembly -a Reference assembly -skipbb -sb Skip Battenberg allele counts + -pindelcpu -pc Max CPUs for pindel analysis, >8 ignored [8] Other: -help -h Brief help message. @@ -283,6 +286,11 @@ =head1 OPTION DETAILS Disables the Battenberg allele count generation +=item B<-pindelcpu> + +Maximum parallel CPU jobs for pindel. Useful if you have data with an extreme memory spike. +Rarely needs setting. + =back =cut