Skip to content

Commit

Permalink
Merge branch 'postreleasefix/107' into release/107
Browse files Browse the repository at this point in the history
  • Loading branch information
jamie-m-a committed Jun 20, 2022
2 parents 392d377 + 8e7519a commit 5253c54
Show file tree
Hide file tree
Showing 37 changed files with 778,981 additions and 58 deletions.
24 changes: 22 additions & 2 deletions modules/Bio/EnsEMBL/VEP/AnnotationSourceAdaptor.pm
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ use Bio::EnsEMBL::VEP::AnnotationSource::Database::Variation;
use Bio::EnsEMBL::VEP::AnnotationSource::Database::StructuralVariation;
use Bio::EnsEMBL::VEP::AnnotationSource::File;

use LWP::Simple;

=head2 get_all
Expand Down Expand Up @@ -224,11 +225,30 @@ sub get_all_custom {
}

$opts->{fields} = \@fields if @fields;

if (grep { /\#\#\#CHR\#\#\#/ } $file){

my @valid_chromosomes = keys %{$self->chr_lengths} > 0 ? sort keys %{$self->chr_lengths}: ((1..22), qw(X Y MT));

foreach my $chr (@valid_chromosomes){
print $chr."\n";
my $new_file = $file;
my $new_opts = { %$opts };
$new_file =~ s/\#\#\#CHR\#\#\#/$chr/;
next unless ( -e $new_file || head($new_file) );
$new_opts->{file} = $new_file;
push @as, Bio::EnsEMBL::VEP::AnnotationSource::File->new($new_opts);
}

# Non-match ###CHR### pattern scenario
die "Error: No files with pattern $file were found\n" unless @as;

push @as, Bio::EnsEMBL::VEP::AnnotationSource::File->new($opts);
} else {
push @as, Bio::EnsEMBL::VEP::AnnotationSource::File->new($opts);
}
}

return \@as;
}

1;
1;
47 changes: 41 additions & 6 deletions modules/Bio/EnsEMBL/VEP/OutputFactory.pm
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ use Bio::EnsEMBL::Utils::Exception qw(throw warning);
use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
use Bio::EnsEMBL::Variation::Utils::Constants;
use Bio::EnsEMBL::Variation::Utils::VariationEffect qw(overlap);
use Bio::EnsEMBL::VEP::Utils qw(format_coords merge_arrays);
use Bio::EnsEMBL::VEP::Utils qw(format_coords merge_arrays get_flatten);
use Bio::EnsEMBL::VEP::Constants;

use Bio::EnsEMBL::VEP::OutputFactory::VEP_output;
Expand Down Expand Up @@ -2283,20 +2283,37 @@ sub get_custom_headers {
my @headers;

foreach my $custom(@{$self->header_info->{custom_info} || []}) {
push @headers, [$custom->{short_name}, sprintf("%s (%s)", $custom->{file}, $custom->{type})];

foreach my $field(@{$custom->{fields} || []}) {
my @flatten_header = get_flatten(\@headers);
my %pos = map { $flatten_header[$_]=~/o/?($flatten_header[$_]=>$_):() } 0..$#flatten_header if @flatten_header;
if (grep { /^$custom->{short_name}$/ } @flatten_header){
my $pos = $pos{$custom->{short_name}} / 2;
$headers[$pos][1] .= ",$custom->{file}";
} else {
push @headers, [
sprintf("%s_%s", $custom->{short_name}, $field),
sprintf("%s field from %s", $field, $custom->{file})
$custom->{short_name},
sprintf("%s", $custom->{file})
];
}

foreach my $field(@{$custom->{fields} || []}) {
my $sub_id = sprintf("%s_%s", $custom->{short_name}, $field);
if (grep { /^$sub_id$/ } @flatten_header){
my $pos = $pos{$sub_id} / 2;
$headers[$pos][1] .= ",$custom->{file}";
} else {
push @headers, [
$sub_id,
sprintf("%s field from %s", $field, $custom->{file})
];
}
}
}

return \@headers;
}


=head2 flag_fields
Example : $fields = $of->flag_fields();
Expand Down Expand Up @@ -2332,4 +2349,22 @@ sub flag_fields {
return \@return;
}


=head2 get_full_command
Example : $headers = $of->get_full_command();
Description: Get headers from custom data files
Returntype : arrayref of arrayrefs [$key, $header]
Exceptions : none
Caller : description_headers() in child classes
Status : Stable
=cut

sub get_full_command {
my $self = shift;

return $self->{_config}->{_raw_config}->{full_command} || "";
}

1;
2 changes: 2 additions & 0 deletions modules/Bio/EnsEMBL/VEP/OutputFactory/BaseTab.pm
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ sub headers {

push @headers, @{$self->description_headers};

push @headers, sprintf("## VEP command-line: %s", $self->get_full_command);

push @headers, $self->column_header;

return \@headers;
Expand Down
2 changes: 2 additions & 0 deletions modules/Bio/EnsEMBL/VEP/OutputFactory/VCF.pm
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,8 @@ sub headers {
# custom headers
push @headers, map {sprintf('##INFO=<ID=%s,Number=.,Type=String,Description="%s">', $_->[0], $_->[1])} @{$self->get_custom_headers};

push @headers, sprintf("##VEP-command-line='%s'", $self->get_full_command);

push @headers, $col_heading;

return \@headers;
Expand Down
8 changes: 8 additions & 0 deletions modules/Bio/EnsEMBL/VEP/Parser.pm
Original file line number Diff line number Diff line change
Expand Up @@ -624,6 +624,7 @@ sub validate_vf {
}

my $ref_allele = shift @alleles;
my $alt_allele = $alleles[-1];

if($ref_allele =~ /^[ACGT]*$/ && ($vf->{end} - $vf->{start}) + 1 != length($ref_allele)) {
$self->warning_msg(
Expand Down Expand Up @@ -653,6 +654,13 @@ sub validate_vf {
$ok = (uc($slice_ref_allele) eq uc($ref_allele) ? 1 : 0);
}
}
if (($ref_allele eq $alt_allele) && ($ref_allele ne defined($slice_ref_allele)) ){
$ok = 0;
}

if (!$alt_allele){
$ok = 0;
}

if(!$ok) {
$vf->{check_ref_failed} = 1;
Expand Down
18 changes: 18 additions & 0 deletions modules/Bio/EnsEMBL/VEP/Utils.pm
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ use Bio::EnsEMBL::VEP::Utils qw(
convert_arrayref
merge_hashes
merge_arrays
get_flaltten
);
# 5-10
Expand Down Expand Up @@ -94,6 +95,7 @@ use vars qw(@ISA @EXPORT_OK);
&get_compressed_filehandle
&get_version_data
&get_version_string
&get_flatten
);

our ($CAN_USE_PERLIO_GZIP, $CAN_USE_GZIP, $CAN_USE_IO_UNCOMPRESS);
Expand Down Expand Up @@ -553,3 +555,19 @@ sub get_version_string {

1;

=head2 get_flatten
Arg 1 : Multi-dimensional array
Example : @flatten_array = get_flatten($dir)
Description: Recursive function to get a unidimensional
array from any multidimensional-array or tuple
Returntype : array
Exceptions : none
Caller : vep
Status : Stable
=cut

sub get_flatten {
return map { ref $_ ? get_flatten(@{$_}) : $_ } @_;
}
49 changes: 48 additions & 1 deletion t/AnnotationSourceAdaptor.t
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ SKIP: {
no warnings 'once';

## REMEMBER TO UPDATE THIS SKIP NUMBER IF YOU ADD MORE TESTS!!!!
skip 'Bio::DB::HTS::Tabix module not available', 4 unless $Bio::EnsEMBL::VEP::AnnotationSource::File::CAN_USE_TABIX_PM;
skip 'Bio::DB::HTS::Tabix module not available', 5 unless $Bio::EnsEMBL::VEP::AnnotationSource::File::CAN_USE_TABIX_PM;

$asa->param('custom', [$test_cfg->{custom_vcf}.',test,vcf,exact']);
is_deeply(
Expand Down Expand Up @@ -209,6 +209,53 @@ SKIP: {
],
'get_all_custom - fields'
);

my $test = $test_cfg->{test_vcf_MT};
$test =~ s/MT.vcf.gz/\#\#\#CHR\#\#\#\.vcf.gz/;
$asa->param('custom', [$test.',test,vcf,overlap,1,FOO,BAR']);
is_deeply(
$asa->get_all_custom(),
[
bless( {
'info' => {
'custom_info' => {
'fields' => ['FOO', 'BAR'],
'short_name' => 'test',
'report_coords' => '1',
'file' => $test_cfg->{test_vcf21},
'type' => 'overlap'
}
},
'short_name' => 'test',
'_config' => $asa->config,
'report_coords' => '1',
'file' => $test_cfg->{test_vcf21},
'fields' => ['FOO', 'BAR'],
'custom_multi_allelic' => undef,
'type' => 'overlap'
}, 'Bio::EnsEMBL::VEP::AnnotationSource::File::VCF' ),
bless( {
'info' => {
'custom_info' => {
'fields' => ['FOO', 'BAR'],
'short_name' => 'test',
'report_coords' => '1',
'file' => $test_cfg->{test_vcf_MT},
'type' => 'overlap'
}
},
'short_name' => 'test',
'_config' => $asa->config,
'report_coords' => '1',
'file' => $test_cfg->{test_vcf_MT},
'fields' => ['FOO', 'BAR'],
'custom_multi_allelic' => undef,
'type' => 'overlap'
}, 'Bio::EnsEMBL::VEP::AnnotationSource::File::VCF' )
],
'get_all_custom - automated ###CHR###'
);

}

$asa->param('custom', []);
Expand Down
31 changes: 30 additions & 1 deletion t/OutputFactory.t
Original file line number Diff line number Diff line change
Expand Up @@ -1813,14 +1813,43 @@ SKIP: {
[
[
'test',
$test_cfg->{custom_vcf}.' (overlap)'
$test_cfg->{custom_vcf}
]
],
'get_custom_headers'
);
}


SKIP: {
no warnings 'once';

## REMEMBER TO UPDATE THIS SKIP NUMBER IF YOU ADD MORE TESTS!!!!
skip 'Bio::DB::HTS::Tabix module not available', 1 unless $Bio::EnsEMBL::VEP::AnnotationSource::File::CAN_USE_TABIX_PM;

$runner = get_annotated_buffer_runner({
input_file => $test_cfg->create_input_file([qw(21 25606454 test G C . . .)]),
custom => [$test_cfg->{custom_vcf}.',test,vcf', $test_cfg->{custom_vcf_2}.',test,vcf'],
quiet => 1,
warning_file => 'STDERR',
});

$of = $runner->get_OutputFactory();
$ib = $runner->get_InputBuffer();

is_deeply(
$of->get_custom_headers,
[
[
'test',
$test_cfg->{custom_vcf} . ',' . $test_cfg->{custom_vcf_2}
]
],
'get_multiple_custom_headers'
);
}


## plugins
##########

Expand Down
5 changes: 3 additions & 2 deletions t/OutputFactory_Tab.t
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,8 @@ is_deeply(
'## DISTANCE : Shortest distance from variant to transcript',
'## STRAND : Strand of the feature (1/-1)',
'## FLAGS : Transcript quality flags',
'## custom_test : test.vcf.gz (overlap)',
'## custom_test : test.vcf.gz',
'## VEP command-line: ',
"#Uploaded_variation\tLocation\tAllele\tGene\tFeature\tFeature_type\tConsequence\tcDNA_position\tCDS_position\tProtein_position\tAmino_acids\tCodons\tExisting_variation\tIMPACT\tDISTANCE\tSTRAND\tFLAGS\tcustom_test"
],
'headers'
Expand All @@ -118,7 +119,7 @@ my $runner = get_annotated_buffer_runner({
});
is(
$runner->get_OutputFactory->headers->[-2].$runner->get_OutputFactory->headers->[-1],
"## test : header".
"## VEP command-line: ".
"#Uploaded_variation\tLocation\tAllele\tGene\tFeature\tFeature_type\tConsequence\tcDNA_position\tCDS_position\tProtein_position\tAmino_acids\tCodons\tExisting_variation\tREF_ALLELE\tIMPACT\tDISTANCE\tSTRAND\tFLAGS\ttest",
'headers - plugin'
);
Expand Down
11 changes: 6 additions & 5 deletions t/OutputFactory_VCF.t
Original file line number Diff line number Diff line change
Expand Up @@ -53,15 +53,16 @@ is_deeply(
'##fileformat=VCFv4.1',
'##VEP="v1" time="test"',
'##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence annotations from Ensembl VEP. Format: Allele|Consequence|IMPACT|SYMBOL|Gene|Feature_type|Feature|BIOTYPE|EXON|INTRON|HGVSc|HGVSp|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|DISTANCE|STRAND|FLAGS|custom_test">',
'##INFO=<ID=custom_test,Number=.,Type=String,Description="test.vcf.gz (overlap)">',
'##INFO=<ID=custom_test,Number=.,Type=String,Description="test.vcf.gz">',
"##VEP-command-line=''",
"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"
],
'headers'
);

my $headers = get_runner({plugin => ['TestPlugin'], quiet => 1, input_file => $test_cfg->{test_vcf}, vcf => 1})->get_OutputFactory->headers;
is_deeply(
[$headers->[-3], $headers->[-2], $headers->[-1]],
[$headers->[-4], $headers->[-3], $headers->[-1]],
[
'##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence annotations from Ensembl VEP. Format: Allele|Consequence|IMPACT|SYMBOL|Gene|Feature_type|Feature|BIOTYPE|EXON|INTRON|HGVSc|HGVSp|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|DISTANCE|STRAND|FLAGS|SYMBOL_SOURCE|HGNC_ID|test">',
'##test=header',
Expand All @@ -72,7 +73,7 @@ is_deeply(

$headers = get_runner({refseq => 1, fasta => $test_cfg->{fasta}, quiet => 1, input_file => $test_cfg->{test_vcf}, vcf => 1})->get_OutputFactory->headers;
is(
$headers->[-2],
$headers->[-3],
'##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence annotations from Ensembl VEP. Format: Allele|Consequence|IMPACT|SYMBOL|Gene|Feature_type|Feature|BIOTYPE|EXON|INTRON|HGVSc|HGVSp|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|DISTANCE|STRAND|FLAGS|SYMBOL_SOURCE|HGNC_ID|REFSEQ_MATCH|REFSEQ_OFFSET|GIVEN_REF|USED_REF|BAM_EDIT">',
'headers - BAM_EDIT'
);
Expand Down Expand Up @@ -517,7 +518,7 @@ my $runner2 = get_runner({
$of = $runner2->get_OutputFactory;

is_deeply(
[map {$of->headers->[$_]} (0,2,3)],
[map {$of->headers->[$_]} (0,2,4)],
[
'##fileformat=VCFv4.1',
'##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence annotations from Ensembl VEP. Format: Allele|Consequence|IMPACT|SYMBOL|Gene|Feature_type|Feature|BIOTYPE|EXON|INTRON|HGVSc|HGVSp|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|DISTANCE|STRAND|FLAGS|SYMBOL_SOURCE|HGNC_ID">',
Expand Down Expand Up @@ -617,7 +618,7 @@ is_deeply(


is(
$of->headers->[-2],
$of->headers->[-3],
'##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence annotations from Ensembl VEP. Format: Allele|Consequence|IMPACT|SYMBOL|Gene|Feature_type|Feature|BIOTYPE|EXON|INTRON|HGVSc|HGVSp|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|DISTANCE|STRAND|FLAGS|SYMBOL_SOURCE|HGNC_ID">',
'headers - from input 2'
);
Expand Down
5 changes: 3 additions & 2 deletions t/OutputFactory_VEP_output.t
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@ is_deeply(
'## DISTANCE : Shortest distance from variant to transcript',
'## STRAND : Strand of the feature (1/-1)',
'## FLAGS : Transcript quality flags',
'## custom_test : test.vcf.gz (overlap)',
'## custom_test : test.vcf.gz',
'## VEP command-line: ',
"#Uploaded_variation\tLocation\tAllele\tGene\tFeature\tFeature_type\tConsequence\tcDNA_position\tCDS_position\tProtein_position\tAmino_acids\tCodons\tExisting_variation\tExtra"
],
'headers'
Expand All @@ -113,7 +114,7 @@ my $runner = get_annotated_buffer_runner({
});
is(
$runner->get_OutputFactory->headers->[-2].$runner->get_OutputFactory->headers->[-1],
"## test : header".
"## VEP command-line: ".
"#Uploaded_variation\tLocation\tAllele\tGene\tFeature\tFeature_type\tConsequence\tcDNA_position\tCDS_position\tProtein_position\tAmino_acids\tCodons\tExisting_variation\tExtra",
'headers - plugin'
);
Expand Down
4 changes: 4 additions & 0 deletions t/Parser.t
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,10 @@ ok($tmp =~ /Specified reference allele.+does not match Ensembl reference allele/
$vf = get_vf({allele_string => 'CTT/TCC', chr => 21, start => 25585733, end => 25585735});
is($p->validate_vf($vf), 1, 'validate_vf - check_ref long ok');

$vf = get_vf({allele_string => 'TTT/T', chr => 21, start => 25585733, end => 25585735});
is($p->validate_vf($vf), 0, 'validate_vf - check_ref_fail_issue');
ok($tmp =~ /Specified reference allele.+does not match Ensembl reference allele/, 'validate_vf - check_ref fail msg');

$p->{check_ref} = 0;

# lookup_ref
Expand Down
Loading

0 comments on commit 5253c54

Please sign in to comment.