-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathVcf.phase.drifttest.pl
82 lines (77 loc) · 1.68 KB
/
Vcf.phase.drifttest.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#! perl
use warnings;
use strict;
use File::Basename;
my $ref = shift or die "need ref";
my $vcf = shift or die "nedd vcf";
open IN,'<',"$ref";
my %r;
my $c1 = 0;
my $c2 = 0;
my %t;
my %y;
while(<IN>){
chomp;
my @l = split/\t/;
push @{$r{$l[1]}}, $l[0];
$c1 += 1;
$t{$l[0]} = 1;
}
close IN;
(my $vcf_n = basename $vcf) =~ s/\..*//;
my $vcf_d = dirname $vcf;
my $out = "$vcf_d/$vcf_n.data.txt";
my $out2 = "$vcf_d/$vcf_n.atlas.txt";
my @head;push @head, 0..8;
my %h;
open IN,'<',$vcf or die "$!";
while(<IN>){
chomp;
next if /^##/;
if(/^#C/){
my @line = (split/\t/,$_);
for(my $i = 9;$i< @line;$i += 1){
push @head, $line[$i];
}
}else{
$c2 += 1;
my @line = split/\t/,$_;
$y{$c2} = "$line[0]\t$line[1]\n";
D:for(my $i = 9;$i< @line;$i += 1){
next D if !exists $t{$head[$i]};
if(/^\./){
push @{$h{$head[$i]}},"-9";
}else{
my @info = split/:/,$line[$i];
if ($info[0] eq "./."){
push @{$h{$head[$i]}},"-9";
}elsif($info[0] =~ /0[\/\|]1/){
push @{$h{$head[$i]}},"1";
}elsif($info[0] =~ /1[\/\|]0/){
push @{$h{$head[$i]}},"1";
}elsif($info[0] =~ /0[\/\|]0/){
push @{$h{$head[$i]}},"0";
}elsif($info[0] =~ /1[\/\|]1/){
push @{$h{$head[$i]}},"2";
}else{
print "$line[0],$line[1],unexcept genotype\n";
exit;
}
}
}
}
}
close IN;
open O,'>',$out;
print O "$c1\n$c2\n";
for my $k (sort {$a <=> $b} keys %r){
for my $s (@{$r{$k}}){
print O "$k";
print O "\t$_" for @{$h{$s}};
print O "\n";
}
}
print O "\n";
open O2 ,'>',$out2;
print O2 "$_\t$y{$_}" for sort{$a <=>$b} keys %y;
close O2;