-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathFasta.split.N.pl
94 lines (90 loc) · 2.01 KB
/
Fasta.split.N.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#! perl
use warnings;
use strict;
use Bio::SeqIO;
my $file = shift;
my $o = shift;
$o //= ".";
mkdir $o if ! -e $o;
my $seqio_obj = Bio::SeqIO -> new (-file => $file, -format => "fasta", -alphabet => "dna");
while(my $seq_obj = $seqio_obj -> next_seq){
my $s = 0;
my $e = 0;
my $id = $seq_obj -> display_id;
my $seq = $seq_obj -> seq;
$seq = uc($seq);
my @seq2;
if($seq =~ /^[Nn]/){
while($seq =~ /[Nn]/){
$seq =~ s/([Nn]+)//;
push @seq2,$1;
if($seq =~ s/([ATCG]+)//){
push @seq2 ,$1;
}
}
if(length $seq > 0){
push @seq2,$seq;
}
}else{
while($seq =~ /[Nn]/){
$seq =~ s/([ATCGatcg]+)//;
push @seq2,$1;
if($seq =~ s/([Nn]+)//){
push @seq2,$1;
}
}
if(length $seq > 0){
push @seq2,$seq;
}
}
#my @seq = split//,$seq;
#@seq = map{uc($_)} @seq;
#next if (scalar @seq == 0);
#my $start_base = $seq[0];
my $c = 0;
#my @seq2 = &array_creat(\@seq,$start_base);
#map{print $_;print "\n";exit;} @seq2;exit;
for my $se (@seq2){
if($se =~ /[nN]/){
$s = $e;
$e = $s + length($se);
}else{
$s = $e;
$e = $s + length($se);
open O,'>',"$o/$id-$c.fa";
print O ">$id\_$s\_$e\n$se\n";
close O;
print STDERR "$id$c\t".length($se)."\n";
$c += 1;
}
}
}
sub array_creat{
my $ref = shift @_;
my $mark = (shift @_);
my $c = 0;
my @a = @{$ref};
my @b;
for(my $i = 0;$i < (scalar @a) - 1;$i ++){
if ($mark eq "N"){
if($a[$i] eq $mark){
$b[$c] .= $a[$i];
}else{
$b[$c+1] .= $a[$i];
if($a[$i+1] eq $mark){
$c += 2;
}
}
}else{
if($a[$i] ne "N"){
$b[$c] .= $a[$i];
}else{
$b[$c+1] .= $a[$i];
if($a[$i+1] ne "N"){
$c += 2;
}
}
}
}
return @b;
}