-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcheckProteins.pl
executable file
·41 lines (40 loc) · 964 Bytes
/
checkProteins.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#!/usr/bin/perl -w
use strict;
unless (-d "ProteomesBackup") {
mkdir "ProteomesBackup";
}
my @listoffiles = `ls Proteomes/*_prot.fasta`;
print "Checking proteome sizes...\n";
foreach my $file (@listoffiles) {
chomp $file;
my @headers = `grep '>' $file`;
my $code = '';
if ($file =~ /Proteomes\/(.*)_prot\.fasta/) {
$code = $1;
}
print "$code - ", scalar(@headers), "\n";
}
print "Size Checks Complete\n\n";
foreach my $file (@listoffiles) {
chomp $file;
print "Checking $file...\n";
open(OUTPUT, ">ProteomesBackup/$file");
my $wholefile = `grep "" $file`;
my @sequences = split /\n>/, $wholefile;
foreach my $seq (@sequences) {
my @lines = split /\n/, $seq;
my $header = shift @lines;
unless ($header =~ />/) {
$header = ">".$header;
}
my $sequence = join("", @lines);
chomp $sequence;
if (length($sequence) < 10) {
next;
}
print OUTPUT $header, "\n", $sequence, "\n";
}
print "Re-written $file\n";
close OUTPUT;
}
exit;