-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrosalind_GC_content.py
54 lines (41 loc) · 8.44 KB
/
rosalind_GC_content.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
def GCcontent (seq): #function takes a sequence and returns the %age of characters that are C's and G's
DesiredBases=["C","G"]
Length= len(seq)
GCcounter = 0
GCresult = 0
for char in seq: #loop through the string and tally the C's & G's
if char in DesiredBases:
GCcounter += 1
GCresult = (100 * GCcounter/float(Length)) #cast the length as a float to preserve decimal
return GCresult
def get_winner (list_of_seqs):
winner= []
this_id= ''
this_seq=''
this_score=0
winning_id= ''
winning_score=0
for seq in list_of_seqs:
this_id = seq[0]
this_seq = seq[1]
this_seq = this_seq.lstrip()
this_score = GCcontent(this_seq)
if this_score > winning_score:
winning_score = this_score
winning_id =this_id
return winning_id, winning_score
def make_list_of_seqs(string):
temp_list= string.split()
list_of_seqs = []
num_of_pairs = len(temp_list)/2
cursor=0
while cursor < 2*num_of_pairs:
seq_ID = temp_list[cursor]
seq_ID = seq_ID.lstrip('>')
list_of_seqs.append([seq_ID, temp_list[cursor+1]])
cursor += 2
return list_of_seqs
big_string_to_check = '>Rosalind_2471 TAAAGGATCGTAGCTACGCCCAAAGACTTGAGTTGTGTTTTGGCGACACACGGCTCCGCCCTGAACGATACGTGAAGATCGAATCTTTCCCAGGAACATAGAGTGGGATGAATAGAGGTAAGCTTGTGCTATCGTACCAGATATAGGCGGCTTGACTGGTTCATCGTTTTCTTGGTCTAAGGCCGGGGTTGAGGGTGCTGTACGTGCGACGTTCGTCAGCCCTTACTCGGTGTCTTTTTGGCGGAATTATCATGCGCACGATGAGACCCTCTTGGAAACATCTTCCCAAGATCCCTTAAAGACGGCTACGTGACAGTGTCCGTCCCTGGCCTTTGTAAAAATTACGACACGGTTTACTTGAGTTTTATCCTGGAAGGAGTTCTGCATGCCTGGAAGAGTCCCCCTACCGCTAATGACAATTCGGGACGACCTCGGGCATACGAGCCCGTACTACCCAAGTTTCGTCATATCTCTAAGGGCCTTATGTTGATACACAGAGGAAGGGTTTCTCCGCGTTCGAACCCGACTAGCCTAAAGGCGGGTGGTCTTCCGAAACTTAATAGCTGACAATACGCCCACCGATAGATGCTTCTGTTGTAAGGCGGGTCGGCCAGGTCGTAGTACGGGCTAGGTTGGGATATGTGTATCTGTGGACTATCTGACGGTGATAGTCTCCAATACGAATTCACTAACGACCGTGCTCGGGCTCCAAAGCGATCTAACATTTATCTATAGTGTTCTCACATTGAGCTGTTAGAAGGGTTTGTGCTTCAAAACCTAGTTTCCAGAAGAAAATCCGGTACCCAAACGATGCGGCAAAGCGTACATCAAGATCATCACTTCGAGAACAAGTTAACGATCGGTGAGAAATCTGATTAGAATGGGCGCAATGACGGCGTTAGTTCGTGGAGTCGGTGGAACGGACCATAAGACTGTACA >Rosalind_5788 CACGACCGTGTTGAGTAGACTTAACAGGGTAATTGGATCCCGGGAACACGTCAGAGAGGGTGAAACCTAGACACAGCACGGAAGCATCCCCTTGTTATAAGCCAGCGCTTCACACACCATAGTATTCACCGTGAGCATATTTACTGTAGCCTTTACTCCAGGAACTGATGGGGTAAGCCCTAAACGTCACATATTAACAGCCAGAATTCCACCACTACTTTCGAGTTCTTCAGAGTGTCCTATACTGCCTTCAGAGATCCACGTCATATCGATGGTTTACAATCGGCTAACTTTTCGTAGGATGGCTAACCCACCACGCGTCGCTGAGAGAGTGGGAAGGAGGATCCTCTTCGACCTCACGGCAGCGGAACCCAGACAGCGTGACCCGGATAGGCGTACCCCACCTCCCTCCTCGCTGGGGATGCTATCTCATTTCCGCTCGCTCACTAAGGTCGCTCCTACATCACTCACTAGACGTCTCCCGATACCATAGCTGAGGCACATAAGACTCGATACACTATAAATTTGGTGTCGTTAGCTTCAGACATGGGCTTTACAAATACAGACACTTGTCGAACCATTTGCAGGAGGGTAACGTACATCGCTTTCGCTGAGGACATTGGAGGACATCTGCCAATCGGACATGCCCAACGTGTAATGGGGGCATGCACATGAGTTCGTAGCGCGCAGCCACACATCGTTCAACCGCATGGGCTTTAAATCGTCACACATTCGACTCCGAGTGTGGCATGGCAGCGAGCACGGTTTTCGAGGCTGCACAAAGTGAAGATTGCTTATTCTCCTGATGATTCGT >Rosalind_4093 GTTAGCGTTGTAGGGATTGTGAGGGAGAAGCCCTCGGGGCTAGTATCTCCGTTTAAATTTACGTGCACCGTTTTTTCTTACGAGTCTATGCTCACCACCAACGGTCGAAGGCTCCACGTCTGCTGTGGACTTCCTAGCCTGTCAATGTTACAGAGGCCTAAGATGGCTGCGATCCAGAAACTACTGTGACGTCGATTTAGACCCCTAGGCGCCCCTTGTAGTCTAGTTGAAAAATTATTCGACTTCGCTAGATTGTCGTCGTTAAGAAGGAGGTAGAGGGATCGGGGGGCGGTGGTTGTGCTTACGAGTCGCATAGAGGCCTTGTCCTGCGGGATTTACGACTTCTAGCGTGCCAGGCAATAGAACGTAGGGTATCGGTTTACAAGAAAGGTCTATTGCTTACAAGCGGAACGCCTGTTCAAAAAGGTCAAGGAACCAGACAGGCACCTCTTCGAGGCTGTATCTAATCGTTGGTTCTCATAAAAACTTGTTAGGTACGCAAGGAGATCCAAGCGAAGGCAACATTTAAACAGAGGTTTCGGATTGAATCACGGACTAGATTGCCATGATTTGCGTTGGCGAGAAGGAATTAATATTCCGGTGTTACGTAGGATCATCGTGACCCAAGACAGCGCCCCATGGTCGGACTCATTCCGCCGCAGTGTTGTCCATCAGCTGTCTTGGACTCATGGCGTAAGATTATTTCTCTGCAAGCGCCAGTAAGAATAGCTCACTTTGAGACGGTCGTTTCGTGCGACGGATCTCCGGCGAGCCGCTTAGGAAATGGACATCTCCACCAACCGGAATATTAGGGACCAGCTCTCCTCGCCTTCTACGAGATACCAGGTCGGCGGTCTGAAAATTTATATGGACAGATAATGAGCTTTGCGGTCCAGCTGGCTTAGAAAATCGTCTTTCAAACCTAGTTAAGTCGACGTCTTCGCTTTCACAGAATTGACCATCC >Rosalind_4486 CATCCTGATGTATAAGAATTGGATGGCCACTATCTTTTACCGGTGTACAGAGCATCGTCAAGAACAGATTCCATCGGCTTAGTTCCCCGCCAACCTGAATTGAATCGTTCATTGTGCTATAACTTGACGCCCTACTGACTCCAATTACGATTCCACGCAAATTAAATCCCCATACACTTGTTAAGAGCTCATCAACCACATCCCAACAACCGCGACTACACTCTTTTAGCTGAATGAGGCATGCACCGAATGTCGGGACGATCCAGGGCCAGGGGATGTTTTGGCTAACGGTTAAACTGGGCAAGTAATCTGCATGGTTTAGCGCCCAACGACCAAGTTTTGTGTCTGTTACAATATGCAAGACCGTTCCCGCCTCTTCGAATAATCTATGCATGCCTGTGACCGCCTAATTAGAGCATGGTGAGTAGATACCGGGTGTTCTGCGTTAGTCTTCGCACGAGGAGAGCTCCGGCTCCCCTCACCGTCCAAATCCCGTACTTATTCTTTTAGCATTTCCGAGGTTATGACCGCCGATTTCTTCGCGCAGACCCATCCAGCAATATCGCGCAGGGGACCGAGCGATTATGGCGGTGTCCATGCGTTCACGAATCTGAGCAAGTGCGACCATACCGAGTACGCAGTCCGAAGCTTCTCTACTTAAATTTCTTCCACTAGGCTGGAGCCCAACCGACGTGTGCAACATCCTTCTGTGGCGCGCGGGTGTGTGTGTGAGTCGTGGGCTACCGTTACCCTCCCCTGCCAAGTTCCTAGGTCTGGAGCGTTAATTGGCTGATAGTAGCTGAAACAGAACTGGGGCTCCTGTA >Rosalind_6754 ATCAACAGACTGCACGCAATCGCGCTACATAGCCGGTCGGTTGCATATAACTGTCGGTTGGCTCTCGCAAGTGCCAGATCGGCACGGATGAAGCCCCCTCCGCGGGGTAGTTGCTCCACTTGTATCCTACCTACGAATAGACTCGATAATAGGGAATAGCGCCTTATACGACGAGGTTCTGTAGTCCACATGGTCCTGCTTCAGATACTCGCACTTCGCAAATTTGGGACCATGCTAGATGCTCGAAAAAGACATATGTCCCCCATGTCTGGTGCAGCTTTTATCAACGCACGATTTGCATTATAGCGGGGGCGGGGGTTTCCCCTAGCTAGTATTCTCCTCCACATGAGCCTTTTCGATGCAGTAGCCCCTTATATCCTCCATATAGGAGGCATCTCTCTCTGAAAGAGGAGGGTTAAATACTCCGGCCGCTTAGAGTCACCGTGCAATCGTTCAAATTGTAATTGCCTTGCAGCTGCGGTAGGTCGCTTTACTGCCTCTTCGTTACCCGTTCTTGGACCTGTCCCGCTATTGGAGCCAAGAAGGTCCTAACATCGAATGCCGAGACCCACTATCGGGCGTTGGTAGGCCGTACGTCCTCGATGTAATGTAACGACGAGTCTGCACTAAAGTGTGTCTTTTTTGATAGATAATAATGCACGTCCGAGGTACCCACCAACGGCAGGATCCACCACAGTCAAAAACTGTATCGAGCGCAGTGGTAAGCCATGTCACGACGAAATACCGTTAAGTTTCCATAACGTATGTTTAAACGTTCCGCTACCCCCTCTTTCCACCCTGCGTAAAATCCAGTCTGTGGGTACGTTTCGATTTCAGCGCCTCATACGGTGTCGTTATTTATCTCTCTAAGAAGGGCCTTCTGCGGGGTGGTAACAGTGGCTATACC >Rosalind_9760 TGACCGAGTGGAGGTTCCAGTCCTTCTAGTGCTACCCATCGATTGAGCATCATACCACTAATCAGGGTCGGGACCAGGCCCAAACTCGTTCGGCGGTCGTACTCCGGCAGCATCCAGTGGATCTAGAGAGAGACAGTCCGCTGATGCACGCACGGGTCCCCGGCCATCACATGAGAATCACGGACGCAGTCGATGGTGGGCTGCAAACCTCTGCGCCAGCTATCGTGTGCAGAGTTGTCGAACGTTCTGTGTCCAGTCTCCCGGCTAGTCTTGAATACAGATCTAACATTATATTCTAGCGGCCTTGGGCGGCACTATGAGCAGCGGCGCCACAGGTAAAATCCGGAATTAGGTGTCTCTGGAGTCTCATAACCTTTCTGAATGGGAATAACCTTGCGGTGGCGCGTGCCCTAACGGGAAACTGCCGACTGTGCTCTGCGTGAGGAAACCCCGTAGAAAGCTGCGAGGGATCTTCCCCTTGTGCCGTTGGTAGAGCTCGATTTTATCTAAGTAGACCGCAACTGGCATCCGCACAATAGACAACTCTCTTGAAGGCGTAGTTCCTGGGATAGACCAGTCTAGGATGCACGGCGTCCGACATGGAACCTGGACCGCGTGACCGGGAAGTCCCATAGCAGCCCTAGACAGACACACGAATCCAGACTAGTAGCCTCGTTGCTGTGCATTCTCGAGTACAGTCTGTCGGATAATATGCCAGCTGACATCTCCGATCCTAGCGCCGAACGGACCCAATTCAGGGGCTGGGAGCGATTAGCGGAAATATCTCCTCCACTCTGTGTCAAACCCGATCAAACAAGTATTCACGTGGCTACGTATGACTGAGTGACCATTACTTTGAAAATAACAACAATGGCTCACTTGATTATGGCCATTGTTATTAAGGTCGTGTTCTAAACCCTTCCTGTTTTATCGCTACTGAAACATC >Rosalind_8295 GATGTGCCTTTGTGACATACCTAATCTCGGCCTGACAATGCCGGGTCTCAAGAGAGGGACATTGGGATACCTTGCGCCCGTAAGTATAGGATACAAACGAGCACACAGCTAGCACTCCCTGCTAAAGCTACCGTAGTCAGGAAACACGAAAGCATACGCTTTGTTTAGCGAGGTGATGAAGTTTCGTAATTCGTGTAATCATGACCCGCATCTGCACGCTTATTGGGCGCACTAAAATGACGGCACAATGAAAGCAATTACAGTGTGAAACAGTCGGACATGCTAAAAGCGCAGTTGGTACATCTCGTCTTGCTGCTTAGTTGTGCCTCCAAATCAAGAGAATGCAGATCAGAAGTCCCGGGAAAGCAAAGGCCACAGCAAGTGCAACCGTGGCAACCGACCTACTTCAAGGTAGCGTTCTGTTTATGTGGCGGGAGAGGTGTTTTACGACTATTGTGCTATTCGGGTGCATTCGCTTTACTCTCACAAAGTAGTCGTCGGCAGAAGCCTGTAGCCCCTTTGCTAGAGCGATATCTCGATCGGAGACATAACCACATACACACGTGGATTAATAAAGAACGGTTCGCCAGCCTCTATGGGTTAGTACCATTTCGTTACTAGGCCCTGGCAAAAGGGACACTCACGATAGGCAAATTTCCGGGTGGTTTAGTCCATTCCGACGGCGCTGATCAAGCGGGCCAACCGTCGACACCTACACAGGTGGATGGCGTTAAAAGATGTCTTCGATGCAATTGACCTGCACATTTTGCACGAAGGCAACGTGAGACAAAACTCACACTCCGTTTCCCGGGGGGAAGCTGTGGGCCGCGTGACACGTGGGACAAGAGCATCTTTCCGTACGCCCAGGCATGTGCGAACCGCG >Rosalind_0051 ACTTTCACTGCTCTTAAACGTAGCTATTAGGTGTGACCTACCATCCCCACTCAATCGGCCTGTGAATTTGCCAGCGGCATTCCTGCCACGAGCAACTAAATATCCGGTGATACTCCACCACACAGGAGGCCGGAACACCGTTGTACAGGCACGTCCCCATGCTTACTAGAATGAGACTAGTCCTTTGTGAGGTAGTAGCGGTACGACTCCCAAGGCCTGCCATGTTACTATCAAACTGGGCCTTGCTCAGCATGTTCGTGGTTTCATGGTTTATAGGCACCACGTAAAATTCGACTACATCGGGTGGTCCCAGCCTGCTTATTCGGCGCGTTCGGGCGACCCGGAGATCACCCGGGTACTAGCGCCCCGTGATAGAGGCTGAGCTGCCATTTCGGGCCATCTCTGATGGTTGACTAAATTACGCCTTACTTGACTCGATCCAAAAGTAGGCACGGTGGTCGTGCTATGTTATAGAACGTATTAAGACTTTTGACCGTCATTGCTTCCAACTATGGGCGCCGACCCGGTATTCCGCCAAACTGTATAAGCAGCTCGCCCTGGCCCGGCTGAGCTAGTTAGGGCTGGCGTCTATCAACATTACGGCATACAGTCGACTCTGTGTTCTAACCAACTGTGGCGCCGAACAATCCTCTGACCCTGGCATTTGATACGTTTTTAATTTCAAATTCCCTATGACCAGCTTTCGCGATAAAAACCCTGTTTCCATCCTCGAGGATTAGACCGCGTGATAGGATCCAGAAACACCATTTTTCGCCATCTTGAGTTCTGCGTTATCGTGACTACCGATCACTCGATAACAAGTAC'
processed_list = make_list_of_seqs(big_string_to_check)
#print "processed_list is: ", processed_list
print get_winner(processed_list)