0% found this document useful (0 votes)
58 views

University of Mauritius

This document summarizes an assignment for the course AGRI 2081Y - Computational Biology offered at the University of Mauritius, Faculty of Agriculture. The assignment was completed by Marie Natacha Meunier with student ID 1712892 and submitted to the lecturer Dr Shakuntala Baichoo on 25th May 2020. The assignment contains code snippets and answers to computational biology questions involving string manipulation of DNA, RNA and protein sequences.

Uploaded by

grace meunier
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
58 views

University of Mauritius

This document summarizes an assignment for the course AGRI 2081Y - Computational Biology offered at the University of Mauritius, Faculty of Agriculture. The assignment was completed by Marie Natacha Meunier with student ID 1712892 and submitted to the lecturer Dr Shakuntala Baichoo on 25th May 2020. The assignment contains code snippets and answers to computational biology questions involving string manipulation of DNA, RNA and protein sequences.

Uploaded by

grace meunier
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 9

UNIVERSITY OF MAURITIUS

FACULTY OF AGRICULTURE
BSc (Hons) Biotechnology

AGRI 2081Y (3) - COMPUTATIONAL BIOLOGY

Name of Student: Marie Natacha Meunier

Student I.D: 1712892

Date: 25th May 2020

Lecturer Name: Dr Shakuntala Baichoo


chain_a = """SSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKM
FCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVV
RRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFR
HSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILT
IITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKG
EPHHELPPGSTKRALPNNT"""

#Question 1 a

num_lines = chain_a.count ("\n")


print (num_lines)

#Question 1 b
length sequence = len (chain_a) - chain_a.count ("\n")
print (length sequence: ", length)

#Question 1 c
new_chain = chain_a.replace("\n", "")
print("New Chain:",new_chain)

#Question 1 d

count = 0
result=0
for i in chain_a:
if i == 'C':
count = count + 1
print ("Number of C:",count)

#Question 1 e
if "NLRVEYLDDRN" in chain_a:
print("yes found");

pos= chain_a.find("NLRVEYLDDRN")
print("Starting position :",pos);
Question 2

dna_seq = """GGGCTTGTGGCGCGAGCTTCTGAAACTAGGCGGCAGAGGCGGAGCCGCT
GTGGCACTGCTGCGCCTCTGCTGCGCCTCGGGTGTCTTTT
GCGGCGGTGGGTCGCCGCCGGGAGAAGCGTGAGGGGACAG
ATTTGTGACCGGCGCGGTTTTTGTCAGCTTACTCCGGCCA AAAAAGAACTGCACCTCTGGAGCGG""

#Question 2 a

# Count the number of C’s in DNA sequence


no_c = dna_seq.count ("C")

# Count the number of G’s in DNA sequence


no_g = dna_seq.count ("G")

#determine the length of the DNA sequence


dna_length = len(dna_seq)

#compute the GC content

gc_cont = (no_g + no_c)

#Question 2 b

rna_seq = dna_seq.replace("T","U")
#Question 2 c

intron = dna_seq[50:156]
exon1 = dna_seq[0:50]
exon2 = dna_seq[156:]
spliced = exon1+exon2

Question 3
#Question 3 a

clusters = """\
>Cluster 0
0 >YLR106C at 100.00%
>Cluster 50
0 >YPL082C at 100.00%
>Cluster 54
0 >YHL009W-A at 90.80%
1 >YHL009W-B at 100.00%
2 >YJL113W at 98.77%
3 >YJL114W at 97.35%
>Cluster 52
0 >YBR208C at 100.00%
"""

#Question a
result = re.findall(r">Cluster?([ \d.]+)", clusters, re.IGNORECASE |
re.MULTILINE)
#print("ID :",str(result))

#Question b
r = clusters.replace('>Cluster', 'Test')
#print("New :",r)
result = re.findall(r"> ?([A-Za-z0-9-]+)", r, re.IGNORECASE |
re.MULTILINE)
#print("sd :",str(result))

per=re.findall(r"> ?([A-Za-z0-9-]+)", r, re.IGNORECASE | re.MULTILINE)


+ re.findall(r"at ?([\d.]+)", clusters, re.IGNORECASE | re.MULTILINE)
#print("sd :",str(per))

lines = r.split('\n')
#print(lines)
for line in lines:
print(re.findall(r"> ?([A-Za-z0-9-]+)", line, re.IGNORECASE |
re.MULTILINE) + re.findall(r"at ?([\d.]+)", line, re.IGNORECASE |
re.MULTILINE))
#Question 4

("A", "T"): 10.0 / 5.0,


("A", "C"): 10.0 / 7.0,
("A", "G"): 10.0 / 6.0,
("T", "C"): 5.0 / 7.0,
("T", "G"): 5.0 / 6.0,
("C", "G"): 7.0 / 6.0 .
#Question 4 a

#There is no difference between the len(ratios), len(ratios.keys()),


len(ratios.values()) and len(ratios.items()) since all the commands
measure the key values
print len(ratios.keys())
print len(ratios.values())
print len(ratios.items())

#Question 4 b

ratio= ("A", "T"): 10.0 / 5.0, ("C", "G"): 7.0 / 6.0 .

If ("A", "T") in ratios:


print ("yes 'A, T' is found in ratios")
or:
print ("No 'T, A' is not found in ratios")

If ("C", "G") in ratios:


print ("yes 'C, G' is found in ratios")
or:
print ("No 'C, G' is not found in ratios")
#Question 4 c

contains_2 = 2 in ratios.values()
print contains_2

contains_3 = 3 in ratios.values()
print contains_3

#Question 4 d

2 in ("A", "T"):
print (("A", "T"), 2) in ratios.items()

1000 in ("C", "G"):


print (("C", "G"), 1000) in ratios.items()

#Question 4 e

keys = [key_value[0]
for key_value in ratios.items()]
values = [key_value[-1]
for key_value in ratios.items()]
#Question 5

#translate the list:

list = ["A", "T", "T", "A", "G", "T", "C"]

translation=

String="ade tym tym ade gua tym cyt"

str = " ade tym tym ade gua tym cyt " 
    
        
s = ['A, T, T, A, G, T, C ', 'for', ' ade, tym, tym, ade, gua, tym, cyt ']

print(listToString(s))
#Question 6

A python program to read the file data.fasta

text=""">2HMI:A|PDBID|CHAIN|SEQUENCE

PISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKI

>2HMI:B|PDBID|CHAIN|SEQUENCE

PISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKI

>2HMI:C|PDBID|CHAIN|SEQUENCE

DIQMTQTTSSLSASLGDRVTISCSASQDISSYLNWYQQKPEGTVKLLIYY

>2HMI:D|PDBID|CHAIN|SEQUENCE

QITLKESGPGIVQPSQPFRLTCTFSGFSLSTSGIGVTWIRQPSGKGLEWL

>2HMI:E|PDBID|CHAIN|SEQUENCE

ATGGCGCCCGAACAGGGAC

>2HMI:F|PDBID|CHAIN|SEQUENCE

GTCCCTGTTCGGGCGCCA"""

fastaFile = open('fasta_file.txt')

You might also like