p3 Python Project

Download as txt, pdf, or txt
Download as txt, pdf, or txt
You are on page 1of 4

"""This program ask the user to enter a number of DNA sequences and finds the

consensus sequence. The ouput is the consensus.


Add the corresponding code to accomplish the requested tasks
"""

##### ADD YOUR NAME, Student ID, and Section number #######
# NAME: DANIELLA VARGAS FIGUEROA
# STUDENT ID:802228453
# SECTION:096
###########################################################

# The function load_data, it take as an argument, it input the DNA sequences, save
in the list and return the list
# a: is a number of sequences to be input

#Auxiliar functions

def valid_seq(seq):
isvalid = False
for s in list(seq):
if (s == 'A') or (s == 'C') or (s == 'T') or (s == 'G'):
isvalid = True
else:
isvalid = False
break
return isvalid

#the max_nuc() takes four inputs: the nucleotide frequencey in a colum, and returns
a list of two elements containing the nucleotide
#and its frequency in a column
def max_nuc(freq_a, freq_g, freq_c, freq_t):
if freq_a > freq_g and freq_a > freq_c and freq_a > freq_t:
return ["A", freq_a]
elif freq_g > freq_a and freq_g > freq_c and freq_g > freq_t:
return ["G", freq_g]
elif freq_c > freq_a and freq_c > freq_g and freq_c > freq_t:
return ["C", freq_c]
elif freq_t > freq_a and freq_t > freq_c and freq_t > freq_g:
return ["T", freq_t]

#########################
#the load_data() takes two inputs: the file name and returns one tuple (firts one
list of elements, and option (consesus or transcription)
def load_data(filename, option):
#assign variable and open file
lst = []
infile = open(filename, "r")
#read file
valid_length = None
for line in infile:
seq = line.rstrip("\n")
#Check if the sequence is valid and is the same length as the first one to
continue with program.
if valid_seq(seq) == True and (valid_length == len(seq)
or valid_length == None):
lst.append(seq)
if len(lst) == 1:
valid_length = len(lst[0])
result = (lst, option)
#Return result.
return result

# The function count_nucl_freq, it take arguments the load_data, contains the


frecuencies of the nucleotides for each column
# a: is a list of DNA sequences
def count_nucl_freq(a):
#create an empty list to store each letter's frequency
frequencies = []
#Use for loops to look for the frequency of each letter in each column.
for i in range(0, len(a[0])):
columnfrec = [0, 0, 0, 0]
for j in range(0, len(a)):
let = a[j][i]
if let == "A":
columnfrec[0] = columnfrec[0] + 1
elif let == "G":
columnfrec[1] = columnfrec[1] + 1
elif let == "C":
columnfrec[2] = columnfrec[2] + 1
else:
columnfrec[3] = columnfrec[3] + 1
#Append each Maximum frequency by column to the list frequencies.
frequencies.append(
max_nuc(columnfrec[0], columnfrec[1], columnfrec[2], columnfrec[3]))
#return list
return frequencies
# analyze the list by columns
# find nucleotide frecuencies
# you will decide what data type, from the ones already explained, works best for
your implementation
# return frecuencies

# The function find_consensus, it take arguments the count_nucl_freq and return a


consensus sequence
# a: is a you return in count_nucl_freq
def find_consensus(a):
#Open a new file to store the consesus string.
f = open("answer.txt", "w")
# Create an empty string to store the consensus.
consensusString = ""
#For loop to access each element in index 0 in the frequency list done before and
add it to the consensous string.
for element in a:
#print(element)
x = element[0]

consensusString = consensusString + x
#Write the Consensus inside the file.
f.write(consensusString)

# function convert_seqn it take one argument the dna sequences


def convert_seq(a):
#Create empty string to store converted DNA to RNA results
result = ""
#Iterate throught each DNA sequences and convert each letter.
for let in a:
if let == "A":
result += "U"
elif let == "T":
result += "A"
elif let == "C":
result += "G"
elif let == "G":
result += "C"
#Return string with converted RNA sequences.
return result

# convert dna to rna sequences


# return rna sequences

#function transcript_seq, it take one argument the list of sequences


def transcript_seq(a):
#Create an empty list to store converted RNA sequences.
rnaseq = []
file = open("answer.txt", "w")
#Iterate through DNA sequences and convert each sequence to RNA.
for seq in a:
rna = convert_seq(seq)
file.write(rna + "\n")
#Append converted RNA sequences to empty list.
rnaseq.append(rna)
#Return RNA sequences list.
return rnaseq

# Read list DNA sequences


# return list RNA Sequences

# The function main, your program to start and function calls and write new file
with consensus or transcription
def main():
filename = input("Write the name of the file: ")
print('Select option:')
print('1. Consensus Sequences')
print('2. Transcriptions Sequences')
option = int(input(""))
#Create while loop to only accept option one or two.
while option != 1 and option != 2:
print("Incorrect input. Only enter 1 or 2.")
option = int(input(""))
data = load_data(filename, option)
#Create the function calls according to the option the user inputs.
if data[1] == 1:
freq = count_nucl_freq(data[0])
cons = find_consensus(freq)
elif data[1] == 2:
# conv=convert_seq(data[0])
transcript = transcript_seq(data[0])

#ask the number DNA sequence


# contains the functions call
# function doesn't return anyting

if __name__ == "__main__":
main()

You might also like