0% found this document useful (0 votes)
81 views10 pages

AI Lab1

Uploaded by

Fatima Malick
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
81 views10 pages

AI Lab1

Uploaded by

Fatima Malick
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 10

Department of Computing

CS370: Artificial Intelligence Class: BSCS-9AB


Lab 01: Introduction to Python

Date: 12-03-2021
Time: 10:00-13:00 (A) and 14:00 to 17:00 (B) Instructor: Dr.
Hashir Kiani
Lab Engineer: Ms Shakeela Bibi

Submitted By:

Fatima Seemab

291310

Lab 1

TASK 1
1. Write down a python program which takes two strings as input and calculate the
Levenshtein/Edit distance between the two strings.

CODE

import numpy

no_of_insertion = 0
no_of_deletion = 0
no_of_substitution = 0

def checkedits(i ,j):


global no_of_insertion, no_of_deletion, no_of_substitution, row, col

print("checkedits index", i, j)

while i >= 0 and j >= 0:


print ("value of i and j inside while is:", i, j)
if i < row and i < col:
if inp[j-1] == out[i-1]:
print("same words")
print(inp[j - 1])
print(out[i-1])
i -= 1
j -= 1
if (i==0 or j == 0):
break
else:
continue
if (arr[i][j] == arr[i - 1][j] + 1):
print("condition 3 hit")
print(inp[j - 1])
print(out[i - 1])
no_of_insertion = no_of_insertion + 1
i -= 1
if (i == 0 or j == 0):
break
else:
continue
print(i, j)
if (arr[i][j]==arr[i-1][j-1]+1):
print("condition 2 hit substitution")
if i < row and i < col:
if inp[j-1] != out[i-1]:
no_of_substitution = no_of_substitution + 1
print(inp[j-1])
print(out[i-1])
i -= 1
j -= 1
if (i == 0 or j == 0):
break
else:
continue

if (arr[i][j] == (arr[i][j-1]+1)):
print("condition 1 hit")
print(inp[j - 1])
print(out[i - 1])
no_of_deletion = no_of_deletion + 1
j -= 1
if (i == 0 or j == 0):
break
else:
continue

if (arr[i][j] == arr[i- 1][j] + 1):


print("condition 3 hit")
print(inp[j-1])
print(out[i-1])
no_of_insertion = no_of_insertion + 1
i -= 1
if (i == 0 or j == 0):
break
else:
continue
print(i, j)

inp = raw_input("Enter string 1:")


out = raw_input("Enter string 2:")
row = len(out)+1
col = len(inp)+1

arr = numpy.zeros((row, col))


print(arr)
for i in range(col):
arr[0][i] = i
for i in range(0, row):
arr[i][0] = i

for i in range(1, row):


for j in range(1, col):
if (inp[j-1] == out[i-1]):
arr[i][j] = arr[i - 1][j - 1]
else:
arr[i][j] = min(arr[i-1][j-1]+1, arr[i-1][j]+1, arr[i][j-
1]+1)
# print(arr)
print(arr)
print(row - 1, col - 1)
checkedits(row - 1, col - 1)
print("The leivenstrein distance is", arr[row-1][col-1])
print("Number of insertion:", no_of_insertion)
print("Number of Deletion:", no_of_deletion)
print("Number of substitution in leivenstrein distance are",
no_of_substitution)

OUTPUT
Task #2

Now modify the above written program in such a way that it takes two text files containing
single- line and lowercase English sentences named as reference.txt and hypothesis.txt,
and outputs the file result.txt containing Levenshtein distance of these two files as below. The
distance should be word level and not character level.

CODE

import numpy
no_of_insertion = 0
no_of_deletion = 0
no_of_substitution = 0
def checkedits(i ,j):
global no_of_insertion, no_of_deletion, no_of_substitution, row, col

print("checkedits index", i, j)

while i >= 0 and j >= 0:


print ("value of i and j inside while is:", i, j)
if i < row and i < col:
if file1words[j-1] == file2words[i-1]:
print("same words")
print(file1words[j - 1])
print(file2words[i - 1])
i -= 1
j -= 1
if (i==0 or j == 0):
break
else:
continue
if (arr[i][j] == arr[i - 1][j] + 1):
print("condition 3 hit")
print(file1words[j - 1])
print(file2words[i - 1])
no_of_insertion = no_of_insertion + 1
i -= 1
if (i == 0 or j == 0):
break
else:
continue
print(i, j)
if (arr[i][j]==arr[i-1][j-1]+1):
print("condition 2 hit substitution")
if i < row and i < col:
if file1words[j-1] != file2words[i-1]:
no_of_substitution = no_of_substitution + 1
print(file1words[j-1])
print(file2words[i-1])
i -= 1
j -= 1
if (i == 0 or j == 0):
break
else:
continue

if (arr[i][j] == (arr[i][j-1]+1)):
print("condition 1 hit")
print(file1words[j - 1])
print(file2words[i - 1])
no_of_deletion = no_of_deletion + 1
j -= 1
if (i == 0 or j == 0):
break
else:
continue

if (arr[i][j] == arr[i- 1][j] + 1):


print("condition 3 hit")
print(file1words[j-1])
print(file2words[i-1])
no_of_insertion = no_of_insertion + 1
i -= 1
if (i == 0 or j == 0):
break
else:
continue
print(i, j)

reference = open("reference.txt", "rt")


file1data = reference.read()
file1words = file1data.split()
hypothesis = open("hypothesis.txt", "rt")
file2data = hypothesis.read()
file2words = file2data.split()
print(len(file2words))
print(len(file1words))
row = len(file2words) + 1
col = len(file1words) + 1

print(row)
print(col)

arr = numpy.zeros((row, col))


print(arr)
for i in range(col):
arr[0][i] = i
for i in range(0, row):
arr[i][0] = i

for i in range(1, row):


for j in range(1, col):
if (file2words[i-1]==file1words[j-1]):
arr[i][j] = arr[i - 1][j - 1]
else:
arr[i][j] = min(arr[i-1][j-1]+1, arr[i-1][j]+1, arr[i][j-
1]+1)
# print(arr)
print(arr)
print(row - 1, col - 1)
checkedits(row - 1, col - 1)
f = open("result.txt","a")
f.write("The leivenstrein distance is"+repr(arr[row-1][col-1]))
f.write("Number of insertion:"+repr(no_of_insertion))
f.write("Number of Deletion:"+repr(no_of_deletion))
f.write("Number of Substitution:"+repr(no_of_substitution))
f.close()
print("The leivenstrein distance is", arr[row-1][col-1])
print("Number of Insertion:", no_of_insertion)
print("Number of Deletion:", no_of_deletion)
print("Number of Substitutions :", no_of_substitution)

OUTPUT

output on console

Screenshot from result.txt

Task #3
Now modify the above program so that it ignores 10 common words in such a way:-
➢ Insertions and deletions involving these common words are ignored
➢ Substitutions are ignored when both initial and final word are one of 10 common

words

List of 10 common words:


the, of, and, a, be, this, there, an, been, some Now the result2.txt should look like

CODE

import numpy

no_of_insertion = 0
no_of_deletion = 0
no_of_substitution = 0

common_words=["the","of","and","a","be","this","there","an","been","some"
]
def checkedits(i ,j):
global no_of_insertion, no_of_deletion, no_of_substitution, row, col

print("checkedits index", i, j)

while i >= 0 and j >= 0:


print ("value of i and j inside while is:", i, j)
if i < row and i < col:
if file1words[j-1] == file2words[i-1]:
print("same words")
print(file1words[j - 1])
print(file2words[i - 1])
i -= 1
j -= 1
if (i==0 or j == 0):
break
else:
continue
elif (arr[i][j] == (arr[i][j - 1] + 1)):
print("condition 1 hit")
print(file1words[j - 1])
print(file2words[i - 1])
if file1words[j - 1] or file2words[i - 1] not in
common_words:
no_of_deletion = no_of_deletion + 1
j -= 1
if (i == 0 or j == 0):
break
else:
continue

elif (arr[i][j] == arr[i - 1][j - 1] + 1):


print("condition 2 hit substitution")
if i < row and i < col:
if file1words[j - 1] != file2words[i - 1]:
if file1words[j - 1] in common_words and
file2words[i - 1] in common_words:
no_of_substitution = no_of_substitution
else:
no_of_substitution = no_of_substitution + 1
print(file1words[j - 1])
print(file2words[i - 1])
i -= 1
j -= 1
if (i == 0 or j == 0):
break
else:
continue

elif (arr[i][j] == arr[i-1][j] + 1):


print("condition 3 hit")
print(file1words[j - 1])
print(file2words[i - 1])
if file1words[j - 1] or file2words[i - 1] not in
common_words:
no_of_insertion = no_of_insertion + 1
i -= 1
if (i == 0 or j == 0):
break
else:
continue
else:
print("Extra Condition")
print(file1words[j-1])
print(file2words[i-1])

i = i - 1
j = j - 1
continue
print(i, j)

reference = open("reference.txt", "rt")


file1data = reference.read()
file1words = file1data.split()
hypothesis = open("hypothesis.txt", "rt")
file2data = hypothesis.read()
file2words = file2data.split()
print(len(file2words))
print(len(file1words))
row = len(file2words) + 1
col = len(file1words) + 1

print(row)
print(col)

arr = numpy.zeros((row, col))


print(arr)
for i in range(col):
arr[0][i] = i
for i in range(0, row):
arr[i][0] = i

for i in range(1, row):


for j in range(1, col):
if ( file2words[i-1] == file1words[j-1] ):
arr[i][j] = arr[i-1][j-1]
else:
if (arr[i-1][j-1]+1) == min(arr[i-1][j-1]+1, arr[i-1][j]+1,
arr[i][j-1]+1):
if file1words[j-1] in common_words:
if file2words[i-1] in common_words:
arr[i][j] = arr[i-1][j-1]
else:
arr[i][j] = min(arr[i-1][j-1]+1, arr[i-1][j]+1, arr[i]
[j-1]+1)
if (arr[i-1][j] + 1) == min(arr[i - 1][j - 1] + 1, arr[i - 1]
[j] + 1, arr[i][j - 1] + 1):
if file1words[j - 1] in common_words or file2words[i -
1] in common_words:
arr[i][j] = arr[i-1][j-1]
else:
arr[i][j] = min(arr[i - 1][j - 1] + 1, arr[i - 1]
[j] + 1, arr[i][j - 1] + 1)
if (arr[i][j-1] + 1) == min(arr[i - 1][j - 1] + 1, arr[i - 1]
[j] + 1, arr[i][j - 1] + 1):
if file1words[j-1] in common_words or file2words[i-1]
in common_words:
arr[i][j] = arr[i-1][j-1]
else:
arr[i][j] = min(arr[i - 1][j - 1] + 1, arr[i - 1]
[j] + 1, arr[i][j - 1] + 1)
print(arr)
print(row - 1, col - 1)
checkedits(row - 1, col - 1)
f=open("result2.txt","a")
f.write("The leivenstrein distance is"+repr(arr[row-1][col-1]))
f.write("Number of insertion:"+repr(no_of_insertion))
f.write("Number of Deletion:"+repr(no_of_deletion))
f.write("Number of Substitution:"+repr(no_of_substitution))
f.close()
print("The leivenstrein distance is", arr[row-1][col-1])
print("Number of Insertion:", no_of_insertion)
print("Number of Deletion:", no_of_deletion)
print("Number of Substitution:", no_of_substitution)

OUTPUT
Console output

Output from Result2.txt

You might also like