Index: SR. NO. Practical Name Date of Perform NO. Sign
Index: SR. NO. Practical Name Date of Perform NO. Sign
Program:-
# Define the documents
document1 = "The quick brown fox jumped over the lazy dog."
document2 = "The lazy dog slept in the sun."
Output:
Program:
import pandas
from contextlib import redirect_stdout
terms = []
keys = []
vec_Dic = {}
dicti = {}
dummy_List = []
# list for performing some operations and clearing them
def filter(documents, rows, cols):
for i in range(rows):
for j in range(cols):
if(j == 0):
# first column has the name of the document in the csv file
keys.append(documents.loc[i].iat[j])
else:
dummy_List.append(documents.loc[i].iat[j])
# dummy list to update the terms in the dictionary
if documents.loc[i].iat[j] not in terms:
# add the terms to the list if it is not present else continue
terms.append(documents.loc[i].iat[j])
copy = dummy_List.copy()
dicti.update({documents.loc[i].iat[0]: copy})
# adding the key value pair to a dictionary
dummy_List.clear()
# clearing the dummy list
def bool_Representation(dicti, rows, cols):
terms.sort()
for i in (dicti):
for j in terms:
# if the string is present in the list we append 1 else we append 0
if j in dicti[i]:
dummy_List.append(1)
else:
dummy_List.append(0)
# appending 1 or 0 for obtaining the boolean representation
copy = dummy_List.copy()
# copying the dummy list to a different list
vec_Dic.update({i: copy})
Output:
Output:
# define actual
act_pos = [1 for _ in range(100)]
act_neg = [0 for _ in range(10000)]
y_true = act_pos + act_neg
# define predictions
pred_pos = [0 for _ in range(10)] + [1 for _ in range(90)] # 90 positive predictions, 10 negative
predictions
pred_neg = [1 for _ in range(30)] + [0 for _ in range(9970)] # 30 positive predictions, 9970
negative predictions
y_pred = pred_pos + pred_neg
# calculate precision
precision = precision_score(y_true, y_pred, average='binary')
print('Precision: %.3f' % precision)
Output:
Recall
from sklearn.metrics import recall_score
# define actual
act_pos = [1 for _ in range(100)]
act_neg = [0 for _ in range(10000)]
y_true = act_pos + act_neg
# define predictions
pred_pos = [0 for _ in range(10)] + [1 for _ in range(90)]
pred_neg = [0 for _ in range(10000)]
y_pred = pred_pos + pred_neg
F-measure
from sklearn.metrics import f1_score
# define predictions
pred_pos = [0 for _ in range(5)] + [1 for _ in range(95)] # 95 positive predictions, 5 negative
predictions
pred_neg = [1 for _ in range(55)] + [0 for _ in range(9945)] # 55 positive predictions, 9945
negative predictions
y_pred = pred_pos + pred_neg # combine predicted positive and negative labels
# calculate F1 score
score = f1_score(y_true, y_pred, average='binary')
Output:
# Sample dataset
texts = [
"I love this movie, it's amazing!",
"What a great day, I feel so happy!",
"This is the worst product I've ever bought.",
"I'm so sad and disappointed.",
"Absolutely fantastic experience, will buy again!",
"Terrible customer service, very unhappy.",
"I'm excited about this new opportunity!",
"The food was awful and I got sick."
]
X_train_vectors = vectorizer.fit_transform(X_train)
X_test_vectors = vectorizer.transform(X_test)
# Make predictions
y_pred = classifier.predict(X_test_vectors)
print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:")
print(report)
Output:
Output:
print(f"Crawling: {url}")
self.visited.add(url)
except Exception as e:
print(f"Failed to crawl {url}: {e}")
# Usage example
if name == " main ":
start_url = "https://www.tpointtech.com/"
crawler = SimpleWebCrawler(base_url=start_url, max_pages=5)
crawler.crawl(start_url)
Program: -
import numpy as np
import networkx as nx
def pagerank(graph, alpha=0.85, tol=1.0e-6, max_iter=100):
"""Computes PageRank scores for a directed graph."""
n = len(graph)
if n == 0:
return {}
# Initialize ranks
ranks = np.ones(n) / n
# Check for convergence (if the change in ranks is less than tolerance)
if np.linalg.norm(new_ranks - ranks, ord=1) < tol:
break
ranks = new_ranks
# Compute PageRank
ranks = pagerank(web_graph)
Output: