0% found this document useful (0 votes)
3 views

Web Mining Lab Source Code 1-12 PRINT

The document contains multiple Python source code snippets that implement various algorithms and functionalities, including the PageRank algorithm, web scraping, sentiment analysis using VADER and TextBlob, and social network analysis using NetworkX. Each section includes function definitions, example usages, and comments explaining the purpose and parameters of the functions. The code demonstrates practical applications of data analysis, natural language processing, and network visualization.

Uploaded by

Siddharth Jindal
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
3 views

Web Mining Lab Source Code 1-12 PRINT

The document contains multiple Python source code snippets that implement various algorithms and functionalities, including the PageRank algorithm, web scraping, sentiment analysis using VADER and TextBlob, and social network analysis using NetworkX. Each section includes function definitions, example usages, and comments explaining the purpose and parameters of the functions. The code demonstrates practical applications of data analysis, natural language processing, and network visualization.

Uploaded by

Siddharth Jindal
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 43

Source Code:

import numpy as np

def page_rank(graph, d=0.85, max_iterations=100, tol=1e-6):


"""
Implements the PageRank algorithm.

Parameters:
graph : 2D NumPy array
Adjacency matrix representing the links between pages (1 if page i links to page j, else
0).
d : float
Damping factor (usually 0.85).
max_iterations : int
Maximum number of iterations for the algorithm to converge.
tol : float
Tolerance for convergence.

Returns:
page_ranks : 1D NumPy array
The PageRank score for each page.
"""

n = graph.shape[0] # Number of pages/nodes

# Create the transition matrix from the adjacency matrix


outbound_links = np.sum(graph, axis=1)

# Handle dangling nodes (pages with no outgoing links)


for i in range(n):
if outbound_links[i] == 0:
graph[i] = np.ones(n)
outbound_links[i] = n

# Normalize the adjacency matrix to create the transition matrix


transition_matrix = graph / outbound_links[:, None]

# Initialize the PageRank scores to 1/n


page_ranks = np.ones(n) / n

for iteration in range(max_iterations):


new_page_ranks = (1 - d) / n + d * np.dot(transition_matrix.T, page_ranks)
# Check for convergence
if np.linalg.norm(new_page_ranks - page_ranks) < tol:
print(f"Converged after {iteration+1} iterations.")
break

page_ranks = new_page_ranks

return page_ranks

# Example usage:
if name == " main ":
# Define the adjacency matrix for the web graph
web_graph = np.array([[0, 1, 1, 0],
[1, 0, 0, 1],
[0, 1, 0, 1],
[0, 0, 1, 0]])

ranks = page_rank(web_graph)
print("Page ranks:", ranks)
Output:
Source Code:

import numpy as np

def page_rank(graph, d=0.85, max_iterations=100, tol=1e-6):


"""
Implements the PageRank algorithm.

Parameters:
graph : 2D NumPy array
Adjacency matrix representing the links between pages (1 if page i links to page j, else
0).
d : float
Damping factor (usually 0.85).
max_iterations : int
Maximum number of iterations for the algorithm to converge.
tol : float
Tolerance for convergence.

Returns:
page_ranks : 1D NumPy array
The PageRank score for each page.
"""

n = graph.shape[0] # Number of pages/nodes

# Create the transition matrix from the adjacency matrix


outbound_links = np.sum(graph, axis=1)

# Handle dangling nodes (pages with no outgoing links)


for i in range(n):
if outbound_links[i] == 0:
graph[i] = np.ones(n)
outbound_links[i] = n

# Normalize the adjacency matrix to create the transition matrix


transition_matrix = graph / outbound_links[:, None]

# Initialize the PageRank scores to 1/n


page_ranks = np.ones(n) / n

for iteration in range(max_iterations):


new_page_ranks = (1 - d) / n + d * np.dot(transition_matrix.T, page_ranks)
# Check for convergence
if np.linalg.norm(new_page_ranks - page_ranks) < tol:
print(f"Converged after {iteration+1} iterations.")
break

page_ranks = new_page_ranks

return page_ranks

def analyze_link_structure(graph, page_ranks):


"""
Analyzes the link structure of the web based on the PageRank scores.

Parameters:
graph : 2D NumPy array
Adjacency matrix representing the links between pages.
page_ranks : 1D NumPy array
The PageRank scores for each
page. """
n = graph.shape[0]

print("\n--- Link Structure ---")


for i in range(n):
outgoing_links = [j for j in range(n) if graph[i][j] == 1]
incoming_links = [j for j in range(n) if graph[j][i] == 1]

print(f"\nPage {i+1} (PageRank: {page_ranks[i]:.4f}):")


print(f" Links to pages: {', '.join(str(j+1) for j in outgoing_links)}")
print(f" Linked from pages: {', '.join(str(j+1) for j in incoming_links)}")

# Example usage:
if name == " main ":
# Define the adjacency matrix for the web graph
web_graph = np.array([[0, 1, 1, 0],
[1, 0, 0, 1],
[0, 1, 0, 1],
[0, 0, 1, 0]])

# Calculate PageRank scores


ranks = page_rank(web_graph)
print("Page ranks:", ranks)
# Analyze the link structure
analyze_link_structure(web_graph, ranks)
Output:
Source Code:

# Required Libraries
import re
import requests
from bs4 import BeautifulSoup
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk.corpus import wordnet
from nltk import pos_tag
import nltk

# Download necessary NLTK data


nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')

def
fetch_webpage(url):
"""
Fetches the webpage content from the URL.
"""
response = requests.get(url)
if response.status_code == 200:
return response.text
else:
print(f"Failed to fetch the webpage: Status code {response.status_code}")
return None

def
clean_html(raw_html):
"""
Cleans the raw HTML and extracts text content.
"""
soup = BeautifulSoup(raw_html, 'html.parser')
text = soup.get_text()
return text

def preprocess_text(text):
"""
Performs various text preprocessing steps including:
- Lowercasing
- Removing punctuation/numbers
- Tokenization
- Stopword removal
- Stemming/Lemmatization
"""
# Lowercase the text
text = text.lower()

# Remove punctuation, numbers, and special characters


text = re.sub(r'[^\w\s]', '', text)
text = re.sub(r'\d+', '', text)

# Tokenize the text


words = word_tokenize(text)

# Remove stopwords
stop_words = set(stopwords.words('english'))
words = [word for word in words if word not in stop_words]

# Lemmatize the tokens (using part-of-speech tagging)


lemmatizer = WordNetLemmatizer()
words = [lemmatizer.lemmatize(word, pos=get_wordnet_pos(tag)) for word, tag in
pos_tag(words)]

return words

def
get_wordnet_pos(treebank_tag):
"""
Helper function to convert POS tags to WordNet POS format for better lemmatization.
"""
if treebank_tag.startswith('J'):
return wordnet.ADJ
elif treebank_tag.startswith('V'):
return wordnet.VERB
elif treebank_tag.startswith('N'):
return wordnet.NOUN
elif treebank_tag.startswith('R'):
return wordnet.ADV
else:
return wordnet.NOUN
def preprocess_webpage(url):
"""
Fetches a webpage, cleans and preprocesses the text from
it. """
# Step 1: Fetch webpage
raw_html = fetch_webpage(url)
if raw_html:
# Step 2: Clean HTML and extract text
cleaned_text = clean_html(raw_html)

# Step 3: Preprocess text


preprocessed_words = preprocess_text(cleaned_text)
return preprocessed_words
else:
return []

# Example usage:
if name == " main ":
# URL of the webpage to preprocess
url = "https://www.example.com"

# Preprocess the webpage text


processed_words = preprocess_webpage(url)

print("Preprocessed Text Tokens:")


print(processed_words)
Output:
Source Code:

import networkx as nx
import matplotlib.pyplot as plt

def create_social_network():
"""
Create a social network graph.
Nodes represent individuals, and edges represent relationships between them.
"""
G = nx.Graph()

# Adding nodes (individuals)


G.add_nodes_from(["Alice", "Bob", "Charlie", "David", "Eve", "Frank"])

# Adding edges (relationships between individuals)


G.add_edges_from([("Alice", "Bob"),
("Alice", "Charlie"),
("Bob", "David"),
("Charlie", "David"),
("David", "Eve"),
("Eve", "Frank"),
("Frank", "Alice")])

return G

def plot_social_network(G):
"""
Plot the social network graph.
"""
plt.figure(figsize=(8, 8))
pos = nx.spring_layout(G) # Layout for the graph
nx.draw(G, pos, with_labels=True, node_color='skyblue', node_size=2000, font_size=15,
font_weight='bold', edge_color='gray')
plt.title('Social Network', fontsize=20)
plt.show()

def
calculate_centrality_measures(G):
"""
Calculate and print centrality measures for each node in the social network graph.
"""
# Degree Centrality
degree_centrality = nx.degree_centrality(G)
# Closeness Centrality
closeness_centrality = nx.closeness_centrality(G)

# Betweenness Centrality
betweenness_centrality = nx.betweenness_centrality(G)

print("\nCentrality Measures:")
for node in G.nodes():
print(f"\nNode: {node}")
print(f" Degree Centrality: {degree_centrality[node]:.4f}")
print(f" Closeness Centrality: {closeness_centrality[node]:.4f}")
print(f" Betweenness Centrality: {betweenness_centrality[node]:.4f}")

def analyze_social_network():
"""
Main function to create, visualize, and analyze the social
network. """
# Create the social network
G = create_social_network()

# Plot the network graph


plot_social_network(G)

# Calculate and display centrality measures


calculate_centrality_measures(G)

# Example usage
if name == " main ":
analyze_social_network()
Output:
Source Code:

from textblob import

TextBlob def

analyze_opinion(text):
"""
Perform sentiment analysis on the given text.

Parameters:
text (str): The input text to analyze.

Returns:
sentiment (str): The sentiment of the text (Positive, Negative, or Neutral).
polarity (float): The polarity score of the text (-1 to 1).
subjectivity (float): The subjectivity score of the text (0 to 1).
"""
# Create a TextBlob object
blob = TextBlob(text)

# Get the sentiment of the text


polarity = blob.sentiment.polarity # Polarity (-1 to 1): -1 = negative, 1 = positive
subjectivity = blob.sentiment.subjectivity # Subjectivity (0 to 1): 0 = objective, 1 =
subjective

# Determine the sentiment


if polarity > 0:
sentiment = "Positive"
elif polarity < 0:
sentiment = "Negative"
else:
sentiment = "Neutral"

return sentiment, polarity, subjectivity

def analyze_opinions(text_list):
"""
Analyze sentiment for a list of texts.

Parameters:
text_list (list of str): List of texts to analyze.

Returns:
None
"""
for idx, text in enumerate(text_list):
sentiment, polarity, subjectivity = analyze_opinion(text)
print(f"\nText {idx+1}: {text}")
print(f" Sentiment: {sentiment}")
print(f" Polarity: {polarity:.4f}")
print(f" Subjectivity: {subjectivity:.4f}")

# Example usage
if name == " main ":
# List of texts (opinions or reviews)
texts = [
"I absolutely love this product! It works wonders.",
"The movie was okay, but I found it a bit too long.",
"I'm very disappointed with the service. It was
terrible.", "The weather is perfect today. I'm feeling
great.",
"The food was bland and tasteless. Not worth the money."
]

# Perform sentiment analysis on the list of texts


analyze_opinions(texts)
Output:
Source Code:

import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

# Download the VADER lexicon (run this only once)


nltk.download('vader_lexicon')

def analyze_sentiment(text):
"""
Analyze the sentiment of a given text using VADER.

Parameters:
text (str): The input text to analyze.

Returns:
dict: Sentiment scores (positive, neutral, negative, and compound).
"""
# Initialize the VADER sentiment intensity
analyzer sia = SentimentIntensityAnalyzer()

# Get the sentiment scores


sentiment_scores =
sia.polarity_scores(text)

return sentiment_scores

def

analyze_texts(text_list):
"""
Analyze sentiment for a list of texts.

Parameters:
text_list (list of str): List of texts to analyze.

Returns:
None
"""
for idx, text in
enumerate(text_list): scores =
analyze_sentiment(text) print(f"\
nText {idx+1}: {text}")
print(f" Positive Score: {scores['pos']}")
print(f" Neutral Score: {scores['neu']}")
print(f" Negative Score: {scores['neg']}")
print(f" Compound Score: {scores['compound']:.4f}")
# Determine the overall sentiment based on the compound score
if scores['compound'] >= 0.05:
sentiment = "Positive"
elif scores['compound'] <= -0.05:
sentiment = "Negative"
else:
sentiment = "Neutral"

print(f" Overall Sentiment: {sentiment}")

# Example usage
if name == " main ":
# List of sample texts (reviews, comments, or opinions)
texts = [
"I love this product! It works great.",
"This movie was terrible. I wasted my time.",
"The food was okay, but nothing special.",
"What a fantastic day! I'm so happy right now.",
"The service was really slow and disappointing."
]

# Perform sentiment analysis on the list of texts


analyze_texts(texts)
Output:
Source Code:

import requests
from bs4 import BeautifulSoup
import time
import random
import urllib.robotparser

def
check_robots(url):
"""
Check if the URL is allowed to be scraped based on robots.txt.

Parameters:
url (str): The website URL to check.

Returns:
bool: True if scraping is allowed, False otherwise.
"""
parsed_url = urllib.parse.urlparse(url)
robots_url = f"{parsed_url.scheme}://{parsed_url.netloc}/robots.txt"
rp = urllib.robotparser.RobotFileParser()
rp.set_url(robots_url)
rp.read()
return rp.can_fetch("*", url)

def scrape_web_content(url):
"""
Scrape web content while considering privacy regulations.

Parameters:
url (str): The website URL to scrape.

Returns:
str: The scraped content or a message indicating scraping is not allowed.
"""
if not check_robots(url):
return "Scraping is not allowed for this URL according to robots.txt."

headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36
(KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}

try:
response = requests.get(url, headers=headers)
response.raise_for_status() # Raise an error for bad responses

# Parse the HTML content


soup = BeautifulSoup(response.text, 'html.parser')

# Extract text content (for example, from paragraphs)


text_content = ' '.join(p.get_text() for p in soup.find_all('p'))

return text_content

except requests.exceptions.RequestException as e:
return f"An error occurred: {e}"

def main():
# List of URLs to
scrape urls = [
"https://www.example.com",
"https://www.wikipedia.org",
# Add more URLs as needed
]

for url in urls:


print(f"Scraping content from: {url}")
content = scrape_web_content(url)
print(content[:500]) # Print the first 500 characters of the content print("\
n" + "-" * 80 + "\n")

# Respectful scraping: sleep between requests to avoid overwhelming the server


time.sleep(random.uniform(1, 3))

if name == " main ":


main()
Output:
Source Code:

import pandas as pd
import random
from datetime import datetime, timedelta

# Function to generate sample access logs


def generate_sample_logs(num_logs):
"""
Generate a list of sample web access logs.

Parameters:
num_logs (int): Number of sample logs to generate.

Returns:
list: List of sample access log entries.
"""
ip_addresses = [f"192.168.1.{i}" for i in range(1, 21)] # Sample IP addresses
paths = ["/home", "/about", "/contact", "/products", "/services"]
logs = []

for _ in range(num_logs):
ip = random.choice(ip_addresses)
timestamp = datetime.now() - timedelta(minutes=random.randint(1, 60)) # Random
timestamp within the last hour
path = random.choice(paths)
logs.append((ip, timestamp, f"GET {path} HTTP/1.1"))

return logs

def create_log_dataframe(logs):
"""
Create a DataFrame from the generated logs.

Parameters:
logs (list): List of log entries.

Returns:
pd.DataFrame: DataFrame containing the access
logs. """
return pd.DataFrame(logs, columns=['IP', 'Timestamp', 'Request'])

def
extract_page_views(log_df):
"""
Extract the most visited pages from the access logs.

Parameters:
log_df (pd.DataFrame): DataFrame containing the access logs.

Returns:
pd.DataFrame: DataFrame with page view counts.
"""
log_df['URL'] = log_df['Request'].str.split(' ').str[1] # Extract URL from the request
page_views = log_df['URL'].value_counts().reset_index()
page_views.columns = ['URL', 'Count']
return page_views

def
analyze_sessions(log_df):
"""
Analyze user sessions based on IP address and timestamps.

Parameters:
log_df (pd.DataFrame): DataFrame containing the access logs.

Returns:
pd.DataFrame: DataFrame with session information.
"""
session_df = log_df.sort_values(by=['IP', 'Timestamp'])
session_df['SessionID'] = (session_df['Timestamp'].diff() >
pd.Timedelta(minutes=30)).cumsum() # Identify sessions
session_count = session_df.groupby(['IP',
'SessionID']).size().reset_index(name='SessionCount')
return session_count

def main():
# Generate sample access logs
num_logs = 100 # Number of sample logs to generate
sample_logs = generate_sample_logs(num_logs)

# Create a DataFrame from the generated logs


log_df = create_log_dataframe(sample_logs)

# Extract and display the most visited pages


page_views = extract_page_views(log_df)
print("Most Visited Pages:")
print(page_views.head(10)) # Display the top 10 most visited pages print("\
n" + "-" * 80 + "\n")
# Analyze user sessions
session_info = analyze_sessions(log_df)
print("User Sessions:")
print(session_info.head(10)) # Display the top 10 user sessions

if name == " main ":


main()
Output:
Source Code:

import pandas as
pd import numpy
as np

# Sample user-item rating data


data = {
'User': ['Alice', 'Alice', 'Alice', 'Bob', 'Bob', 'Charlie', 'Charlie', 'Charlie', 'Charlie'],
'Item': ['Item1', 'Item2', 'Item3', 'Item1', 'Item2', 'Item1', 'Item2', 'Item3', 'Item4'],
'Rating': [5, 3, 4, 4, 5, 2, 5, 1, 4]
}

# Create a DataFrame
df = pd.DataFrame(data)

# Create a user-item matrix


user_item_matrix = df.pivot_table(index='User', columns='Item', values='Rating').fillna(0)

# Calculate similarity using cosine similarity


def cosine_similarity(matrix):
"""Compute cosine similarity between users."""
similarity = np.dot(matrix, matrix.T)
norms = np.array([np.sqrt(np.diagonal(similarity))])
return similarity / norms / norms.T

# Get item recommendations for a user


def recommend_items(user, user_item_matrix, num_recommendations=2):
"""Recommend items for a specified user."""
# Compute the user-item matrix as a numpy
array matrix = user_item_matrix.values

# Calculate similarity
similarity = cosine_similarity(matrix)

# Get the user's index


user_idx = user_item_matrix.index.get_loc(user)

# Get the scores for all items based on similarity


user_scores = similarity[user_idx].dot(matrix) /
np.array([np.abs(similarity[user_idx]).sum()])

# Create a DataFrame for recommended items


recommended_items = pd.DataFrame(user_scores, index=user_item_matrix.columns,
columns=['Score'])
recommended_items =
recommended_items[recommended_items.index.isin(user_item_matrix.columns)
& (user_item_matrix.loc[user] == 0)]

# Return the top recommended items


return recommended_items.sort_values(by='Score',
ascending=False).head(num_recommendations)

def main():
user = 'Alice' # Specify the user for recommendations
recommendations = recommend_items(user, user_item_matrix)

print(f"Recommendations for {user}:")


print(recommendations)

if name == " main ":


main()
Output:
Source Code:

import networkx as nx
import matplotlib.pyplot as plt

# Sample web structure data (web pages and their links)


web_data = {
'PageA': ['PageB', 'PageC'],
'PageB': ['PageC', 'PageD'],
'PageC': ['PageD'],
'PageD': ['PageA'],
'PageE': ['PageD', 'PageA'],
}

def create_web_graph(data):
"""Create a directed graph from web
data.""" G = nx.DiGraph() # Create a
directed graph for page, links in
data.items():
for link in links:
G.add_edge(page, link) # Add edges for links
return G

def analyze_graph(G):
"""Analyze the web graph to find the most linked pages."""
# Calculate in-degree (number of incoming links)
in_degrees = G.in_degree()
sorted_pages = sorted(in_degrees, key=lambda x: x[1], reverse=True)

return sorted_pages

def plot_graph(G):
"""Visualize the web structure graph."""
plt.figure(figsize=(8, 6))
pos = nx.spring_layout(G)
nx.draw(G, pos, with_labels=True, node_color='lightblue', node_size=2000, font_size=10,
font_weight='bold', arrows=True)
plt.title("Web Structure
Graph") plt.show()

def main():
# Create the web graph from the sample data
web_graph = create_web_graph(web_data)

# Analyze the graph to find the most linked pages


most_linked_pages = analyze_graph(web_graph)

print("Most Linked Pages (In-Degree):")


for page, count in most_linked_pages:
print(f"{page}: {count} incoming links")

# Plot the web structure graph


plot_graph(web_graph)

if name == " main ":


main()
Output:
BEYOND
CURRICULUM
Source Code:

import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Sample dataset of movies with descriptions


data = {
'Movie': ['The Matrix', 'Titanic', 'Interstellar', 'John Wick', 'When Harry Met Sally'],
'Description': [
'A thrilling action movie set in a dystopian future where a hacker discovers the reality
is simulated by machines.',
'A romantic disaster movie about a young couple who fall in love aboard the doomed
Titanic ship.',
'A science fiction epic set in space, where a group of astronauts venture into a wormhole
in search of a new home for humanity.',
'An intense action movie where an ex-hitman seeks revenge on those who wronged him,
with plenty of fight scenes.',
'A classic romantic comedy about two friends who navigate love and friendship, filled
with humorous moments and heartfelt emotions.'
]
}

# Convert data into a DataFrame


df = pd.DataFrame(data)

# Use TF-IDF Vectorizer to convert descriptions into numerical form


tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(df['Description'])

# Compute cosine similarity between movies based on descriptions


cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Function to recommend similar movies


def get_recommendations(movie_name, cosine_sim=cosine_sim):
# Get the index of the movie that matches the title
idx = df[df['Movie'] == movie_name].index[0]

# Get the pairwise similarity scores of all movies with that movie
sim_scores = list(enumerate(cosine_sim[idx]))

# Sort the movies based on similarity scores


sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
# Get the indices of the most similar movies (ignoring the first one, as it's the same movie)
sim_scores = sim_scores[1:4] # Get top 3 recommendations

# Get the movie indices


movie_indices = [i[0] for i in sim_scores]

# Return the top 3 most similar movies


return df['Movie'].iloc[movie_indices]

def main():
# Example: Recommend movies similar to 'The Matrix'
movie_name = 'The Matrix'
recommendations = get_recommendations(movie_name)

print(f"Movies similar to '{movie_name}':")


for i, rec in enumerate(recommendations, 1):
print(f"{i}. {rec}")

if name == " main ":


main()
Output:
Source Code:

import networkx as nx
import matplotlib.pyplot as plt

# Simulate initial set of web pages and links (directed graph)


def create_initial_web_graph():
G = nx.DiGraph()

# Add initial nodes (web pages) and edges (links)


G.add_edges_from([
('PageA', 'PageB'),
('PageA', 'PageC'),
('PageB', 'PageC'),
('PageC', 'PageA'),
('PageD', 'PageC')
])

return G

# Function to display the PageRank of web pages


def display_pagerank(G, time_step):
pagerank = nx.pagerank(G)
print(f"\nPageRank at Time Step {time_step}:")
for page, rank in pagerank.items():
print(f"{page}: {rank:.4f}")

return pagerank

# Function to plot the graph (optional, for visualization)


def plot_graph(G, time_step):
pos = nx.spring_layout(G)
plt.figure(figsize=(8, 6))
nx.draw(G, pos, with_labels=True, node_color='skyblue', node_size=3000,
edge_color='gray', font_size=12, font_weight='bold')
plt.title(f"Web Link Structure at Time Step {time_step}")
plt.show()

# Simulate dynamic changes in the web link structure over time


def simulate_dynamic_changes(G):
# Display initial PageRank
display_pagerank(G, time_step=0)
plot_graph(G, time_step=0)
# Time Step 1: Add a new page and links
G.add_edges_from([
('PageE', 'PageA'),
('PageE', 'PageD'),
('PageB', 'PageE')
])
display_pagerank(G, time_step=1)
plot_graph(G, time_step=1)

# Time Step 2: Remove some links and add new ones


G.remove_edge('PageA', 'PageC')
G.add_edges_from([
('PageC', 'PageD'),
('PageD', 'PageE')
])
display_pagerank(G, time_step=2)
plot_graph(G, time_step=2)

# Time Step 3: Further changes in the web structure


G.remove_edge('PageD', 'PageC')
G.add_edges_from([
('PageE', 'PageB')
])
display_pagerank(G, time_step=3)
plot_graph(G, time_step=3)

def main():
# Create the initial web graph
web_graph =
create_initial_web_graph()

# Simulate dynamic changes in web link structure and analyze PageRank


simulate_dynamic_changes(web_graph)

if name == " main ":


main()
Output:

You might also like