0% found this document useful (0 votes)
198 views15 pages

Gen Ai Lab Programs

Uploaded by

rashmimaruthi2
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
198 views15 pages

Gen Ai Lab Programs

Uploaded by

rashmimaruthi2
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 15

GEN AI LAB PROGRAMS

1. Computing the TF-IDF Matrix using NumPy

Task: Write a Python function to compute the TF-IDF matrix for the given set of
documents using only NumPy.
Code:

import numpy as np
def compute_tf_idf(documents, vocabulary):
N = len(documents)
V = len(vocabulary)

# Initialize TF matrix (N x V)
tf = np.zeros((N, V))

# Build term frequency matrix


for i, doc in enumerate(documents):
words = doc.lower().split()
for word in words:
if word in vocabulary:
j = vocabulary.index(word)
tf[i, j] += 1
tf[i] = tf[i] / len(words) # Normalize TF by document length

# Compute Document Frequency (DF)


df = np.zeros(V)
for j, term in enumerate(vocabulary):
df[j] = sum(1 for doc in documents if term in doc.lower().split())

# Compute Inverse Document Frequency (IDF)


idf = np.log(N / (df + 1)) # Add 1 to avoid division by zero

# Compute TF-IDF matrix


tf_idf = tf * idf # Element-wise multiplication

return tf_idf

# Example usage:
documents = [
"cat sat on the mat",
"dog sat on the log",
"cat and dog played together"
]

vocabulary = list(set(" ".join(documents).lower().split()))


tf_idf_matrix = compute_tf_idf(documents, vocabulary)

print("Vocabulary:", vocabulary)
print("TF-IDF Matrix:\n", tf_idf_matrix)
Output:
Vocabulary: ['mat', 'together', 'sat', 'dog', 'cat', 'the', 'played', 'on', 'log', 'and']
TF-IDF Matrix:
[[0.08109302 0. 0. 0. 0. 0.
0. 0. 0. 0. ]
[0. 0. 0. 0. 0. 0.
0. 0. 0.08109302 0. ]
[0. 0.08109302 0. 0. 0. 0.
0.08109302 0. 0. 0.08109302]]

2. Generating n-grams for a Sentence


Task: Write a Python function to generate n-grams for a given sentence.

Code:
def generate_ngrams(sentence, n):
words = sentence.lower().split()
ngrams = []
for i in range(len(words) - n + 1):
ngram = tuple(words[i:i + n])
ngrams.append(ngram)
return ngrams

# Example usage:
sentence = "The quick brown fox jumps over the lazy dog."
n=3
ngrams = generate_ngrams(sentence, n)
print(f"{n}-grams:")
for gram in ngrams:
print(gram)

Output:
3-grams:
('the', 'quick', 'brown')
('quick', 'brown', 'fox')
('brown', 'fox', 'jumps')
('fox', 'jumps', 'over')
('jumps', 'over', 'the')
('over', 'the', 'lazy')
('the', 'lazy', 'dog.')
3: Computing a 3-gram Language Model

Task: Write a Python function to compute a 3-gram language model.

Code:
def compute_trigram_language_model(documents):
from collections import defaultdict

trigram_counts = defaultdict(int)
total_trigrams = 0

for doc in documents:


words = doc.lower().split()
for i in range(len(words) - 2):
trigram = tuple(words[i:i + 3])
trigram_counts[trigram] += 1
total_trigrams += 1

# Compute probabilities
trigram_probabilities = {}
for trigram, count in trigram_counts.items():
trigram_probabilities[trigram] = count / total_trigrams

return trigram_probabilities

# Example usage:
documents = [
"The quick brown fox jumps over the lazy dog",
"The quick blue fox jumps over the lazy cat",
"The lazy dog sleeps under the blue sky"
]

trigram_model = compute_trigram_language_model(documents)

print("Trigram Probabilities:")
for trigram, prob in trigram_model.items():
print(f"{trigram}: {prob}")

Output:
Trigram Probabilities:
('the', 'quick', 'brown'): 0.05
('quick', 'brown', 'fox'): 0.05
('brown', 'fox', 'jumps'): 0.05
('fox', 'jumps', 'over'): 0.1
('jumps', 'over', 'the'): 0.1
('over', 'the', 'lazy'): 0.1
('the', 'lazy', 'dog'): 0.1
('the', 'quick', 'blue'): 0.05
('quick', 'blue', 'fox'): 0.05
('blue', 'fox', 'jumps'): 0.05
4: Creating a Word Embedding Matrix

Task:

1. Implement the function create_embedding_matrix(corpus, embedding_dim).

2. Test the function and get_word_vector with the given corpus and embedding_dim=3.

Code:
import numpy as np

def create_embedding_matrix(corpus, embedding_dim):


# Preprocessing
vocabulary = {}
index = 0
for sentence in corpus:
words = sentence.lower().split()
for word in words:
if word not in vocabulary:
vocabulary[word] = index
index += 1

V = len(vocabulary)
# Initialize embedding matrix with random values between 0 and 1
E = np.random.rand(V, embedding_dim)

# Create word to index mapping (already done in vocabulary)


word_to_index = vocabulary

# Define get_word_vector function


def get_word_vector(word):
word = word.lower()
if word in word_to_index:
idx = word_to_index[word]
return E[idx]
else:
return np.zeros(embedding_dim)

return E, vocabulary, get_word_vector

# Example usage:
corpus = [
"I love machine learning",
"Machine learning is amazing",
"I love learning new things"
]
embedding_dim = 3
E, vocabulary, get_word_vector = create_embedding_matrix(corpus, embedding_dim)

print("Vocabulary:", vocabulary)
print("Embedding Matrix E:\n", E)

# Test get_word_vector
word = "learning"
vector = get_word_vector(word)
print(f"Embedding for '{word}':", vector)

# Test with a word not in the vocabulary


word = "unknown"
vector = get_word_vector(word)
print(f"Embedding for '{word}':", vector)

Output:
Vocabulary: {'i': 0, 'love': 1, 'machine': 2, 'learning': 3, 'is': 4, 'amazing': 5, 'new': 6, 'things': 7}
Embedding Matrix E:
[[0.70366694 0.37323165 0.8339942 ]
[0.30824863 0.25459773 0.29978671]
[0.17141767 0.55727104 0.19208332]
[0.36011277 0.62322428 0.86099527]
[0.1327652 0.03365305 0.35291037]
[0.80062233 0.84881622 0.73158583]
[0.70957902 0.75419446 0.53513209]
[0.78353907 0.28600711 0.20810742]]
Embedding for 'learning': [0.36011277 0.62322428 0.86099527]
Embedding for 'unknown': [0. 0. 0.]
5: Creating a Word Embedding Matrix with Pre-trained Embeddings

Task:

1. Implement the function create_embedding_matrix_with_pretrained(corpus,


pretrained_embeddings, embedding_dim).

2. Test the function with the given corpus and pre-trained embeddings.

Code:

import numpy as np

def create_embedding_matrix_with_pretrained(corpus, pretrained_embeddings,


embedding_dim):
# Preprocessing
vocabulary = {}
index = 0
for sentence in corpus:
words = sentence.lower().split()
for word in words:
if word not in vocabulary:
vocabulary[word] = index
index += 1

V = len(vocabulary)
# Initialize embedding matrix
E = np.zeros((V, embedding_dim))

# Assign embeddings
for word, idx in vocabulary.items():
if word in pretrained_embeddings:
E[idx] = np.array(pretrained_embeddings[word])
else:
E[idx] = np.random.rand(embedding_dim) # Random initialization

# Define get_word_vector function


def get_word_vector(word):
word = word.lower()
if word in vocabulary:
idx = vocabulary[word]
return E[idx]
else:
return np.zeros(embedding_dim)

return E, vocabulary, get_word_vector

# Example usage:
corpus = [
"I love machine learning",
"Machine learning is amazing",
"I love learning new things"
]

pretrained_embeddings = {
"machine": [0.1, 0.2, 0.3],
"learning": [0.2, 0.3, 0.4],
"amazing": [0.3, 0.4, 0.5],
"love": [0.4, 0.5, 0.6]
}

embedding_dim = 3

E, vocabulary, get_word_vector = create_embedding_matrix_with_pretrained(


corpus, pretrained_embeddings, embedding_dim)

print("Vocabulary:", vocabulary)
print("Embedding Matrix E:\n", E)

# Test get_word_vector
word = "machine"
vector = get_word_vector(word)
print(f"Embedding for '{word}':", vector)

word = "i"
vector = get_word_vector(word)
print(f"Embedding for '{word}':", vector) # Randomly initialized

word = "unknown"
vector = get_word_vector(word)
print(f"Embedding for '{word}':", vector) # Returns zeros

Output:
Vocabulary: {'i': 0, 'love': 1, 'machine': 2, 'learning': 3, 'is': 4, 'amazing': 5, 'new': 6, 'things': 7}
Embedding Matrix E:
[[0.20433889 0.45932819 0.62836074]
[0.4 0.5 0.6 ]
[0.1 0.2 0.3 ]
[0.2 0.3 0.4 ]
[0.84748087 0.37440758 0.93981111]
[0.3 0.4 0.5 ]
[0.40474447 0.82834371 0.13308173]
[0.44601989 0.85308688 0.05198728]]
Embedding for 'machine': [0.1 0.2 0.3]
Embedding for 'i': [0.20433889 0.45932819 0.62836074]
Embedding for 'unknown': [0. 0. 0.]
6: Generating One-Hot Encodings

Task:

1. Implement the function create_one_hot_encodings(corpus).

2. Test the function with the given corpus.

Code:
import numpy as np

def create_one_hot_encodings(corpus):
# Preprocessing
vocabulary = {}
index = 0
for sentence in corpus:
words = sentence.lower().split()
for word in words:
if word not in vocabulary:
vocabulary[word] = index
index += 1

V = len(vocabulary)
# Initialize one-hot encoding matrix
one_hot_encodings = {}

for word, idx in vocabulary.items():


one_hot_vector = np.zeros(V)
one_hot_vector[idx] = 1
one_hot_encodings[word] = one_hot_vector

return vocabulary, one_hot_encodings

# Example usage:
corpus = [
"I love machine learning",
"Machine learning is amazing",
"I love learning new things"
]

vocabulary, one_hot_encodings = create_one_hot_encodings(corpus)

print("Vocabulary:", vocabulary)
print("\nOne-Hot Encodings:")
for word, one_hot_vector in one_hot_encodings.items():
print(f"Word: '{word}' - One-Hot Vector: {one_hot_vector}")
Output:
Vocabulary: {'i': 0, 'love': 1, 'machine': 2, 'learning': 3, 'is': 4, 'amazing': 5, 'new': 6, 'things': 7}

One-Hot Encodings:
Word: 'i' - One-Hot Vector: [1. 0. 0. 0. 0. 0. 0. 0.]
Word: 'love' - One-Hot Vector: [0. 1. 0. 0. 0. 0. 0. 0.]
Word: 'machine' - One-Hot Vector: [0. 0. 1. 0. 0. 0. 0. 0.]
Word: 'learning' - One-Hot Vector: [0. 0. 0. 1. 0. 0. 0. 0.]
Word: 'is' - One-Hot Vector: [0. 0. 0. 0. 1. 0. 0. 0.]
Word: 'amazing' - One-Hot Vector: [0. 0. 0. 0. 0. 1. 0. 0.]
Word: 'new' - One-Hot Vector: [0. 0. 0. 0. 0. 0. 1. 0.]
Word: 'things' - One-Hot Vector: [0. 0. 0. 0. 0. 0. 0. 1.]

7: Implementing the Skip-Gram Model

Task:

1. Implement the function generate_skip_gram_pairs(sentences, window_size).

2. Test it with the given sentences and window_size = 2.

Code:

def generate_skip_gram_pairs(sentences, window_size):


# Preprocessing: Build the vocabulary and word indices
vocabulary = {}
index = 0
for sentence in sentences:
words = sentence.lower().split()
for word in words:
if word not in vocabulary:
vocabulary[word] = index
index += 1

# Generate skip-gram training pairs


training_pairs = []
for sentence in sentences:
words = sentence.lower().split()
for i, target_word in enumerate(words):
# Define the context window
start = max(0, i - window_size)
end = min(len(words), i + window_size + 1)
for j in range(start, end):
if i != j:
context_word = words[j]
training_pairs.append((target_word, context_word))
return vocabulary, training_pairs

# Example usage:
sentences = [
"I love machine learning",
"Machine learning is amazing",
"I love learning new things"
]

window_size = 2

vocabulary, training_pairs = generate_skip_gram_pairs(sentences, window_size)

print("Vocabulary:", vocabulary)
print("\nSkip-Gram Training Pairs:")
for pair in training_pairs:
print(pair)

Output:
Vocabulary: {'i': 0, 'love': 1, 'machine': 2, 'learning': 3, 'is': 4, 'amazing': 5, 'new': 6, 'things': 7}

Skip-Gram Training Pairs:


('i', 'love')
('i', 'machine')
('love', 'i')
('love', 'machine')
('love', 'learning')
('machine', 'i')
('machine', 'love')
('machine', 'learning')
('learning', 'love')
('learning', 'machine')
('machine', 'learning')
('machine', 'is')
('learning', 'machine')
('learning', 'is')
('learning', 'amazing')
('is', 'machine')
('is', 'learning')
('is', 'amazing')
('amazing', 'learning')
('amazing', 'is')
('i', 'love')
('i', 'learning')
('love', 'i')
('love', 'learning')
('love', 'new')
('learning', 'i')
('learning', 'love')
('learning', 'new')
('learning', 'things')
8: Generating CBOW Training Pairs

Task:

1. Implement the function generate_cbow_pairs(sentences, window_size).

2. Test it with the given sentences and window_size = 2.

Code:

def generate_cbow_pairs(sentences, window_size):


# Preprocessing: Build the vocabulary and word indices
vocabulary = {}
index = 0
for sentence in sentences:
words = sentence.lower().split()
for word in words:
if word not in vocabulary:
vocabulary[word] = index
index += 1

# Generate CBOW training pairs


training_pairs = []
for sentence in sentences:
words = sentence.lower().split()
for i, target_word in enumerate(words):
# Define the context window
start = max(0, i - window_size)
end = min(len(words), i + window_size + 1)
context_words = []
for j in range(start, end):
if i != j:
context_words.append(words[j])
if context_words:
training_pairs.append((tuple(context_words), target_word))

return vocabulary, training_pairs

# Example usage:
sentences = [
"I love machine learning",
"Machine learning is amazing",
"I love learning new things"
]

window_size = 2

vocabulary, training_pairs = generate_cbow_pairs(sentences, window_size)


print("Vocabulary:", vocabulary)
print("\nCBOW Training Pairs:")
for pair in training_pairs:
print(f"Context: {pair[0]}, Target: {pair[1]}")

Output:
Vocabulary: {'i': 0, 'love': 1, 'machine': 2, 'learning': 3, 'is': 4, 'amazing': 5, 'new': 6, 'things': 7}

CBOW Training Pairs:


Context: ('love', 'machine'), Target: i
Context: ('i', 'machine', 'learning'), Target: love
Context: ('i', 'love', 'learning'), Target: machine
Context: ('love', 'machine'), Target: learning
Context: ('learning', 'is'), Target: machine
Context: ('machine', 'is', 'amazing'), Target: learning
Context: ('machine', 'learning', 'amazing'), Target: is
Context: ('learning', 'is'), Target: amazing
Context: ('love', 'learning'), Target: i
Context: ('i', 'learning', 'new'), Target: love
Context: ('i', 'love', 'new', 'things'), Target: learning
Context: ('love', 'learning', 'things'), Target: new
Context: ('learning', 'new'), Target: things

9. Implementing a Simple Vanilla RNN

Task:

1. Implement the function rnn_forward(x, Wxh, Whh, Why, bh, by, h0).

2. Test the function with random weights, biases, and an initial hidden state.

Code:

import numpy as np

def rnn_forward(x, Wxh, Whh, Why, bh, by, h0):


h = h0
hs = []
ys = []
for t in range(len(x)):
xt = np.array([[x[t]]]) # Input at time t (make it a column vector)
h = np.tanh(np.dot(Whh, h) + np.dot(Wxh, xt) + bh) # Hidden state
y = np.dot(Why, h) + by # Output
hs.append(h)
ys.append(y)
return ys, hs

# Example usage:
# Input sequence
x = [1, 2, 3]

# Hyperparameters
input_size = 1 # Since x is a sequence of numbers
hidden_size = 4 # You can choose any size for hidden state
output_size = 1 # Output is a single number at each time step

# Random initialization of weights and biases


np.random.seed(0) # For reproducibility
Wxh = np.random.randn(hidden_size, input_size) * 0.01
Whh = np.random.randn(hidden_size, hidden_size) * 0.01
Why = np.random.randn(output_size, hidden_size) * 0.01
bh = np.zeros((hidden_size, 1))
by = np.zeros((output_size, 1))
h0 = np.zeros((hidden_size, 1))

# Run the RNN forward function


ys, hs = rnn_forward(x, Wxh, Whh, Why, bh, by, h0)

print("Outputs at each time step:")


for t, y in enumerate(ys):
print(f"Time step {t+1}: y = {y.flatten()}")

Output:
Outputs at each time step:
Time step 1: y = [-0.00050584]
Time step 2: y = [-0.00101643]
Time step 3: y = [-0.00152624]
10 : Implementation of the self-attention mechanism using only NumPy
Code:
import numpy as np

def softmax(x, axis=-1):


"""Compute the softmax of each element along the specified axis of x."""
exp_x = np.exp(x - np.max(x, axis=axis, keepdims=True)) # For numerical stability
return exp_x / np.sum(exp_x, axis=axis, keepdims=True)

def self_attention(X, Wq, Wk, Wv):


"""
Implement the self-attention mechanism.

Args:
X: Input matrix of shape (n, d), where n is the number of input vectors, and d is the
dimension of each vector.
Wq: Query weight matrix of shape (d, dout).
Wk: Key weight matrix of shape (d, dout).
Wv: Value weight matrix of shape (d, dout).

Returns:
Output matrix of shape (n, dout).
"""
# Compute Queries (Q), Keys (K), and Values (V)
Q = np.dot(X, Wq) # Shape: (n, dout)
K = np.dot(X, Wk) # Shape: (n, dout)
V = np.dot(X, Wv) # Shape: (n, dout)

# Compute attention scores: Q * K.T, then scale by sqrt(dout)


d_k = Q.shape[1] # dout
attention_scores = np.dot(Q, K.T) / np.sqrt(d_k) # Shape: (n, n)

# Apply softmax to attention scores


attention_weights = softmax(attention_scores, axis=-1) # Shape: (n, n)

# Compute final output: Attention weights * V


output = np.dot(attention_weights, V) # Shape: (n, dout)

return output

# Example usage:
np.random.seed(0) # For reproducibility

# Input matrix X (n=4 vectors, d=3 features per vector)


X = np.random.rand(4, 3) # Shape: (4, 3)

# Learnable weight matrices Wq, Wk, Wv


d=3 # Input dimension
dout = 2 # Output dimension
Wq = np.random.rand(d, dout) # Shape: (3, 2)
Wk = np.random.rand(d, dout) # Shape: (3, 2)
Wv = np.random.rand(d, dout) # Shape: (3, 2)

# Call the self_attention function


output = self_attention(X, Wq, Wk, Wv)

print("Input Matrix X:")


print(X)
print("\nWeight Matrix Wq:")
print(Wq)
print("\nWeight Matrix Wk:")
print(Wk)
print("\nWeight Matrix Wv:")
print(Wv)
print("\nSelf-Attention Output:")
print(output)

Output:
Input Matrix X:
[[0.5488135 0.71518937 0.60276338]
[0.54488318 0.4236548 0.64589411]
[0.43758721 0.891773 0.96366276]
[0.38344152 0.79172504 0.52889492]]

Weight Matrix Wq:


[[0.56804456 0.92559664]
[0.07103606 0.0871293 ]
[0.0202184 0.83261985]]

Weight Matrix Wk:


[[0.77815675 0.87001215]
[0.97861834 0.79915856]
[0.46147936 0.78052918]]

Weight Matrix Wv:


[[0.11827443 0.63992102]
[0.14335329 0.94466892]
[0.52184832 0.41466194]]

Self-Attention Output:
[[0.53569849 1.29450415]
[0.53551973 1.29413435]
[0.53849796 1.29925955]
[0.53131543 1.28657939]]

You might also like