Gen Ai Lab Programs
Gen Ai Lab Programs
Task: Write a Python function to compute the TF-IDF matrix for the given set of
documents using only NumPy.
Code:
import numpy as np
def compute_tf_idf(documents, vocabulary):
N = len(documents)
V = len(vocabulary)
# Initialize TF matrix (N x V)
tf = np.zeros((N, V))
return tf_idf
# Example usage:
documents = [
"cat sat on the mat",
"dog sat on the log",
"cat and dog played together"
]
print("Vocabulary:", vocabulary)
print("TF-IDF Matrix:\n", tf_idf_matrix)
Output:
Vocabulary: ['mat', 'together', 'sat', 'dog', 'cat', 'the', 'played', 'on', 'log', 'and']
TF-IDF Matrix:
[[0.08109302 0. 0. 0. 0. 0.
0. 0. 0. 0. ]
[0. 0. 0. 0. 0. 0.
0. 0. 0.08109302 0. ]
[0. 0.08109302 0. 0. 0. 0.
0.08109302 0. 0. 0.08109302]]
Code:
def generate_ngrams(sentence, n):
words = sentence.lower().split()
ngrams = []
for i in range(len(words) - n + 1):
ngram = tuple(words[i:i + n])
ngrams.append(ngram)
return ngrams
# Example usage:
sentence = "The quick brown fox jumps over the lazy dog."
n=3
ngrams = generate_ngrams(sentence, n)
print(f"{n}-grams:")
for gram in ngrams:
print(gram)
Output:
3-grams:
('the', 'quick', 'brown')
('quick', 'brown', 'fox')
('brown', 'fox', 'jumps')
('fox', 'jumps', 'over')
('jumps', 'over', 'the')
('over', 'the', 'lazy')
('the', 'lazy', 'dog.')
3: Computing a 3-gram Language Model
Code:
def compute_trigram_language_model(documents):
from collections import defaultdict
trigram_counts = defaultdict(int)
total_trigrams = 0
# Compute probabilities
trigram_probabilities = {}
for trigram, count in trigram_counts.items():
trigram_probabilities[trigram] = count / total_trigrams
return trigram_probabilities
# Example usage:
documents = [
"The quick brown fox jumps over the lazy dog",
"The quick blue fox jumps over the lazy cat",
"The lazy dog sleeps under the blue sky"
]
trigram_model = compute_trigram_language_model(documents)
print("Trigram Probabilities:")
for trigram, prob in trigram_model.items():
print(f"{trigram}: {prob}")
Output:
Trigram Probabilities:
('the', 'quick', 'brown'): 0.05
('quick', 'brown', 'fox'): 0.05
('brown', 'fox', 'jumps'): 0.05
('fox', 'jumps', 'over'): 0.1
('jumps', 'over', 'the'): 0.1
('over', 'the', 'lazy'): 0.1
('the', 'lazy', 'dog'): 0.1
('the', 'quick', 'blue'): 0.05
('quick', 'blue', 'fox'): 0.05
('blue', 'fox', 'jumps'): 0.05
4: Creating a Word Embedding Matrix
Task:
2. Test the function and get_word_vector with the given corpus and embedding_dim=3.
Code:
import numpy as np
V = len(vocabulary)
# Initialize embedding matrix with random values between 0 and 1
E = np.random.rand(V, embedding_dim)
# Example usage:
corpus = [
"I love machine learning",
"Machine learning is amazing",
"I love learning new things"
]
embedding_dim = 3
E, vocabulary, get_word_vector = create_embedding_matrix(corpus, embedding_dim)
print("Vocabulary:", vocabulary)
print("Embedding Matrix E:\n", E)
# Test get_word_vector
word = "learning"
vector = get_word_vector(word)
print(f"Embedding for '{word}':", vector)
Output:
Vocabulary: {'i': 0, 'love': 1, 'machine': 2, 'learning': 3, 'is': 4, 'amazing': 5, 'new': 6, 'things': 7}
Embedding Matrix E:
[[0.70366694 0.37323165 0.8339942 ]
[0.30824863 0.25459773 0.29978671]
[0.17141767 0.55727104 0.19208332]
[0.36011277 0.62322428 0.86099527]
[0.1327652 0.03365305 0.35291037]
[0.80062233 0.84881622 0.73158583]
[0.70957902 0.75419446 0.53513209]
[0.78353907 0.28600711 0.20810742]]
Embedding for 'learning': [0.36011277 0.62322428 0.86099527]
Embedding for 'unknown': [0. 0. 0.]
5: Creating a Word Embedding Matrix with Pre-trained Embeddings
Task:
2. Test the function with the given corpus and pre-trained embeddings.
Code:
import numpy as np
V = len(vocabulary)
# Initialize embedding matrix
E = np.zeros((V, embedding_dim))
# Assign embeddings
for word, idx in vocabulary.items():
if word in pretrained_embeddings:
E[idx] = np.array(pretrained_embeddings[word])
else:
E[idx] = np.random.rand(embedding_dim) # Random initialization
# Example usage:
corpus = [
"I love machine learning",
"Machine learning is amazing",
"I love learning new things"
]
pretrained_embeddings = {
"machine": [0.1, 0.2, 0.3],
"learning": [0.2, 0.3, 0.4],
"amazing": [0.3, 0.4, 0.5],
"love": [0.4, 0.5, 0.6]
}
embedding_dim = 3
print("Vocabulary:", vocabulary)
print("Embedding Matrix E:\n", E)
# Test get_word_vector
word = "machine"
vector = get_word_vector(word)
print(f"Embedding for '{word}':", vector)
word = "i"
vector = get_word_vector(word)
print(f"Embedding for '{word}':", vector) # Randomly initialized
word = "unknown"
vector = get_word_vector(word)
print(f"Embedding for '{word}':", vector) # Returns zeros
Output:
Vocabulary: {'i': 0, 'love': 1, 'machine': 2, 'learning': 3, 'is': 4, 'amazing': 5, 'new': 6, 'things': 7}
Embedding Matrix E:
[[0.20433889 0.45932819 0.62836074]
[0.4 0.5 0.6 ]
[0.1 0.2 0.3 ]
[0.2 0.3 0.4 ]
[0.84748087 0.37440758 0.93981111]
[0.3 0.4 0.5 ]
[0.40474447 0.82834371 0.13308173]
[0.44601989 0.85308688 0.05198728]]
Embedding for 'machine': [0.1 0.2 0.3]
Embedding for 'i': [0.20433889 0.45932819 0.62836074]
Embedding for 'unknown': [0. 0. 0.]
6: Generating One-Hot Encodings
Task:
Code:
import numpy as np
def create_one_hot_encodings(corpus):
# Preprocessing
vocabulary = {}
index = 0
for sentence in corpus:
words = sentence.lower().split()
for word in words:
if word not in vocabulary:
vocabulary[word] = index
index += 1
V = len(vocabulary)
# Initialize one-hot encoding matrix
one_hot_encodings = {}
# Example usage:
corpus = [
"I love machine learning",
"Machine learning is amazing",
"I love learning new things"
]
print("Vocabulary:", vocabulary)
print("\nOne-Hot Encodings:")
for word, one_hot_vector in one_hot_encodings.items():
print(f"Word: '{word}' - One-Hot Vector: {one_hot_vector}")
Output:
Vocabulary: {'i': 0, 'love': 1, 'machine': 2, 'learning': 3, 'is': 4, 'amazing': 5, 'new': 6, 'things': 7}
One-Hot Encodings:
Word: 'i' - One-Hot Vector: [1. 0. 0. 0. 0. 0. 0. 0.]
Word: 'love' - One-Hot Vector: [0. 1. 0. 0. 0. 0. 0. 0.]
Word: 'machine' - One-Hot Vector: [0. 0. 1. 0. 0. 0. 0. 0.]
Word: 'learning' - One-Hot Vector: [0. 0. 0. 1. 0. 0. 0. 0.]
Word: 'is' - One-Hot Vector: [0. 0. 0. 0. 1. 0. 0. 0.]
Word: 'amazing' - One-Hot Vector: [0. 0. 0. 0. 0. 1. 0. 0.]
Word: 'new' - One-Hot Vector: [0. 0. 0. 0. 0. 0. 1. 0.]
Word: 'things' - One-Hot Vector: [0. 0. 0. 0. 0. 0. 0. 1.]
Task:
Code:
# Example usage:
sentences = [
"I love machine learning",
"Machine learning is amazing",
"I love learning new things"
]
window_size = 2
print("Vocabulary:", vocabulary)
print("\nSkip-Gram Training Pairs:")
for pair in training_pairs:
print(pair)
Output:
Vocabulary: {'i': 0, 'love': 1, 'machine': 2, 'learning': 3, 'is': 4, 'amazing': 5, 'new': 6, 'things': 7}
Task:
Code:
# Example usage:
sentences = [
"I love machine learning",
"Machine learning is amazing",
"I love learning new things"
]
window_size = 2
Output:
Vocabulary: {'i': 0, 'love': 1, 'machine': 2, 'learning': 3, 'is': 4, 'amazing': 5, 'new': 6, 'things': 7}
Task:
1. Implement the function rnn_forward(x, Wxh, Whh, Why, bh, by, h0).
2. Test the function with random weights, biases, and an initial hidden state.
Code:
import numpy as np
# Example usage:
# Input sequence
x = [1, 2, 3]
# Hyperparameters
input_size = 1 # Since x is a sequence of numbers
hidden_size = 4 # You can choose any size for hidden state
output_size = 1 # Output is a single number at each time step
Output:
Outputs at each time step:
Time step 1: y = [-0.00050584]
Time step 2: y = [-0.00101643]
Time step 3: y = [-0.00152624]
10 : Implementation of the self-attention mechanism using only NumPy
Code:
import numpy as np
Args:
X: Input matrix of shape (n, d), where n is the number of input vectors, and d is the
dimension of each vector.
Wq: Query weight matrix of shape (d, dout).
Wk: Key weight matrix of shape (d, dout).
Wv: Value weight matrix of shape (d, dout).
Returns:
Output matrix of shape (n, dout).
"""
# Compute Queries (Q), Keys (K), and Values (V)
Q = np.dot(X, Wq) # Shape: (n, dout)
K = np.dot(X, Wk) # Shape: (n, dout)
V = np.dot(X, Wv) # Shape: (n, dout)
return output
# Example usage:
np.random.seed(0) # For reproducibility
Output:
Input Matrix X:
[[0.5488135 0.71518937 0.60276338]
[0.54488318 0.4236548 0.64589411]
[0.43758721 0.891773 0.96366276]
[0.38344152 0.79172504 0.52889492]]
Self-Attention Output:
[[0.53569849 1.29450415]
[0.53551973 1.29413435]
[0.53849796 1.29925955]
[0.53131543 1.28657939]]