0% found this document useful (0 votes)

12 views31 pages

GenAI - Lab-File - Darab Khan 22SCSE1480055

The document outlines a lab file for a course on Generative and Explainable AI, detailing two projects: a CNN-based classification of COVID and non-COVID images, and a Transformer-based LLM project. It includes code snippets for data preparation, model building, and training, with specific metrics on image counts and model performance. The document serves as a comprehensive guide for implementing the first project using TensorFlow and Keras.

Uploaded by

aman khan

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

12 views31 pages

GenAI - Lab-File - Darab Khan 22SCSE1480055

Uploaded by

aman khan

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 31

LAB FILE

Course Code: R1UD702B

Course Name: Generative and Explainable AI
Semester: 7th
Session: 2024-2025

Submitted to Submitted by
Shiksha Singh Aryan Kumar
22SCSE1180135

1
INDEX

S. No Object Page No Signature

1. Project 1 - CNN Based Classification 3-15

2 Project 2 – Transformer Based LLMs 16-31

2
Project-1
Description: To implement CNN for classification on the dataset of COVID and NON-COVID from Kaggle.
Code:
rom PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [ ]:

from tensorflow.keras.optimizers.legacy import Adam

In [ ]:

import numpy as np # linear algebra

import random
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import tensorflow as tf

from tensorflow.keras.models import Sequential

from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import cv2
import shutil
from glob import glob
# Helper libraries
import matplotlib.pyplot as plt
import math
%matplotlib inline
print(tf.__version__)

2.17.0

3
In [ ]:

data_root='/content/Data'
path_positive_cases = os.path.join('/content/Data/Covid')
path_negative_cases = os.path.join('/content/Data/NoCovid')

In [ ]:

positive_images_ls = glob(os.path.join(path_positive_cases,"*.png"))

negative_images_ls = glob(os.path.join(path_negative_cases,"*.png"))
negative_images_ls.extend(glob(os.path.join(path_negative_cases,"*.jpg")))

In [ ]:

covid = {'class': 'CT_COVID',

'path': path_positive_cases,
'images': positive_images_ls}

non_covid = {'class': 'CT_NonCOVID',

'path': path_negative_cases,
'images': negative_images_ls}

In [ ]:

total_positive_covid = len(positive_images_ls)
total_negative_covid = len(negative_images_ls)
print("Total Positive Cases Covid19 images: {}".format(total_positive_covid))
print("Total Negative Cases Covid19 images: {}".format(total_negative_covid))

Total Positive Cases Covid19 images: 1252

Total Negative Cases Covid19 images: 1228
In [ ]:

4
image_positive = cv2.imread(os.path.join(positive_images_ls[1]))
image_negative = cv2.imread(os.path.join(negative_images_ls[5]))

f = plt.figure(figsize=(8, 8))
f.add_subplot(1, 2, 1)
plt.imshow(image_negative)
f.add_subplot(1,2, 2)
plt.imshow(image_positive)

Out[ ]:
<matplotlib.image.AxesImage at 0x79568114c250>

In [ ]:

print("Image COVID Shape {}".format(image_positive.shape))

print("Image Non COVID Shape {}".format(image_negative.shape))

Image COVID Shape (326, 370, 3)

Image Non COVID Shape (247, 320, 3)
In [ ]:

# Create Train-Test Directory

subdirs = ['train/', 'test/']
5
for subdir in subdirs:
labeldirs = ['CT_COVID', 'CT_NonCOVID']
for labldir in labeldirs:
newdir = subdir + labldir
os.makedirs(newdir, exist_ok=True)

In [ ]:

# Copy Images to test set

# seed random number generator

random.seed(123)
# define ratio of pictures used for testing
test_ratio = 0.1

for cases in [covid, non_covid]:

total_cases = len(cases['images']) #number of total images
num_to_select = int(test_ratio * total_cases) #number of images to copy to test set

print(cases['class'], num_to_select)

list_of_random_files = random.sample(cases['images'], num_to_select) #random files selected

for files in list_of_random_files:

shutil.copy2(files, 'test/' + cases['class'])

CT_COVID 125
CT_NonCOVID 122
In [ ]:

for cases in [covid, non_covid]:

image_test_files = os.listdir('test/' + cases['class']) # list test files

6
for images in cases['images']:
if images.split('/')[-1] not in (image_test_files): #exclude test files from shutil.copy
shutil.copy2(images, 'train/' + cases['class'])

In [ ]:

total_train_covid = len(os.listdir('/content/train/CT_COVID'))
total_train_noncovid = len(os.listdir('/content/train/CT_NonCOVID'))
total_test_covid = len(os.listdir('/content/test/CT_COVID'))
total_test_noncovid = len(os.listdir('/content/test/CT_NonCOVID'))

print("Train sets images COVID: {}".format(total_train_covid))

print("Train sets images Non COVID: {}".format(total_train_noncovid))
print("Test sets images COVID: {}".format(total_test_covid))
print("Test sets images Non COVID: {}".format(total_test_noncovid))

Train sets images COVID: 1127

Train sets images Non COVID: 1106
Test sets images COVID: 125
Test sets images Non COVID: 122
In [ ]:

batch_size = 128
epochs = 15
IMG_HEIGHT = 150
IMG_WIDTH = 150

In [ ]:

train_image_generator = ImageDataGenerator(rescale=1./255) # Generator for our training data

test_image_generator = ImageDataGenerator(rescale=1./255) # Generator for our validation data

In [ ]:

7
train_dir = os.path.join('/content/train')
test_dir = os.path.join('/content/test')

total_train = total_train_covid + total_train_noncovid

total_test = total_test_covid + total_test_noncovid

In [ ]:

test_data_gen = test_image_generator.flow_from_directory(batch_size=batch_size,
directory=test_dir,
target_size=(IMG_HEIGHT, IMG_WIDTH),
class_mode='binary')

Found 247 images belonging to 2 classes.

In [ ]:

train_data_gen = train_image_generator.flow_from_directory(batch_size=batch_size,
directory=train_dir,
shuffle=True,
target_size=(IMG_HEIGHT, IMG_WIDTH),
class_mode='binary')

Found 2233 images belonging to 2 classes.

In [ ]:

model = Sequential([
Conv2D(16, 3, padding='same', activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH ,3)),

8
MaxPooling2D(),
Conv2D(32, 3, padding='same', activation='relu'),
MaxPooling2D(),
Conv2D(64, 3, padding='same', activation='relu'),
MaxPooling2D(),
Flatten(),
Dense(512, activation='relu'),
Dense(1)
])

/usr/local/lib/python3.10/dist-packages/keras/src/layers/convolutional/base_conv.py:107: UserWarning: Do
not pass an ìnput_shape`/ìnput_dim` argument to a layer. When using Sequential models, prefer using an
Ìnput(shape)` object as the first layer in the model instead.
super().__init__(activity_regularizer=activity_regularizer, **kwargs)
In [ ]:

model.compile(optimizer='adam',
loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
metrics=['accuracy'])

In [ ]:

model.summary()

Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━
━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓

┃ Layer (type) ┃ Output Shape ┃ Param # ┃

┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━
━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ conv2d (Conv2D) │ (None, 150, 150, 16) │ 448 │
├──────────────────────────────────────┼──────────────────────
───────┼─────────────────┤
│ max_pooling2d (MaxPooling2D) │ (None, 75, 75, 16) │ 0│

9
├──────────────────────────────────────┼──────────────────────
───────┼─────────────────┤
│ conv2d_1 (Conv2D) │ (None, 75, 75, 32) │ 4,640 │
├──────────────────────────────────────┼──────────────────────
───────┼─────────────────┤
│ max_pooling2d_1 (MaxPooling2D) │ (None, 37, 37, 32) │ 0│
├──────────────────────────────────────┼──────────────────────
───────┼─────────────────┤
│ conv2d_2 (Conv2D) │ (None, 37, 37, 64) │ 18,496 │
├──────────────────────────────────────┼──────────────────────
───────┼─────────────────┤
│ max_pooling2d_2 (MaxPooling2D) │ (None, 18, 18, 64) │ 0│
├──────────────────────────────────────┼──────────────────────
───────┼─────────────────┤
│ flatten (Flatten) │ (None, 20736) │ 0│
├──────────────────────────────────────┼──────────────────────
───────┼─────────────────┤
│ dense (Dense) │ (None, 512) │ 10,617,344 │
├──────────────────────────────────────┼──────────────────────
───────┼─────────────────┤
│ dense_1 (Dense) │ (None, 1) │ 513 │
└──────────────────────────────────────┴──────────────────────
───────┴─────────────────┘
Total params: 10,641,441 (40.59 MB)
Trainable params: 10,641,441 (40.59 MB)
Non-trainable params: 0 (0.00 B)
In [ ]:

!pip install keras.preprocessing

Collecting keras.preprocessing
Downloading Keras_Preprocessing-1.1.2-py2.py3-none-any.whl.metadata (1.9 kB)
Requirement already satisfied: numpy>=1.9.1 in /usr/local/lib/python3.10/dist-packages (from
keras.preprocessing) (1.26.4)
Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.10/dist-packages (from
keras.preprocessing) (1.16.0)

10
Downloading Keras_Preprocessing-1.1.2-py2.py3-none-any.whl (42 kB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
42.6/42.6 kB 2.7 MB/s eta 0:00:00
Installing collected packages: keras.preprocessing
Successfully installed keras.preprocessing-1.1.2
In [ ]:

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator # Use the correct import statement
for ImageDataGenerator

def data_generator(generator):
try:
for data_batch, labels_batch in generator:
yield data_batch, labels_batch
except Exception as e:
print(f"Error loading image: {e}")

history = model.fit(
data_generator(train_data_gen), # Wrap the generator with error handling
steps_per_epoch=total_train // batch_size,
epochs=epochs,
validation_data=data_generator(test_data_gen), # Wrap the validation generator as well
validation_steps=total_test // batch_size
)

Epoch 1/15
8/17 ━━━━━━━━━━━━━━━━━━━━ 9s 1s/step - accuracy: 0.4848 - loss: 2.5313 Error
loading image: Truncated File Read

11
/usr/lib/python3.10/contextlib.py:153: UserWarning: Your input ran out of data; interrupting training. Make
sure that your dataset or generator can generate at least `steps_per_epoch * epochs` batches. You may need to
use the `.repeat()` function when building your dataset.
self.gen.throw(typ, value, traceback)
17/17 ━━━━━━━━━━━━━━━━━━━━ 32s 721ms/step - accuracy: 0.4898 - loss: 2.1836 -
val_accuracy: 0.4922 - val_loss: 0.6919
Epoch 2/15
17/17 ━━━━━━━━━━━━━━━━━━━━ 16s 913ms/step - accuracy: 0.0000e+00 - loss:
0.0000e+00 - val_accuracy: 0.5210 - val_loss: 0.6932
Epoch 3/15
17/17 ━━━━━━━━━━━━━━━━━━━━ 0s 29ms/step - accuracy: 0.0000e+00 - loss:
0.0000e+00 - val_accuracy: 0.5391 - val_loss: 0.6946
Epoch 4/15
17/17 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - accuracy: 0.0000e+00 - loss:
0.0000e+00 - val_accuracy: 0.4706 - val_loss: 0.6903
Epoch 5/15
17/17 ━━━━━━━━━━━━━━━━━━━━ 1s 56ms/step - accuracy: 0.0000e+00 - loss:
0.0000e+00 - val_accuracy: 0.5000 - val_loss: 0.6921
Epoch 6/15
17/17 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - accuracy: 0.0000e+00 - loss:
0.0000e+00 - val_accuracy: 0.5126 - val_loss: 0.6929
Epoch 7/15
17/17 ━━━━━━━━━━━━━━━━━━━━ 1s 54ms/step - accuracy: 0.0000e+00 - loss:
0.0000e+00 - val_accuracy: 0.5469 - val_loss: 0.6952
Epoch 8/15
17/17 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - accuracy: 0.0000e+00 - loss:
0.0000e+00 - val_accuracy: 0.4622 - val_loss: 0.6896
Epoch 9/15
17/17 ━━━━━━━━━━━━━━━━━━━━ 1s 53ms/step - accuracy: 0.0000e+00 - loss:
0.0000e+00 - val_accuracy: 0.5234 - val_loss: 0.6937
Epoch 10/15
17/17 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - accuracy: 0.0000e+00 - loss:
0.0000e+00 - val_accuracy: 0.4874 - val_loss: 0.6913
Epoch 11/15
17/17 ━━━━━━━━━━━━━━━━━━━━ 1s 56ms/step - accuracy: 0.0000e+00 - loss:
0.0000e+00 - val_accuracy: 0.5078 - val_loss: 0.6926

12
Epoch 12/15
17/17 ━━━━━━━━━━━━━━━━━━━━ 1s 49ms/step - accuracy: 0.0000e+00 - loss:
0.0000e+00 - val_accuracy: 0.5042 - val_loss: 0.6924
Epoch 13/15
17/17 ━━━━━━━━━━━━━━━━━━━━ 1s 56ms/step - accuracy: 0.0000e+00 - loss:
0.0000e+00 - val_accuracy: 0.4609 - val_loss: 0.6895
Epoch 14/15
17/17 ━━━━━━━━━━━━━━━━━━━━ 1s 50ms/step - accuracy: 0.0000e+00 - loss:
0.0000e+00 - val_accuracy: 0.5546 - val_loss: 0.6958
Epoch 15/15
17/17 ━━━━━━━━━━━━━━━━━━━━ 1s 57ms/step - accuracy: 0.0000e+00 - loss:
0.0000e+00 - val_accuracy: 0.4844 - val_loss: 0.6911
In [ ]:

acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss=history.history['loss']
val_loss=history.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')

13
plt.show()

14
Project-1
Description: To implement Transformers for LLMs.
Code:

In [1]:

!pip install numpy requests torch tiktoken matplotlib pandas

Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (1.26.4)

Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (2.32.3)
Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (2.4.1+cu121)
Collecting tiktoken
Downloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata
(6.6 kB)
Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (3.7.1)
Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (2.2.2)
Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from
requests) (3.4.0)
Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests) (3.10)
Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests)
(2.2.3)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests)
(2024.8.30)
Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch) (3.16.1)
Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.10/dist-packages (from torch)
(4.12.2)
Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch) (1.13.3)
Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch) (3.4.1)
Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch) (3.1.4)
Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch) (2024.6.1)
Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/dist-packages (from tiktoken)
(2024.9.11)
Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib)
(1.3.0)

15
Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib)
(0.12.1)
Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib)
(4.54.1)
Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib)
(1.4.7)
Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib)
(24.1)
Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib)
(10.4.0)
Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib)
(3.2.0)
Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from
matplotlib) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2024.2)
Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas)
(2024.2)
Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7-
>matplotlib) (1.16.0)
Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2-
>torch) (3.0.1)
Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy-
>torch) (1.3.0)
Downloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.2/1.2
MB 14.5 MB/s eta 0:00:00
Installing collected packages: tiktoken
Successfully installed tiktoken-0.8.0
In [2]:

import os
import requests
import pandas as pd
import matplotlib.pyplot as plt
import math
import tiktoken
import torch

16
import torch.nn as nn

In [3]:

# Hyperparameters
batch_size = 4 # How many batches per training step
context_length = 16 # Length of the token chunk each batch
d_model = 64 # The vector size of the token embeddings
num_layers = 8 # Number of transformer blocks
num_heads = 4 # Number of heads in Multi-head attention
learning_rate = 1e-3 # 0.001
dropout = 0.1 # Dropout rate
max_iters = 5000 # Total of training iterations
eval_interval = 50 # How often to evaluate the model
eval_iters = 20 # How many iterations to average the loss over when evaluating the model
device = 'cuda' if torch.cuda.is_available() else 'cpu' # Instead of using the cpu, we'll use the GPU if it's
available.

TORCH_SEED = 1337
torch.manual_seed(TORCH_SEED)

Out[3]:
<torch._C.Generator at 0x7a8d42e65ad0>
In [4]:

# download a sample txt file from https://huggingface.co/datasets/goendalf666/sales-

textbook_for_convincing_and_selling/raw/main/sales_textbook.txt
if not os.path.exists('sales_textbook.txt'):
url = 'https://huggingface.co/datasets/goendalf666/sales-
textbook_for_convincing_and_selling/raw/main/sales_textbook.txt'
with open('sales_textbook.txt', 'w') as f:
f.write(requests.get(url).text)

with open('sales_textbook.txt', 'r', encoding='utf-8') as f:

17
text = f.read()

In [5]:

# Using TikToken to tokenize the source text

encoding = tiktoken.get_encoding("cl100k_base")
tokenized_text = encoding.encode(text) # size of tokenized source text is 77,919
vocab_size = len(set(tokenized_text)) # size of vocabulary is 3,771
max_token_value = max(tokenized_text)

print(f"Tokenized text size: {len(tokenized_text)}")

print(f"Vocabulary size: {vocab_size}")
print(f"The maximum value in the tokenized text is: {max_token_value}")

Tokenized text size: 77919

Vocabulary size: 3771
The maximum value in the tokenized text is: 100069
In [7]:

# Prepare data for training batch

data = train_data
idxs = torch.randint(low=0, high=len(data) - context_length, size=(batch_size,))
# Convert list slices to tensors before stacking
x_batch = torch.stack([torch.tensor(data[idx:idx + context_length]) for idx in idxs])
y_batch = torch.stack([torch.tensor(data[idx + 1:idx + context_length + 1]) for idx in idxs])
print(x_batch.shape, x_batch.shape)

torch.Size([4, 16]) torch.Size([4, 16])

In [8]:

# Define Token Embedding look-up table

token_embedding_lookup_table = nn.Embedding(max_token_value, d_model)

18
# Get X and Y embedding
x = token_embedding_lookup_table(x_batch.data)
y = token_embedding_lookup_table(y_batch.data)

In [9]:

# Get x and y embedding

x = token_embedding_lookup_table(x_batch.data) # [4, 16, 64] [batch_size, context_length, d_model]
y = token_embedding_lookup_table(y_batch.data)

In [10]:

# Define Position Encoding look-up table

position_encoding_lookup_table = torch.zeros(context_length, d_model) # initial with zeros with shape
(context_length, d_model)
position = torch.arange(0, context_length, dtype=torch.float).unsqueeze(1)
# apply the sine & cosine
div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
position_encoding_lookup_table[:, 0::2] = torch.sin(position * div_term)
position_encoding_lookup_table[:, 1::2] = torch.cos(position * div_term)
position_encoding_lookup_table = position_encoding_lookup_table.unsqueeze(0).expand(batch_size, -1, -1)
#add batch to the first dimension

print("Position Encoding Look-up Table: ", position_encoding_lookup_table.shape)

Position Encoding Look-up Table: torch.Size([4, 16, 64])

In [11]:

# Add positional encoding into the input embedding vector

input_embedding_x = x + position_encoding_lookup_table # [4, 16, 64] [batch_size, context_length, d_model]
input_embedding_y = y + position_encoding_lookup_table

X = input_embedding_x

19
x_plot = input_embedding_x[0].detach().cpu().numpy()
print("Final Input Embedding of x: \n", pd.DataFrame(x_plot))

Final Input Embedding of x:

0 1 2 3 4 5 6 \
0 -0.299130 0.949924 -0.631772 0.939232 -2.082235 1.398602 -0.240277
1 0.758386 0.999339 0.238804 2.130460 -0.736705 1.686566 0.824980
2 -0.922071 -2.250749 -0.556319 1.222940 2.109022 -0.400113 -0.138789
3 0.306074 -1.573041 2.231532 0.032769 1.850678 -0.120396 -0.011787
4 -0.954794 -0.976841 0.139319 -0.865764 -0.193850 -1.655313 1.521742
5 0.679198 0.506228 0.353623 -0.863025 -2.570290 -1.323900 0.371723
6 -0.323705 2.691280 -2.467007 0.717207 1.387555 -2.344249 0.049187
7 0.476250 1.532691 -0.440719 0.884680 -0.049779 -0.415280 -0.095141
8 -1.377338 -0.241152 0.619716 0.535313 -2.747910 -0.546994 -1.181236
9 -0.405825 0.559437 1.838797 0.457612 -2.470442 1.303259 -0.322104
10 -0.724732 -1.171013 -0.256460 -1.604869 -1.592075 0.130025 0.570488
11 -0.449413 -0.703517 0.179154 0.373091 -0.182106 1.931234 -0.799360
12 0.341896 1.383503 -0.627911 -1.276247 1.170495 1.140588 -1.435867
13 0.524299 0.225087 -1.034895 0.773311 2.538593 0.801516 -2.413383
14 1.239190 -0.703227 -0.857716 -0.738189 2.185374 1.034195 0.174827
15 2.108387 -0.318655 -0.287144 0.649548 0.771553 -0.321154 1.457626

7 8 9 ... 54 55 56 57 \
0 1.460907 0.438821 0.568639 ... 1.452847 1.467532 1.984371 0.791459
1 0.610698 -0.504885 1.616139 ... 0.448665 -2.071404 0.672218 2.796822
2 -1.933866 -1.671411 1.150713 ... -0.686295 -0.096573 -2.178207 0.451573
3 -0.635150 0.029269 1.307228 ... -0.607196 2.235353 -0.330132 0.043395
4 -0.318029 -0.214993 0.665824 ... -0.545532 -0.345191 0.142483 1.507527
5 -1.104829 0.616293 0.783596 ... 1.080138 -0.161667 -1.531240 1.557224
6 0.084035 1.446481 0.074499 ... 0.716758 1.740306 -1.434396 0.254191
7 -1.715202 -0.558720 -1.187362 ... 0.746224 0.008394 -0.101539 -0.490031
8 -3.191279 -0.665482 -1.169012 ... -0.079880 3.122454 -2.058005 1.065758

20
9 0.723305 -1.070696 -1.178378 ... -0.280317 1.782857 -1.054796 1.835647
10 -1.642119 0.801727 -1.764521 ... -0.340848 3.639359 -0.644349 3.063182
11 0.825862 -0.736930 0.177177 ... -0.280090 1.006504 1.357095 1.059691
12 1.224846 -0.545266 -1.121737 ... -1.307938 2.227912 -1.178179 1.575556
13 -1.562514 0.118980 0.325631 ... 0.397413 0.479963 0.217271 2.055336
14 1.099753 -1.613777 -1.243054 ... 0.359625 1.258582 1.067245 1.663585
15 0.268369 -0.826254 -0.871297 ... 1.625291 0.643504 -1.032252 1.950458

58 59 60 61 62 63
0 0.456751 0.789545 -2.045897 1.099113 0.166035 3.139895
1 0.437217 2.284778 -1.708361 -0.152342 0.699953 0.343699
2 -0.500310 1.891739 0.073773 0.102590 -0.138560 0.840906
3 1.379010 1.228062 -0.214476 0.090425 -0.173942 -0.830836
4 0.027854 -0.008453 -1.213767 -0.094705 0.886935 1.103025
5 -0.348979 1.005130 -0.339587 0.136550 -0.597760 1.012034
6 -0.240991 0.542596 0.039506 2.375268 0.122078 1.598531
7 -0.253113 1.570766 -0.642553 0.589649 -0.934843 -1.100760
8 0.787592 2.288829 0.152384 1.582828 -0.278190 1.500612
9 -2.389836 2.359888 -0.895393 2.478489 1.150637 2.515316
10 1.745668 1.311434 -0.703046 0.981743 0.178330 0.443150
11 1.175118 1.527424 -0.844068 1.202420 -0.927549 0.908416
12 0.974123 1.321630 -0.581522 -1.142979 -0.997342 2.858498
13 0.841140 0.250887 1.338835 1.003394 1.427358 1.072308
14 0.892081 1.761903 0.728198 1.667248 -1.340948 0.224300
15 0.948557 2.855112 -0.750874 2.738219 0.865055 2.720319

[16 rows x 64 columns]

In [12]:

# Prepare Query, Key, Value for Multi-head Attention

query = key = value = X # [4, 16, 64] [batch_size, context_length, d_model]

21
# Define Query, Key, Value weight matrices
Wq = nn.Linear(d_model, d_model)
Wk = nn.Linear(d_model, d_model)
Wv = nn.Linear(d_model, d_model)

Q = Wq(query) #[4, 16, 64]

Q = Q.view(batch_size, -1, num_heads, d_model // num_heads) #[4, 16, 4, 16]

K = Wk(key) #[4, 16, 64]

K = K.view(batch_size, -1, num_heads, d_model // num_heads) #[4, 16, 4, 16]

V = Wv(value) #[4, 16, 64]

V = V.view(batch_size, -1, num_heads, d_model // num_heads) #[4, 16, 4, 16]

In [13]:

# Transpose q,k,v from [batch_size, context_length, num_heads, head_size] to [batch_size, num_heads,

context_length, head_size]
# The reason is that treat each batch with "num_heads" as its first dimension.
Q = Q.transpose(1, 2) # [4, 4, 16, 16]
K = K.transpose(1, 2) # [4, 4, 16, 16]
V = V.transpose(1, 2) # [4, 4, 16, 16]

In [15]:

attention_score = torch.matmul(Q, K.transpose(-2, -1)) / math.sqrt(d_model // num_heads) # [4, 4, 16, 16] #[4,
4, 16, 16] [batch_size, num_heads, context_length, context_length]
print(pd.DataFrame(attention_score[0][0].detach().cpu().numpy()))

0 1 2 3 4 5 6 \
0 0.021498 0.759700 1.114897 0.861203 0.996177 0.087225 0.020397
1 -0.186354 0.423521 0.930679 0.397063 0.689583 -0.023029 -1.295206

22
2 -0.116987 -0.521399 -0.102028 -0.048670 -0.094187 0.211903 0.785083
3 0.677378 0.094782 0.187630 1.087047 0.387743 0.641712 1.120173
4 0.631352 -0.280073 -0.932875 -0.581271 -0.206257 0.127698 -0.102794
5 -0.327146 0.068143 0.148191 -0.391794 0.132223 0.017728 0.268415
6 -0.444339 0.258554 0.455449 -0.077464 0.134794 -0.053327 0.101352
7 -0.245799 0.580802 0.932249 0.135226 0.357541 -0.006203 0.136920
8 -0.303093 0.018890 0.015307 0.294602 0.369652 -0.219553 -0.192504
9 -0.136668 0.443592 0.470723 -0.184153 0.221327 -0.058326 -0.335395
10 0.694354 0.946938 0.422884 0.789917 0.990797 -0.172340 0.704143
11 0.352159 0.977470 1.040248 1.317057 1.359986 0.120361 0.799884
12 -0.238808 0.104332 0.052890 -0.593303 -0.794684 -0.462218 0.311895
13 -0.517919 0.067904 0.636224 0.144879 0.018605 0.213238 0.280446
14 -0.085574 -0.059928 0.476232 -0.008881 -0.143431 0.318674 0.256556
15 -0.184219 -0.247370 0.608031 0.304992 0.352951 0.453829 0.285533

7 8 9 10 11 12 13 \
0 0.353933 -0.263620 -0.481791 -1.015287 -0.027799 -0.770300 1.028327
1 0.035366 -0.226877 -1.094293 -0.077335 0.149487 -1.291372 -0.246792
2 0.184757 -0.163499 0.059236 0.515760 0.117891 0.133373 0.033434
3 0.259390 -0.339914 0.120134 -0.394649 -0.246437 -0.579486 0.994677
4 -0.442630 0.287699 -0.108314 0.540231 0.316796 -0.293156 -0.427342
5 0.222270 0.331641 -0.015495 -0.366638 0.014746 -0.224710 0.378336
6 0.162352 0.333664 0.069486 -0.037877 -0.015095 -0.301473 0.237264
7 0.361665 0.171941 -0.240028 -0.460423 -0.134655 0.122417 0.401689
8 -0.606256 -0.548382 -0.053605 -0.832274 -0.339201 -0.297512 0.289678
9 0.530910 0.210424 0.113577 -0.280486 0.202264 0.089873 0.381280
10 -0.240142 -0.371990 -0.299596 -0.832197 -0.359611 -0.637496 0.765209
11 0.680050 0.324915 -0.482536 -0.499427 0.151246 -0.254691 1.035710
12 0.081620 -0.338989 -0.082385 -0.711630 -0.425134 0.018855 0.009447
13 0.551979 0.049975 -0.170539 0.008342 0.121066 0.532295 0.402487
14 0.872133 0.463599 0.522785 0.704850 0.512636 0.430275 0.164187
15 0.616212 0.059369 0.419577 0.663161 0.387546 0.333471 0.249560

23
14 15
0 0.241509 0.527406
1 -0.591006 0.052637
2 0.032123 0.521154
3 -0.021960 0.356922
4 -0.393042 -0.474839
5 0.081683 0.197241
6 0.111261 0.401928
7 0.278085 0.003031
8 0.017494 0.183737
9 0.191927 0.236189
10 0.152228 0.264614
11 0.668065 0.268745
12 -0.214531 -0.060118
13 0.094902 0.129446
14 0.546852 0.545444
15 0.546221 0.853163
In [16]:

# Apply Mask to attention scores

attention_score = attention_score.masked_fill(torch.triu(torch.ones(attention_score.shape[-2:]),
diagonal=1).bool(), float('-inf')) #[4, 4, 16, 16] [batch_size, num_heads, context_length, context_length]
print(pd.DataFrame(attention_score[0][0].detach().cpu().numpy()))

0 1 2 3 4 5 6 \
0 0.021498 -inf -inf -inf -inf -inf -inf
1 -0.186354 0.423521 -inf -inf -inf -inf -inf
2 -0.116987 -0.521399 -0.102028 -inf -inf -inf -inf
3 0.677378 0.094782 0.187630 1.087047 -inf -inf -inf
4 0.631352 -0.280073 -0.932875 -0.581271 -0.206257 -inf -inf
5 -0.327146 0.068143 0.148191 -0.391794 0.132223 0.017728 -inf
6 -0.444339 0.258554 0.455449 -0.077464 0.134794 -0.053327 0.101352
7 -0.245799 0.580802 0.932249 0.135226 0.357541 -0.006203 0.136920

24
8 -0.303093 0.018890 0.015307 0.294602 0.369652 -0.219553 -0.192504
9 -0.136668 0.443592 0.470723 -0.184153 0.221327 -0.058326 -0.335395
10 0.694354 0.946938 0.422884 0.789917 0.990797 -0.172340 0.704143
11 0.352159 0.977470 1.040248 1.317057 1.359986 0.120361 0.799884
12 -0.238808 0.104332 0.052890 -0.593303 -0.794684 -0.462218 0.311895
13 -0.517919 0.067904 0.636224 0.144879 0.018605 0.213238 0.280446
14 -0.085574 -0.059928 0.476232 -0.008881 -0.143431 0.318674 0.256556
15 -0.184219 -0.247370 0.608031 0.304992 0.352951 0.453829 0.285533

7 8 9 10 11 12 13 \
0 -inf -inf -inf -inf -inf -inf -inf
1 -inf -inf -inf -inf -inf -inf -inf
2 -inf -inf -inf -inf -inf -inf -inf
3 -inf -inf -inf -inf -inf -inf -inf
4 -inf -inf -inf -inf -inf -inf -inf
5 -inf -inf -inf -inf -inf -inf -inf
6 -inf -inf -inf -inf -inf -inf -inf
7 0.361665 -inf -inf -inf -inf -inf -inf
8 -0.606256 -0.548382 -inf -inf -inf -inf -inf
9 0.530910 0.210424 0.113577 -inf -inf -inf -inf
10 -0.240142 -0.371990 -0.299596 -0.832197 -inf -inf -inf
11 0.680050 0.324915 -0.482536 -0.499427 0.151246 -inf -inf
12 0.081620 -0.338989 -0.082385 -0.711630 -0.425134 0.018855 -inf
13 0.551979 0.049975 -0.170539 0.008342 0.121066 0.532295 0.402487
14 0.872133 0.463599 0.522785 0.704850 0.512636 0.430275 0.164187
15 0.616212 0.059369 0.419577 0.663161 0.387546 0.333471 0.249560

14 15
0 -inf -inf
1 -inf -inf
2 -inf -inf
3 -inf -inf
4 -inf -inf

25
5 -inf -inf
6 -inf -inf
7 -inf -inf
8 -inf -inf
9 -inf -inf
10 -inf -inf
11 -inf -inf
12 -inf -inf
13 -inf -inf
14 0.546852 -inf
15 0.546221 0.853163
In [18]:

import pandas as pd
import torch
import math

# Assuming attention_score is your 4D tensor

# Select the first batch and the first head

batch_index = 0
head_index = 0
attention_score_2d = attention_score[batch_index, head_index].detach().cpu().numpy()

# Create the DataFrame

df = pd.DataFrame(attention_score_2d)
print(df)

0 1 2 3 4 5 6 \
0 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
1 0.352088 0.647912 0.000000 0.000000 0.000000 0.000000 0.000000
2 0.372795 0.248792 0.378413 0.000000 0.000000 0.000000 0.000000
3 0.271920 0.151853 0.166628 0.409599 0.000000 0.000000 0.000000

26
4 0.427102 0.171674 0.089371 0.127027 0.184826 0.000000 0.000000
5 0.124600 0.185008 0.200427 0.116800 0.197252 0.175912 0.000000
6 0.083958 0.169561 0.206461 0.121170 0.149823 0.124131 0.144895
7 0.069452 0.158736 0.225584 0.101663 0.126974 0.088256 0.101836
8 0.088831 0.122574 0.122136 0.161488 0.174074 0.096571 0.099218
9 0.073767 0.131784 0.135409 0.070346 0.105520 0.079778 0.060472
10 0.121218 0.156050 0.092400 0.133374 0.163047 0.050953 0.122411
11 0.060166 0.112440 0.119725 0.157907 0.164834 0.047718 0.094145
12 0.072694 0.102453 0.097315 0.050997 0.041695 0.058140 0.126086
13 0.034521 0.062015 0.109476 0.066978 0.059032 0.071716 0.076702
14 0.042108 0.043202 0.073850 0.045464 0.039741 0.063085 0.059286
15 0.035042 0.032898 0.077386 0.057155 0.059963 0.066327 0.056053

7 8 9 10 11 12 13 \
0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
1 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
2 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
3 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
4 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
5 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
6 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
7 0.127499 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
8 0.065600 0.069508 0.000000 0.000000 0.000000 0.000000 0.000000
9 0.143809 0.104376 0.094741 0.000000 0.000000 0.000000 0.000000
10 0.047613 0.041731 0.044864 0.026339 0.000000 0.000000 0.000000
11 0.083513 0.058549 0.026113 0.025675 0.049215 0.000000 0.000000
12 0.100152 0.065764 0.085003 0.045306 0.060336 0.094059 0.000000
13 0.100631 0.060913 0.048859 0.058429 0.065401 0.098669 0.086658
14 0.109721 0.072923 0.077370 0.092820 0.076588 0.070533 0.054055
15 0.078021 0.044707 0.064094 0.081772 0.062073 0.058806 0.054073

14 15
0 0.000000 0.000000

27
1 0.000000 0.000000
2 0.000000 0.000000
3 0.000000 0.000000
4 0.000000 0.000000
5 0.000000 0.000000
6 0.000000 0.000000
7 0.000000 0.000000
8 0.000000 0.000000
9 0.000000 0.000000
10 0.000000 0.000000
11 0.000000 0.000000
12 0.000000 0.000000
13 0.000000 0.000000
14 0.079254 0.000000
15 0.072747 0.098883
In [20]:

# Calculate the V attention output

attention_output = torch.matmul(attention_score, V) # [4, 4, 16, 16] [batch_size, num_heads, context_length,
head_size]
print(attention_output.shape) # Now this line should work correctly.

torch.Size([4, 4, 16, 16])

In [21]:

A = A.transpose(1, 2) # [4, 16, 4, 16] [batch_size, context_length, num_heads, head_size]

A = A.reshape(batch_size, -1, d_model) # [4, 16, 64] [batch_size, context_length, d_model]

In [22]:

# Define the output weight matrix

Wo = nn.Linear(d_model, d_model)
output = Wo(A) # [4, 16, 64] [batch_size, context_length, d_model]

28
print(output.shape)

torch.Size([4, 16, 64])

In [23]:

# Add residual connection

output = output + X

# Add Layer Normalization

layer_norm = nn.LayerNorm(d_model)
output = layer_norm(output)

In [24]:

# Define Feed Forward Network

output = nn.Linear(d_model, d_model * 4)(output)
output = nn.ReLU()(output)
output = nn.Linear(d_model * 4, d_model)(output)
output = torch.dropout(output, p=dropout, train=True)

In [25]:

# Add residual connection

output = output + X
# Add Layer Normalization
layer_norm = nn.LayerNorm(d_model)
output = layer_norm(output)

In [26]:

logits = nn.Linear(d_model, max_token_value)(output)

print(pd.DataFrame(logits[0].detach().cpu().numpy()))

29
0 1 2 3 4 5 6 \
0 0.667483 -0.389633 -0.851936 -0.394342 -0.998247 0.262954 0.480094
1 -0.038112 0.322328 -0.031820 -0.584922 -0.112201 -0.738884 0.440067
2 -0.628515 0.205482 -0.180367 -0.370944 0.412005 0.792747 -0.194784
3 -1.048590 0.793600 -0.683289 -0.619398 -0.317157 0.628633 -0.024611
4 -0.716813 -0.497194 -0.200344 -1.548010 -0.219922 0.368706 -0.773210
5 -0.378771 1.055804 -0.537877 0.158270 -0.003828 -0.635562 0.394238
6 -0.310792 -0.171071 0.622006 -0.609642 0.586624 0.604495 0.867905
7 -0.922160 0.500379 -0.034140 0.611128 -0.062309 0.604757 0.336614
8 0.318265 0.177395 0.507006 -0.731018 -0.162960 0.023586 0.724885
9 1.179621 0.450486 -0.439893 0.052057 0.397160 -1.076628 0.009521
10 0.329482 -0.249196 -0.487763 0.146532 0.203778 0.126987 -0.013209
11 0.491254 0.254098 -0.805063 0.847444 -0.868649 -0.204639 0.187366
12 0.908632 -0.443110 0.164215 -0.860427 -0.831706 -0.385907 -0.045611
13 -0.267631 -0.724435 -0.408752 0.460729 -0.677400 0.245174 0.588925
14 -0.226249 0.275237 -0.203830 0.269060 -0.362902 0.613476 0.069186
15 0.445209 0.237637 0.394769 0.191370 0.002953 0.345110 0.509143

7 8 9 ... 100059 100060 100061 100062 \

0 -0.170555 0.321491 -0.220245 ... 0.564134 -0.215222 0.511216 -0.345695
1 -0.064511 -0.477147 0.017552 ... -0.518621 -0.709125 -0.062348 0.385515
2 0.090611 0.116007 -0.432469 ... -0.815883 -0.021172 0.105399 -0.184825
3 -0.684836 0.315627 -0.726044 ... 0.343552 -0.406947 0.113339 0.518061
4 0.131509 1.002828 -0.384169 ... -0.272205 -0.356717 -0.617441 0.507051
5 0.205076 -0.376159 -0.327241 ... 0.802938 -0.170070 -0.385224 0.149915
6 0.189337 -0.016440 -0.140744 ... 1.027234 0.717433 0.220867 0.117609
7 -0.430542 -0.866047 -1.096003 ... -0.119494 0.724068 -0.010441 -0.006981
8 1.001006 -0.591480 -0.908987 ... -0.415700 -0.490938 0.141666 0.299761
9 -0.407299 -0.337472 -0.405212 ... 0.243390 -0.221569 0.333176 -0.685081
10 0.003749 -0.731550 -0.305711 ... 0.174980 -0.875414 0.528700 0.273198
11 -0.176603 0.268296 -1.479937 ... 0.183884 -0.223598 0.664912 0.249806
12 -0.186296 -0.316574 -0.322434 ... -0.309264 -0.024482 0.413519 0.471669

30
13 -0.478098 -0.365851 -1.100504 ... 0.009546 -0.100925 0.359181 -0.584053
14 -0.556882 -0.456754 0.220161 ... 0.158783 0.210422 0.217538 -0.163716
15 -0.298419 -0.693817 0.393190 ... -0.114032 0.024332 0.364460 -0.628126

100063 100064 100065 100066 100067 100068

0 0.280026 -0.349468 -1.221684 0.165527 -0.370290 -0.554029
1 0.757354 0.018531 0.216118 0.658736 0.896402 0.008098
2 0.522658 0.539416 -0.011119 -0.354371 0.286689 0.178755
3 0.095571 0.447914 0.503538 -0.180665 -1.383265 0.025491
4 0.071807 -0.294688 0.486347 -0.558503 0.035770 0.591002
5 -0.481235 0.759102 -0.394108 0.041804 -0.036551 0.057988
6 -0.191414 0.218748 -0.374619 0.706337 -0.422270 0.440738
7 0.553941 0.703121 -0.167306 1.422663 0.062663 -0.328725
8 0.190456 0.520707 0.083594 -0.300857 -1.272671 0.048819
9 -0.171625 0.068226 0.191056 -0.876465 0.260361 0.481487
10 0.839494 0.547030 0.100529 -0.164694 -0.530678 -0.605713
11 -0.750480 0.348481 0.068968 -0.156382 -0.645810 0.348471
12 0.029170 0.117407 0.952073 0.557498 -0.423473 0.781243
13 -0.187492 0.196010 -0.530906 -0.222555 0.607256 0.519245
14 -0.704219 0.359411 0.344767 0.353608 -0.194872 0.841876
15 0.173693 0.330141 0.751014 -0.535763 0.894746 0.147730

[16 rows x 100069 columns]

what we get here is a huge matrix with shape [16, 100069] which is the probabilities of each token in the whole
vocabulary.

Complete Mesocolic Excision and Extent of Lymphadenectomy For The Treatment of Colon Cancer
No ratings yet
Complete Mesocolic Excision and Extent of Lymphadenectomy For The Treatment of Colon Cancer
14 pages
Final Marketing Plan Whole
No ratings yet
Final Marketing Plan Whole
19 pages
Pattern Recognition Lab
No ratings yet
Pattern Recognition Lab
24 pages
Industrial Grinders N V
100% (3)
Industrial Grinders N V
9 pages
Gen AI
No ratings yet
Gen AI
35 pages
A 1
No ratings yet
A 1
9 pages
Assignment 2.3.1 Transfer Learning
No ratings yet
Assignment 2.3.1 Transfer Learning
7 pages
Appix
No ratings yet
Appix
15 pages
Skill4 2100100003
No ratings yet
Skill4 2100100003
5 pages
Assignment 2.1.2 Image Augmentation
No ratings yet
Assignment 2.1.2 Image Augmentation
8 pages
Brain Tumour Classification
No ratings yet
Brain Tumour Classification
10 pages
1729492946538
No ratings yet
1729492946538
10 pages
Convolutional Neural Network
No ratings yet
Convolutional Neural Network
4 pages
Wild Fire CNN Accuracy 95
No ratings yet
Wild Fire CNN Accuracy 95
15 pages
TMA01 Question 2 (55 Marks)
No ratings yet
TMA01 Question 2 (55 Marks)
26 pages
Ex 6
No ratings yet
Ex 6
7 pages
Transfer Learning CNN
No ratings yet
Transfer Learning CNN
21 pages
IBM Deep Learning Peer Review
No ratings yet
IBM Deep Learning Peer Review
43 pages
Neural DEEP
No ratings yet
Neural DEEP
39 pages
Skin Disease Detection Using Transformers
No ratings yet
Skin Disease Detection Using Transformers
35 pages
Malaria 1735833704
No ratings yet
Malaria 1735833704
14 pages
Brain Tumor Multi-Classification With PSO: Import As Import As Import
No ratings yet
Brain Tumor Multi-Classification With PSO: Import As Import As Import
18 pages
TLM For CNN
No ratings yet
TLM For CNN
32 pages
Soc DL Manual
No ratings yet
Soc DL Manual
50 pages
Tomato-Leaf-Disease-Classification - Ipynb - Colaboratory
No ratings yet
Tomato-Leaf-Disease-Classification - Ipynb - Colaboratory
7 pages
Potato Disease Classification Using CNN
No ratings yet
Potato Disease Classification Using CNN
21 pages
Potato
No ratings yet
Potato
16 pages
Skin
No ratings yet
Skin
31 pages
3-Sentiment Analysis BERT
No ratings yet
3-Sentiment Analysis BERT
5 pages
Final Code
No ratings yet
Final Code
16 pages
EXP6
No ratings yet
EXP6
5 pages
COVID-19 Detection CNN Using Chest X-Ray
No ratings yet
COVID-19 Detection CNN Using Chest X-Ray
13 pages
Breat Cancer Detection Using Thermograpgy
No ratings yet
Breat Cancer Detection Using Thermograpgy
15 pages
Brain Tumor Mri Using vgg19 Resnet50 Capstone
No ratings yet
Brain Tumor Mri Using vgg19 Resnet50 Capstone
3 pages
Exp 10 Sentiment Analysis BERT
No ratings yet
Exp 10 Sentiment Analysis BERT
5 pages
C1 W421
No ratings yet
C1 W421
31 pages
ML Lab Session 05 - CNN Implementation
No ratings yet
ML Lab Session 05 - CNN Implementation
4 pages
7 CNNWithCustomImage
No ratings yet
7 CNNWithCustomImage
11 pages
LAB 2 Transfer Learning
No ratings yet
LAB 2 Transfer Learning
10 pages
Improved - FCC - Cat - Dog - Ipynb - Colab
No ratings yet
Improved - FCC - Cat - Dog - Ipynb - Colab
12 pages
Skin Cancer Detection Using Deep Learning Models - Ipynb
No ratings yet
Skin Cancer Detection Using Deep Learning Models - Ipynb
189 pages
LA Lab
No ratings yet
LA Lab
4 pages
Assignment 2.4.1 Multiclass Classification
No ratings yet
Assignment 2.4.1 Multiclass Classification
5 pages
Pinn-Emfnet For Breast Cancer Image Classification: Import As Import As
No ratings yet
Pinn-Emfnet For Breast Cancer Image Classification: Import As Import As
16 pages
Final Question1 With Results
No ratings yet
Final Question1 With Results
21 pages
Cats and Dogs Classification Using CNN
No ratings yet
Cats and Dogs Classification Using CNN
3 pages
Transfer Learning Model
No ratings yet
Transfer Learning Model
4 pages
Experiment 3
No ratings yet
Experiment 3
8 pages
Dcgan
No ratings yet
Dcgan
9 pages
Fatima Binte Aqeel AI Theory Assignment 3
No ratings yet
Fatima Binte Aqeel AI Theory Assignment 3
7 pages
Dlweek 7
No ratings yet
Dlweek 7
9 pages
MLP 40522 97.11%
No ratings yet
MLP 40522 97.11%
12 pages
DL LAB Expt 11 (Add On)
No ratings yet
DL LAB Expt 11 (Add On)
9 pages
Source Code
No ratings yet
Source Code
3 pages
AI Medical Diagnosis Week 01
No ratings yet
AI Medical Diagnosis Week 01
5 pages
Nndlrepo
No ratings yet
Nndlrepo
2 pages
Deep Learning Lab Manual
No ratings yet
Deep Learning Lab Manual
88 pages
AI - Homework - Colab
No ratings yet
AI - Homework - Colab
10 pages
Programing of Ai
No ratings yet
Programing of Ai
18 pages
Cancer Peau
No ratings yet
Cancer Peau
23 pages
Finalised Question 1
No ratings yet
Finalised Question 1
40 pages
Pursue Lesson 1
No ratings yet
Pursue Lesson 1
10 pages
TMH 7 Moving Loads
No ratings yet
TMH 7 Moving Loads
30 pages
Pravin Kolhe,: Executive Engineer
No ratings yet
Pravin Kolhe,: Executive Engineer
21 pages
The Ergonomic Posture Assessment by Comparing REBA With RULA & OWAS: A Case Study in A Gas Springs Factory
No ratings yet
The Ergonomic Posture Assessment by Comparing REBA With RULA & OWAS: A Case Study in A Gas Springs Factory
23 pages
Tritaal/teentaal-Single Speed - (Thah)
No ratings yet
Tritaal/teentaal-Single Speed - (Thah)
7 pages
School Brochure 2024-2025
No ratings yet
School Brochure 2024-2025
2 pages
Module 2.1 Managerial Economics
No ratings yet
Module 2.1 Managerial Economics
18 pages
WHKF DWH Instructions
No ratings yet
WHKF DWH Instructions
11 pages
History of Kenya
No ratings yet
History of Kenya
2 pages
Ge El 10 - Mod 3
No ratings yet
Ge El 10 - Mod 3
62 pages
So Harian N3 TGL 03 Juni 2024
No ratings yet
So Harian N3 TGL 03 Juni 2024
160 pages
PT Mathematics-6 Q2
No ratings yet
PT Mathematics-6 Q2
7 pages
Giancoli Chap 3 Vectors Kinematics in 2 Dimensions
No ratings yet
Giancoli Chap 3 Vectors Kinematics in 2 Dimensions
37 pages
To Investigate The Relationship Between Specific Energy (E) and Depth of Flow (Y) in A Rectangular Channel
67% (3)
To Investigate The Relationship Between Specific Energy (E) and Depth of Flow (Y) in A Rectangular Channel
4 pages
Tony Tella Resume
No ratings yet
Tony Tella Resume
2 pages
STD.7 Comparing Quantities and Algebraic Expressions Practice Worksheet
No ratings yet
STD.7 Comparing Quantities and Algebraic Expressions Practice Worksheet
5 pages
NRC, Logistics Officer, Cover Letter & CV, Elhamfrotan.
No ratings yet
NRC, Logistics Officer, Cover Letter & CV, Elhamfrotan.
4 pages
BARTEC Engineers Manual
No ratings yet
BARTEC Engineers Manual
12 pages
Bourdon Pressure - Gauges PDF
No ratings yet
Bourdon Pressure - Gauges PDF
2 pages
Welding Classification
No ratings yet
Welding Classification
30 pages
Sigachi Industries Limited: Purchase Order
100% (1)
Sigachi Industries Limited: Purchase Order
1 page
Ajmer - RajRAS
No ratings yet
Ajmer - RajRAS
8 pages
Blown Film
0% (1)
Blown Film
4 pages
Ericsson The Bss To Cloud Journey
No ratings yet
Ericsson The Bss To Cloud Journey
26 pages
Approach, Method, and Technique
100% (1)
Approach, Method, and Technique
23 pages
Enterprise Value and EBITDA
No ratings yet
Enterprise Value and EBITDA
3 pages
Ielts Reading Question Sheet
No ratings yet
Ielts Reading Question Sheet
2 pages

GenAI - Lab-File - Darab Khan 22SCSE1480055

Uploaded by

GenAI - Lab-File - Darab Khan 22SCSE1480055

Uploaded by

LAB FILE

Course Code: R1UD702B

S. No Object Page No Signature

2 Project 2 – Transformer Based LLMs 16-31

from tensorflow.keras.optimizers.legacy import Adam

import numpy as np # linear algebra

from tensorflow.keras.models import Sequential

covid = {'class': 'CT_COVID',

non_covid = {'class': 'CT_NonCOVID',

Total Positive Cases Covid19 images: 1252

print("Image COVID Shape {}".format(image_positive.shape))

Image COVID Shape (326, 370, 3)

# Create Train-Test Directory

# Copy Images to test set

# seed random number generator

for cases in [covid, non_covid]:

list_of_random_files = random.sample(cases['images'], num_to_select) #random files selected

for files in list_of_random_files:

for cases in [covid, non_covid]:

print("Train sets images COVID: {}".format(total_train_covid))

Train sets images COVID: 1127

train_image_generator = ImageDataGenerator(rescale=1./255) # Generator for our training data

total_train = total_train_covid + total_train_noncovid

Found 247 images belonging to 2 classes.

Found 2233 images belonging to 2 classes.

┃ Layer (type) ┃ Output Shape ┃ Param # ┃

!pip install keras.preprocessing

!pip install numpy requests torch tiktoken matplotlib pandas

Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (1.26.4)

# download a sample txt file from https://huggingface.co/datasets/goendalf666/sales-

with open('sales_textbook.txt', 'r', encoding='utf-8') as f:

# Using TikToken to tokenize the source text

print(f"Tokenized text size: {len(tokenized_text)}")

Tokenized text size: 77919

# Prepare data for training batch

torch.Size([4, 16]) torch.Size([4, 16])

# Define Token Embedding look-up table

# Get x and y embedding

# Define Position Encoding look-up table

print("Position Encoding Look-up Table: ", position_encoding_lookup_table.shape)

Position Encoding Look-up Table: torch.Size([4, 16, 64])

# Add positional encoding into the input embedding vector

Final Input Embedding of x:

[16 rows x 64 columns]

# Prepare Query, Key, Value for Multi-head Attention

query = key = value = X # [4, 16, 64] [batch_size, context_length, d_model]

Q = Wq(query) #[4, 16, 64]

K = Wk(key) #[4, 16, 64]

V = Wv(value) #[4, 16, 64]

# Transpose q,k,v from [batch_size, context_length, num_heads, head_size] to [batch_size, num_heads,

# Apply Mask to attention scores

# Assuming attention_score is your 4D tensor

# Select the first batch and the first head

# Create the DataFrame

# Calculate the V attention output

torch.Size([4, 4, 16, 16])

A = A.transpose(1, 2) # [4, 16, 4, 16] [batch_size, context_length, num_heads, head_size]

# Define the output weight matrix

torch.Size([4, 16, 64])

# Add residual connection

# Add Layer Normalization

# Define Feed Forward Network

# Add residual connection

logits = nn.Linear(d_model, max_token_value)(output)

7 8 9 ... 100059 100060 100061 100062 \

100063 100064 100065 100066 100067 100068

[16 rows x 100069 columns]

You might also like