CV Record
CV Record
TECHNOLOGY
M.SC ARTIFICIAL INTELLIGENCE AND
MACHINE LEARNING
COMPUTER VISION LAB RECORD
NAME: Shamruthi R
Ex.no : 01
Date : 10/07/2023
Aim :
To Read and display an image , Convert a coloured image into a grayscale image , Perform
Scaling, rotation and Affine, Euclidean and Similarity translations and Read an image from a
video and display it.
Code :
import cv2
from google.colab.patches import cv2_imshow
img = cv2.imread('/content/image_cv.jpg', cv2.IMREAD_UNCHANGED)
img
cv2_imshow(img)
cv2_imshow( gray_image)
import cv2
import numpy as np
from matplotlib import pyplot as plt
rows, cols, ch = img.shape
pts1 = np.float32([[50, 50],[200, 50],[50, 200]])
M = cv2.getAffineTransform(pts1, pts2)
dst = cv2.warpAffine(img, M, (cols, rows))
plt.subplot(121)
plt.imshow(img)
plt.title('Input')
plt.subplot(122)
plt.imshow(dst)
plt.title('Output')
plt.show()
Rotation translation
M = cv2.getRotationMatrix2D((cols/2,rows/2),30,1)
dst = cv2.warpAffine(img,M,(cols,rows))
scale_percent = 50 # percent of original size
width = int(dst.shape[1] * scale_percent / 100)
height = int(dst.shape[0] * scale_percent / 100)
dim = (width, height)
# resize image
resized = cv2.resize(dst, dim, interpolation = cv2.INTER_AREA)
print('Resized Dimensions : ',resized.shape)
print ("Resized image with the dimension of",resized.shape,"\n")
cv2_imshow(resized)
Output :
Result :
The above code has been executed and the output has been verified.
Object Detection using R-CNN pre-trained model
Ex.no : 02
Date : 27/07/2023
Aim :
Object detection using pre trained model
Code :
import os
import yaml
from shutil import copytree, ignore_patterns
import xml.etree.ElementTree as ET
from google.colab import drive
drive.mount('/content/drive')
root_path = '/content/drive/MyDrive/archive'
os.listdir(root_path)
train_data_path = os.path.join(root_path,'train_zip/train')
test_data_path = os.path.join(root_path,'test_zip/test')
'''All .xml and .jpg file names'''
train_data_description = os.listdir(train_data_path)
test_data_description = os.listdir(test_data_path)
'''train_annotaion_file_paths and test_annotation_file_paths contains all .xml file paths
train_image_file_paths and test_image_file_paths contains all .jpg file paths'''
train_annotation_file_paths = [os.path.join(train_data_path,i) for i in train_data_description if '.xml' in i]
train_image_file_paths = [os.path.join(train_data_path,i) for i in train_data_description if '.jpg' in i]
image_width = int(root.find('size/width').text)
image_height = int(root.find('size/height').text)
dest_path = '/content/drive/MyDrive/Reference/ObjectDetection/test/labels/'
for i in test_annotation_file_paths:
convert_xml_to_txt(test_data_path,i,class_dict,dest_path)
yaml_path = "/content/drive/MyDrive/Reference/data.yaml"
from ultralytics import YOLO
model = YOLO('yolov8n.yaml')
'''Training model'''
results = model.train(data=yaml_path, epochs=50, batch=4)
'''Predicting single image to check how good model works'''
Test_image_results = model('/content/drive/MyDrive/Reference/ObjectDetection/test/images/mixed_23.jpg')
Test_image_results[0].boxes.data.tolist()
import cv2
import matplotlib.pyplot as plt
'''Load the image'''
image = cv2.imread('/content/drive/MyDrive/archive/test_zip/test/mixed_23.jpg')
copy_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
img_dict = {0:'apple',1:'banana',2:'orange'}
'''Iterate over the bounding box predictions'''
for bbox in Test_image_results[0].boxes.data.tolist():
x1, y1, x2, y2,confidence,label = bbox
'''Draw rectangle'''
cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
'''Add label'''
cv2.putText(image, img_dict[int(label)], (int(x1), int(y1 - 10)), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0,
255, 0), 2)
'''Convert BGR image to RGB'''
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
'''Display the image using matplotlib'''
plt.imshow(image_rgb)
Output :
Result :
The above code has been executed and the output has been verified.
Histogram Equalization
Ex.no : 03
Date : 04/08/2023
Aim :
To Implement the following transformation using scratch code.a) Read any image, convert
to grayscale and display the histogram for the same.(library function may be used)
b) Apply the global histogram equalization on the image after converting it to a 2D
array(hardcode) c) Display the converted image and its histogram. Compare both.
Code :
a)
import cv2
import numpy as np
image
# Resizing the image for compatibility
image = cv2.resize(image, (500, 600))
img
# convert to grayscale
imgray = img.convert(mode='L')
imgray
img_array = np.asarray(imgray)
img_array
histogram_array = np.bincount(img_array.flatten(), minlength=256)
histogram_array
num_pixels = np.sum(histogram_array)
histogram_array = histogram_array/num_pixels
histogram_array
chistogram_array = np.cumsum(histogram_array)
chistogram_array
transform_map = np.floor(255 * chistogram_array).astype(np.uint8)
transform_map
img_list = list(img_array.flatten())
img_list
eq_img_list = [transform_map[p] for p in img_list]
eq_img_list
eq_img_array = np.reshape(np.asarray(eq_img_list), img_array.shape)
eq_img_array
ori_pdf = histogram_array
eq_histogram_array = np.bincount(eq_img_array.flatten(), minlength=256)
num_pixels = np.sum(eq_histogram_array)
eq_pdf = eq_histogram_array/num_pixels
eq_cdf = np.cumsum(eq_pdf)
#plot
plt.figure()
plt.plot(ori_pdf)
plt.plot(eq_pdf)
plt.xlabel('Pixel intensity')
plt.ylabel('Distribution')
plt.legend(['Original','Equalized'])
plt.figure()
plt.plot(ori_cdf)
plt.plot(eq_cdf)
plt.xlabel('Pixel intensity')
plt.ylabel('Distribution')
plt.legend(['Original','Equalized'])
save_filename='devasena-1.jpg'
eq_img = Image.fromarray(eq_img_array, mode='L')
eq_img.save(save_filename)
img = Image.open(save_filename)
Output :
Result :
The above code has been executed and the output has been verified.
Object Detection using Pre-Trained CNN models-Fruit DS
Ex.no : 04
Date : 11/08/2023
Aim :
To Implement object detection using a pretrained CNN model (e.g., SSD, YOLO, or Faster
R-CNN) on an image dataset containing multiple objects and Draw bounding boxes around the
detected objects and display the image with the identified objects.
Code :
!pip install -q torch_snippets lovely-tensors torchinfo
import torch
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import models
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from xml.etree import ElementTree as et
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# Define the root path where the data is located in Google Drive
root = '/content/drive/MyDrive/CV_LAB/train'
# List of labels, where the first label is for the background and the rest are for specific fruits
labels = ['background', 'orange', 'apple', 'banana']
a.add_patch(rect)
plt.show()
def preprocess_img(img):
img = torch.tensor(img).permute(2, 0, 1)
return img.float()
class FruitsDataset(Dataset):
def __init__(self, root=root, transforms=None):
self.root = root
self.transforms = transforms
self.img_paths = sorted(Glob(self.root + '/*.jpg'))
self.xml_paths = sorted(Glob(self.root + '/*.xml'))
def __len__(self):
return len(self.img_paths)
train_ds = FruitsDataset()
train_dl = DataLoader(train_ds, batch_size=4, shuffle=True, collate_fn=train_ds.collate_fn)
val_ds = FruitsDataset(root=val_root)
val_dl = DataLoader(val_ds, batch_size=2, shuffle=True, collate_fn=val_ds.collate_fn)
model = get_model().to(device)
print(model(imgs, targets))
summary(model, (1,3,224,224))
def train_batch(batch, model, optim):
model.train()
imgs, targets = batch
imgs = list(img.to(device) for img in imgs)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
optim.zero_grad()
losses = model(imgs, targets)
loss = sum(loss for loss in losses.values())
loss.backward()
optim.step()
return loss, losses
@torch.no_grad()
def validate_batch(batch, model, optim):
model.train()
imgs, targets = batch
imgs = list(img.to(device) for img in imgs)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
optim.zero_grad()
losses = model(imgs, targets)
loss = sum(loss for loss in losses.values())
return loss, losses
model = get_model().to(device)
optim = torch.optim.SGD(model.parameters(), lr=0.005,
weight_decay=5e-4, momentum=0.9)
n_epochs = 1
log = Report(n_epochs)
for e in range(n_epochs):
for i, batch in enumerate(train_dl):
N = len(train_dl)
loss, losses = train_batch(batch, model, optim)
loc_loss, regr_loss, loss_objectness, loss_rpn_box_reg = [losses[k] for k in
['loss_classifier', 'loss_box_reg', 'loss_objectness',
'loss_rpn_box_reg']]
log.record(e + (i+1)/N, trn_loss=loss.item(), trn_loc_loss=loc_loss.item(),
trn_regr_loss=regr_loss.item(), trn_loss_objectness=loss_objectness.item(),
trn_loss_rpn_box_reg = loss_rpn_box_reg.item())
for i, batch in enumerate(val_dl):
N = len(val_dl)
loss, losses = validate_batch(batch, model.float(), optim)
loc_loss, regr_loss, loss_objectness, loss_rpn_box_reg = [losses[k] for k in
['loss_classifier', 'loss_box_reg',
'loss_objectness', 'loss_rpn_box_reg']]
log.record(e + (i+1)/N, val_loss=loss.item(), val_loc_loss=loc_loss.item(),
val_regr_loss=regr_loss.item(), val_loss_objectness=loss_objectness.item(),
val_loss_rpn_box_reg = loss_rpn_box_reg.item())
log.report_avgs(e+1)
log.plot_epochs(['trn_loss', 'val_loss'])
def decode_output(output):
bbs = output['boxes'].cpu().detach().numpy().astype(np.uint16)
labels = np.array([targets2label[i] for i in output['labels'].cpu().detach().numpy()])
confs = output['scores'].cpu().detach().numpy()
idxs = nms(torch.tensor(bbs.astype(np.float32)), torch.tensor(confs), 0.05)
bbs, confs, labels = [tensor[idxs] for tensor in [bbs, confs, labels]]
if len(idxs) == 1:
bbs, confs, labels = [np.array([tensor]) for tensor in [bbs, confs, labels]]
return bbs.tolist(), confs.tolist(), labels.tolist()
model.eval()
for i, (images, targets) in enumerate(val_dl):
imgs = list(img.to(device) for img in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
if i == 3: break
images = [im for im in imgs]
outputs = model(images)
for i, output in enumerate(outputs):
bbs, confs, labels = decode_output(output)
plot_img_bbox(images[i].cpu().permute(1,2,0), bbs)
Output :
Result :
The above code has been executed and the output has been verified.
Image enhancement-Linear filter operations
Ex.no : 05
Date : 24/08/2023
Aim :
Apply linear filter operations on a given image and display its output -Go for filters for
Blurring(Box, Guassian, Median, Bilateral)[edge detection] , Filters for
Sharpening(Laplacian,Unsharp,Sobel or Prewitt etc , Noise reduction filters
Code :
import cv2
import numpy as np
from google.colab.patches import cv2_imshow
Output :
Result :
The above code has been executed and the output has been verified.
Morphological Operators in Binary Image processing
Ex.no : 06
Date : 24/08/2023
Aim :
Read any image from and convert it to binary image and perform a) Apply all the four
basic morphological operators on it-Erosion,Dilation,Opening and Closing without using built-in
function and display the converted image.b) Use python Opencv library functions to perform the
above and display the output.
Code :
import cv2
import numpy as np
import matplotlib.pyplot as plt
import cv2
import numpy as np
import matplotlib.pyplot as plt
<matplotlib.image.AxesImage at 0x7dfdfaa955a0>
#Acquire size of the image
m,n= img1.shape
#Show the image
plt.imshow(img1, cmap="gray")
# k= 11,15,45 -Different sizes of the structuring element
k=11
SE= np.ones((k,k), dtype=np.uint8)
constant= (k-1)//2
#Define new image
imgErode= np.zeros((m,n), dtype=np.uint8)
#Erosion without using inbuilt cv2 function for morphology
for i in range(constant, m-constant):
for j in range(constant,n-constant):
temp= img1[i-constant:i+constant+1, j-constant:j+constant+1]
product= temp*SE
imgErode[i,j]= np.min(product)
plt.imshow(imgErode,cmap="gray")
cv2.imwrite("Eroded3.png", imgErode)
img2= cv2.imread("download1.png",0)
#Acquire size of the image
p,q= img2.shape
#Show the image
plt.imshow(img2, cmap="gray")
#Define new image to store the pixels of dilated image
imgDilate= np.zeros((p,q), dtype=np.uint8)
#Define the structuring element
SED= np.array([[0,1,0], [1,1,1],[0,1,0]])
constant1=1
#Dilation operation without using inbuilt CV2 function
for i in range(constant1, p-constant1):
for j in range(constant1,q-constant1):
temp= img2[i-constant1:i+constant1+1, j-constant1:j+constant1+1]
product= temp*SED
imgDilate[i,j]= np.max(product)
plt.imshow(imgDilate,cmap="gray")
def erosion(img, SE):
imgErode= cv2.erode(img,SE,1)
return imgErode
def dilation(img, SE):
imgDilate= cv2.dilate(img,SE,1)
return imgDilate
img= cv2.imread("download1.png",0)
img_finger=cv2.imwrite("whaat.png", img)
SE= cv2.getStructuringElement(cv2.MORPH_RECT,(3,3))
AeB= erosion(img,SE)
AoB= dilation(AeB, SE)
AoBdB= dilation(AoB,SE)
AoBdBeB= erosion(AoBdB, SE)
plt.subplot(3,2,1)
plt.imshow(img, cmap="gray")
plt.subplot(3,2,2)
plt.title("E(A,B)")
plt.imshow(AeB, cmap="gray")
plt.subplot(3,2,3)
plt.title("O(A, B)")
plt.imshow(AoB, cmap="gray")
plt.subplot(3,2,4)
plt.title("D(O(A,B), B)")
plt.imshow(AoBdB, cmap="gray")
plt.subplot(3,2,5)
plt.title("C((O(A,B),B),B)")
plt.imshow(AoBdBeB, cmap="gray")
Output :
Application of Filters on videos
Ex.no : 07
Date : 24/08/2023
Aim :
To Enhance a video using different filters such as blurring filters (box and Gaussian), edge
detection (Canny), and sharpening (unsharp mask) to each frame of the video and then combines
the original and processed frames into a single output video.
Code :
import cv2
import numpy as np
from google.colab.patches import cv2_imshow
import cv2
import numpy as np
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
# Process the frame
box_blurred_frame, gaussian_blurred_frame, edges_frame, unsharp_mask_frame =
apply_filters(frame)
# Resize frames before stacking
resized_frame = cv2.resize(frame, (width // 2, height // 2))
resized_box_blurred_frame = cv2.resize(box_blurred_frame, (width // 2, height // 2))
resized_gaussian_blurred_frame = cv2.resize(gaussian_blurred_frame, (width // 2, height // 2))
resized_edges_frame = cv2.resize(edges_frame, (width // 2, height // 2))
resized_unsharp_mask_frame = cv2.resize(unsharp_mask_frame, (width // 2, height // 2))
# Convert the grayscale edge frame to a three-channel image
resized_edges_frame = cv2.cvtColor(resized_edges_frame, cv2.COLOR_GRAY2BGR)
# Stack the resized frames horizontally
combined_frame = np.hstack((resized_frame, resized_box_blurred_frame,
resized_gaussian_blurred_frame, resized_edges_
# Write the combined frame to the output video
out.write(combined_frame)
cv2_imshow(combined_frame)
# Release video objects and close windows
cap.release()
out.release()
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cv2.destroyAllWindows()
Output :
Result :
The above code has been executed and the output has been verified.
Moving Object Detection
Ex.no : 08
Date : 24/08/2023
Aim :
To implement a pre-trained model to detect moving objects such as person or vehicle in a
video.
Code :
Output :
Result :
The above code has been executed and the output has been verified.
Alpha Blending and Masking
Ex.no : 09
Date : 24/08/2023
Aim :
Consider any two images and Aim is to blend both the images using alpha blending to
create a new image.
Code :
import cv2
import numpy as np
from google.colab.patches import cv2_imshow
# Load the two images you want to blend
image1 = cv2.imread('cat.jpg') # Replace 'image1.jpg' with the path to your first image
image2 = cv2.imread('dog.jpg') # Replace 'image2.jpg' with the path to your second image
width = 800 # Set your desired width
height = 600 # Set your desired height
image1 = cv2.resize(image1, (width, height))
image2 = cv2.resize(image2, (width, height))
# Resize the images to the same dimensions if needed
width = 400 # Set your desired width
height = 300 # Set your desired height
image1 = cv2.resize(image1, (width, height))
image2 = cv2.resize(image2, (width, height))
# Display the original images
cv2_imshow(image1)
cv2_imshow(image2)
cv2.waitKey(0)
cv2.destroyAllWindows()
# Set the alpha, beta, and gamma values for blending
alpha = 0.5 # Weight of the first image
beta = 0.5 # Weight of the second image
gamma = 0 # Scalar value
# Blend the images using alpha blending
blended_image = cv2.addWeighted(image1, alpha, image2, beta, gamma)
# Display the blended image
cv2_imshow(blended_image)
cv2.waitKey(0)
cv2.destroyAllWindows()
image = cv2.imread("cat.jpg")
cv2_imshow( image)
mask = np.zeros(image.shape[:2], dtype="uint8")
cv2.rectangle(mask, (0, 90), (290, 450), 255, -1)
cv2_imshow( mask)
masked = cv2.bitwise_and(image, image, mask=mask)
cv2_imshow( masked)
cv2.waitKey(0)
mask = np.zeros(image.shape[:2], dtype="uint8")
cv2.circle(mask, (145, 200), 100, 255, -1)
masked = cv2.bitwise_and(image, image, mask=mask)
# show the output images
cv2_imshow( mask)
cv2_imshow( masked)
cv2.waitKey(0)
from PIL import Image
from PIL import Image
image = Image.open('dog.jpg').convert('RGBA')
alpha_mask = Image.open('cat.jpg').convert('L')
alpha_mask = alpha_mask.resize(image.size, Image.ANTIALIAS)
image.putalpha(alpha_mask)
image.save('masked_image.png')
Output :
Result :
The above code has been executed and the output has been verified.
Feature Detection (Corner Detection)
Ex.no : 10
Date : 24/08/2023
Aim :
To Implement various corner detection algorithms like Harris Corner, Shi
Tomasi,SIFT,SURF etc on an image to detect corners of the images.
Code :
Output :
Result :
The above code has been executed and the output has been verified.
Image Matching
Ex.no : 11
Date : 24/08/2023
Aim :
Read an input or query image and Perform an image matching with the train image or
scene using Brute-Force image matching with ORB descriptors method using OpenCV. The
output image will display the top matched keypoints between the two images.
Code :
# storing the finded key points and descriptors of both of the images
key_pt1,descrip1,key_pt2,descrip2 = detector(gray_pic1,gray_pic2)
# sorting the number of best matches obtained from brute force matcher
number_of_matches = BF_FeatureMatcher(descrip1,descrip2)
Result :
The above code has been executed and the output has been verified.
Hough Transform
Ex.no : 12
Date : 24/08/2023
Aim :
To Consider any image and Apply Hough Transform to detect shapes in the image and
display the output.
Code :
import cv2
import numpy as np
from google.colab.patches import cv2_imshow
img_path="/content/th.jpg"
image=cv2.imread(img_path)
# Convert the image to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
Result :
The above code has been executed and the output has been verified.
Contour Detection and Drawing
Ex.no : 13
Date : 24/08/2023
Aim :
To Apply Hough Transform to detect shapes in the image and display the output.
Code :
import cv2
import numpy as np
from google.colab.patches import cv2_imshow # Import cv2_imshow for Google Colab
Output :
Result :
The above code has been executed and the output has been verified.
Image segmentation using OpenCV
Ex.no : 14
Date : 24/08/2023
Aim :
Consider an image or a frame from a video and Apply image detection using thresholding
and contour detection methods and segment the image.
Code :
import matplotlib.pyplot as plt
import numpy as np
import cv2
sample_image = cv2.imread('image.jpg')
img = cv2.cvtColor(sample_image,cv2.COLOR_BGR2RGB)
img = cv2.resize(img,(256,256))
plt.axis('off');
plt.imshow(img)
edges = cv2.dilate(cv2.Canny(thresh,0,255),None)
plt.axis('off')
plt.imshow(edges)
cnt = sorted(cv2.findContours(edges, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)[-2],
key=cv2.contourArea)[-1]
mask = np.zeros((256,256), np.uint8)
masked = cv2.drawContours(mask, [cnt],-1, 255, -1)
plt.axis('off')
plt.imshow(masked)
dst = cv2.bitwise_and(img, img, mask=mask)
segmented = cv2.cvtColor(dst, cv2.COLOR_BGR2RGB)
plt.imshow(segmented)
Output :
Result :
The above code has been executed and the output has been verified.