0% found this document useful (0 votes)
5 views

Image Search Code Working

Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
5 views

Image Search Code Working

Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 5

import os

# Check if the dataset file exists and download it if it does not


dataset_zip = 'Image_Similarity_Search.zip'
dataset_folder = 'Image_Similarity_Search'

if not os.path.exists(dataset_zip) and not os.path.exists(dataset_folder):


!curl -L
https://github.com/raksaba/imagesearch/releases/download/v1.0.0/Image_Similarity_Se
arch.zip -O
!unzip -q -o Image_Similarity_Search.zip
elif not os.path.exists(dataset_folder):
!unzip -q -o Image_Similarity_Search.zip

--------------

! python -m pip install torch oracledb transformers

--------------

import torch
from transformers import ViTModel, ViTFeatureExtractor
from PIL import Image
import os
import csv
import oracledb
import numpy as np
import array
from tqdm import tqdm
import pandas as pd

# Load pre-trained ViT model and remove the final classification layer
model = ViTModel.from_pretrained('google/vit-base-patch16-224')
model.eval()

# Load the feature extractor


feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-
224')

--------------

def extract_features(image_path):
image = Image.open(image_path).convert('RGB')
inputs = feature_extractor(images=image, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
# We use the [CLS] token representation as the feature vector
features = outputs.last_hidden_state[:, 0, :].squeeze().numpy()
return features

--------------

# Database connection parameters


dsn = 'basbabu/[email protected]:1521/freepdb1'

# Connect to Oracle Database


connection = oracledb.connect(dsn)
cursor = connection.cursor()
# Drop table if it exists
cursor.execute("""DROP TABLE IF EXISTS image_features CASCADE CONSTRAINTS""")
connection.commit()

# Create table if not exists


cursor.execute("""
CREATE TABLE IF NOT EXISTS image_features (
id NUMBER GENERATED BY DEFAULT AS IDENTITY PRIMARY KEY,
image_path VARCHAR2(500),
feature_vector VECTOR-- Assuming the vector datatype column
)
""")
connection.commit()

--------------

def numpy_converter_in(value):
if value.dtype == np.float64:
dtype = "d"
elif value.dtype == np.float32:
dtype = "f"
else:
dtype = "b"
return array.array(dtype, value)

def input_type_handler(cursor, value, arraysize):


if isinstance(value, np.ndarray):
return cursor.var(
oracledb.DB_TYPE_VECTOR,
arraysize=arraysize,
inconverter=numpy_converter_in,
)

connection.inputtypehandler = input_type_handler

--------------

!ls -ltr
cd Image_Similarity_Search
ls -ltr
cd imagesearch1-main/
ls -ltr
!unzip Image_Similarity_Search.zip
!pwd
!ls -ltr

---------------

# Function to insert image path and features into Oracle Database


def insert_into_oracle(image_path, features):
cursor.execute("""
INSERT INTO image_features (image_path, feature_vector)
VALUES (:image_path, :feature_vector)
""", {'image_path': image_path, 'feature_vector': features})
connection.commit()

# Function to insert image paths and features into Oracle Database in batch
def batch_insert_into_oracle(image_data):
cursor.executemany("""
INSERT INTO image_features (image_path, feature_vector)
VALUES (:1, :2)
""", image_data)
connection.commit()

# Define the base directory


base_dir = 'Image_Similarity_Search'

# Read the CSV file


csv_file = os.path.join(base_dir, 'reverse_image_search.csv')
df = pd.read_csv(csv_file)

image_data = []

# Process each image in the CSV file


for relative_path in tqdm(df['path'], desc='Processing images', unit='image'):
# Construct the full image path
image_path = os.path.join(base_dir, relative_path.lstrip('./'))
# Check if the file exists
if os.path.exists(image_path):
# Extract features
features = extract_features(image_path)
# Append the data to the list
image_data.append((image_path, features))
else:
print(f"File not found: {image_path}")

# Insert into Oracle database


batch_insert_into_oracle(image_data)

---------------

def numpy_converter_out(value):
if value.typecode == "b":
dtype = numpy.int8
elif value.typecode == "f":
dtype = numpy.float32
else:
dtype = numpy.float64
return numpy.array(value, copy=False, dtype=dtype)

def output_type_handler(cursor, metadata):


if metadata.type_code is oracledb.DB_TYPE_VECTOR:
return cursor.var(
metadata.type_code,
arraysize=cursor.arraysize,
outconverter=numpy_converter_out,
)
connection.outputtypehandler = output_type_handler

---------------

def query_similar_images(query_image_path, top_n=5):


query_features = extract_features(query_image_path)
query_vector = query_features

cursor.execute("""
SELECT image_path
FROM image_features
ORDER BY VECTOR_DISTANCE(feature_vector, :query_vector, COSINE)
FETCH FIRST :top_n ROWS ONLY
""", {'query_vector': query_vector, 'top_n': top_n})

results = cursor.fetchall()
return [row[0] for row in results]

# Example usage for querying data


query_image_path = 'Image_Similarity_Search/test/Amitabh/Amitabh.jpg'
similar_images = query_similar_images(query_image_path)
print(similar_images)

---------------

import matplotlib.pyplot as plt

def display_images(query_image_path, similar_image_paths):


# Display the query image
query_img = Image.open(query_image_path)
plt.figure(figsize=(30, 10))
plt.subplot(1, len(similar_image_paths) + 1, 1)
plt.imshow(query_img)
plt.title("Query Image")
plt.axis('off')

# Display the similar images


for i, img_path in enumerate(similar_image_paths):
img = Image.open(img_path)
plt.subplot(1, len(similar_image_paths) + 1, i + 2)
plt.imshow(img)
plt.title(f"Similar Image {i+1}")
plt.axis('off')

plt.show()

def query_and_display_similar_images(query_image_path, top_n=3):


query_features = extract_features(query_image_path)
query_vector = query_features

cursor.execute("""
SELECT image_path
FROM image_features
ORDER BY VECTOR_DISTANCE(feature_vector, :query_vector, COSINE)
FETCH FIRST :top_n ROWS ONLY
""", {'query_vector': query_vector, 'top_n': top_n})

results = cursor.fetchall()
similar_image_paths = [row[0] for row in results]

display_images(query_image_path, similar_image_paths)

# Example usage for querying and displaying data


query_image_path = 'Image_Similarity_Search/test/Rajini/Rajini.jpg'
query_and_display_similar_images(query_image_path)

---------------

# Close the cursor and connection


cursor.close()
connection.close()

---------------

You might also like