0% found this document useful (0 votes)

5 views14 pages

HPC Codes

The document contains multiple code examples demonstrating parallel algorithms in Python and CUDA for graph traversal (DFS and BFS), sorting (Bubble Sort and Merge Sort), and matrix/vector operations. Each example includes a sequential and parallel implementation, along with timing comparisons for performance evaluation. The outputs of the algorithms are also provided, showcasing the results of the operations performed.

Uploaded by

rupeshchavan.sit.comp

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

5 views14 pages

HPC Codes

Uploaded by

rupeshchavan.sit.comp

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 14

EXP1

Code:

import threading

from queue import Queue

class Graph:

def init(self, V):

self.V = V

self.adj = [[] for _ in range(V)]

self.lock = threading.Lock() # Lock for thread safety

def add_edge(self, v, w):

self.adj[v].append(w)

# Parallel Depth-First Search

def parallel_dfs(self, start_vertex):

visited = [False] * self.V

threads = []

def dfs_util(v):

with self.lock:

if visited[v]:

return

visited[v] = True

print(v, end=" ")

# Start threads for each adjacent node

local_threads = []
for n in self.adj[v]:

if not visited[n]:

t = threading.Thread(target=dfs_util, args=(n,))

local_threads.append(t)

t.start()

# Wait for all threads to finish

for t in local_threads:

t.join()

# Start DFS from the initial node

t = threading.Thread(target=dfs_util, args=(start_vertex,))

threads.append(t)

t.start()

for t in threads:

t.join()

# Parallel Breadth-First Search

def parallel_bfs(self, start_vertex):

visited = [False] * self.V

q = Queue()

q.put(start_vertex)

visited[start_vertex] = True

while not q.empty():

q_size = q.qsize()

threads = []
def process_node():

while not q.empty():

v = q.get()

print(v, end=" ")

# Add adjacent nodes to the queue

for n in self.adj[v]:

with self.lock:

if not visited[n]:

visited[n] = True

q.put(n)

# Launch multiple threads to process BFS in parallel

for _ in range(q_size):

t = threading.Thread(target=process_node)

threads.append(t)

t.start()

for t in threads:

t.join()

# Create a graph

g = Graph(7)

g.add_edge(0, 1)

g.add_edge(0, 2)

g.add_edge(1, 3)

g.add_edge(1, 4)
g.add_edge(2, 5)

g.add_edge(2, 6)

"""

0 -------->1

| /\

| / \

v v v

2 ----> 3 4

| |

v v

5 6

"""

print("Depth-First Search (DFS): ", end="")

g.parallel_dfs(0)

print("\nBreadth-First Search (BFS): ", end="")

g.parallel_bfs(0)

print()

OUTPUT:

Depth-First Search (DFS): 0 1 3 2 4 5 6

Breadth-First Search (BFS): 0 1 2 3 4 5 6

EXP 2

CODE:

import time

import multiprocessing

# Sequential Bubble Sort

def bubble_sort(arr):

n = len(arr)

for i in range(n - 1):

for j in range(n - i - 1):

if arr[j] > arr[j + 1]:

arr[j], arr[j + 1] = arr[j + 1], arr[j]

# Parallel Odd-Even Bubble Sort

def parallel_bubble_sort(arr):

n = len(arr)

def odd_even_pass(is_odd):

"""Sort odd or even indexed elements in parallel"""

jobs = []

for j in range(1 if is_odd else 2, n, 2):

if arr[j] < arr[j - 1]:

arr[j], arr[j - 1] = arr[j - 1], arr[j]

for _ in range(n): # Run multiple passes

# Odd index pass

p1 = multiprocessing.Process(target=odd_even_pass, args=(True,))

p1.start()
p1.join()

# Even index pass

p2 = multiprocessing.Process(target=odd_even_pass, args=(False,))

p2.start()

p2.join()

# Print array

def print_array(arr):

print(" ".join(map(str, arr)))

if __name__ == "__main__":

n = 10

arr = list(range(n, 0, -1)) # Create an array from 10 to 1

arr_parallel = arr.copy() # Copy for parallel sorting

# Sequential Sorting Time

start_time = time.time()

bubble_sort(arr)

end_time = time.time()

print("Sequential Bubble Sort took:", round(end_time - start_time, 6), "seconds")

print_array(arr)

# Parallel Sorting Time

start_time = time.time()

parallel_bubble_sort(arr_parallel)

end_time = time.time()

print("Parallel Bubble Sort took:", round(end_time - start_time, 6), "seconds")

print_array(arr_parallel)

OUTPUT:

Sequential Bubble Sort took: 1e-05 seconds

1 2 3 4 5 6 7 8 9 10

Parallel Bubble Sort took: 0.108825 seconds

10 9 8 7 6 5 4 3 2 1
EXP 3

CODE:

import time

import multiprocessing

# Merge function

def merge(arr, low, mid, high):

left = arr[low:mid+1]

right = arr[mid+1:high+1]

i=j=0

k = low

while i < len(left) and j < len(right):

if left[i] <= right[j]:

arr[k] = left[i]

i += 1

else:

arr[k] = right[j]

j += 1

k += 1

while i < len(left):

arr[k] = left[i]

i += 1

k += 1
while j < len(right):

arr[k] = right[j]

j += 1

k += 1

# Sequential Merge Sort

def merge_sort(arr, low, high):

if low < high:

mid = (low + high) // 2

merge_sort(arr, low, mid)

merge_sort(arr, mid + 1, high)

merge(arr, low, mid, high)

# Parallel Merge Sort

def parallel_merge_sort(arr, low, high):

if low < high:

mid = (low + high) // 2

left_process = multiprocessing.Process(target=parallel_merge_sort, args=(arr, low, mid))

right_process = multiprocessing.Process(target=parallel_merge_sort, args=(arr, mid + 1,

high))

left_process.start()

right_process.start()

left_process.join()

right_process.join()
merge(arr, low, mid, high)

# Main function

if __name__ == "__main__":

n = 10

arr = list(range(n, 0, -1)) # Create an array from 10 to 1

arr_parallel = arr.copy() # Copy for parallel sorting

# Sequential Sorting Time

start_time = time.time()

merge_sort(arr, 0, n - 1)

end_time = time.time()

print("Time taken by sequential algorithm:", round(end_time - start_time, 6), "seconds")

# Parallel Sorting Time

start_time = time.time()

parallel_merge_sort(arr_parallel, 0, n - 1)

end_time = time.time()

print("Time taken by parallel algorithm:", round(end_time - start_time, 6), "seconds")

OUTPUT:

Time taken by sequential algorithm: 2.1e-05 seconds

Time taken by parallel algorithm: 0.126153 seconds

EXP 4

CODE:

#include <iostream>

#include <cuda_runtime.h>

using namespace std;

// CUDA Kernel for 3x3 Matrix Multiplication

_global_ void matrixMultiply(int A, int B, int *C) {

int row = threadIdx.y;

int col = threadIdx.x;

if (row < 3 && col < 3) {

int sum = 0;

for (int k = 0; k < 3; k++) {

sum += A[row * 3 + k] * B[k * 3 + col];

C[row * 3 + col] = sum;

// CUDA Kernel for Vector Addition (Size 3)

_global_ void vectorAdd(int V1, int V2, int *V3) {

int i = threadIdx.x;

if (i < 3) {

V3[i] = V1[i] + V2[i];

}
// Function to print a 3x3 matrix

void printMatrix(int *M) {

for (int i = 0; i < 3; i++) {

for (int j = 0; j < 3; j++) {

cout << M[i * 3 + j] << " ";

cout << endl;

// Function to print a vector of size 3

void printVector(int *V) {

for (int i = 0; i < 3; i++) {

cout << V[i] << " ";

cout << endl;

int main() {

// Host matrices and vectors

int h_A[3][3] = {{1, 2, 3}, {4, 5, 6}, {7, 8, 9}};

int h_B[3][3] = {{9, 8, 7}, {6, 5, 4}, {3, 2, 1}};

int h_C[3][3];

int h_V1[3] = {1, 2, 3};

int h_V2[3] = {4, 5, 6};

int h_V3[3];
// Device memory pointers

int d_A, d_B, *d_C;

int d_V1, d_V2, *d_V3;

// Allocate memory on GPU

cudaMalloc((void**)&d_A, 9 * sizeof(int));

cudaMalloc((void**)&d_B, 9 * sizeof(int));

cudaMalloc((void**)&d_C, 9 * sizeof(int));

cudaMalloc((void**)&d_V1, 3 * sizeof(int));

cudaMalloc((void**)&d_V2, 3 * sizeof(int));

cudaMalloc((void**)&d_V3, 3 * sizeof(int));

// Copy data from Host to Device

cudaMemcpy(d_A, h_A, 9 * sizeof(int), cudaMemcpyHostToDevice);

cudaMemcpy(d_B, h_B, 9 * sizeof(int), cudaMemcpyHostToDevice);

cudaMemcpy(d_V1, h_V1, 3 * sizeof(int), cudaMemcpyHostToDevice);

cudaMemcpy(d_V2, h_V2, 3 * sizeof(int), cudaMemcpyHostToDevice);

// Define CUDA execution configuration

dim3 threadsPerBlock(3, 3); // 3x3 threads for matrix multiplication

dim3 threadsPerVector(3); // 3 threads for vector addition

// Launch the kernels

matrixMultiply<<<1, threadsPerBlock>>>(d_A, d_B, d_C);

vectorAdd<<<1, threadsPerVector>>>(d_V1, d_V2, d_V3);

// Copy result back from Device to Host

cudaMemcpy(h_C, d_C, 9 * sizeof(int), cudaMemcpyDeviceToHost);

cudaMemcpy(h_V3, d_V3, 3 * sizeof(int), cudaMemcpyDeviceToHost);

// Print the results

cout << "Matrix Multiplication Result:\n";

printMatrix((int*)h_C);

cout << "\nVector Addition Result:\n";

printVector((int*)h_V3);

// Free device memory

cudaFree(d_A);

cudaFree(d_B);

cudaFree(d_C);

cudaFree(d_V1);

cudaFree(d_V2);

cudaFree(d_V3);

return 0;

OUTPUT:

Matrix Multiplication Result:

30 24 18

84 69 54

138 114 90

Vector Addition Result:

579

Case Study
33% (3)
Case Study
4 pages
Problem Set 5 Solutions
No ratings yet
Problem Set 5 Solutions
10 pages
HPC Practicals
No ratings yet
HPC Practicals
26 pages
HPC Codes-2
No ratings yet
HPC Codes-2
15 pages
All HPC Programs
No ratings yet
All HPC Programs
16 pages
Converted Text
No ratings yet
Converted Text
25 pages
HPC Output
No ratings yet
HPC Output
12 pages
Cuda Notes From Udacity Lecture
No ratings yet
Cuda Notes From Udacity Lecture
3 pages
HPC Codes
No ratings yet
HPC Codes
18 pages
Week 11
No ratings yet
Week 11
21 pages
HPC Printout 1
No ratings yet
HPC Printout 1
22 pages
Lab Experiment 6
No ratings yet
Lab Experiment 6
4 pages
Web GPU
0% (1)
Web GPU
40 pages
Homework 5
No ratings yet
Homework 5
6 pages
Lab 7
No ratings yet
Lab 7
3 pages
CUDA Exercises
No ratings yet
CUDA Exercises
185 pages
Data Structures and Algorithms - Unit 3 & 4 Solutions
No ratings yet
Data Structures and Algorithms - Unit 3 & 4 Solutions
31 pages
HPC 1 - Merged
No ratings yet
HPC 1 - Merged
41 pages
Prac 301
No ratings yet
Prac 301
20 pages
Department of Computer Engineering BE Laboratory Practice-I A.Y 2021-22 SEM1
No ratings yet
Department of Computer Engineering BE Laboratory Practice-I A.Y 2021-22 SEM1
45 pages
Par - 1 In-Term Exam - Course 2017/18-Q2
No ratings yet
Par - 1 In-Term Exam - Course 2017/18-Q2
7 pages
Solutions 2
No ratings yet
Solutions 2
14 pages
OS Lab Python Programs
No ratings yet
OS Lab Python Programs
43 pages
Profound Python Data Science
From Everand
Profound Python Data Science
Onder Teker
No ratings yet
LP V Lab Manual 2022-23 Semester II
No ratings yet
LP V Lab Manual 2022-23 Semester II
45 pages
An Introduction To PyCUDA Using Prefix Sum Algorithm PDF
No ratings yet
An Introduction To PyCUDA Using Prefix Sum Algorithm PDF
6 pages
20 Quiz 14
No ratings yet
20 Quiz 14
12 pages
Vector Addition
No ratings yet
Vector Addition
3 pages
Lab Programs
No ratings yet
Lab Programs
18 pages
DS Lab It
No ratings yet
DS Lab It
6 pages
Input: Output: 1. Sub String Program
No ratings yet
Input: Output: 1. Sub String Program
8 pages
Clenqueuereadbuffer (Queue, C - Buffer,, 0, N, C, 0, ,)
No ratings yet
Clenqueuereadbuffer (Queue, C - Buffer,, 0, N, C, 0, ,)
3 pages
Thread-Level Parallel Algorithm For Sorting Integer Sequence On Multi-Core Computers
No ratings yet
Thread-Level Parallel Algorithm For Sorting Integer Sequence On Multi-Core Computers
5 pages
APznzaYLTIT2-EQfMM620dyKOyYc90IpuuIa73bDqtiAKutzouebu45YtbYJOHoIhG1c0rwIVRGCjUis GFsAVJdFLd30Ku67R5hkDTfTaF7oRXmjhot5ff6E6LKKBazpugqBcdj Ztn3tGfxu09hXyF0NF9C5Q6Mu6No5ghCA0xDpY9yt45hKAYkI0B86cFRIARpvqALio
No ratings yet
APznzaYLTIT2-EQfMM620dyKOyYc90IpuuIa73bDqtiAKutzouebu45YtbYJOHoIhG1c0rwIVRGCjUis GFsAVJdFLd30Ku67R5hkDTfTaF7oRXmjhot5ff6E6LKKBazpugqBcdj Ztn3tGfxu09hXyF0NF9C5Q6Mu6No5ghCA0xDpY9yt45hKAYkI0B86cFRIARpvqALio
9 pages
CN Lab Manual
No ratings yet
CN Lab Manual
55 pages
Computer Engineering Laboratory Solution Primer
From Everand
Computer Engineering Laboratory Solution Primer
Karan Bhandari
No ratings yet
3 Cuda
No ratings yet
3 Cuda
5 pages
Artificial Intelligence Lab Manual
No ratings yet
Artificial Intelligence Lab Manual
35 pages
Rishi
No ratings yet
Rishi
30 pages
Lab7 GPU
No ratings yet
Lab7 GPU
10 pages
BECOA157 Parallel Matrix Multiplication
No ratings yet
BECOA157 Parallel Matrix Multiplication
3 pages
Spos-1 Sbpcoe
No ratings yet
Spos-1 Sbpcoe
19 pages
Lab Programs
No ratings yet
Lab Programs
15 pages
Unit 4 - Threads - Program Solution
No ratings yet
Unit 4 - Threads - Program Solution
9 pages
Cuda
No ratings yet
Cuda
7 pages
Os Codes
No ratings yet
Os Codes
19 pages
08 Dataparallel
No ratings yet
08 Dataparallel
51 pages
5 Computation
No ratings yet
5 Computation
13 pages
Eliminating The Hardware/Software Divide: Satnam Singh, Microsoft Research Cambridge, UK
No ratings yet
Eliminating The Hardware/Software Divide: Satnam Singh, Microsoft Research Cambridge, UK
146 pages
Introduction To CUDA: CAP 4730 Spring 2012
No ratings yet
Introduction To CUDA: CAP 4730 Spring 2012
35 pages
OpenCL Guide
No ratings yet
OpenCL Guide
19 pages
Programs
No ratings yet
Programs
4 pages
Oriental College of Technology: Department of Computer Science & Engineering
No ratings yet
Oriental College of Technology: Department of Computer Science & Engineering
38 pages
Osasgnmnt
No ratings yet
Osasgnmnt
7 pages
OS Practical ALL - 241118 - 112515
No ratings yet
OS Practical ALL - 241118 - 112515
35 pages
HPC Practical 2025
No ratings yet
HPC Practical 2025
19 pages
FFT Openmp
No ratings yet
FFT Openmp
11 pages
LP 1,,1
No ratings yet
LP 1,,1
5 pages
My Experiments: Opencl Gpu Matrix Multiplication Program
No ratings yet
My Experiments: Opencl Gpu Matrix Multiplication Program
19 pages
The Essential R Reference
From Everand
The Essential R Reference
Mark Gardener
No ratings yet
Advanced C Concepts and Programming: First Edition
From Everand
Advanced C Concepts and Programming: First Edition
Gayatri
3/5 (1)
G-Math Sem 1 Revision 2024 With Answer 2
No ratings yet
G-Math Sem 1 Revision 2024 With Answer 2
4 pages
Number System Related Interview/Test Questions
No ratings yet
Number System Related Interview/Test Questions
3 pages
Network Analysis and Visualization With R and Igraph
No ratings yet
Network Analysis and Visualization With R and Igraph
62 pages
Dbatu DM Question Paper
100% (1)
Dbatu DM Question Paper
3 pages
9.1 Measure of Information - Entropy: Chapter Outline
No ratings yet
9.1 Measure of Information - Entropy: Chapter Outline
81 pages
Implementation of Hamming Code Using Verilog
No ratings yet
Implementation of Hamming Code Using Verilog
6 pages
Important DS Question
No ratings yet
Important DS Question
7 pages
Precalculus Reviewer
No ratings yet
Precalculus Reviewer
6 pages
Cyclic Codes - Detailed Study Notes
No ratings yet
Cyclic Codes - Detailed Study Notes
24 pages
Convolutional Code of Rate 2/3 To Approach The Theoretical Limit Imposed by Shannon's Channel Capacity
No ratings yet
Convolutional Code of Rate 2/3 To Approach The Theoretical Limit Imposed by Shannon's Channel Capacity
4 pages
Numset1 2021
No ratings yet
Numset1 2021
1 page
UAM English Courses 2020-21
No ratings yet
UAM English Courses 2020-21
23 pages
BasicMaths Log DPP-5 (JEE) Solution @GB Sir
No ratings yet
BasicMaths Log DPP-5 (JEE) Solution @GB Sir
2 pages
Orthogonality of Legendre Polynomials
No ratings yet
Orthogonality of Legendre Polynomials
3 pages
Medallo - Angeline - GE MATH1 - Week 14
No ratings yet
Medallo - Angeline - GE MATH1 - Week 14
6 pages
GIF and Fractinal Part
No ratings yet
GIF and Fractinal Part
3 pages
DSE Free Mock 2025 Compulsory Part Paper 1 Marking
No ratings yet
DSE Free Mock 2025 Compulsory Part Paper 1 Marking
15 pages
Minimax With Alpha Beta Pruning
No ratings yet
Minimax With Alpha Beta Pruning
21 pages
1st Term Class 6 Math Paper
No ratings yet
1st Term Class 6 Math Paper
8 pages
PEA305 Workbook
No ratings yet
PEA305 Workbook
116 pages
Analysis and Design of Algorithms 1st Edition by Amrinder Arora ISBN 1634870212 9781634870214
100% (10)
Analysis and Design of Algorithms 1st Edition by Amrinder Arora ISBN 1634870212 9781634870214
90 pages
Areas of Mathematics Glossary of Areas of Mathematics: See Also: and
No ratings yet
Areas of Mathematics Glossary of Areas of Mathematics: See Also: and
12 pages
Article Mordell Postprint
No ratings yet
Article Mordell Postprint
41 pages
M8 Mod 2 T1 Lesson 4 PW
No ratings yet
M8 Mod 2 T1 Lesson 4 PW
2 pages
Theoretical Computer Science Cheat Sheet
No ratings yet
Theoretical Computer Science Cheat Sheet
10 pages
Garfield Warnings
No ratings yet
Garfield Warnings
6 pages
Problems On H.C.F and L.C.M
No ratings yet
Problems On H.C.F and L.C.M
15 pages
Closure Properties of Regular Sets: Beulah A. Ap/Cse
100% (1)
Closure Properties of Regular Sets: Beulah A. Ap/Cse
20 pages
8 Math Rational Numbers
No ratings yet
8 Math Rational Numbers
8 pages

HPC Codes

Uploaded by

HPC Codes

Uploaded by

EXP1

from queue import Queue

def __init__(self, V):

self.adj = [[] for _ in range(V)]

self.lock = threading.Lock() # Lock for thread safety

def add_edge(self, v, w):

# Parallel Depth-First Search

def parallel_dfs(self, start_vertex):

visited = [False] * self.V

print(v, end=" ")

# Start threads for each adjacent node

# Wait for all threads to finish

# Start DFS from the initial node

# Parallel Breadth-First Search

def parallel_bfs(self, start_vertex):

visited = [False] * self.V

while not q.empty():

while not q.empty():

print(v, end=" ")

# Add adjacent nodes to the queue

# Launch multiple threads to process BFS in parallel

print("Depth-First Search (DFS): ", end="")

print("\nBreadth-First Search (BFS): ", end="")

Depth-First Search (DFS): 0 1 3 2 4 5 6

Breadth-First Search (BFS): 0 1 2 3 4 5 6

# Sequential Bubble Sort

for i in range(n - 1):

for j in range(n - i - 1):

if arr[j] > arr[j + 1]:

arr[j], arr[j + 1] = arr[j + 1], arr[j]

# Parallel Odd-Even Bubble Sort

"""Sort odd or even indexed elements in parallel"""

for j in range(1 if is_odd else 2, n, 2):

if arr[j] < arr[j - 1]:

arr[j], arr[j - 1] = arr[j - 1], arr[j]

for _ in range(n): # Run multiple passes

# Odd index pass

# Even index pass

print(" ".join(map(str, arr)))

arr = list(range(n, 0, -1)) # Create an array from 10 to 1

arr_parallel = arr.copy() # Copy for parallel sorting

# Sequential Sorting Time

print("Sequential Bubble Sort took:", round(end_time - start_time, 6), "seconds")

# Parallel Sorting Time

print("Parallel Bubble Sort took:", round(end_time - start_time, 6), "seconds")

Sequential Bubble Sort took: 1e-05 seconds

Parallel Bubble Sort took: 0.108825 seconds

def merge(arr, low, mid, high):

while i < len(left) and j < len(right):

if left[i] <= right[j]:

while i < len(left):

# Sequential Merge Sort

def merge_sort(arr, low, high):

if low < high:

mid = (low + high) // 2

merge_sort(arr, low, mid)

merge_sort(arr, mid + 1, high)

merge(arr, low, mid, high)

# Parallel Merge Sort

def parallel_merge_sort(arr, low, high):

if low < high:

mid = (low + high) // 2

left_process = multiprocessing.Process(target=parallel_merge_sort, args=(arr, low, mid))

right_process = multiprocessing.Process(target=parallel_merge_sort, args=(arr, mid + 1,

arr = list(range(n, 0, -1)) # Create an array from 10 to 1

arr_parallel = arr.copy() # Copy for parallel sorting

# Sequential Sorting Time

print("Time taken by sequential algorithm:", round(end_time - start_time, 6), "seconds")

# Parallel Sorting Time

print("Time taken by parallel algorithm:", round(end_time - start_time, 6), "seconds")

Time taken by sequential algorithm: 2.1e-05 seconds

Time taken by parallel algorithm: 0.126153 seconds

using namespace std;

// CUDA Kernel for 3x3 Matrix Multiplication

_global_ void matrixMultiply(int *A, int *B, int *C) {

int row = threadIdx.y;

def init(self, V):

_global_ void matrixMultiply(int A, int B, int *C) {

_global_ void vectorAdd(int V1, int V2, int *V3) {

int d_A, d_B, *d_C;

int d_V1, d_V2, *d_V3;