Assignment 04 (2)
Assignment 04 (2)
Task_1:
1. Using MPI (Message Passing Interface) for distributed memory systems.
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <time.h>
printf("\n");
}
int main(int argc, char *argv[]) {
if (argc != 3) {
return 1;
int n = atoi(argv[1]);
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
if (size != num_procs) {
MPI_Finalize();
return 1;
if (n % size != 0) {
MPI_Finalize();
return 1;
}
double *A = NULL, *B = NULL, *C = NULL;
B = malloc(n * n * sizeof(double));
if (rank == 0) {
A = malloc(n * n * sizeof(double));
C = malloc(n * n * sizeof(double));
srand(time(NULL));
fill_random(A, n*n);
fill_random(B, n*n);
local_C[i*n + j] = 0;
}
MPI_Gather(local_C, n*n/size, MPI_DOUBLE, C, n*n/size, MPI_DOUBLE, 0,
MPI_COMM_WORLD);
if (rank == 0) {
print_submatrix(C, n);
free(A); free(C);
MPI_Finalize();
return 0;
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <time.h>
int n, num_threads;
typedef struct {
int start_row;
int end_row;
} ThreadData;
printf("\n");
C[i*n + j] = 0;
pthread_exit(NULL);
if (argc != 3) {
return 1;
n = atoi(argv[1]);
num_threads = atoi(argv[2]);
A = malloc(n * n * sizeof(double));
B = malloc(n * n * sizeof(double));
C = malloc(n * n * sizeof(double));
srand(time(NULL));
fill_random(A, n*n);
fill_random(B, n*n);
pthread_t threads[MAX_THREADS];
ThreadData thread_data[MAX_THREADS];
thread_data[i].start_row = i * rows_per_thread;
pthread_join(threads[i], NULL);
print_submatrix(C);
return 0;
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <omp.h>
printf("\n");
if (argc != 3) {
return 1;
int n = atoi(argv[1]);
omp_set_num_threads(num_threads);
srand(time(NULL));
fill_random(A, n*n);
fill_random(B, n*n);
C[i*n + j] = 0;
print_submatrix(C, n);
return 0;
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <cuda_runtime.h>
#define BLOCK_SIZE 16
double sum = 0;
printf("\n");
if (argc != 3) {
return 1;
int n = atoi(argv[1]);
srand(time(NULL));
fill_random(A, n*n);
fill_random(B, n*n);
cudaMalloc(&d_A, n*n*sizeof(double));
cudaMalloc(&d_B, n*n*sizeof(double));
cudaMalloc(&d_C, n*n*sizeof(double));
(n + BLOCK_SIZE - 1) / BLOCK_SIZE);
print_submatrix(C, n);
return 0;
Task_2:
1. Using MPI (Message Passing Interface) for distributed memory systems.
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
if (argc != 3) {
return 1;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
if (size != num_procs) {
MPI_Finalize();
return 1;
if (i % 2 == 1) term = -term;
local_sum += term;
double global_sum;
if (rank == 0) {
MPI_Finalize();
return 0;
#include <stdio.h>
#include <stdlib.h>
#include <omp.h>
if (argc != 3) {
return 1;
omp_set_num_threads(num_threads);
if (i % 2 == 1) term = -term;
sum += term;
return 0;