Ex7 Code

This document contains source code for a parallel matrix multiplication program using MPI. It includes functions for setting up a grid of processes, reading matrices by scattering portions to each process, performing local matrix multiplication on block portions, and writing output. The main function handles initializing MPI, setting up the grid, reading the input matrices A and B, performing the parallel Fox algorithm for matrix multiplication, and writing the result matrix C to a file.

Uploaded by

Quyết Đào

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

58 views3 pages

Ex7 Code

Uploaded by

Quyết Đào

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 3

File: /home/kai/uni-wuppertal/ws16_17/labcourses/homework7/ex7_1.c Page 1 of 6 File: /home/kai/uni-wuppertal/ws16_17/labcourses/homework7/ex7_1.

c Page 2 of 6

#include <stdio.h> /* read dimension of matrix B*/

#include <stdlib.h> if (grid.my_rank == 0)
#include <string.h> {
#include <math.h> //printf("Enter the dimension m, n of the m x n matrix B:
#include "mpi.h" \n");
scanf("%d %d", &mb, &nb);
typedef struct }
{
int p; /*broadcast the demion of matrix B*/
MPI_Comm comm; MPI_Bcast(&mb, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Comm row_comm; MPI_Bcast(&nb, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Comm col_comm;
int q; if (na != mb)
int my_row; {
int my_col; printf("Matrices dimension do not match!\n");
int my_rank; exit;
} }
GRID_INFO_T;
block_B = (float*)calloc(nb * mb / grid.p, sizeof(float));
main(int argc, char* argv[]) mat_C = (float*)malloc(ma * nb * sizeof(float));
{
int rank; /* read matrix B*/
int p; start2 = MPI_Wtime();
float *block_A; Read_matrix("The matrix B", block_B, mb, nb, &grid);
float *block_B; finish2 = MPI_Wtime();
float *mat_C;
int ma, na; /* FOX algorithem*/
int mb, nb; start3 = MPI_Wtime();
int i, j; mat_C = parallel_Fox(block_A, block_B, ma, nb, na, mb, &grid);
GRID_INFO_T grid; finish3 = MPI_Wtime();
MPI_Status status;
FILE *fp; /* save result in a file*/
double start1, start2, start3, start4; if (grid.my_rank == 0)
double finish1, finish2, finish3, finish4; {
fp = fopen("C.txt", "a"); //create C.txt

start4 = MPI_Wtime();
void Setup_grid(GRID_INFO_T* grid); fprintf(fp, "%d\n", ma);
void Read_matrix(char* prompt, float block[], int m, int n, GRID_INFO_T* grid); fprintf(fp, "%d\n", nb);
float *parallel_Fox(float block_A[], float block_B[], int ma, int nb, int na,
int mb, GRID_INFO_T* grid); for (i = 0; i < ma * nb; i++)
{
MPI_Init(&argc, &argv); fprintf(fp, "%f\n", mat_C[i]);
MPI_Comm_rank(MPI_COMM_WORLD, &rank); }
Setup_grid(&grid); finish4 = MPI_Wtime();

/* read dimension of matrix A*/ fclose(fp);

if (rank == 0)
{ printf("Number of processes: %d\n", grid.p);
//printf("Enter the dimension m, n of the m x n matrix A:\n"); printf("Time elapsed with I/O: %e\n", finish1 + finish2 + finish3 +
scanf("%d %d", &ma, &na); finish4 - start1 - start2 - start3 - start4);
} printf("Time elapsed without I/O: %e\n", finish3 - start3);
}
/*broadcast the demion of matrix A*/
MPI_Bcast(&ma, 1, MPI_INT, 0, MPI_COMM_WORLD); free(block_A);
MPI_Bcast(&na, 1, MPI_INT, 0, MPI_COMM_WORLD); free(block_B);
free(mat_C);
block_A = (float*)calloc((na * ma / grid.p), sizeof(float));
MPI_Finalize();
}

/* read matrix A/ / read matrix*/

start1 = MPI_Wtime(); void Read_matrix(char* prompt, float block[], int m, int n, GRID_INFO_T* grid)
Read_matrix("The matrix A", block_A, ma, na, &grid); {
finish1 = MPI_Wtime(); float *temp;
int m_bar;
File: /home/kai/uni-wuppertal/ws16_17/labcourses/homework7/ex7_1.c Page 3 of 6 File: /home/kai/uni-wuppertal/ws16_17/labcourses/homework7/ex7_1.c Page 4 of 6

int n_bar; {
int i, j, k, h, x; dest = l + i * grid->q;
int l; MPI_Send(temp + l * n_bar * m_bar, n_bar * m_bar, MPI_FLOAT, dest, 0 ,
int q, dest; grid->comm);
MPI_Status status; }
}
m_bar = m / grid->q; }
n_bar = n / grid->q; }

temp = (float)malloc(m n / grid->q * sizeof(float)); else

{
/* this code use MPI_Scatter is shorter, but it needs to allocate entire MPI_Recv(block, n_bar * m_bar, MPI_FLOAT, 0, 0, grid->comm, &status);
matrix, which is memory cost */ }
/* if (my_rank ==
0) free(temp);
}
{
for (i =0; i < q; i++) // row of processes /*local dot operation in the block*/
for (j = 0; j < m; j++) // number of reading when read the part of matrix float *local_dot(float block_A[], float block_B[], int ma, int nb, int na,
for each row of porc. GRID_INFO_T* grid)
for(k = 0; k < n_bar; k++) {
{ int i, j, k, h;
scanf("%f", &temp[m * n / p * (j % q) + j / q * n_bar + k + i * m_bar int ma_bar, nb_bar, na_bar;
* n_bar * q]); // rearrange the index float *res;
// the size of temp equals to the size of the entire matrix
} res = (float*)calloc(ma * nb / grid->p, sizeof(float));
} ma_bar = ma / grid->q;
MPI_Scatter(temp, m * n / p, MPI_FLOAT, block, m * n / p, MPI_FLOAT, 0, nb_bar = nb / grid->q;
MPI_COMM_WORLD); na_bar = na / grid->q;
}*/
for (j = 0; j < ma_bar; j++) // number of rows in block A
{
/* this code reads 1/q of entries each time then send them to the corresponding for (k = 0; k < nb_bar; k++) // number of columns in block B
row of processes*/ {
/* this code is longer, but needs less memory*/ for (h = 0; h < na_bar; h++)
if (grid->my_rank == 0) {
{ res[j * nb_bar + k] = res[j * nb_bar + k] + block_A[h + j * na_bar] *
for (i =0; i < grid->q; i++) // row of processes block_B[k + h * nb_bar];
{ }
for (j = 0; j < m; j++) // number of reading when read the part of matrix }
for each row of porc. }
{
for(k = 0; k < n_bar; k++) return res;
{ }
scanf("%f", &temp[m_bar * n_bar * (j % grid->q) + j / grid->q * n_bar +
k ]); // rearrange the index /* circular shift in column communicators */
} float column_circular_shift(float block_B[], int mb, int nb, GRID_INFO_T* grid)
} {
int dest;
if (i == 0) int source;
{ int tag = 0;
memcpy(block, temp, m_bar * n_bar * sizeof(float)); // keep for proc.0 MPI_Status status;
itself
source = (grid->q + grid->my_row + 1) % grid->q;
for (l = 1; l < grid->q; l++) // send the rest to the rest 1st row dest = (grid->q + grid->my_row - 1) % grid->q;
processes
{ MPI_Sendrecv_replace(block_B, mb * nb / grid->p, MPI_FLOAT, dest, tag, source,
dest = l; tag, grid->col_comm, &status);
MPI_Send(temp + l * n_bar * m_bar, n_bar * m_bar, MPI_FLOAT, dest, 0 , }
grid->comm);
} /* broadcast in row communicator */
} float row_broadcast(float block_A[], float new_block[], int na, int ma, int step,
GRID_INFO_T* grid)
else {
{ int root;
for (l = 0; l < grid->q; l++) // send to other processes row wise int count;
File: /home/kai/uni-wuppertal/ws16_17/labcourses/homework7/ex7_1.c Page 5 of 6 File: /home/kai/uni-wuppertal/ws16_17/labcourses/homework7/ex7_1.c Page 6 of 6

int old_rank;
count = ma * na / grid->p; int dimensions[2];
int wrap_around[2];
// determine which block should be broadcasted in step(0....grid.q-1) in each int coordinates[2];
row int free_coords[2];
if (grid->my_rank == grid->my_row * grid->q + (grid->my_row + step) % grid->q)
{ /* Set up Global Grid Information */
memcpy(new_block, block_A, count * sizeof(float)); MPI_Comm_size(MPI_COMM_WORLD, &(grid->p));
} MPI_Comm_rank(MPI_COMM_WORLD, &old_rank);

root = (grid->my_row + step % grid->q) % grid->q; /* We assume p is a perfect square */

MPI_Bcast(new_block, count, MPI_FLOAT, root, grid->row_comm); grid->q = (int) sqrt((double) grid->p);
} dimensions[0] = dimensions[1] = grid->q;

/* parallel Fox algorithm / / We want a circular shift in second dimension. */

/* Don't care about first */
float *parallel_Fox(float block_A[], wrap_around[0] = wrap_around[1] = 1;
float block_B[], MPI_Cart_create(MPI_COMM_WORLD, 2, dimensions,
int ma, wrap_around, 1, &(grid->comm));
int nb, MPI_Comm_rank(grid->comm, &(grid->my_rank));
int na, MPI_Cart_coords(grid->comm, grid->my_rank, 2,
int mb, coordinates);
GRID_INFO_T* grid) grid->my_row = coordinates[0];
{ grid->my_col = coordinates[1];
int i, j, k,h,max_step;
float *res; /* Set up row communicators */
float *new_block; free_coords[0] = 0;
float *pieces_res; free_coords[1] = 1;
float *mat_C; MPI_Cart_sub(grid->comm, free_coords,
&(grid->row_comm));
float *local_dot(float block_A[], float block_B[], int ma, int nb, int na,
GRID_INFO_T* grid); /* Set up column communicators */
float column_circular_shift(float block_B[], int mb, int nb, GRID_INFO_T* grid); free_coords[0] = 1;
float row_broadcast(float block_A[], float new_block[], int na, int ma, int free_coords[1] = 0;
step, GRID_INFO_T* grid); MPI_Cart_sub(grid->comm, free_coords,
void rearrange_result(float *block_C, float *res, int ma, int nb, GRID_INFO_T* &(grid->col_comm));
grid); } /* Setup_grid */

max_step = grid->q; void rearrange_result(float *block_C, float *res, int ma, int nb, GRID_INFO_T*
res = (float*)calloc(ma * nb / grid->p, sizeof(float)); grid)
pieces_res = (float*)malloc(ma * nb / grid->p * sizeof(float)); {
new_block = (float*)calloc(na * ma / grid->p, sizeof(float)); float *buff;
mat_C = (float*)malloc(ma * nb * sizeof(float)); int i, j, k;
int nb_bar, ma_bar;
for (i = 0; i < max_step; i++)
{ ma_bar = ma / grid->q;
row_broadcast(block_A, new_block, na, ma, i, grid); nb_bar = nb / grid->q;
pieces_res = local_dot(new_block, block_B, ma, nb, na, grid); buff = (float*)malloc(ma_bar * nb * sizeof(float));
column_circular_shift(block_B, mb, nb, grid);
for (i = 0; i < ma_bar; i++) // rearrange the entries in each row of grid
for (j = 0; j < ma * nb / grid->p; j++) processes
{ {
res[j] = res[j] + pieces_res[j]; // update local result MPI_Gather(block_C + i * nb_bar, nb_bar, MPI_FLOAT, buff + i * nb, nb_bar,
} MPI_FLOAT, 0, grid->row_comm);
} }

rearrange_result(res, mat_C, ma, nb, grid); // arrange entries to a nomal order // gather the ordered entries from first column to the grid prosses in proc.0
MPI_Gather(buff, ma_bar * nb, MPI_FLOAT, res, ma_bar * nb, MPI_FLOAT, 0, grid-
free(res); >col_comm);
free(pieces_res);
free(new_block); free(buff);
}
return mat_C;
}

void Setup_grid( GRID_INFO_T* grid /* out */)

{

77a3d882-bc70-4699-a880-d8bd3ce01411
No ratings yet
77a3d882-bc70-4699-a880-d8bd3ce01411
24 pages
Jordan
No ratings yet
Jordan
6 pages
EXERCISE- 4[1] (1)
No ratings yet
EXERCISE- 4[1] (1)
8 pages
Untitled document
No ratings yet
Untitled document
23 pages
.Trashed 1734019373 TH4
No ratings yet
.Trashed 1734019373 TH4
21 pages
Parallel Block-Oriented Matrix Multiplication
No ratings yet
Parallel Block-Oriented Matrix Multiplication
5 pages
EXERCISE- 4
No ratings yet
EXERCISE- 4
8 pages
Gauss
No ratings yet
Gauss
7 pages
Nguyen Thanh Nam
No ratings yet
Nguyen Thanh Nam
21 pages
Assignment 04 (2)
No ratings yet
Assignment 04 (2)
16 pages
hpcExp3 1832
No ratings yet
hpcExp3 1832
3 pages
22l-6831
No ratings yet
22l-6831
9 pages
Pseudo Code of Mpi Programs
No ratings yet
Pseudo Code of Mpi Programs
22 pages
lab9pdchhhggffffddd
No ratings yet
lab9pdchhhggffffddd
4 pages
22l-6819
No ratings yet
22l-6819
8 pages
Pcap Lab Week4
No ratings yet
Pcap Lab Week4
6 pages
MPI Reduce PPMPI
No ratings yet
MPI Reduce PPMPI
2 pages
Lab 7
No ratings yet
Lab 7
5 pages
SWE2017 - Lab Assignment 1pages-7
No ratings yet
SWE2017 - Lab Assignment 1pages-7
5 pages
Untitled document
No ratings yet
Untitled document
23 pages
PDC Lab 9 Final
No ratings yet
PDC Lab 9 Final
21 pages
Solutions Midterm 1 March 72020
No ratings yet
Solutions Midterm 1 March 72020
7 pages
Problemes MPI
No ratings yet
Problemes MPI
4 pages
HPC MPI LAB 1 Vector Addition
No ratings yet
HPC MPI LAB 1 Vector Addition
9 pages
PDC Lab 2-5
No ratings yet
PDC Lab 2-5
5 pages
Case Study 1
No ratings yet
Case Study 1
12 pages
CSE4001 Parallel and Distributed Computing: Lab Assignment 6
No ratings yet
CSE4001 Parallel and Distributed Computing: Lab Assignment 6
8 pages
Sunil Kumar L 24
No ratings yet
Sunil Kumar L 24
21 pages
333063_OSSP Assignment 3
No ratings yet
333063_OSSP Assignment 3
7 pages
ECE 1747H: Parallel Programming: Message Passing (MPI)
No ratings yet
ECE 1747H: Parallel Programming: Message Passing (MPI)
67 pages
Sistemet e Shperndara, Lab1
No ratings yet
Sistemet e Shperndara, Lab1
5 pages
Final PDC Exam
No ratings yet
Final PDC Exam
10 pages
Probleme Laborator: Compilare: Mpicc Prog.c - o Prog Executie: Mpirun - N 4 ./prog
No ratings yet
Probleme Laborator: Compilare: Mpicc Prog.c - o Prog Executie: Mpirun - N 4 ./prog
14 pages
As 3
No ratings yet
As 3
2 pages
CP4292-MCAP(1)
No ratings yet
CP4292-MCAP(1)
15 pages
Reporte 2
No ratings yet
Reporte 2
5 pages
9 MPI Zadaci
No ratings yet
9 MPI Zadaci
10 pages
Deklaracije Fja 2021
No ratings yet
Deklaracije Fja 2021
4 pages
OS Solved
No ratings yet
OS Solved
18 pages
Report - Viber String
No ratings yet
Report - Viber String
26 pages
Pdcnotes
No ratings yet
Pdcnotes
23 pages
os_program
No ratings yet
os_program
32 pages
Reporte
No ratings yet
Reporte
12 pages
Mpi CG
No ratings yet
Mpi CG
23 pages
1.hello World Programme in Mpi
No ratings yet
1.hello World Programme in Mpi
11 pages
Spos-1 Sbpcoe
No ratings yet
Spos-1 Sbpcoe
19 pages
Parallel Image Processing Using MPI: By: Zhaoyang Dong
No ratings yet
Parallel Image Processing Using MPI: By: Zhaoyang Dong
7 pages
Partition A 2D Array Column-Wise and Use Allgather
No ratings yet
Partition A 2D Array Column-Wise and Use Allgather
3 pages
MPI Programming For Fibonacci Series
0% (1)
MPI Programming For Fibonacci Series
2 pages
OpenMP Programs
No ratings yet
OpenMP Programs
4 pages
HPC Programs
No ratings yet
HPC Programs
19 pages
OpenMP Matrix
No ratings yet
OpenMP Matrix
6 pages
Par - 1 In-Term Exam - Course 2017/18-Q2
No ratings yet
Par - 1 In-Term Exam - Course 2017/18-Q2
7 pages
Lab 7
No ratings yet
Lab 7
3 pages
OS Record
No ratings yet
OS Record
19 pages
RG2-ParallelizationPrinciples-HPCAI-Jan2020
No ratings yet
RG2-ParallelizationPrinciples-HPCAI-Jan2020
40 pages
#Include #Include #Define
No ratings yet
#Include #Include #Define
8 pages
Matrix Multiplication Parallel
No ratings yet
Matrix Multiplication Parallel
5 pages
Mpi
No ratings yet
Mpi
67 pages
Computer Engineering Laboratory Solution Primer
From Everand
Computer Engineering Laboratory Solution Primer
Karan Bhandari
No ratings yet
Forces, Density and Pressure
No ratings yet
Forces, Density and Pressure
34 pages
STARRETT Decimal Chart PDF
100% (1)
STARRETT Decimal Chart PDF
2 pages
#Include Void Main (Int A, B, C Sprint ("%d%d",&a,&b) Printf ("The Sum %D",C) )
No ratings yet
#Include Void Main (Int A, B, C Sprint ("%d%d",&a,&b) Printf ("The Sum %D",C) )
3 pages
EE 402: Electrical and Electronics Measurements Contact Hours: 4L +0T 40hours Credits: 4 Lecture: 40hours
No ratings yet
EE 402: Electrical and Electronics Measurements Contact Hours: 4L +0T 40hours Credits: 4 Lecture: 40hours
65 pages
7_Maths Summer Work (1)
No ratings yet
7_Maths Summer Work (1)
12 pages
COSC-90-Lecture-1
No ratings yet
COSC-90-Lecture-1
27 pages
PPBFullbook
No ratings yet
PPBFullbook
217 pages
2 Quarter Exam: San Vicente Elementary School
No ratings yet
2 Quarter Exam: San Vicente Elementary School
2 pages
Force Questions and Answers
No ratings yet
Force Questions and Answers
6 pages
Development of Packaging and Products For Use in Microwave Ovens (Woodhead Publishing in Materials) 2nd Edition Ulrich Erle (Editor)
100% (3)
Development of Packaging and Products For Use in Microwave Ovens (Woodhead Publishing in Materials) 2nd Edition Ulrich Erle (Editor)
62 pages
HEAT Transfer Modelling in Exhaust Systems of High-Performance Two-Stroke Engines
No ratings yet
HEAT Transfer Modelling in Exhaust Systems of High-Performance Two-Stroke Engines
34 pages
GPI-120L Series Manual PDF
100% (1)
GPI-120L Series Manual PDF
61 pages
Reflections On Measurement And: Geometry
No ratings yet
Reflections On Measurement And: Geometry
4 pages
Unit-2 MCQ
No ratings yet
Unit-2 MCQ
3 pages
Ca U1m02l02-1
No ratings yet
Ca U1m02l02-1
12 pages
Googol - Wikipedia
No ratings yet
Googol - Wikipedia
19 pages
Chapter 8
No ratings yet
Chapter 8
26 pages
Mean, Mode, Median
No ratings yet
Mean, Mode, Median
9 pages
Gaddis Python 6e Chapter 02
No ratings yet
Gaddis Python 6e Chapter 02
81 pages
Cart-Pole Optimal Control - Dymos
No ratings yet
Cart-Pole Optimal Control - Dymos
6 pages
Relational Model: Lecture - 3
No ratings yet
Relational Model: Lecture - 3
50 pages
Chainsaw 2
No ratings yet
Chainsaw 2
38 pages
WMI Preliminary Round (GRADE 6A) 2020
No ratings yet
WMI Preliminary Round (GRADE 6A) 2020
4 pages
100 Days DSA Roadmap
No ratings yet
100 Days DSA Roadmap
21 pages
Genmath Answers Key
No ratings yet
Genmath Answers Key
13 pages
Conceptualizations of Professional Knowledge For Teachers of Mathematics
No ratings yet
Conceptualizations of Professional Knowledge For Teachers of Mathematics
12 pages
05 - Data Structures - Asymptotic Analysis
No ratings yet
05 - Data Structures - Asymptotic Analysis
6 pages
Thermal Energy ExamZone Answers
No ratings yet
Thermal Energy ExamZone Answers
3 pages
Business Mathematics: Quarter 2, Week 8-Module 18 Drawing A Graph or Table To Present Data ABM - BM11PAD-Ili-9
No ratings yet
Business Mathematics: Quarter 2, Week 8-Module 18 Drawing A Graph or Table To Present Data ABM - BM11PAD-Ili-9
18 pages
AP Calculus AB Workbook Unit 1-8 2
No ratings yet
AP Calculus AB Workbook Unit 1-8 2
4 pages

Ex7 Code

Uploaded by

Ex7 Code

Uploaded by

File: /home/kai/uni-wuppertal/ws16_17/labcourses/homework7/ex7_1.c Page 1 of 6 File: /home/kai/uni-wuppertal/ws16_17/labcourses/homework7/ex7_1.

#include <stdio.h> /* read dimension of matrix B*/

/* read dimension of matrix A*/ fclose(fp);

/* read matrix A*/ /* read matrix*/

temp = (float*)malloc(m * n / grid->q * sizeof(float)); else

root = (grid->my_row + step % grid->q) % grid->q; /* We assume p is a perfect square */

/* parallel Fox algorithm */ /* We want a circular shift in second dimension. */

void Setup_grid( GRID_INFO_T* grid /* out */)

You might also like

/* read matrix A/ / read matrix*/

temp = (float)malloc(m n / grid->q * sizeof(float)); else

/* parallel Fox algorithm / / We want a circular shift in second dimension. */