0% found this document useful (0 votes)
52 views

Ex7 Code

This document contains source code for a parallel matrix multiplication program using MPI. It includes functions for setting up a grid of processes, reading matrices by scattering portions to each process, performing local matrix multiplication on block portions, and writing output. The main function handles initializing MPI, setting up the grid, reading the input matrices A and B, performing the parallel Fox algorithm for matrix multiplication, and writing the result matrix C to a file.

Uploaded by

Quyết Đào
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
52 views

Ex7 Code

This document contains source code for a parallel matrix multiplication program using MPI. It includes functions for setting up a grid of processes, reading matrices by scattering portions to each process, performing local matrix multiplication on block portions, and writing output. The main function handles initializing MPI, setting up the grid, reading the input matrices A and B, performing the parallel Fox algorithm for matrix multiplication, and writing the result matrix C to a file.

Uploaded by

Quyết Đào
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 3

File: /home/kai/uni-wuppertal/ws16_17/labcourses/homework7/ex7_1.c Page 1 of 6 File: /home/kai/uni-wuppertal/ws16_17/labcourses/homework7/ex7_1.

c Page 2 of 6

#include <stdio.h> /* read dimension of matrix B*/


#include <stdlib.h> if (grid.my_rank == 0)
#include <string.h> {
#include <math.h> //printf("Enter the dimension m, n of the m x n matrix B:
#include "mpi.h" \n");
scanf("%d %d", &mb, &nb);
typedef struct }
{
int p; /*broadcast the demion of matrix B*/
MPI_Comm comm; MPI_Bcast(&mb, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Comm row_comm; MPI_Bcast(&nb, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Comm col_comm;
int q; if (na != mb)
int my_row; {
int my_col; printf("Matrices dimension do not match!\n");
int my_rank; exit;
} }
GRID_INFO_T;
block_B = (float*)calloc(nb * mb / grid.p, sizeof(float));
main(int argc, char* argv[]) mat_C = (float*)malloc(ma * nb * sizeof(float));
{
int rank; /* read matrix B*/
int p; start2 = MPI_Wtime();
float *block_A; Read_matrix("The matrix B", block_B, mb, nb, &grid);
float *block_B; finish2 = MPI_Wtime();
float *mat_C;
int ma, na; /* FOX algorithem*/
int mb, nb; start3 = MPI_Wtime();
int i, j; mat_C = parallel_Fox(block_A, block_B, ma, nb, na, mb, &grid);
GRID_INFO_T grid; finish3 = MPI_Wtime();
MPI_Status status;
FILE *fp; /* save result in a file*/
double start1, start2, start3, start4; if (grid.my_rank == 0)
double finish1, finish2, finish3, finish4; {
fp = fopen("C.txt", "a"); //create C.txt

start4 = MPI_Wtime();
void Setup_grid(GRID_INFO_T* grid); fprintf(fp, "%d\n", ma);
void Read_matrix(char* prompt, float block[], int m, int n, GRID_INFO_T* grid); fprintf(fp, "%d\n", nb);
float *parallel_Fox(float block_A[], float block_B[], int ma, int nb, int na,
int mb, GRID_INFO_T* grid); for (i = 0; i < ma * nb; i++)
{
MPI_Init(&argc, &argv); fprintf(fp, "%f\n", mat_C[i]);
MPI_Comm_rank(MPI_COMM_WORLD, &rank); }
Setup_grid(&grid); finish4 = MPI_Wtime();

/* read dimension of matrix A*/ fclose(fp);


if (rank == 0)
{ printf("Number of processes: %d\n", grid.p);
//printf("Enter the dimension m, n of the m x n matrix A:\n"); printf("Time elapsed with I/O: %e\n", finish1 + finish2 + finish3 +
scanf("%d %d", &ma, &na); finish4 - start1 - start2 - start3 - start4);
} printf("Time elapsed without I/O: %e\n", finish3 - start3);
}
/*broadcast the demion of matrix A*/
MPI_Bcast(&ma, 1, MPI_INT, 0, MPI_COMM_WORLD); free(block_A);
MPI_Bcast(&na, 1, MPI_INT, 0, MPI_COMM_WORLD); free(block_B);
free(mat_C);
block_A = (float*)calloc((na * ma / grid.p), sizeof(float));
MPI_Finalize();
}

/* read matrix A*/ /* read matrix*/


start1 = MPI_Wtime(); void Read_matrix(char* prompt, float block[], int m, int n, GRID_INFO_T* grid)
Read_matrix("The matrix A", block_A, ma, na, &grid); {
finish1 = MPI_Wtime(); float *temp;
int m_bar;
File: /home/kai/uni-wuppertal/ws16_17/labcourses/homework7/ex7_1.c Page 3 of 6 File: /home/kai/uni-wuppertal/ws16_17/labcourses/homework7/ex7_1.c Page 4 of 6

int n_bar; {
int i, j, k, h, x; dest = l + i * grid->q;
int l; MPI_Send(temp + l * n_bar * m_bar, n_bar * m_bar, MPI_FLOAT, dest, 0 ,
int q, dest; grid->comm);
MPI_Status status; }
}
m_bar = m / grid->q; }
n_bar = n / grid->q; }

temp = (float*)malloc(m * n / grid->q * sizeof(float)); else


{
/* this code use MPI_Scatter is shorter, but it needs to allocate entire MPI_Recv(block, n_bar * m_bar, MPI_FLOAT, 0, 0, grid->comm, &status);
matrix, which is memory cost */ }
/* if (my_rank ==
0) free(temp);
}
{
for (i =0; i < q; i++) // row of processes /*local dot operation in the block*/
for (j = 0; j < m; j++) // number of reading when read the part of matrix float *local_dot(float block_A[], float block_B[], int ma, int nb, int na,
for each row of porc. GRID_INFO_T* grid)
for(k = 0; k < n_bar; k++) {
{ int i, j, k, h;
scanf("%f", &temp[m * n / p * (j % q) + j / q * n_bar + k + i * m_bar int ma_bar, nb_bar, na_bar;
* n_bar * q]); // rearrange the index float *res;
// the size of temp equals to the size of the entire matrix
} res = (float*)calloc(ma * nb / grid->p, sizeof(float));
} ma_bar = ma / grid->q;
MPI_Scatter(temp, m * n / p, MPI_FLOAT, block, m * n / p, MPI_FLOAT, 0, nb_bar = nb / grid->q;
MPI_COMM_WORLD); na_bar = na / grid->q;
}*/
for (j = 0; j < ma_bar; j++) // number of rows in block A
{
/* this code reads 1/q of entries each time then send them to the corresponding for (k = 0; k < nb_bar; k++) // number of columns in block B
row of processes*/ {
/* this code is longer, but needs less memory*/ for (h = 0; h < na_bar; h++)
if (grid->my_rank == 0) {
{ res[j * nb_bar + k] = res[j * nb_bar + k] + block_A[h + j * na_bar] *
for (i =0; i < grid->q; i++) // row of processes block_B[k + h * nb_bar];
{ }
for (j = 0; j < m; j++) // number of reading when read the part of matrix }
for each row of porc. }
{
for(k = 0; k < n_bar; k++) return res;
{ }
scanf("%f", &temp[m_bar * n_bar * (j % grid->q) + j / grid->q * n_bar +
k ]); // rearrange the index /* circular shift in column communicators */
} float column_circular_shift(float block_B[], int mb, int nb, GRID_INFO_T* grid)
} {
int dest;
if (i == 0) int source;
{ int tag = 0;
memcpy(block, temp, m_bar * n_bar * sizeof(float)); // keep for proc.0 MPI_Status status;
itself
source = (grid->q + grid->my_row + 1) % grid->q;
for (l = 1; l < grid->q; l++) // send the rest to the rest 1st row dest = (grid->q + grid->my_row - 1) % grid->q;
processes
{ MPI_Sendrecv_replace(block_B, mb * nb / grid->p, MPI_FLOAT, dest, tag, source,
dest = l; tag, grid->col_comm, &status);
MPI_Send(temp + l * n_bar * m_bar, n_bar * m_bar, MPI_FLOAT, dest, 0 , }
grid->comm);
} /* broadcast in row communicator */
} float row_broadcast(float block_A[], float new_block[], int na, int ma, int step,
GRID_INFO_T* grid)
else {
{ int root;
for (l = 0; l < grid->q; l++) // send to other processes row wise int count;
File: /home/kai/uni-wuppertal/ws16_17/labcourses/homework7/ex7_1.c Page 5 of 6 File: /home/kai/uni-wuppertal/ws16_17/labcourses/homework7/ex7_1.c Page 6 of 6

int old_rank;
count = ma * na / grid->p; int dimensions[2];
int wrap_around[2];
// determine which block should be broadcasted in step(0....grid.q-1) in each int coordinates[2];
row int free_coords[2];
if (grid->my_rank == grid->my_row * grid->q + (grid->my_row + step) % grid->q)
{ /* Set up Global Grid Information */
memcpy(new_block, block_A, count * sizeof(float)); MPI_Comm_size(MPI_COMM_WORLD, &(grid->p));
} MPI_Comm_rank(MPI_COMM_WORLD, &old_rank);

root = (grid->my_row + step % grid->q) % grid->q; /* We assume p is a perfect square */


MPI_Bcast(new_block, count, MPI_FLOAT, root, grid->row_comm); grid->q = (int) sqrt((double) grid->p);
} dimensions[0] = dimensions[1] = grid->q;

/* parallel Fox algorithm */ /* We want a circular shift in second dimension. */


/* Don't care about first */
float *parallel_Fox(float block_A[], wrap_around[0] = wrap_around[1] = 1;
float block_B[], MPI_Cart_create(MPI_COMM_WORLD, 2, dimensions,
int ma, wrap_around, 1, &(grid->comm));
int nb, MPI_Comm_rank(grid->comm, &(grid->my_rank));
int na, MPI_Cart_coords(grid->comm, grid->my_rank, 2,
int mb, coordinates);
GRID_INFO_T* grid) grid->my_row = coordinates[0];
{ grid->my_col = coordinates[1];
int i, j, k,h,max_step;
float *res; /* Set up row communicators */
float *new_block; free_coords[0] = 0;
float *pieces_res; free_coords[1] = 1;
float *mat_C; MPI_Cart_sub(grid->comm, free_coords,
&(grid->row_comm));
float *local_dot(float block_A[], float block_B[], int ma, int nb, int na,
GRID_INFO_T* grid); /* Set up column communicators */
float column_circular_shift(float block_B[], int mb, int nb, GRID_INFO_T* grid); free_coords[0] = 1;
float row_broadcast(float block_A[], float new_block[], int na, int ma, int free_coords[1] = 0;
step, GRID_INFO_T* grid); MPI_Cart_sub(grid->comm, free_coords,
void rearrange_result(float *block_C, float *res, int ma, int nb, GRID_INFO_T* &(grid->col_comm));
grid); } /* Setup_grid */

max_step = grid->q; void rearrange_result(float *block_C, float *res, int ma, int nb, GRID_INFO_T*
res = (float*)calloc(ma * nb / grid->p, sizeof(float)); grid)
pieces_res = (float*)malloc(ma * nb / grid->p * sizeof(float)); {
new_block = (float*)calloc(na * ma / grid->p, sizeof(float)); float *buff;
mat_C = (float*)malloc(ma * nb * sizeof(float)); int i, j, k;
int nb_bar, ma_bar;
for (i = 0; i < max_step; i++)
{ ma_bar = ma / grid->q;
row_broadcast(block_A, new_block, na, ma, i, grid); nb_bar = nb / grid->q;
pieces_res = local_dot(new_block, block_B, ma, nb, na, grid); buff = (float*)malloc(ma_bar * nb * sizeof(float));
column_circular_shift(block_B, mb, nb, grid);
for (i = 0; i < ma_bar; i++) // rearrange the entries in each row of grid
for (j = 0; j < ma * nb / grid->p; j++) processes
{ {
res[j] = res[j] + pieces_res[j]; // update local result MPI_Gather(block_C + i * nb_bar, nb_bar, MPI_FLOAT, buff + i * nb, nb_bar,
} MPI_FLOAT, 0, grid->row_comm);
} }

rearrange_result(res, mat_C, ma, nb, grid); // arrange entries to a nomal order // gather the ordered entries from first column to the grid prosses in proc.0
MPI_Gather(buff, ma_bar * nb, MPI_FLOAT, res, ma_bar * nb, MPI_FLOAT, 0, grid-
free(res); >col_comm);
free(pieces_res);
free(new_block); free(buff);
}
return mat_C;
}

void Setup_grid( GRID_INFO_T* grid /* out */)


{

You might also like