add comments

05807ce4 · Björn Fischer · e730db5d · 05807ce4 · 05807ce4 · 05807ce4
Commit 05807ce4 authored Jan 05, 2016 by Björn Fischer
Showing with 40 additions and 24 deletions

cart.c cart.c +3 -0

jacobi.c jacobi.c +2 -1

main.c main.c +12 -9

makefile makefile +1 -5

matrix.c matrix.c +3 -0

mpi_util.c mpi_util.c +6 -4

pid0.c pid0.c +8 -0

project.h project.h +5 -5

No files found.
--- a/cart.c
+++ b/cart.c
@@ -37,12 +37,15 @@ MPI_Comm Create_MPI_Cart_Cluster(MPI_Comm comm, int rank, int *pro_per_dim) {
 	MPI_Comm cart_comm;
 	int periods[] = {0,0};	// edges are not connected
 	
+	// no reorder
 	if(MPI_Cart_create(comm, 2, pro_per_dim, periods, 0, &cart_comm)) {
 		fprintf(stderr, "Cannot create topology\n");		
 		exit(1);
 	}
+
 	if(cart_comm == MPI_COMM_NULL) {
 		printf("process %d not in use. exiting...\n", rank);
+		// finalize is neccessary otherwise MPI crashes
 		MPI_Finalize();
 		exit(0);
 	}

--- a/jacobi.c
+++ b/jacobi.c
@@ -37,6 +37,7 @@ void Init_Jacobi(int dim0_size, int dim1_size, int alpha, double *delta_t, doubl
 }

 void Init_Edges(int dim0_size, int dim1_size, int *matrix_size, int neighbor_dim0_left, int neighbor_dim0_right, int neighbor_dim1_left, int neighbor_dim1_right, double **partial_field, double **partial_field_clipboard, t_process_info pi) {
+	// determine wether edge is edge of root field
 	if(neighbor_dim1_left == MPI_PROC_NULL) {
 		for(int i = pi.start_m; i <= pi.end_m; i++) {
 			partial_field[i - pi.start_m + 1][1] = (double)i / (dim0_size-1);
@@ -79,7 +80,6 @@ int Jacobi_Iterate(int neighbor_dim0_left, int neighbor_dim0_right, int neighbor
 			j < pi.end_n - pi.start_n + ((neighbor_dim1_right == MPI_PROC_NULL) ? 1 : 2); 
 			j++
 		) {
-			//printf("%d working on %d, %d\n", omp_get_thread_num(), i, j);
 			delta_a = alpha * 
 				    ( ((*partial_field)[i+1][j] + (*partial_field)[i-1][j] - 2.0 * (*partial_field)[i][j]) / (hy_square)
 					 +((*partial_field)[i][j-1] + (*partial_field)[i][j+1] - 2.0 * (*partial_field)[i][j]) / (hx_square) );
@@ -90,6 +90,7 @@ int Jacobi_Iterate(int neighbor_dim0_left, int neighbor_dim0_right, int neighbor
 				maxdiff = delta_a;
 		}
 	}
+	// just switch pointer instead of data
 	swap = *partial_field_clipboard;
 	*partial_field_clipboard = *partial_field;
 	*partial_field = swap;

--- a/main.c
+++ b/main.c
@@ -20,7 +20,7 @@ int main(int argc, char **argv)
 	double **partial_field;				// partial field where process works on
 	double **partial_field_clipboard;	// copy of partial field
 	t_process_info pi;
-	t_process_info *infos;
+	t_process_info *infos;				// all pi gathered by root
 	int pro_per_dim[2];
 	int cell_per_pro[2];
 	MPI_Comm cart_comm;
@@ -35,6 +35,7 @@ int main(int argc, char **argv)

 	MPI_Init(&argc, &argv);

+	// fill variable from argv
 	Process_Args(argc, argv, &num_threads, &dim0_size, &dim1_size, &eps, &delta_t);

 	printf("%d threads per process\n", num_threads);
@@ -58,13 +59,14 @@ int main(int argc, char **argv)
 		}
 	}

-	// optimize cart cluster
+	// optimize cart cluster (minimize edges)
 	Optimize_Cart_Cluster(dim0_size, dim1_size, MPI_COMM_WORLD, rank, pro_per_dim, cell_per_pro);

 	cart_comm = Create_MPI_Cart_Cluster(MPI_COMM_WORLD, rank, pro_per_dim);

 	pi = Calculate_Process_Info(cart_comm, rank, dim0_size, dim1_size, cell_per_pro);

+	// size of partial field + edges of all neighbors
 	matrix_size[0] = pi.end_m - pi.start_m + 3;
 	matrix_size[1] = pi.end_n - pi.start_n + 3;

@@ -73,6 +75,7 @@ int main(int argc, char **argv)
 		exit(1);
 	}

+	// each process computes it's own pi and sends it to root
 	infos = Gather_Process_Info(&pi, rank, cart_cluster_size, cart_comm);

 	if(rank == 0) {
@@ -81,12 +84,16 @@ int main(int argc, char **argv)
 		}
 	}

+	// alloc two partial fields
 	Alloc_Partial_Field(matrix_size, &partial_field, &partial_field_clipboard);

+	// calculate neighbor ranks, alloc memory for communication
 	Init_Neighbor_Comm(cart_comm, sync_requests, matrix_size, &neighbor_dim0_left, &neighbor_dim0_right, &neighbor_dim1_left, &neighbor_dim1_right, &dim1_own_edge_values, &dim1_neighbor_egde_values);

+	// calculate jacobi parameter
 	Init_Jacobi(dim0_size, dim1_size, alpha, &delta_t, &hx, &hy, &hx_square, &hy_square);

+	// set 1 to corner
 	Init_Edges(dim0_size, dim1_size, matrix_size, neighbor_dim0_left, neighbor_dim0_right, neighbor_dim1_left, neighbor_dim1_right, partial_field, partial_field_clipboard, pi);

 	int *completions = malloc(sizeof(int) * cart_cluster_size);
@@ -103,15 +110,10 @@ int main(int argc, char **argv)
 	while (1) {			// iterate until break;
 		iterations++;

+		// do one iteration
 		int completion = Jacobi_Iterate(neighbor_dim0_left, neighbor_dim0_right, neighbor_dim1_left, neighbor_dim1_right, alpha, delta_t, eps, hx_square, hy_square, pi, &partial_field, &partial_field_clipboard);

-		/*
-		if(iterations == 2) {
-			MPI_Finalize();
-			return 0;
-		}
-		*/
-
+		// sync edges and communicate to all processes about completeness
 		int all_completed = Sync(cart_comm, completion, completions, cart_cluster_size, matrix_size, sync_requests, neighbor_dim0_left, neighbor_dim0_right, neighbor_dim1_left, neighbor_dim1_right, partial_field, dim1_own_edge_values, dim1_neighbor_egde_values);

 		if(all_completed) {
@@ -135,6 +137,7 @@ int main(int argc, char **argv)
 		printf("total: %.10lf\n", time_end - time_start);
 	}

+	// send to partial field to root to save it
 	Send_To_Root(cart_comm, rank, dim0_size, dim1_size, cart_cluster_size, matrix_size, infos, partial_field, root_field);

 	MPI_Finalize();

--- a/makefile
+++ b/makefile
@@ -3,10 +3,6 @@ all: main
 main: matrix.c main.c args.c pid0.c pi.c cart.c mpi_util.c
 	mpicc matrix.c main.c args.c pid0.c pi.c cart.c mpi_util.c jacobi.c -fopenmp -std=c99 -lm -o main

-ViewMatrix.class: ViewMatrix.java
-	javac ViewMatrix.java
-
 clean:
-	rm -f *.o *.c~ heat Matrix.txt
-	rm -f ViewMatrix.class
+	rm -f *.o *.c~ main Matrix.txt
 
\ No newline at end of file
--- a/matrix.c
+++ b/matrix.c
@@ -95,6 +95,8 @@ void Init_Matrix(double **a, int m, int n, int init_value) {
 	}
 }

+// insert b in a
+// offset allows to cut the edges from b
 void Insert_Matrix(double **a, int a_dim0, int a_dim1, int pos_dim0, int pos_dim1, double **b, int b_dim0, int b_dim1, int offset_dim0_left, int offset_dim0_right, int offset_dim1_left, int offset_dim1_right) {
 	int i,j;
 	for(i=0; i < b_dim0 - offset_dim0_left - offset_dim0_right; i++) {
@@ -104,6 +106,7 @@ void Insert_Matrix(double **a, int a_dim0, int a_dim1, int pos_dim0, int pos_dim
 	}
 }

+// same as Insert_Matrix but b is a flat array (no double index)
 void Insert_Array_In_Matrix(double **a, int a_dim0, int a_dim1, int pos_dim0, int pos_dim1, double *b, int b_dim0, int b_dim1, int offset_dim0_left, int offset_dim0_right, int offset_dim1_left, int offset_dim1_right) {
 	int i,j;
 	for(i=0; i < b_dim0 - offset_dim0_left - offset_dim0_right; i++) {

--- a/mpi_util.c
+++ b/mpi_util.c
@@ -3,6 +3,7 @@
 #include <stdlib.h>
 #include "project.h"

+// create MPI Type to use it as a datatype for send and recv
 void Create_MPI_Type_t_process_info(MPI_Datatype *datatype) {
 	t_process_info mock;
 	MPI_Datatype type[7] = {MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_INT};
@@ -54,18 +55,16 @@ int Sync(MPI_Comm cart_comm, int completion, int *completions, int cart_cluster_
 	}

 	// Sync edges
-
-	// copy own edges in send buffer
+	// dim0 edges are available as a single block and can be copied directly
 	if(neighbor_dim0_left != MPI_PROC_NULL) {
 		MPI_Isend(&(partial_field[1][1]), (matrix_size[1] - 2), MPI_DOUBLE, neighbor_dim0_left, 0, cart_comm, &(sync_requests[0]));
 		MPI_Irecv(&(partial_field[0][1]), (matrix_size[1] - 2), MPI_DOUBLE, neighbor_dim0_left, 0, cart_comm, &(sync_requests[4]));
-		//memcpy(&(dim1_own_edge_values[2*(matrix_size[0]-2)]), &(partial_field[1][1]), sizeof(double) * (matrix_size[1] - 2));
 	}
 	if(neighbor_dim0_right != MPI_PROC_NULL) {
 		MPI_Isend(&(partial_field[matrix_size[0]-2][1]), (matrix_size[1] - 2), MPI_DOUBLE, neighbor_dim0_right, 0, cart_comm, &(sync_requests[1]));
 		MPI_Irecv(&(partial_field[matrix_size[0]-1][1]), (matrix_size[1] - 2), MPI_DOUBLE, neighbor_dim0_right, 0, cart_comm, &(sync_requests[5]));
-		//memcpy(&(dim1_own_edge_values[2*(matrix_size[0]-2) + matrix_size[1] - 2]), &(partial_field[matrix_size[0]-2][1]), sizeof(double) * (matrix_size[1] - 2));
 	}
+	// dim1 edges need to be copied in a send/recv buffer
 	if(neighbor_dim1_left != MPI_PROC_NULL) {
 		for(int i = 0; i < matrix_size[0] - 2; i++) {
 			dim1_own_edge_values[i] = partial_field[i+1][1];
@@ -82,17 +81,20 @@ int Sync(MPI_Comm cart_comm, int completion, int *completions, int cart_cluster_
 		MPI_Irecv(&(dim1_neighbor_egde_values[matrix_size[0]-2]), matrix_size[0] -2, MPI_DOUBLE, neighbor_dim1_right, 0, cart_comm, &(sync_requests[7]));
 	}

+	// wait for all to complete
 	while(1) {
 		int current;
 		MPI_Waitany(9, sync_requests, &current, MPI_STATUS_IGNORE);
 		if(current == MPI_UNDEFINED) {
 			break;
 		}
+		// recv edge dim1
 		if(current == 6) {
 			for(int i = 0; i < matrix_size[0] - 2; i++) {
 				partial_field[i+1][0] = dim1_neighbor_egde_values[i];
 			}
 		}
+		// recv edge dim1
 		if(current == 7) {
 			int right_edge_index = matrix_size[1] - 1;
 			for(int i = 0; i < matrix_size[0] - 2; i++) {

--- a/pid0.c
+++ b/pid0.c
@@ -13,6 +13,7 @@ t_process_info* Gather_Process_Info(t_process_info *pi, int rank, int cluster_si
 		infos = malloc(sizeof(t_process_info) * cluster_size);
 	}

+	// root receives all pi's in infos
 	if(MPI_Gather(pi, 1, MPI_process_info, infos, 1, MPI_process_info, 0, cart_comm)) {
 		fprintf(stderr, "Gather failed\n");
 		exit(1);
@@ -21,8 +22,12 @@ t_process_info* Gather_Process_Info(t_process_info *pi, int rank, int cluster_si
 	return infos;
 }

+
 void Send_To_Root(MPI_Comm cart_comm, int rank, int dim0_size, int dim1_size, int cart_cluster_size, int *matrix_size, t_process_info *infos, double **partial_field, double **root_field) {
 	MPI_Request send_request;
+	// every process (also root) sends to root
+	// gather is not suitable because received data needes to processed further and size is not the same for all processes
+	// send needes to be async otherwise root runs into deadlock
 	MPI_Isend(partial_field[0], matrix_size[0]*matrix_size[1], MPI_DOUBLE, 0, 0, cart_comm, &send_request);
 	if(rank == 0) {
 		MPI_Request *requests = malloc(sizeof(MPI_Request) * cart_cluster_size);
@@ -41,6 +46,7 @@ void Send_To_Root(MPI_Comm cart_comm, int rank, int dim0_size, int dim1_size, in
 		for(int i = 0; i < cart_cluster_size; i++) {
 			int current;
 			MPI_Waitany(cart_cluster_size, requests, &current, MPI_STATUS_IGNORE);
+			// processes send neighbor edges as well => offset: 1
 			Insert_Array_In_Matrix(
 				root_field, 
 				dim0_size, 
@@ -57,5 +63,7 @@ void Send_To_Root(MPI_Comm cart_comm, int rank, int dim0_size, int dim1_size, in
 		free(allocation);
 		Write_Matrix(root_field, dim0_size, dim1_size);
 	}
+	// wait for own send
+	// required, otherwise process terminates before sending all data to root
 	MPI_Wait(&send_request, MPI_STATUS_IGNORE);
 }
\ No newline at end of file
--- a/project.h
+++ b/project.h
 #include <mpi.h>

 typedef struct {
-	int rank;
-	int coord0;
-	int coord1;
+	int rank;		// MPI rank
+	int coord0;		// coordinates of MPI cart
+	int coord1;	
 	int start_m;	// first tile to process in X-axis
 	int start_n;	// first tile to process in Y-axis
-	int end_m;	// last tile to process in X-axis
-	int end_n;	// last tile to process in Y-axis
+	int end_m;		// last (inclusive) tile to process in X-axis
+	int end_n;		// last (inclusive) tile to process in Y-axis
 } t_process_info;

 // matrix.c