
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <mpi.h> 
#include <sys/time.h>
#include "project.h"

double eps = 0.001;
double delta_t = 0.000001;
double alpha = 1;

int main(int argc, char **argv)
{

	MPI_Init(&argc, &argv);
	int m, n;
	double **partial_field;
	double start, end;
	t_process_info pi;

	process_args(argc, argv, &m, &n, &eps, &delta_t);

/*
	a = New_Matrix(m, n);
	if (a == NULL) {
		fprintf(stderr, "Can't allocate matrix !\n");
		exit(1);
	}

	Init_Matrix(a, m, n, 0);
*/
	int pid, num_p;
	if(MPI_Comm_rank(MPI_COMM_WORLD, &pid)) {
		fprintf(stderr, "Cannot fetch PID\n");
		exit(1);
	}

	if(MPI_Comm_size(MPI_COMM_WORLD, &num_p)) {
		fprintf(stderr, "Cannot fetch size of cluster\n");		
		exit(1);
	}

	if(pid == 0) {
		printf("number of processes: %d\n", num_p);
	}

	int pro_per_dim[2];
	float temp_f,temp_g;
	temp_g = sqrt((float)(num_p*m)/n);
	temp_f = num_p/temp_g;
	temp_g = floor(temp_g);
	temp_f = floor(temp_f);
	pro_per_dim[0] = (int)temp_f;
	pro_per_dim[1] = (int)temp_g;

	int m_per_pro, n_per_pro;
	m_per_pro = ceil(m/(float)pro_per_dim[1]);
	n_per_pro = ceil(n/(float)pro_per_dim[0]);

	if(pid==0) {
		printf("dim0: %d dim1: %d\n", pro_per_dim[0], pro_per_dim[1]);
		printf("size per pro: %dx%d\n", m_per_pro, n_per_pro);	
	}
	
	int periods[] = {0,0};	// edges are not connected
	MPI_Comm cart_comm;
	if(MPI_Cart_create(MPI_COMM_WORLD, 2, pro_per_dim, periods, 0, &cart_comm)) {
		fprintf(stderr, "Cannot create topology\n");		
		exit(1);
	}
	if(cart_comm == MPI_COMM_NULL) {
		printf("process %d not in use. exiting...\n", pid);
		MPI_Finalize();
		exit(0);
	}
	int coord[2];
	if(MPI_Cart_coords(cart_comm, pid, 2, coord)) {
		fprintf(stderr, "Cannot get coordinates\n");		
		exit(1);
	}

	// calculate own field using coord
	pi.start_m = coord[0] * m_per_pro;
	pi.end_m = (coord[0]+1)*m_per_pro -1;
	pi.start_n = coord[1] * n_per_pro;
	pi.end_n = (coord[0]+1) * n_per_pro -1;
	if(pi.end_m > m - 1) {
		pi.end_m = m - 1;
	}
	if(pi.end_n > n - 1) {
		pi.end_n = n - 1;
	}

	// print own info
	printf("pid(%d,%d) from (%d, %d) to (%d,%d)\n",
		coord[0],
		coord[1],
		pi.start_n,
		pi.start_m,
		pi.end_n,
		pi.end_m
	);

	double delta_a;
	partial_field = New_Matrix(pi.end_m - pi.start_m + 2, pi.end_n - pi.start_n + 2);
	double **partial_field_tmp = New_Matrix(pi.end_m - pi.start_m + 2, pi.end_n - pi.start_n + 2);
	int i,j;
	double **swap;
	double hx = 1.0/(double)m;
	double hy = 1.0/(double)n;
	double hx_square = hx * hx;
	double hy_square = hy * hy;

	double max_delta_t = 0.25*((min(hx,hy))*(min(hx,hy)))/alpha;  /* minimaler Wert f�r Konvergenz */
	if (delta_t > max_delta_t) { 
		delta_t = max_delta_t;
		printf ("Info: delta_t set to %.10lf.\n", delta_t);
	}
	for(i = 1; i < pi.end_m - pi.start_m + 1; i++) {		// catch edges
		for(j = 1; j < pi.end_n - pi.start_n + 1; j++) {	// catch edges
			delta_a = alpha * 
				    ( (partial_field[i+1][j] + partial_field[i-1][j] - 2.0 * partial_field[i][j]) / (hy_square)
					 +(partial_field[i][j-1] + partial_field[i][j+1] - 2.0 * partial_field[i][j]) / (hx_square) );
			delta_a = delta_a * delta_t;
			partial_field_tmp[i][j] = partial_field[i][j] + delta_a;

			if(delta_a > maxdiff)
				maxdiff = delta_a;
		}
	}
	swap = partial_field_tmp;
	partial_field_tmp = partial_field;
	partial_field = swap;
	

	if(pid == 0) {
		Write_Matrix(partial_field, pi.end_m - pi.start_m + 2, pi.end_n - pi.start_n + 2);
	}

	// write neighbar_com function


	//MPI_Gather()
	MPI_Finalize();

	return 0;
}