#include <mpi.h> 
#include <stdio.h>
#include <stdlib.h>
#include "project.h"

t_process_info* Gather_Process_Info(t_process_info *pi, int rank, int cluster_size, MPI_Comm cart_comm) {
	t_process_info *infos;
	MPI_Datatype MPI_process_info;

	Create_MPI_Type_t_process_info(&MPI_process_info);

	if(rank == 0) {
		infos = malloc(sizeof(t_process_info) * cluster_size);
	}

	// root receives all pi's in infos
	if(MPI_Gather(pi, 1, MPI_process_info, infos, 1, MPI_process_info, 0, cart_comm)) {
		fprintf(stderr, "Gather failed\n");
		exit(1);
	}

	return infos;
}


void Send_To_Root(MPI_Comm cart_comm, int rank, int dim0_size, int dim1_size, int cart_cluster_size, int *matrix_size, t_process_info *infos, double **partial_field, double **root_field) {
	MPI_Request send_request;
	// every process (also root) sends to root
	// gather is not suitable because received data needes to processed further and size is not the same for all processes
	// send needes to be async otherwise root runs into deadlock
	MPI_Isend(partial_field[0], matrix_size[0]*matrix_size[1], MPI_DOUBLE, 0, 0, cart_comm, &send_request);
	if(rank == 0) {
		MPI_Request *requests = malloc(sizeof(MPI_Request) * cart_cluster_size);
		double **allocation = malloc(sizeof(double*) * cart_cluster_size);
		for(int i = 0; i < cart_cluster_size; i++) {
			allocation[i] = malloc(sizeof(double) * (infos[i].end_m - infos[i].start_m + 3) * (infos[i].end_n - infos[i].start_n + 3));
			MPI_Irecv(allocation[i], 
				(infos[i].end_m - infos[i].start_m + 3) * (infos[i].end_n - infos[i].start_n + 3), 
				MPI_DOUBLE, 
				infos[i].rank, 
				MPI_ANY_TAG, 
				cart_comm,
				&requests[i]
			);
		}
		for(int i = 0; i < cart_cluster_size; i++) {
			int current;
			MPI_Waitany(cart_cluster_size, requests, &current, MPI_STATUS_IGNORE);
			// processes send neighbor edges as well => offset: 1
			Insert_Array_In_Matrix(
				root_field, 
				dim0_size, 
				dim1_size, 
				infos[current].start_m, 
				infos[current].start_n, 
				allocation[current], 
				infos[current].end_m - infos[current].start_m + 3, 
				infos[current].end_n - infos[current].start_n + 3, 
				1, 1, 1, 1);
			free(allocation[current]);
		}
		free(requests);
		free(allocation);
		Write_Matrix(root_field, dim0_size, dim1_size);
	}
	// wait for own send
	// required, otherwise process terminates before sending all data to root
	MPI_Wait(&send_request, MPI_STATUS_IGNORE);
}