Отчёт_неявная_схема (Весенний семестр), страница 4
Описание файла
Файл "Отчёт_неявная_схема" внутри архива находится в папке "Весенний семестр". PDF-файл из архива "Весенний семестр", который расположен в категории "". Всё это находится в предмете "параллельные методы решения задач" из 10 семестр (2 семестр магистратуры), которые можно найти в файловом архиве МГУ им. Ломоносова. Не смотря на прямую связь этого архива с МГУ им. Ломоносова, его также можно найти и в других разделах. .
Просмотр PDF-файла онлайн
Текст 4 страницы из PDF
That means iteration is over.// --------------------------------------------------------------// ---------------------------------------------------------------19}// Output results to a file.void Trans_solver::Output(const int mode) {std::fstream out_file;if (mode == 1) { // Use stdout.for (int i = 0; i < field_width; ++i) {for (int j = 0; j < field_length; ++j) {std::cout << array_data[i * field_length + j] << " ";}}}elseif (mode == 2) { // Use fstream.out_file.open("std_out.txt", std::ios::out);for (int i = 0; i < field_width; ++i) {for (int j = 0; j < field_length; ++j) {out_file << array_data[i * field_length + j] << " ";}out_file << "\n";}out_file.close();}}// Default destructor.Trans_solver::~Trans_solver(){// Delete field data.if (array_data != NULL) { delete array_data; }if (dub_array != NULL) { delete dub_array; }//ififififDelete border data.(upper_border != NULL) { delete upper_border; }(lower_border != NULL) { delete lower_border; }(left_border != NULL) { delete left_border; }(right_border != NULL) { delete right_border; }// Delete coefficients data.if (F_coeff_x_axis != NULL) { delete F_coeff_x_axis; }if (F_coeff_y_axis != NULL) { delete F_coeff_y_axis; }}// Basic#include#include#includeinclusions.<cstdlib><cstdio><iostream>// Mpi inclusions.#include "mpi.h"// Using directives.using namespace std;// Project inclusions.20#include "Trans_solver.cpp"// Main function :)// ------------------------------------------------------------------------------int main(int argc, char* argv[]) {// MPI rank & size variables.int p_rank, p_size;// Command line arguments.int data_length = atoi(argv[1]);int data_width = atoi(argv[2]);int iter_num = atoi(argv[3]);int calc_mode = atoi(argv[4]);int out_mode = atoi(argv[5]);// Initialize MPI.MPI_Init(&argc, &argv);MPI_Comm_rank(MPI_COMM_WORLD, &p_rank);MPI_Comm_size(MPI_COMM_WORLD, &p_size);// Time parameters.double start_time;double end_time;Trans_solver Eq_solver; // Create a solver.Eq_solver.Init(data_length, data_width, p_rank, p_size); // Initialize solver.Eq_solver.Fill_data(0); // Load test data to solver.start_time = MPI_Wtime();// Perform as many iterations as needed.if (calc_mode == 0) { // Calculation mode.for (int i = 0; i < iter_num; ++i) {Eq_solver.Iterate(i);}}else if (calc_mode == 1) { // Generate test solutionEq_solver.Fill_data(iter_num);}MPI_Barrier(MPI_COMM_WORLD);end_time = MPI_Wtime();double total_time = end_time - start_time;if (out_mode == 0) { // Print time.cout << "Calculation time is : " << total_time << "\n";}else { // VerifyEq_solver.Output(out_mode);}// Finalize MPI.MPI_Finalize();21// Return :)return 0;}10.Приложение 2.
Параллельная версия программы.Параллельная версия программы отличается от последовательной реализациейфункции вычисления шага по времени. Нужно обратить внимание наограниченность использования openmp – использовать openmp в самом внешнемцикле «нехорошо», поскольку внутри цикла происходит взаимодействие узловчерез MPI. Из трёх крупных внутренних циклов распараллеливается через openmpтолько первый, т. к. в остальных двух имеется строго последовательнаязависимость по данным.
Ещё openmp используется при обработке «глобальной»матрицы.// ------------------------------------------------------------------------------// This file contains class to solve given transcalency equations.// ------------------------------------------------------------------------------#define _USE_MATH_DEFINES// MPI inclusions.#include <mpi.h>#include <omp.h>// Basic#include#include#include#include#includeinclusions.<cstdlib><cstdio><iostream><fstream><cmath>// Definitions#define default_dims 2// Material is considered to be stainless steel ( coeffs are added for FASTcalc ).#define DEFAULT_LAMBDA 1.0 // Lambda.#define DEFAULT_C 1.0 // C.#define DEFAULT_RO 1.0 // Ro.// Timestep is considered 0.01 second.#define DEFAULT_T_STEP 0.01// Default grid steps are considered M_PI both for x and y axis.#define DEFAULT_X_STEP M_PI#define DEFAULT_Y_STEP M_PI// MPI_Sorter class is used to implement a sorting network to sort array fragments.
Onlydeclaration is here.// ------------------------------------------------------------------------------class Trans_solver{private:22// Global data parameters.int global_length;int global_width;// Step parameters.double x_step;double y_step;double exponential_coeff;// MPI_Cart parameters.int p_rank;int n_dims;int* coords;int* dub_coords;int* dim_sizes;int* periods;MPI_Comm cart_comm;// MPI_Cart send_recv parameters.int shift_source, shift_dest;MPI_Status status; // Status of MPI messages received.MPI_Request s_request; // Send request for MPI messages.MPI_Request r_request; // Receive request for MPI messages.// Data field parameters.int field_length; // Data field length.int field_width; // Data field width.double* array_data; // A field of dots calculated.double* dub_array; // A field of dots calculated duplicate.// Border parameters.double* upper_border; // Border data :)double* lower_border; // Border data :)double* left_border; // Border data :)double* right_border; // Border data :)double border_value; // Default border value.// Tridiagonal equation system coefficients for horizontal solution step.double* alpha_coeff_y_axis;double* charlie_coeff_y_axis;double* beta_coeff_y_axis;double* alpha_coeff_x_axis;double* charlie_coeff_x_axis;double* beta_coeff_x_axis;// Auxillary data fields for matrix operations.double* left_data_buffer;double* right_data_buffer;double* up_data_buffer;double* down_data_buffer;23// Equation coefficients Ay(i-1) - Cy(i) + By(i+1) = - Fdouble A_coeff_x_axis;double B_coeff_x_axis;double C_coeff_x_axis;double A_coeff_y_axis;double B_coeff_y_axis;double C_coeff_y_axis;double* F_coeff_x_axis;double* F_coeff_y_axis;// Data buffers for upper line in reversal step of the method.double appendix; // Additional value used in forming the global matrix.double auxillary; // Data element from previous process - used in the follower.double* send_buf;double* recv_buf;// Global matrix used in the final step of the method.double* global_x_array;double* global_y_array;double* global_x_mtrx;double* global_y_mtrx;//This internal function solves a simple version of original equaition.void simple_solve(double* global_mtrx, double* global_array, const int length);// This internal function is used to simulate heat generators.double generate_heat(const int iteration_number, const int x_pos, const inty_pos);public:// Constructor.Trans_solver();void Init(int data_length, int data_width, int p_rank, int p_size); // Initializesolver with processor cart.void Fill_data(const int iter_num); // Fill solver with test source data.void Iterate(const int iter_num); // Perform a local iteration.void Output(const int mode); // Output data.// Destructor.virtual ~Trans_solver();};// Project inclusions.#include "Trans_solver.h"// ------------------------------------------------------------------------------// This file contains functions used in trans_solver class// ------------------------------------------------------------------------------// ------------------------------------------------------------------------------// -------------------------------------------------------------------------------24//This internal function solves a simple version of original equaition.void Trans_solver::simple_solve(double* global_mtrx, double* global_array, const intlength) {double global_subtract_coeff_1;double global_subtract_coeff_2;int upper = 0;int lower = length - 1;int i = 0;int j = 0;// Straight walk.#pragma omp parallel private(upper, lower) num_threads(2){if (omp_get_thread_num() == 0) {for (i = 1; i < length / 2; ++i) {global_subtract_coeff_1 = global_mtrx[4 * i] /global_mtrx[4 * (i - 1) + 1];global_mtrx[4 * i] -= global_mtrx[4 * (i - 1) + 1] *global_subtract_coeff_1;global_mtrx[4 * i + 1] -= global_mtrx[4 * (i - 1) + 2]* global_subtract_coeff_1;global_mtrx[4 * i + 3] -= global_mtrx[4 * (i - 1) + 3]* global_subtract_coeff_1;upper++;}}else {for (j = length - 2; j >= length / 2; --j) {global_subtract_coeff_2 = global_mtrx[4 * j + 2] /global_mtrx[4 * (j + 1) + 1];global_mtrx[4 * j + 1] -= global_mtrx[4 * (j + 1)] *global_subtract_coeff_2;global_mtrx[4 * j + 2] -= global_mtrx[4 * (j + 1) + 1]* global_subtract_coeff_2;global_mtrx[4 * j + 3] -= global_mtrx[4 * (j + 1) + 3]* global_subtract_coeff_2;lower--;}}}// Synchronise.global_subtract_coeff_1 = global_mtrx[4 * (upper + 1)] / global_mtrx[4 * (upper)+ 1];global_mtrx[4 * (upper + 1)] -= global_mtrx[4 * (upper)+1] *global_subtract_coeff_1;global_mtrx[4 * (upper + 1) + 1] -= global_mtrx[4 * (upper)+2] *global_subtract_coeff_1;global_mtrx[4 * (upper + 1) + 3] -= global_mtrx[4 * (upper)+3] *global_subtract_coeff_1;25global_subtract_coeff_2 = global_mtrx[4 * (lower - 1) + 2] / global_mtrx[4 *(lower) + 1];global_mtrx[4 * (lower - 1) + 1] -= global_mtrx[4 * (lower)] *global_subtract_coeff_2;global_mtrx[4 * (lower - 1) + 2] -= global_mtrx[4 * (lower)+1] *global_subtract_coeff_2;global_mtrx[4 * (lower - 1) + 3] -= global_mtrx[4 * (lower)+3] *global_subtract_coeff_2;#pragma omp parallel num_threads(2){if (omp_get_thread_num() == 0) {// Reverse walk.for (int p = i + 1; p < length; ++p) {global_subtract_coeff_1 = global_mtrx[4 * p] /global_mtrx[4 * (p - 1) + 1];global_mtrx[4 * p] -= global_mtrx[4 * (p - 1) + 1] *global_subtract_coeff_1;global_mtrx[4 * p + 1] -= global_mtrx[4 * (p - 1) + 2]* global_subtract_coeff_1;global_mtrx[4 * p + 3] -= global_mtrx[4 * (p - 1) + 3]* global_subtract_coeff_1;}} else {// Reverse walk.for (int q = j - 1; q >= 0; --q) {global_subtract_coeff_2 = global_mtrx[4 * q + 2] /global_mtrx[4 * (q + 1) + 1];global_mtrx[4 * q + 1] -= global_mtrx[4 * (q + 1)] *global_subtract_coeff_2;global_mtrx[4 * q + 2] -= global_mtrx[4 * (q + 1) + 1]* global_subtract_coeff_2;global_mtrx[4 * q + 3] -= global_mtrx[4 * (q + 1) + 3]* global_subtract_coeff_2;}}}// Solutions, finally !.for (int m = 0; m < length; ++m) {global_array[m] = global_mtrx[4 * m + 3] / global_mtrx[4 * m + 1];}}// This internal function is used to simulate heat generators.double Trans_solver::generate_heat(const int iter_num, const int x_pos, const int y_pos) {// This function can give an additional heat source to any cell in the field.// As you can see, it depends on coordinates and iteration number.// So it's basically f(r,t).// For test purposes it's considered a simple given function, nondependant ofposition// This doesn't change any of calculation principles, since these additional heatsources always provide a known value.26// Actual function.//double result = (double)iter_num * DEFAULT_T_STEP + DEFAULT_T_STEP / 2.0;//return 0.2 * cos(((double)result / DEFAULT_T_STEP) / 20.0);// Test variant for checking.return 0.0;}// ------------------------------------------------------------------------------// ------------------------------------------------------------------------------// Default constructor.Trans_solver::Trans_solver() {}// Initialize solver ( make a cart of processes ).void Trans_solver::Init(int data_length, int data_width, int p_rank, int p_size) {// Set up global field parameters.global_length = data_length;global_width = data_width;// Allocate memory for cart parameters.n_dims = default_dims; // Number of dimensions - 2 by default.dim_sizes = new int[n_dims];periods = new int[n_dims];coords = new int[n_dims];dub_coords = new int[n_dims];// Calculate cart parameters and initialize cart.this->p_rank = p_rank;int cart_size = static_cast<int>(trunc(sqrt(p_size))); // HERE SHOULD BE A CHECKWITH AN EXCEPTION !!!for (int i = 0; i < n_dims; ++i) {dim_sizes[i] = cart_size;periods[i] = 1; // The cart is periodic.}MPI_Cart_create(MPI_COMM_WORLD, default_dims, dim_sizes, periods, false,&cart_comm);// Calculate field parameters and initialize field data.field_length = ((data_length % cart_size == 0) * data_length + (data_length %cart_size != 0) * (data_length + cart_size - data_length % cart_size)) / cart_size;field_width = ((data_width % cart_size == 0) * data_width + (data_width %cart_size != 0) * (data_width + cart_size - data_width % cart_size)) / cart_size;array_data = new double[field_length * field_width];dub_array = new double[field_length * field_width];// Allocate memory for border fields.// Border values will be set later during data filling.upper_border = new double[field_length];lower_border = new double[field_length];left_border = new double[field_width];right_border = new double[field_width];27// Allocate memory for tridiagonal coefficients.alpha_coeff_y_axis = new double[field_width];charlie_coeff_y_axis = new double[field_width];beta_coeff_y_axis = new double[field_width];alpha_coeff_x_axis = new double[field_length];charlie_coeff_x_axis = new double[field_length];beta_coeff_x_axis = new double[field_length];// Allocate memory for auxillary data fields used in matrix operations.left_data_buffer = new double[field_width];right_data_buffer = new double[field_width];up_data_buffer = new double[field_length];down_data_buffer = new double[field_length];// Calculate step sizes.x_step = (2.0 * DEFAULT_X_STEP) / (double)(data_length - 1);y_step = (2.0 * DEFAULT_Y_STEP) / (double)(data_width - 1);// Calculate static equation coefficients.A_coeff_x_axis = DEFAULT_LAMBDA / (x_step * x_step);B_coeff_x_axis = DEFAULT_LAMBDA / (x_step * x_step);C_coeff_x_axis = -2.0 * (DEFAULT_LAMBDA / (x_step * x_step)) - (DEFAULT_RO *DEFAULT_C) / DEFAULT_T_STEP;A_coeff_y_axis = DEFAULT_LAMBDA / (y_step * y_step);B_coeff_y_axis = DEFAULT_LAMBDA / (y_step * y_step);C_coeff_y_axis = -2.0 * (DEFAULT_LAMBDA / (y_step * y_step)) - (DEFAULT_RO *DEFAULT_C) / DEFAULT_T_STEP;// Allocate memory for equation coefficients.F_coeff_x_axis = new double[field_length];F_coeff_y_axis = new double[field_width];// Allocate memory for send and receive buffers.send_buf = new double[4];recv_buf = new double[4];// Allocate memory for global matrix.global_x_array = new double[dim_sizes[0]];global_y_array = new double[dim_sizes[1]];global_x_mtrx = new double[4 * dim_sizes[0]];global_y_mtrx = new double[4 * dim_sizes[1]];}// Fill solver with test source data.void Trans_solver::Fill_data(const int iter_num) {// Cart coordinates are required to set data appropriately.MPI_Cart_coords(cart_comm, p_rank, n_dims, coords);// Global offsets.int x_offset = field_length * coords[0];int y_offset = field_width * coords[1];28exponential_coeff = pow(M_E, -(iter_num) * DEFAULT_T_STEP);// Formulae used is sqrt((x - global_length/2)(y - global_width/2)) +sqrt((global_length/2)(global_width/2)) + 1// X and Y are global coordinates.