From 3b840e5e73c309f77734b093ebc8067901076c21 Mon Sep 17 00:00:00 2001 From: Alex Hocks Date: Fri, 2 Jan 2026 11:47:52 +0100 Subject: [PATCH 01/14] support large strain --- pyfans/micro.cpp | 72 +++++++++++++++++++++++++++++++----------------- pyfans/micro.hpp | 7 +++-- 2 files changed, 52 insertions(+), 27 deletions(-) diff --git a/pyfans/micro.cpp b/pyfans/micro.cpp index b9f3636..bf7d4a8 100644 --- a/pyfans/micro.cpp +++ b/pyfans/micro.cpp @@ -31,14 +31,21 @@ MicroSimulation::MicroSimulation(int sim_id, char *input_file) // Input file name is hardcoded. TODO: Make it configurable reader.ReadInputFile(input_file); - reader.ReadMS(3); - matmanager = createMaterialManager<3, 6>(reader); - solver = createSolver<3, 6>(reader, matmanager); + + if (reader.strain_type == "small") { + matmanager = createMaterialManager<3, 6>(reader); + solver = createSolver<3, 6>(reader, std::get*>(matmanager)); + } + else { + matmanager = createMaterialManager<3, 9>(reader); + solver = createSolver<3, 9>(reader, std::get*>(matmanager)); + } } py::dict MicroSimulation::solve(py::dict macro_data, double dt) { + const bool is_small_strain = std::holds_alternative*>(matmanager); // Time step value dt is not used currently, but is available for future use // Create a pybind style Numpy array from macro_write_data["micro_vector_data"], which is a Numpy array @@ -46,40 +53,55 @@ py::dict MicroSimulation::solve(py::dict macro_data, double dt) py::array_t strain2 = macro_data["strains4to6"].cast>(); py::array_t strain = merge_arrays(strain1, strain2); + if (not is_small_strain) { + py::array_t strain3 = macro_data["strains7to9"].cast>(); + strain = merge_arrays(strain, strain3); + } + std::vector g0 = std::vector(strain.data(), strain.data() + strain.size()); // convert numpy array to std::vector. VectorXd homogenized_stress; - matmanager->set_gradient(g0); + std::visit([&](auto& mm){mm->set_gradient(g0);}, matmanager); - solver->solve(); + std::visit([](auto& s){s->solve();}, solver); - homogenized_stress = solver->get_homogenized_stress(); + homogenized_stress = std::visit([](auto& s) -> VectorXd { return s->get_homogenized_stress();}, solver); - auto C = solver->get_homogenized_tangent(pert_param); + auto C = std::visit([&](auto& s) -> MatrixXd {return s->get_homogenized_tangent(pert_param);}, solver); // Convert data to a py::dict again to send it back to the Micro Manager py::dict micro_write_data; // Add stress and stiffness matrix data to Python dict to be returned - std::vector stress13 = {homogenized_stress[0], homogenized_stress[1], homogenized_stress[2]}; - micro_write_data["stresses1to3"] = stress13; - std::vector stress46 = {homogenized_stress[3], homogenized_stress[4], homogenized_stress[5]}; - micro_write_data["stresses4to6"] = stress46; - std::vector C_1 = {C(0, 0), C(0, 1), C(0, 2)}; - micro_write_data["cmat1"] = C_1; - std::vector C_2 = {C(0, 3), C(0, 4), C(0, 5)}; - micro_write_data["cmat2"] = C_2; - std::vector C_3 = {C(1, 1), C(1, 2), C(1, 3)}; - micro_write_data["cmat3"] = C_3; - std::vector C_4 = {C(1, 4), C(1, 5), C(2, 2)}; - micro_write_data["cmat4"] = C_4; - std::vector C_5 = {C(2, 3), C(2, 4), C(2, 5)}; - micro_write_data["cmat5"] = C_5; - std::vector C_6 = {C(3, 3), C(3, 4), C(3, 5)}; - micro_write_data["cmat6"] = C_6; - std::vector C_7 = {C(4, 4), C(4, 5), C(5, 5)}; - micro_write_data["cmat7"] = C_7; + if (is_small_strain) { + std::vector stress13 = {homogenized_stress[0], homogenized_stress[1], homogenized_stress[2]}; + micro_write_data["stresses1to3"] = stress13; + std::vector stress46 = {homogenized_stress[3], homogenized_stress[4], homogenized_stress[5]}; + micro_write_data["stresses4to6"] = stress46; + std::vector C_1 = {C(0, 0), C(0, 1), C(0, 2)}; + micro_write_data["cmat1"] = C_1; + std::vector C_2 = {C(0, 3), C(0, 4), C(0, 5)}; + micro_write_data["cmat2"] = C_2; + std::vector C_3 = {C(1, 1), C(1, 2), C(1, 3)}; + micro_write_data["cmat3"] = C_3; + std::vector C_4 = {C(1, 4), C(1, 5), C(2, 2)}; + micro_write_data["cmat4"] = C_4; + std::vector C_5 = {C(2, 3), C(2, 4), C(2, 5)}; + micro_write_data["cmat5"] = C_5; + std::vector C_6 = {C(3, 3), C(3, 4), C(3, 5)}; + micro_write_data["cmat6"] = C_6; + std::vector C_7 = {C(4, 4), C(4, 5), C(5, 5)}; + micro_write_data["cmat7"] = C_7; + } + else { + std::vector stress13 = {homogenized_stress[0], homogenized_stress[1], homogenized_stress[2]}; + micro_write_data["stresses1to3"] = stress13; + std::vector stress46 = {homogenized_stress[3], homogenized_stress[4], homogenized_stress[5]}; + micro_write_data["stresses4to6"] = stress46; + std::vector stress79 = {homogenized_stress[6], homogenized_stress[7], homogenized_stress[8]}; + micro_write_data["stresses7to9"] = stress79; + } return micro_write_data; } diff --git a/pyfans/micro.hpp b/pyfans/micro.hpp index b6ebc6c..7eafc2a 100644 --- a/pyfans/micro.hpp +++ b/pyfans/micro.hpp @@ -5,6 +5,7 @@ #pragma once #include #include +#include #include "pybind11/pybind11.h" #include "pybind11/numpy.h" // numpy arrays @@ -26,7 +27,9 @@ class MicroSimulation { int _sim_id; Reader reader; // Hardcoding mechanical models because these definitions need information from the input file. - MaterialManager<3, 6> *matmanager; - Solver<3, 6> *solver; + using matmanager_t = std::variant*, MaterialManager<3, 9>*>; + using solver_t = std::variant*, Solver<3, 9>*>; + matmanager_t matmanager; + solver_t solver; double pert_param = 1e-6; // scalar strain perturbation parameter }; From c77b6d575efaddede4f951e49c7474f6c10d1846 Mon Sep 17 00:00:00 2001 From: Alex Hocks Date: Fri, 2 Jan 2026 11:52:07 +0100 Subject: [PATCH 02/14] add changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 54877af..414e660 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## v0.6.1 +- Add pyFANS support for large strain [#124](https://github.com/DataAnalyticsEngineering/FANS/pull/124) - Adds support to write integration point data to disk [#117](https://github.com/DataAnalyticsEngineering/FANS/pull/117) ## v0.6.0 From dd8690c48456ac08b754161aa5928d958be29f8a Mon Sep 17 00:00:00 2001 From: Alex Hocks Date: Fri, 2 Jan 2026 12:26:34 +0100 Subject: [PATCH 03/14] fix lock file --- pixi.lock | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/pixi.lock b/pixi.lock index a1b4345..86abe46 100644 --- a/pixi.lock +++ b/pixi.lock @@ -3,8 +3,6 @@ environments: dashboard: channels: - url: https://conda.anaconda.org/conda-forge/ - options: - pypi-prerelease-mode: if-necessary-or-explicit packages: linux-64: - conda: https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2 @@ -1456,8 +1454,6 @@ environments: default: channels: - url: https://conda.anaconda.org/conda-forge/ - options: - pypi-prerelease-mode: if-necessary-or-explicit packages: linux-64: - conda: https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2 @@ -2951,8 +2947,6 @@ environments: dev: channels: - url: https://conda.anaconda.org/conda-forge/ - options: - pypi-prerelease-mode: if-necessary-or-explicit packages: linux-64: - conda: https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2 @@ -6127,9 +6121,6 @@ packages: version: 0.6.1 build: h1235946_0 subdir: linux-64 - variants: - cxx_compiler: clangxx - target_platform: linux-64 depends: - libstdcxx >=15 - libgcc >=15 @@ -6137,7 +6128,7 @@ packages: - fftw >=3.3.10,<4.0a0 - fftw * mpi_openmpi_* input: - hash: 16dd9c1131985cbcdb3bb37c6672fa20ea368188987b8d42da1b2ebd02bd9fbf + hash: 81734781fed742a3ee906fd6767759243525440b3f419cd0622b90e4d0f107fe globs: [] - conda: . name: fans @@ -6171,16 +6162,13 @@ packages: version: 0.6.1 build: hcb8d3e5_0 subdir: osx-arm64 - variants: - cxx_compiler: clangxx - target_platform: osx-arm64 depends: - libcxx >=21 - hdf5 >=1.14.6,<1.14.7.0a0 mpi_openmpi_* - fftw >=3.3.10,<4.0a0 - fftw * mpi_openmpi_* input: - hash: 16dd9c1131985cbcdb3bb37c6672fa20ea368188987b8d42da1b2ebd02bd9fbf + hash: 81734781fed742a3ee906fd6767759243525440b3f419cd0622b90e4d0f107fe globs: [] - conda: https://conda.anaconda.org/conda-forge/linux-64/fftw-3.3.10-mpi_openmpi_h99e62ba_10.conda sha256: 59a1fd0daa71bd5529e19b4da8aae2ded4d4ef05e5e31d80c39cbe2fc7664b6a From b183777b8b90bf0b47de89036617718947f50cfd Mon Sep 17 00:00:00 2001 From: Alex Hocks Date: Fri, 2 Jan 2026 17:11:30 +0100 Subject: [PATCH 04/14] fix pyFANS MPI --- include/reader.h | 103 ++++++++++++----------------- include/solver.h | 158 +++++++++++---------------------------------- include/solverCG.h | 2 +- pyfans/micro.cpp | 10 +++ src/reader.cpp | 32 +++++---- 5 files changed, 109 insertions(+), 196 deletions(-) diff --git a/include/reader.h b/include/reader.h index 1e62249..be947ae 100644 --- a/include/reader.h +++ b/include/reader.h @@ -21,6 +21,10 @@ class Reader { // Destructor to free allocated memory ~Reader(); + // MPI controlls + bool force_single_rank; + MPI_Comm communicator; + // contents of input file: char ms_filename[4096]{}; // Name of Micro-structure hdf5 file char ms_datasetname[4096]{}; // Absolute path of Micro-structure in hdf5 file @@ -73,10 +77,10 @@ class Reader { void writeData(const char *fieldName, int load_idx, int time_idx, T *data, hsize_t *dims, int ndims); template - void writeSlab(const char *fieldName, int load_idx, int time_idx, T *data, const std::vector &extra_dims); + void writeSlab(const char *fieldName, int load_idx, int time_idx, T *data, int size); template - void WriteSlab(T *data, const std::vector &extra_dims, const char *dset_name); + void WriteSlab(T *data, int _howmany, const char *dset_name); template void WriteData(T *data, const char *dset_name, hsize_t *dims, int rank); @@ -157,25 +161,22 @@ void Reader::WriteData(T *data, const char *dset_name, hsize_t *dims, int rank) // this function has to be here because of the template /* --------------------------------------------------------------------------- - * Write an N-D slab (local_n0 × Ny × Nz × d1 × d2 × ...) from THIS MPI rank - * into an HDF5 dataset whose global layout on disk is + * Write a 4-D slab (local_n0 × Ny × Nz × howmany) from THIS MPI rank into an + * HDF5 dataset whose global layout on disk is * - * Z Y X d1 d2 ... (i.e. "zyx" + extra dims) + * Z Y X k (i.e. "zyx" + extra dim k) * * The caller supplies the data in logical order - * X Y Z d1 d2 ... . + * X Y Z k . * - * We therefore transpose the first 3 dimensions once per write call. + * We therefore transpose in-memory once per write call. * --------------------------------------------------------------------------*/ template void Reader::WriteSlab( - T *data, // in: local slab, layout [X][Y][Z][d1][d2]... - const std::vector &extra_dims, // sizes of extra dimensions [d1, d2, ...] - const char *dset_name) + T *data, // in: local slab, layout [X][Y][Z][k] + int _howmany, // global size of the 4th axis (k) + const char *dset_name) { - if (extra_dims.empty()) { - throw std::invalid_argument("WriteSlab: extra_dims cannot be empty"); - } /*------------------------------------------------------------------*/ /* 0. map C++ type -> native HDF5 type */ /*------------------------------------------------------------------*/ @@ -197,6 +198,7 @@ void Reader::WriteSlab( /* 1. open or create the HDF5 file */ /*------------------------------------------------------------------*/ hid_t file_id = results_file_id; + hid_t plist_id; if (file_id < 0) { throw std::runtime_error("WriteSlab: results file is not open"); } @@ -204,20 +206,15 @@ void Reader::WriteSlab( void *old_client_data; /*------------------------------------------------------------------*/ - /* 2. create the dataset (global dims = Z Y X d1 d2 ...) if needed */ + /* 2. create the dataset (global dims = Z Y X k ) if necessary */ /*------------------------------------------------------------------*/ - const hsize_t Nx = static_cast(dims[0]); - const hsize_t Ny = static_cast(dims[1]); - const hsize_t Nz = static_cast(dims[2]); - - const int rank = 3 + extra_dims.size(); - std::vector dimsf(rank); - dimsf[0] = Nz; - dimsf[1] = Ny; - dimsf[2] = Nx; - for (size_t i = 0; i < extra_dims.size(); ++i) { - dimsf[3 + i] = static_cast(extra_dims[i]); - } + const hsize_t Nx = static_cast(dims[0]); + const hsize_t Ny = static_cast(dims[1]); + const hsize_t Nz = static_cast(dims[2]); + const hsize_t kDim = static_cast(_howmany); + + const int rank = 4; + hsize_t dimsf[rank] = {Nz, Ny, Nx, kDim}; /* <<< Z Y X k */ safe_create_group(file_id, dset_name); @@ -229,7 +226,7 @@ void Reader::WriteSlab( H5Eset_auto(H5E_DEFAULT, old_func, old_client_data); /* restore */ if (dset_id < 0) { - hid_t filespace = H5Screate_simple(rank, dimsf.data(), nullptr); + hid_t filespace = H5Screate_simple(rank, dimsf, nullptr); dset_id = H5Dcreate2(file_id, dset_name, data_type, filespace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); H5Sclose(filespace); @@ -252,40 +249,20 @@ void Reader::WriteSlab( /*------------------------------------------------------------------*/ /* 3. build FILE hyperslab (slice along file-dim 2 = X axis) */ /*------------------------------------------------------------------*/ - std::vector fcount(rank); - std::vector foffset(rank); - - fcount[0] = Nz; - fcount[1] = Ny; - fcount[2] = static_cast(local_n0); - for (size_t i = 0; i < extra_dims.size(); ++i) { - fcount[3 + i] = static_cast(extra_dims[i]); - } - - foffset[0] = 0; - foffset[1] = 0; - foffset[2] = static_cast(local_0_start); - for (size_t i = 0; i < extra_dims.size(); ++i) { - foffset[3 + i] = 0; - } + hsize_t fcount[4] = {Nz, Ny, static_cast(local_n0), kDim}; + hsize_t foffset[4] = {0, 0, static_cast(local_0_start), 0}; hid_t filespace = H5Dget_space(dset_id); - H5Sselect_hyperslab(filespace, H5S_SELECT_SET, foffset.data(), nullptr, fcount.data(), nullptr); + H5Sselect_hyperslab(filespace, H5S_SELECT_SET, foffset, nullptr, fcount, nullptr); /*------------------------------------------------------------------*/ - /* 4. transpose local slab [X][Y][Z][d1][d2]... -> [Z][Y][X][d1][d2]... */ + /* 4. transpose local slab [X][Y][Z][k] -> [Z][Y][X][k] */ /*------------------------------------------------------------------*/ - const Eigen::Index NxLoc = static_cast(local_n0); - const Eigen::Index NyLoc = static_cast(Ny); - const Eigen::Index NzLoc = static_cast(Nz); - - // Flatten extra dimensions into one for Eigen Tensor (which supports up to rank 4) - size_t extra_size = 1; - for (int d : extra_dims) - extra_size *= d; - const Eigen::Index extraLoc = static_cast(extra_size); - - const size_t slabElems = static_cast(NxLoc * NyLoc * NzLoc * extraLoc); + const Eigen::Index NxLoc = static_cast(local_n0); + const Eigen::Index NyLoc = static_cast(Ny); + const Eigen::Index NzLoc = static_cast(Nz); + const Eigen::Index kLoc = static_cast(kDim); + const size_t slabElems = static_cast(NxLoc * NyLoc * NzLoc * kLoc); T *tmp = static_cast(fftw_malloc(slabElems * sizeof(T))); if (!tmp) { @@ -293,21 +270,21 @@ void Reader::WriteSlab( } Eigen::TensorMap> - input_tensor(data, NxLoc, NyLoc, NzLoc, extraLoc); + input_tensor(data, NxLoc, NyLoc, NzLoc, kLoc); Eigen::TensorMap> - output_tensor(tmp, NzLoc, NyLoc, NxLoc, extraLoc); + output_tensor(tmp, NzLoc, NyLoc, NxLoc, kLoc); - // Permute dimensions: (0,1,2,3) -> (2,1,0,3) swaps X and Z, keeps extra dims + // Permute dimensions: (0,1,2,3) -> (2,1,0,3) swaps X and Z Eigen::array shuffle_dims = {2, 1, 0, 3}; output_tensor = input_tensor.shuffle(shuffle_dims); /*------------------------------------------------------------------*/ /* 5. MEMORY dataspace matches FILE slab exactly */ /*------------------------------------------------------------------*/ - hid_t memspace = H5Screate_simple(rank, fcount.data(), nullptr); + hid_t memspace = H5Screate_simple(4, fcount, nullptr); - hid_t plist_id = H5Pcreate(H5P_DATASET_XFER); + plist_id = H5Pcreate(H5P_DATASET_XFER); H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE); herr_t status = H5Dwrite(dset_id, data_type, @@ -337,14 +314,14 @@ void Reader::writeData(const char *fieldName, int load_idx, int time_idx, T *dat } template -void Reader::writeSlab(const char *fieldName, int load_idx, int time_idx, T *data, const std::vector &extra_dims) +void Reader::writeSlab(const char *fieldName, int load_idx, int time_idx, T *data, int size) { if (std::find(resultsToWrite.begin(), resultsToWrite.end(), fieldName) == resultsToWrite.end()) { return; } char name[5096]; snprintf(name, sizeof(name), "%s/load%i/time_step%i/%s", dataset_name, load_idx, time_idx, fieldName); - WriteSlab(data, extra_dims, name); + WriteSlab(data, size, name); } #endif diff --git a/include/solver.h b/include/solver.h index cb05d49..e14e33f 100644 --- a/include/solver.h +++ b/include/solver.h @@ -18,6 +18,7 @@ class Solver : private MixedBCController { const int world_rank; const int world_size; + MPI_Comm communicator; const ptrdiff_t n_x, n_y, n_z; // NOTE: the order in the declaration is very important because it is the same order in which the later initialization via member initializer lists takes place @@ -108,6 +109,7 @@ Solver::Solver(Reader &reader, MaterialManager * matmanager(matmgr), world_rank(reader.world_rank), world_size(reader.world_size), + communicator(reader.communicator), n_x(reader.dims[0]), n_y(reader.dims[1]), n_z(reader.dims[2]), @@ -218,8 +220,8 @@ void Solver::CreateFFTWPlans(double *in, fftw_complex *transform // But, according to https://fftw.org/doc/MPI-Plan-Creation.html the BLOCK sizes must be the same: // "These must be the same block sizes as were passed to the corresponding ‘local_size’ function" const ptrdiff_t n[3] = {n_x, n_y, n_z}; - planfft = fftw_mpi_plan_many_dft_r2c(rank, n, howmany, iblock, oblock, in, transformed, MPI_COMM_WORLD, FFTW_MEASURE | FFTW_MPI_TRANSPOSED_OUT); - planifft = fftw_mpi_plan_many_dft_c2r(rank, n, howmany, iblock, oblock, transformed, out, MPI_COMM_WORLD, FFTW_MEASURE | FFTW_MPI_TRANSPOSED_IN); + planfft = fftw_mpi_plan_many_dft_r2c(rank, n, howmany, iblock, oblock, in, transformed, communicator, FFTW_MEASURE | FFTW_MPI_TRANSPOSED_OUT); + planifft = fftw_mpi_plan_many_dft_c2r(rank, n, howmany, iblock, oblock, transformed, out, communicator, FFTW_MEASURE | FFTW_MPI_TRANSPOSED_IN); // see https://eigen.tuxfamily.org/dox/group__TutorialMapClass.html#title3 new (&rhat) Map((std::complex *) transformed, local_n1 * n_x * (n_z / 2 + 1) * howmany); @@ -242,7 +244,7 @@ void Solver::compute_residual_basic(RealArray &r_matrix, RealArr // int MPI_Sendrecv(void *sendbuf, int sendcount, MPI_Datatype sendtype, int dest, int sendtag, void *recvbuf, // int recvcount, MPI_Datatype recvtype, int source, int recvtag, MPI_Comm comm, MPI_Status *status) MPI_Sendrecv(u, n_y * n_z * howmany, MPI_DOUBLE, (world_rank + world_size - 1) % world_size, 0, - u + local_n0 * n_y * n_z * howmany, n_y * n_z * howmany, MPI_DOUBLE, (world_rank + 1) % world_size, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + u + local_n0 * n_y * n_z * howmany, n_y * n_z * howmany, MPI_DOUBLE, (world_rank + 1) % world_size, 0, communicator, MPI_STATUS_IGNORE); Matrix ue; @@ -262,7 +264,7 @@ void Solver::compute_residual_basic(RealArray &r_matrix, RealArr }); MPI_Sendrecv(r + local_n0 * n_y * (n_z + padding) * howmany, n_y * (n_z + padding) * howmany, MPI_DOUBLE, (world_rank + 1) % world_size, 0, - buffer_padding, n_y * (n_z + padding) * howmany, MPI_DOUBLE, (world_rank + world_size - 1) % world_size, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + buffer_padding, n_y * (n_z + padding) * howmany, MPI_DOUBLE, (world_rank + world_size - 1) % world_size, 0, communicator, MPI_STATUS_IGNORE); RealArray b(buffer_padding, n_z * howmany, n_y, OuterStride<>((n_z + padding) * howmany)); // NOTE: for any padding of more than 2, the buffer_padding has to be extended @@ -427,7 +429,7 @@ double Solver::compute_error(RealArray &r) } double err; - MPI_Allreduce(&err_local, &err, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); + MPI_Allreduce(&err_local, &err, 1, MPI_DOUBLE, MPI_MAX, communicator); err_all[iter] = err; double err0 = err_all[0]; @@ -454,37 +456,8 @@ double Solver::compute_error(RealArray &r) template void Solver::postprocess(Reader &reader, int load_idx, int time_idx) { - int n_gp = matmanager->models[0]->n_gp; - - // Check what user requested - auto &results = reader.resultsToWrite; - bool need_stress = std::find(results.begin(), results.end(), "stress") != results.end(); - bool need_stress_gp = std::find(results.begin(), results.end(), "stress_gp") != results.end(); - bool need_strain = std::find(results.begin(), results.end(), "strain") != results.end(); - bool need_strain_gp = std::find(results.begin(), results.end(), "strain_gp") != results.end(); - - bool need_global_avg = std::find(results.begin(), results.end(), "stress_average") != results.end() || - std::find(results.begin(), results.end(), "strain_average") != results.end(); - - bool need_phase_avg = false; - for (int mat_idx = 0; mat_idx < reader.n_mat; ++mat_idx) { - char name[512]; - sprintf(name, "phase_stress_average_phase%d", mat_idx); - if (std::find(results.begin(), results.end(), name) != results.end()) { - need_phase_avg = true; - break; - } - } - - // Determine if we need to compute stress/strain at all - bool need_compute = need_stress || need_stress_gp || need_strain || need_strain_gp || need_global_avg || need_phase_avg; - - // Conditional allocation - VectorXd *strain_elem = need_strain ? new VectorXd(local_n0 * n_y * n_z * n_str) : nullptr; - VectorXd *stress_elem = need_stress ? new VectorXd(local_n0 * n_y * n_z * n_str) : nullptr; - VectorXd *strain_gp = need_strain_gp ? new VectorXd(local_n0 * n_y * n_z * n_gp * n_str) : nullptr; - VectorXd *stress_gp = need_stress_gp ? new VectorXd(local_n0 * n_y * n_z * n_gp * n_str) : nullptr; - + VectorXd strain = VectorXd::Zero(local_n0 * n_y * n_z * n_str); + VectorXd stress = VectorXd::Zero(local_n0 * n_y * n_z * n_str); VectorXd stress_average = VectorXd::Zero(n_str); VectorXd strain_average = VectorXd::Zero(n_str); @@ -495,76 +468,38 @@ void Solver::postprocess(Reader &reader, int load_idx, int time_ vector phase_counts(n_mat, 0); MPI_Sendrecv(v_u, n_y * n_z * howmany, MPI_DOUBLE, (world_rank + world_size - 1) % world_size, 0, - v_u + local_n0 * n_y * n_z * howmany, n_y * n_z * howmany, MPI_DOUBLE, (world_rank + 1) % world_size, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + v_u + local_n0 * n_y * n_z * howmany, n_y * n_z * howmany, MPI_DOUBLE, (world_rank + 1) % world_size, 0, communicator, MPI_STATUS_IGNORE); Matrix ue; int phase_id; - - if (need_compute) { - iterateCubes<0>([&](ptrdiff_t *idx, ptrdiff_t *idxPadding) { - for (int i = 0; i < 8; ++i) { - for (int j = 0; j < howmany; ++j) { - ue(howmany * i + j, 0) = v_u[howmany * idx[i] + j]; - } - } - phase_id = ms[idx[0]]; - - const MaterialInfo &info = matmanager->get_info(phase_id); - - // Temporary storage for element averages - double elem_strain_avg[n_str]; - double elem_stress_avg[n_str]; - - // Compute once - populates internal eps/sigma - info.model->getStrainStress(elem_strain_avg, elem_stress_avg, ue, info.local_mat_id, idx[0]); - - // Store element averages if requested - if (need_stress) { - for (int c = 0; c < n_str; ++c) { - (*stress_elem)(idx[0] * n_str + c) = elem_stress_avg[c]; - } - } - if (need_strain) { - for (int c = 0; c < n_str; ++c) { - (*strain_elem)(idx[0] * n_str + c) = elem_strain_avg[c]; - } + iterateCubes<0>([&](ptrdiff_t *idx, ptrdiff_t *idxPadding) { + for (int i = 0; i < 8; ++i) { + for (int j = 0; j < howmany; ++j) { + ue(howmany * i + j, 0) = v_u[howmany * idx[i] + j]; } + } + phase_id = ms[idx[0]]; - // Copy all GP data if requested - if (need_stress_gp) { - memcpy(&(*stress_gp)(idx[0] * n_gp * n_str), - info.model->get_sigma_data(), - n_gp * n_str * sizeof(double)); - } - if (need_strain_gp) { - memcpy(&(*strain_gp)(idx[0] * n_gp * n_str), - info.model->get_eps_data(), - n_gp * n_str * sizeof(double)); - } + const MaterialInfo &info = matmanager->get_info(phase_id); + info.model->getStrainStress(strain.segment(n_str * idx[0], n_str).data(), stress.segment(n_str * idx[0], n_str).data(), ue, info.local_mat_id, idx[0]); + stress_average += stress.segment(n_str * idx[0], n_str); + strain_average += strain.segment(n_str * idx[0], n_str); - // Accumulate for global and phase averages - if (need_global_avg || need_phase_avg) { - for (int c = 0; c < n_str; ++c) { - stress_average(c) += elem_stress_avg[c]; - strain_average(c) += elem_strain_avg[c]; - phase_stress_average[phase_id](c) += elem_stress_avg[c]; - phase_strain_average[phase_id](c) += elem_strain_avg[c]; - } - phase_counts[phase_id]++; - } - }); - } + phase_stress_average[phase_id] += stress.segment(n_str * idx[0], n_str); + phase_strain_average[phase_id] += strain.segment(n_str * idx[0], n_str); + phase_counts[phase_id]++; + }); - MPI_Allreduce(MPI_IN_PLACE, stress_average.data(), n_str, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(MPI_IN_PLACE, strain_average.data(), n_str, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(MPI_IN_PLACE, stress_average.data(), n_str, MPI_DOUBLE, MPI_SUM, communicator); + MPI_Allreduce(MPI_IN_PLACE, strain_average.data(), n_str, MPI_DOUBLE, MPI_SUM, communicator); stress_average /= (n_x * n_y * n_z); strain_average /= (n_x * n_y * n_z); // Reduce per-phase accumulations across all processes for (int mat_index = 0; mat_index < n_mat; ++mat_index) { - MPI_Allreduce(MPI_IN_PLACE, phase_stress_average[mat_index].data(), n_str, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(MPI_IN_PLACE, phase_strain_average[mat_index].data(), n_str, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(MPI_IN_PLACE, &phase_counts[mat_index], 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(MPI_IN_PLACE, phase_stress_average[mat_index].data(), n_str, MPI_DOUBLE, MPI_SUM, communicator); + MPI_Allreduce(MPI_IN_PLACE, phase_strain_average[mat_index].data(), n_str, MPI_DOUBLE, MPI_SUM, communicator); + MPI_Allreduce(MPI_IN_PLACE, &phase_counts[mat_index], 1, MPI_INT, MPI_SUM, communicator); // Compute average for each phase if (phase_counts[mat_index] > 0) { @@ -664,33 +599,16 @@ void Solver::postprocess(Reader &reader, int load_idx, int time_ reader.writeData("absolute_error", load_idx, time_idx, err_all.data(), dims, 1); vector rank_field(local_n0 * n_y * n_z, world_rank); - reader.writeSlab("mpi_rank", load_idx, time_idx, rank_field.data(), {1}); - reader.writeSlab("microstructure", load_idx, time_idx, ms, {1}); - reader.writeSlab("displacement_fluctuation", load_idx, time_idx, v_u, {howmany}); - reader.writeSlab("displacement", load_idx, time_idx, u_total.data(), {howmany}); - reader.writeSlab("residual", load_idx, time_idx, v_r, {howmany}); - - if (need_strain) - reader.writeSlab("strain", load_idx, time_idx, strain_elem->data(), {n_str}); - if (need_stress) - reader.writeSlab("stress", load_idx, time_idx, stress_elem->data(), {n_str}); - if (need_strain_gp) - reader.writeSlab("strain_gp", load_idx, time_idx, strain_gp->data(), {n_gp, n_str}); - if (need_stress_gp) - reader.writeSlab("stress_gp", load_idx, time_idx, stress_gp->data(), {n_gp, n_str}); + reader.writeSlab("mpi_rank", load_idx, time_idx, rank_field.data(), 1); + reader.writeSlab("microstructure", load_idx, time_idx, ms, 1); + reader.writeSlab("displacement_fluctuation", load_idx, time_idx, v_u, howmany); + reader.writeSlab("displacement", load_idx, time_idx, u_total.data(), howmany); + reader.writeSlab("residual", load_idx, time_idx, v_r, howmany); + reader.writeSlab("strain", load_idx, time_idx, strain.data(), n_str); + reader.writeSlab("stress", load_idx, time_idx, stress.data(), n_str); matmanager->postprocess(*this, reader, load_idx, time_idx); - // Cleanup - if (strain_elem) - delete strain_elem; - if (stress_elem) - delete stress_elem; - if (strain_gp) - delete strain_gp; - if (stress_gp) - delete stress_gp; - // Compute homogenized tangent only if requested if (find(reader.resultsToWrite.begin(), reader.resultsToWrite.end(), "homogenized_tangent") != reader.resultsToWrite.end()) { homogenized_tangent = get_homogenized_tangent(1e-6); @@ -714,7 +632,7 @@ VectorXd Solver::get_homogenized_stress() homogenized_stress = VectorXd::Zero(n_str); MPI_Sendrecv(v_u, n_y * n_z * howmany, MPI_DOUBLE, (world_rank + world_size - 1) % world_size, 0, - v_u + local_n0 * n_y * n_z * howmany, n_y * n_z * howmany, MPI_DOUBLE, (world_rank + 1) % world_size, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + v_u + local_n0 * n_y * n_z * howmany, n_y * n_z * howmany, MPI_DOUBLE, (world_rank + 1) % world_size, 0, communicator, MPI_STATUS_IGNORE); Matrix ue; int phase_id; @@ -731,7 +649,7 @@ VectorXd Solver::get_homogenized_stress() homogenized_stress += stress.segment(n_str * idx[0], n_str); }); - MPI_Allreduce(MPI_IN_PLACE, homogenized_stress.data(), n_str, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(MPI_IN_PLACE, homogenized_stress.data(), n_str, MPI_DOUBLE, MPI_SUM, communicator); homogenized_stress /= (n_x * n_y * n_z); return homogenized_stress; diff --git a/include/solverCG.h b/include/solverCG.h index b02a428..60bc0d6 100644 --- a/include/solverCG.h +++ b/include/solverCG.h @@ -53,7 +53,7 @@ double SolverCG::dotProduct(RealArray &a, RealArray &b) { double local_value = (a * b).sum(); double result; - MPI_Allreduce(&local_value, &result, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&local_value, &result, 1, MPI_DOUBLE, MPI_SUM, this->communicator); return result; } diff --git a/pyfans/micro.cpp b/pyfans/micro.cpp index bf7d4a8..9658119 100644 --- a/pyfans/micro.cpp +++ b/pyfans/micro.cpp @@ -6,6 +6,7 @@ #include "micro.hpp" #include "setup.h" #include "matmodel.h" +#include "mpi.h" py::array_t merge_arrays(py::array_t array1, py::array_t array2) { @@ -31,7 +32,15 @@ MicroSimulation::MicroSimulation(int sim_id, char *input_file) // Input file name is hardcoded. TODO: Make it configurable reader.ReadInputFile(input_file); + + reader.force_single_rank = true; + reader.communicator = MPI_COMM_SELF; + + int rank = -1; + MPI_Comm_rank(reader.communicator, &rank); + reader.ReadMS(3); + printf(">>read input%d id %d\n", rank, sim_id); if (reader.strain_type == "small") { matmanager = createMaterialManager<3, 6>(reader); @@ -41,6 +50,7 @@ MicroSimulation::MicroSimulation(int sim_id, char *input_file) matmanager = createMaterialManager<3, 9>(reader); solver = createSolver<3, 9>(reader, std::get*>(matmanager)); } + printf(">>constructed %d id %d\n", rank, sim_id); } py::dict MicroSimulation::solve(py::dict macro_data, double dt) diff --git a/src/reader.cpp b/src/reader.cpp index 288d4af..542127e 100644 --- a/src/reader.cpp +++ b/src/reader.cpp @@ -29,8 +29,8 @@ void Reader::ComputeVolumeFractions() // Find the global maximum and minimum material indices unsigned short global_max, global_min; - MPI_Allreduce(&local_max, &global_max, 1, MPI_UNSIGNED_SHORT, MPI_MAX, MPI_COMM_WORLD); - MPI_Allreduce(&local_min, &global_min, 1, MPI_UNSIGNED_SHORT, MPI_MIN, MPI_COMM_WORLD); + MPI_Allreduce(&local_max, &global_max, 1, MPI_UNSIGNED_SHORT, MPI_MAX, communicator); + MPI_Allreduce(&local_min, &global_min, 1, MPI_UNSIGNED_SHORT, MPI_MIN, communicator); // Calculate total number of materials n_mat = global_max - global_min + 1; @@ -52,7 +52,7 @@ void Reader::ComputeVolumeFractions() for (int i = 0; i < n_mat; i++) { long vf; - MPI_Allreduce(&(vol_frac[i]), &vf, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&(vol_frac[i]), &vf, 1, MPI_LONG, MPI_SUM, communicator); v_frac[i] = double(vf) / double(dims[0] * dims[1] * dims[2]); if (world_rank == 0) printf("# material %4u vol. frac. %10.4f%% \n", @@ -63,15 +63,23 @@ void Reader::ComputeVolumeFractions() void Reader ::ReadInputFile(char input_fn[]) { try { - - MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); - MPI_Comm_size(MPI_COMM_WORLD, &world_size); - ifstream i(input_fn); json j; i >> j; inputJson = j; // Store complete input JSON for MaterialManager + if (j.contains("no_mpi")) { + force_single_rank = true; + communicator = MPI_COMM_SELF; + } + else { + force_single_rank = false; + communicator = MPI_COMM_WORLD; + } + + MPI_Comm_rank(communicator, &world_rank); + MPI_Comm_size(communicator, &world_size); + microstructure = j["microstructure"]; strcpy(ms_filename, microstructure["filepath"].get().c_str()); strcpy(ms_datasetname, microstructure["datasetname"].get().c_str()); @@ -222,8 +230,8 @@ void Reader ::ReadMS(int hm) hid_t plist_id; /* property list identifier */ herr_t status; - MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); - MPI_Comm_size(MPI_COMM_WORLD, &world_size); + MPI_Comm_rank(communicator, &world_rank); + MPI_Comm_size(communicator, &world_size); MPI_Info info = MPI_INFO_NULL; // Set up file access property list with parallel I/O access @@ -310,11 +318,11 @@ void Reader ::ReadMS(int hm) ptrdiff_t *local_n1, ptrdiff_t *local_1_start); \ */ - alloc_local = fftw_mpi_local_size_many_transposed(rank, n, hm, block0, block1, MPI_COMM_WORLD, &local_n0, &local_0_start, &local_n1, &local_1_start); + alloc_local = fftw_mpi_local_size_many_transposed(rank, n, hm, block0, block1, communicator, &local_n0, &local_0_start, &local_n1, &local_1_start); if (local_n0 < 4) throw std::runtime_error("[ FANS3D_Grid ] ERROR: Number of voxels in x-direction is less than 4 in process " + to_string(world_rank)); - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(communicator); hsize_t fcount[3], foffset[3]; if (is_zyx) { /* file layout Z Y X */ @@ -400,7 +408,7 @@ void Reader::OpenResultsFile(const char *output_fn) { std::snprintf(results_filename, sizeof(results_filename), "%s", output_fn); hid_t plist_id = H5Pcreate(H5P_FILE_ACCESS); - H5Pset_fapl_mpio(plist_id, MPI_COMM_WORLD, MPI_INFO_NULL); + H5Pset_fapl_mpio(plist_id, communicator, MPI_INFO_NULL); results_file_id = H5Fcreate(results_filename, H5F_ACC_TRUNC, H5P_DEFAULT, plist_id); H5Pclose(plist_id); From a4f508e2fba9c235d8a5a947876107baa958daa0 Mon Sep 17 00:00:00 2001 From: Alex Hocks Date: Fri, 2 Jan 2026 17:20:00 +0100 Subject: [PATCH 05/14] applied suggested fix for pixi.lock --- pixi.lock | 48 +++++++++++++++++++++++------------------------- 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/pixi.lock b/pixi.lock index 86abe46..0679bc1 100644 --- a/pixi.lock +++ b/pixi.lock @@ -3,6 +3,8 @@ environments: dashboard: channels: - url: https://conda.anaconda.org/conda-forge/ + options: + pypi-prerelease-mode: if-necessary-or-explicit packages: linux-64: - conda: https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2 @@ -1454,6 +1456,8 @@ environments: default: channels: - url: https://conda.anaconda.org/conda-forge/ + options: + pypi-prerelease-mode: if-necessary-or-explicit packages: linux-64: - conda: https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2 @@ -2249,7 +2253,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/zstandard-0.25.0-py314h2e8dab5_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/zstd-1.5.7-hbcf94c1_2.conda - conda: . - subdir: linux-aarch64 + build: he8cfe8b_0 osx-64: - conda: https://conda.anaconda.org/conda-forge/noarch/_python_abi3_support-1.0-hd8ed1ab_2.conda - conda: https://conda.anaconda.org/conda-forge/noarch/aiobotocore-2.25.2-pyhcf101f3_0.conda @@ -2596,7 +2600,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/osx-64/zstandard-0.25.0-py314hd1e8ddb_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/zstd-1.5.7-h8210216_2.conda - conda: . - subdir: osx-64 + build: h0dc7051_0 osx-arm64: - conda: https://conda.anaconda.org/conda-forge/noarch/_python_abi3_support-1.0-hd8ed1ab_2.conda - conda: https://conda.anaconda.org/conda-forge/noarch/aiobotocore-2.25.2-pyhcf101f3_0.conda @@ -2947,6 +2951,8 @@ environments: dev: channels: - url: https://conda.anaconda.org/conda-forge/ + options: + pypi-prerelease-mode: if-necessary-or-explicit packages: linux-64: - conda: https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2 @@ -6119,57 +6125,49 @@ packages: - conda: . name: fans version: 0.6.1 - build: h1235946_0 - subdir: linux-64 + build: h0dc7051_0 + subdir: osx-64 + variants: + target_platform: osx-64 depends: - - libstdcxx >=15 - - libgcc >=15 + - libcxx >=21 - hdf5 >=1.14.6,<1.14.7.0a0 mpi_openmpi_* - fftw >=3.3.10,<4.0a0 - fftw * mpi_openmpi_* - input: - hash: 81734781fed742a3ee906fd6767759243525440b3f419cd0622b90e4d0f107fe - globs: [] - conda: . name: fans version: 0.6.1 - build: hbf21a9e_0 - subdir: linux-aarch64 + build: h1235946_0 + subdir: linux-64 depends: - libstdcxx >=15 - libgcc >=15 - hdf5 >=1.14.6,<1.14.7.0a0 mpi_openmpi_* - fftw >=3.3.10,<4.0a0 - fftw * mpi_openmpi_* - input: - hash: 8ad54598e221efef736cec65db9e327856f556e55f81445cda5620e181ef8813 - globs: [] - conda: . name: fans version: 0.6.1 - build: hbf21a9e_0 - subdir: osx-64 + build: hcb8d3e5_0 + subdir: osx-arm64 depends: - libcxx >=21 - hdf5 >=1.14.6,<1.14.7.0a0 mpi_openmpi_* - fftw >=3.3.10,<4.0a0 - fftw * mpi_openmpi_* - input: - hash: 8ad54598e221efef736cec65db9e327856f556e55f81445cda5620e181ef8813 - globs: [] - conda: . name: fans version: 0.6.1 - build: hcb8d3e5_0 - subdir: osx-arm64 + build: he8cfe8b_0 + subdir: linux-aarch64 + variants: + target_platform: linux-aarch64 depends: - - libcxx >=21 + - libstdcxx >=15 + - libgcc >=15 - hdf5 >=1.14.6,<1.14.7.0a0 mpi_openmpi_* - fftw >=3.3.10,<4.0a0 - fftw * mpi_openmpi_* - input: - hash: 81734781fed742a3ee906fd6767759243525440b3f419cd0622b90e4d0f107fe - globs: [] - conda: https://conda.anaconda.org/conda-forge/linux-64/fftw-3.3.10-mpi_openmpi_h99e62ba_10.conda sha256: 59a1fd0daa71bd5529e19b4da8aae2ded4d4ef05e5e31d80c39cbe2fc7664b6a md5: 3fcbf2ef5f3a8f0ed5683f60fe08293b From d179d7b0362f429619c53768e927bf352866a117 Mon Sep 17 00:00:00 2001 From: Alex Hocks <73783301+Snapex2409@users.noreply.github.com> Date: Fri, 2 Jan 2026 17:23:20 +0100 Subject: [PATCH 06/14] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 414e660..61fb5d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ ## v0.6.1 -- Add pyFANS support for large strain [#124](https://github.com/DataAnalyticsEngineering/FANS/pull/124) +- Add pyFANS support for large strain and fix pyFANS MPI [#124](https://github.com/DataAnalyticsEngineering/FANS/pull/124) - Adds support to write integration point data to disk [#117](https://github.com/DataAnalyticsEngineering/FANS/pull/117) ## v0.6.0 From 807d347b6726814a2f514c8dce9ab0aca9661064 Mon Sep 17 00:00:00 2001 From: Alex Hocks Date: Fri, 2 Jan 2026 17:56:29 +0100 Subject: [PATCH 07/14] undo merge mistake --- include/reader.h | 99 +++++++++++++++++++++++------------- include/solver.h | 130 ++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 170 insertions(+), 59 deletions(-) diff --git a/include/reader.h b/include/reader.h index be947ae..8b65c7f 100644 --- a/include/reader.h +++ b/include/reader.h @@ -77,10 +77,10 @@ class Reader { void writeData(const char *fieldName, int load_idx, int time_idx, T *data, hsize_t *dims, int ndims); template - void writeSlab(const char *fieldName, int load_idx, int time_idx, T *data, int size); + void writeSlab(const char *fieldName, int load_idx, int time_idx, T *data, const std::vector &extra_dims); template - void WriteSlab(T *data, int _howmany, const char *dset_name); + void WriteSlab(T *data, const std::vector &extra_dims, const char *dset_name); template void WriteData(T *data, const char *dset_name, hsize_t *dims, int rank); @@ -161,22 +161,25 @@ void Reader::WriteData(T *data, const char *dset_name, hsize_t *dims, int rank) // this function has to be here because of the template /* --------------------------------------------------------------------------- - * Write a 4-D slab (local_n0 × Ny × Nz × howmany) from THIS MPI rank into an - * HDF5 dataset whose global layout on disk is + * Write an N-D slab (local_n0 × Ny × Nz × d1 × d2 × ...) from THIS MPI rank + * into an HDF5 dataset whose global layout on disk is * - * Z Y X k (i.e. "zyx" + extra dim k) + * Z Y X d1 d2 ... (i.e. "zyx" + extra dims) * * The caller supplies the data in logical order - * X Y Z k . + * X Y Z d1 d2 ... . * - * We therefore transpose in-memory once per write call. + * We therefore transpose the first 3 dimensions once per write call. * --------------------------------------------------------------------------*/ template void Reader::WriteSlab( - T *data, // in: local slab, layout [X][Y][Z][k] - int _howmany, // global size of the 4th axis (k) - const char *dset_name) + T *data, // in: local slab, layout [X][Y][Z][d1][d2]... + const std::vector &extra_dims, // sizes of extra dimensions [d1, d2, ...] + const char *dset_name) { + if (extra_dims.empty()) { + throw std::invalid_argument("WriteSlab: extra_dims cannot be empty"); + } /*------------------------------------------------------------------*/ /* 0. map C++ type -> native HDF5 type */ /*------------------------------------------------------------------*/ @@ -198,7 +201,6 @@ void Reader::WriteSlab( /* 1. open or create the HDF5 file */ /*------------------------------------------------------------------*/ hid_t file_id = results_file_id; - hid_t plist_id; if (file_id < 0) { throw std::runtime_error("WriteSlab: results file is not open"); } @@ -206,15 +208,20 @@ void Reader::WriteSlab( void *old_client_data; /*------------------------------------------------------------------*/ - /* 2. create the dataset (global dims = Z Y X k ) if necessary */ + /* 2. create the dataset (global dims = Z Y X d1 d2 ...) if needed */ /*------------------------------------------------------------------*/ - const hsize_t Nx = static_cast(dims[0]); - const hsize_t Ny = static_cast(dims[1]); - const hsize_t Nz = static_cast(dims[2]); - const hsize_t kDim = static_cast(_howmany); - - const int rank = 4; - hsize_t dimsf[rank] = {Nz, Ny, Nx, kDim}; /* <<< Z Y X k */ + const hsize_t Nx = static_cast(dims[0]); + const hsize_t Ny = static_cast(dims[1]); + const hsize_t Nz = static_cast(dims[2]); + + const int rank = 3 + extra_dims.size(); + std::vector dimsf(rank); + dimsf[0] = Nz; + dimsf[1] = Ny; + dimsf[2] = Nx; + for (size_t i = 0; i < extra_dims.size(); ++i) { + dimsf[3 + i] = static_cast(extra_dims[i]); + } safe_create_group(file_id, dset_name); @@ -226,7 +233,7 @@ void Reader::WriteSlab( H5Eset_auto(H5E_DEFAULT, old_func, old_client_data); /* restore */ if (dset_id < 0) { - hid_t filespace = H5Screate_simple(rank, dimsf, nullptr); + hid_t filespace = H5Screate_simple(rank, dimsf.data(), nullptr); dset_id = H5Dcreate2(file_id, dset_name, data_type, filespace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); H5Sclose(filespace); @@ -249,20 +256,40 @@ void Reader::WriteSlab( /*------------------------------------------------------------------*/ /* 3. build FILE hyperslab (slice along file-dim 2 = X axis) */ /*------------------------------------------------------------------*/ - hsize_t fcount[4] = {Nz, Ny, static_cast(local_n0), kDim}; - hsize_t foffset[4] = {0, 0, static_cast(local_0_start), 0}; + std::vector fcount(rank); + std::vector foffset(rank); + + fcount[0] = Nz; + fcount[1] = Ny; + fcount[2] = static_cast(local_n0); + for (size_t i = 0; i < extra_dims.size(); ++i) { + fcount[3 + i] = static_cast(extra_dims[i]); + } + + foffset[0] = 0; + foffset[1] = 0; + foffset[2] = static_cast(local_0_start); + for (size_t i = 0; i < extra_dims.size(); ++i) { + foffset[3 + i] = 0; + } hid_t filespace = H5Dget_space(dset_id); - H5Sselect_hyperslab(filespace, H5S_SELECT_SET, foffset, nullptr, fcount, nullptr); + H5Sselect_hyperslab(filespace, H5S_SELECT_SET, foffset.data(), nullptr, fcount.data(), nullptr); /*------------------------------------------------------------------*/ - /* 4. transpose local slab [X][Y][Z][k] -> [Z][Y][X][k] */ + /* 4. transpose local slab [X][Y][Z][d1][d2]... -> [Z][Y][X][d1][d2]... */ /*------------------------------------------------------------------*/ - const Eigen::Index NxLoc = static_cast(local_n0); - const Eigen::Index NyLoc = static_cast(Ny); - const Eigen::Index NzLoc = static_cast(Nz); - const Eigen::Index kLoc = static_cast(kDim); - const size_t slabElems = static_cast(NxLoc * NyLoc * NzLoc * kLoc); + const Eigen::Index NxLoc = static_cast(local_n0); + const Eigen::Index NyLoc = static_cast(Ny); + const Eigen::Index NzLoc = static_cast(Nz); + + // Flatten extra dimensions into one for Eigen Tensor (which supports up to rank 4) + size_t extra_size = 1; + for (int d : extra_dims) + extra_size *= d; + const Eigen::Index extraLoc = static_cast(extra_size); + + const size_t slabElems = static_cast(NxLoc * NyLoc * NzLoc * extraLoc); T *tmp = static_cast(fftw_malloc(slabElems * sizeof(T))); if (!tmp) { @@ -270,21 +297,21 @@ void Reader::WriteSlab( } Eigen::TensorMap> - input_tensor(data, NxLoc, NyLoc, NzLoc, kLoc); + input_tensor(data, NxLoc, NyLoc, NzLoc, extraLoc); Eigen::TensorMap> - output_tensor(tmp, NzLoc, NyLoc, NxLoc, kLoc); + output_tensor(tmp, NzLoc, NyLoc, NxLoc, extraLoc); - // Permute dimensions: (0,1,2,3) -> (2,1,0,3) swaps X and Z + // Permute dimensions: (0,1,2,3) -> (2,1,0,3) swaps X and Z, keeps extra dims Eigen::array shuffle_dims = {2, 1, 0, 3}; output_tensor = input_tensor.shuffle(shuffle_dims); /*------------------------------------------------------------------*/ /* 5. MEMORY dataspace matches FILE slab exactly */ /*------------------------------------------------------------------*/ - hid_t memspace = H5Screate_simple(4, fcount, nullptr); + hid_t memspace = H5Screate_simple(rank, fcount.data(), nullptr); - plist_id = H5Pcreate(H5P_DATASET_XFER); + hid_t plist_id = H5Pcreate(H5P_DATASET_XFER); H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE); herr_t status = H5Dwrite(dset_id, data_type, @@ -314,14 +341,14 @@ void Reader::writeData(const char *fieldName, int load_idx, int time_idx, T *dat } template -void Reader::writeSlab(const char *fieldName, int load_idx, int time_idx, T *data, int size) +void Reader::writeSlab(const char *fieldName, int load_idx, int time_idx, T *data, const std::vector &extra_dims) { if (std::find(resultsToWrite.begin(), resultsToWrite.end(), fieldName) == resultsToWrite.end()) { return; } char name[5096]; snprintf(name, sizeof(name), "%s/load%i/time_step%i/%s", dataset_name, load_idx, time_idx, fieldName); - WriteSlab(data, size, name); + WriteSlab(data, extra_dims, name); } #endif diff --git a/include/solver.h b/include/solver.h index e14e33f..074a909 100644 --- a/include/solver.h +++ b/include/solver.h @@ -456,8 +456,37 @@ double Solver::compute_error(RealArray &r) template void Solver::postprocess(Reader &reader, int load_idx, int time_idx) { - VectorXd strain = VectorXd::Zero(local_n0 * n_y * n_z * n_str); - VectorXd stress = VectorXd::Zero(local_n0 * n_y * n_z * n_str); + int n_gp = matmanager->models[0]->n_gp; + + // Check what user requested + auto &results = reader.resultsToWrite; + bool need_stress = std::find(results.begin(), results.end(), "stress") != results.end(); + bool need_stress_gp = std::find(results.begin(), results.end(), "stress_gp") != results.end(); + bool need_strain = std::find(results.begin(), results.end(), "strain") != results.end(); + bool need_strain_gp = std::find(results.begin(), results.end(), "strain_gp") != results.end(); + + bool need_global_avg = std::find(results.begin(), results.end(), "stress_average") != results.end() || + std::find(results.begin(), results.end(), "strain_average") != results.end(); + + bool need_phase_avg = false; + for (int mat_idx = 0; mat_idx < reader.n_mat; ++mat_idx) { + char name[512]; + sprintf(name, "phase_stress_average_phase%d", mat_idx); + if (std::find(results.begin(), results.end(), name) != results.end()) { + need_phase_avg = true; + break; + } + } + + // Determine if we need to compute stress/strain at all + bool need_compute = need_stress || need_stress_gp || need_strain || need_strain_gp || need_global_avg || need_phase_avg; + + // Conditional allocation + VectorXd *strain_elem = need_strain ? new VectorXd(local_n0 * n_y * n_z * n_str) : nullptr; + VectorXd *stress_elem = need_stress ? new VectorXd(local_n0 * n_y * n_z * n_str) : nullptr; + VectorXd *strain_gp = need_strain_gp ? new VectorXd(local_n0 * n_y * n_z * n_gp * n_str) : nullptr; + VectorXd *stress_gp = need_stress_gp ? new VectorXd(local_n0 * n_y * n_z * n_gp * n_str) : nullptr; + VectorXd stress_average = VectorXd::Zero(n_str); VectorXd strain_average = VectorXd::Zero(n_str); @@ -472,23 +501,61 @@ void Solver::postprocess(Reader &reader, int load_idx, int time_ Matrix ue; int phase_id; - iterateCubes<0>([&](ptrdiff_t *idx, ptrdiff_t *idxPadding) { - for (int i = 0; i < 8; ++i) { - for (int j = 0; j < howmany; ++j) { - ue(howmany * i + j, 0) = v_u[howmany * idx[i] + j]; + + if (need_compute) { + iterateCubes<0>([&](ptrdiff_t *idx, ptrdiff_t *idxPadding) { + for (int i = 0; i < 8; ++i) { + for (int j = 0; j < howmany; ++j) { + ue(howmany * i + j, 0) = v_u[howmany * idx[i] + j]; + } } - } - phase_id = ms[idx[0]]; + phase_id = ms[idx[0]]; - const MaterialInfo &info = matmanager->get_info(phase_id); - info.model->getStrainStress(strain.segment(n_str * idx[0], n_str).data(), stress.segment(n_str * idx[0], n_str).data(), ue, info.local_mat_id, idx[0]); - stress_average += stress.segment(n_str * idx[0], n_str); - strain_average += strain.segment(n_str * idx[0], n_str); + const MaterialInfo &info = matmanager->get_info(phase_id); - phase_stress_average[phase_id] += stress.segment(n_str * idx[0], n_str); - phase_strain_average[phase_id] += strain.segment(n_str * idx[0], n_str); - phase_counts[phase_id]++; - }); + // Temporary storage for element averages + double elem_strain_avg[n_str]; + double elem_stress_avg[n_str]; + + // Compute once - populates internal eps/sigma + info.model->getStrainStress(elem_strain_avg, elem_stress_avg, ue, info.local_mat_id, idx[0]); + + // Store element averages if requested + if (need_stress) { + for (int c = 0; c < n_str; ++c) { + (*stress_elem)(idx[0] * n_str + c) = elem_stress_avg[c]; + } + } + if (need_strain) { + for (int c = 0; c < n_str; ++c) { + (*strain_elem)(idx[0] * n_str + c) = elem_strain_avg[c]; + } + } + + // Copy all GP data if requested + if (need_stress_gp) { + memcpy(&(*stress_gp)(idx[0] * n_gp * n_str), + info.model->get_sigma_data(), + n_gp * n_str * sizeof(double)); + } + if (need_strain_gp) { + memcpy(&(*strain_gp)(idx[0] * n_gp * n_str), + info.model->get_eps_data(), + n_gp * n_str * sizeof(double)); + } + + // Accumulate for global and phase averages + if (need_global_avg || need_phase_avg) { + for (int c = 0; c < n_str; ++c) { + stress_average(c) += elem_stress_avg[c]; + strain_average(c) += elem_strain_avg[c]; + phase_stress_average[phase_id](c) += elem_stress_avg[c]; + phase_strain_average[phase_id](c) += elem_strain_avg[c]; + } + phase_counts[phase_id]++; + } + }); + } MPI_Allreduce(MPI_IN_PLACE, stress_average.data(), n_str, MPI_DOUBLE, MPI_SUM, communicator); MPI_Allreduce(MPI_IN_PLACE, strain_average.data(), n_str, MPI_DOUBLE, MPI_SUM, communicator); @@ -599,16 +666,33 @@ void Solver::postprocess(Reader &reader, int load_idx, int time_ reader.writeData("absolute_error", load_idx, time_idx, err_all.data(), dims, 1); vector rank_field(local_n0 * n_y * n_z, world_rank); - reader.writeSlab("mpi_rank", load_idx, time_idx, rank_field.data(), 1); - reader.writeSlab("microstructure", load_idx, time_idx, ms, 1); - reader.writeSlab("displacement_fluctuation", load_idx, time_idx, v_u, howmany); - reader.writeSlab("displacement", load_idx, time_idx, u_total.data(), howmany); - reader.writeSlab("residual", load_idx, time_idx, v_r, howmany); - reader.writeSlab("strain", load_idx, time_idx, strain.data(), n_str); - reader.writeSlab("stress", load_idx, time_idx, stress.data(), n_str); + reader.writeSlab("mpi_rank", load_idx, time_idx, rank_field.data(), {1}); + reader.writeSlab("microstructure", load_idx, time_idx, ms, {1}); + reader.writeSlab("displacement_fluctuation", load_idx, time_idx, v_u, {howmany}); + reader.writeSlab("displacement", load_idx, time_idx, u_total.data(), {howmany}); + reader.writeSlab("residual", load_idx, time_idx, v_r, {howmany}); + + if (need_strain) + reader.writeSlab("strain", load_idx, time_idx, strain_elem->data(), {n_str}); + if (need_stress) + reader.writeSlab("stress", load_idx, time_idx, stress_elem->data(), {n_str}); + if (need_strain_gp) + reader.writeSlab("strain_gp", load_idx, time_idx, strain_gp->data(), {n_gp, n_str}); + if (need_stress_gp) + reader.writeSlab("stress_gp", load_idx, time_idx, stress_gp->data(), {n_gp, n_str}); matmanager->postprocess(*this, reader, load_idx, time_idx); + // Cleanup + if (strain_elem) + delete strain_elem; + if (stress_elem) + delete stress_elem; + if (strain_gp) + delete strain_gp; + if (stress_gp) + delete stress_gp; + // Compute homogenized tangent only if requested if (find(reader.resultsToWrite.begin(), reader.resultsToWrite.end(), "homogenized_tangent") != reader.resultsToWrite.end()) { homogenized_tangent = get_homogenized_tangent(1e-6); From 633fda7e6d5b78feb25b758e629de9305646d5b6 Mon Sep 17 00:00:00 2001 From: Alex Hocks Date: Fri, 2 Jan 2026 18:01:04 +0100 Subject: [PATCH 08/14] remove debug msg --- pyfans/micro.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pyfans/micro.cpp b/pyfans/micro.cpp index 9658119..5283570 100644 --- a/pyfans/micro.cpp +++ b/pyfans/micro.cpp @@ -36,11 +36,7 @@ MicroSimulation::MicroSimulation(int sim_id, char *input_file) reader.force_single_rank = true; reader.communicator = MPI_COMM_SELF; - int rank = -1; - MPI_Comm_rank(reader.communicator, &rank); - reader.ReadMS(3); - printf(">>read input%d id %d\n", rank, sim_id); if (reader.strain_type == "small") { matmanager = createMaterialManager<3, 6>(reader); @@ -50,7 +46,6 @@ MicroSimulation::MicroSimulation(int sim_id, char *input_file) matmanager = createMaterialManager<3, 9>(reader); solver = createSolver<3, 9>(reader, std::get*>(matmanager)); } - printf(">>constructed %d id %d\n", rank, sim_id); } py::dict MicroSimulation::solve(py::dict macro_data, double dt) From 9fedc080e8a3e76de121e15d27ee9903bffeda2f Mon Sep 17 00:00:00 2001 From: Alex Hocks Date: Fri, 2 Jan 2026 18:03:43 +0100 Subject: [PATCH 09/14] update lock file --- pixi.lock | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pixi.lock b/pixi.lock index 0679bc1..1c1db95 100644 --- a/pixi.lock +++ b/pixi.lock @@ -6139,6 +6139,9 @@ packages: version: 0.6.1 build: h1235946_0 subdir: linux-64 + variants: + cxx_compiler: clangxx + target_platform: linux-64 depends: - libstdcxx >=15 - libgcc >=15 @@ -6150,6 +6153,9 @@ packages: version: 0.6.1 build: hcb8d3e5_0 subdir: osx-arm64 + variants: + cxx_compiler: clangxx + target_platform: osx-arm64 depends: - libcxx >=21 - hdf5 >=1.14.6,<1.14.7.0a0 mpi_openmpi_* From 12df07a009a52c50013e06e7789ea2d0b3f45102 Mon Sep 17 00:00:00 2001 From: Ishaan Desai Date: Fri, 2 Jan 2026 20:22:37 +0100 Subject: [PATCH 10/14] Formatting --- include/reader.h | 4 ++-- include/solver.h | 2 +- pyfans/micro.cpp | 26 ++++++++++++-------------- pyfans/micro.hpp | 10 +++++----- src/reader.cpp | 7 +++---- 5 files changed, 23 insertions(+), 26 deletions(-) diff --git a/include/reader.h b/include/reader.h index 8b65c7f..ef6a87a 100644 --- a/include/reader.h +++ b/include/reader.h @@ -22,8 +22,8 @@ class Reader { ~Reader(); // MPI controlls - bool force_single_rank; - MPI_Comm communicator; + bool force_single_rank; + MPI_Comm communicator; // contents of input file: char ms_filename[4096]{}; // Name of Micro-structure hdf5 file diff --git a/include/solver.h b/include/solver.h index 074a909..36d36fb 100644 --- a/include/solver.h +++ b/include/solver.h @@ -18,7 +18,7 @@ class Solver : private MixedBCController { const int world_rank; const int world_size; - MPI_Comm communicator; + MPI_Comm communicator; const ptrdiff_t n_x, n_y, n_z; // NOTE: the order in the declaration is very important because it is the same order in which the later initialization via member initializer lists takes place diff --git a/pyfans/micro.cpp b/pyfans/micro.cpp index 5283570..9eb6861 100644 --- a/pyfans/micro.cpp +++ b/pyfans/micro.cpp @@ -34,23 +34,22 @@ MicroSimulation::MicroSimulation(int sim_id, char *input_file) reader.ReadInputFile(input_file); reader.force_single_rank = true; - reader.communicator = MPI_COMM_SELF; + reader.communicator = MPI_COMM_SELF; reader.ReadMS(3); if (reader.strain_type == "small") { matmanager = createMaterialManager<3, 6>(reader); - solver = createSolver<3, 6>(reader, std::get*>(matmanager)); - } - else { + solver = createSolver<3, 6>(reader, std::get *>(matmanager)); + } else { matmanager = createMaterialManager<3, 9>(reader); - solver = createSolver<3, 9>(reader, std::get*>(matmanager)); + solver = createSolver<3, 9>(reader, std::get *>(matmanager)); } } py::dict MicroSimulation::solve(py::dict macro_data, double dt) { - const bool is_small_strain = std::holds_alternative*>(matmanager); + const bool is_small_strain = std::holds_alternative *>(matmanager); // Time step value dt is not used currently, but is available for future use // Create a pybind style Numpy array from macro_write_data["micro_vector_data"], which is a Numpy array @@ -60,20 +59,20 @@ py::dict MicroSimulation::solve(py::dict macro_data, double dt) py::array_t strain = merge_arrays(strain1, strain2); if (not is_small_strain) { py::array_t strain3 = macro_data["strains7to9"].cast>(); - strain = merge_arrays(strain, strain3); + strain = merge_arrays(strain, strain3); } - std::vector g0 = std::vector(strain.data(), strain.data() + strain.size()); // convert numpy array to std::vector. + std::vector g0 = std::vector(strain.data(), strain.data() + strain.size()); // convert numpy array to std::vector. VectorXd homogenized_stress; - std::visit([&](auto& mm){mm->set_gradient(g0);}, matmanager); + std::visit([&](auto &mm) { mm->set_gradient(g0); }, matmanager); - std::visit([](auto& s){s->solve();}, solver); + std::visit([](auto &s) { s->solve(); }, solver); - homogenized_stress = std::visit([](auto& s) -> VectorXd { return s->get_homogenized_stress();}, solver); + homogenized_stress = std::visit([](auto &s) -> VectorXd { return s->get_homogenized_stress(); }, solver); - auto C = std::visit([&](auto& s) -> MatrixXd {return s->get_homogenized_tangent(pert_param);}, solver); + auto C = std::visit([&](auto &s) -> MatrixXd { return s->get_homogenized_tangent(pert_param); }, solver); // Convert data to a py::dict again to send it back to the Micro Manager py::dict micro_write_data; @@ -98,8 +97,7 @@ py::dict MicroSimulation::solve(py::dict macro_data, double dt) micro_write_data["cmat6"] = C_6; std::vector C_7 = {C(4, 4), C(4, 5), C(5, 5)}; micro_write_data["cmat7"] = C_7; - } - else { + } else { std::vector stress13 = {homogenized_stress[0], homogenized_stress[1], homogenized_stress[2]}; micro_write_data["stresses1to3"] = stress13; std::vector stress46 = {homogenized_stress[3], homogenized_stress[4], homogenized_stress[5]}; diff --git a/pyfans/micro.hpp b/pyfans/micro.hpp index 7eafc2a..1e6ee55 100644 --- a/pyfans/micro.hpp +++ b/pyfans/micro.hpp @@ -27,9 +27,9 @@ class MicroSimulation { int _sim_id; Reader reader; // Hardcoding mechanical models because these definitions need information from the input file. - using matmanager_t = std::variant*, MaterialManager<3, 9>*>; - using solver_t = std::variant*, Solver<3, 9>*>; - matmanager_t matmanager; - solver_t solver; - double pert_param = 1e-6; // scalar strain perturbation parameter + using matmanager_t = std::variant *, MaterialManager<3, 9> *>; + using solver_t = std::variant *, Solver<3, 9> *>; + matmanager_t matmanager; + solver_t solver; + double pert_param = 1e-6; // scalar strain perturbation parameter }; diff --git a/src/reader.cpp b/src/reader.cpp index 542127e..7894991 100644 --- a/src/reader.cpp +++ b/src/reader.cpp @@ -70,11 +70,10 @@ void Reader ::ReadInputFile(char input_fn[]) if (j.contains("no_mpi")) { force_single_rank = true; - communicator = MPI_COMM_SELF; - } - else { + communicator = MPI_COMM_SELF; + } else { force_single_rank = false; - communicator = MPI_COMM_WORLD; + communicator = MPI_COMM_WORLD; } MPI_Comm_rank(communicator, &world_rank); From ca926d0766d39ce78b1329d821bad5c9df530cda Mon Sep 17 00:00:00 2001 From: Alex Hocks Date: Sat, 3 Jan 2026 16:34:06 +0100 Subject: [PATCH 11/14] added logging and comp time log enable --- CMakeLists.txt | 5 ++ include/MaterialManager.h | 79 ++++++++++---------- include/general.h | 2 - include/logging.h | 78 ++++++++++++++++++++ include/solver.h | 61 +++++++--------- include/solverCG.h | 13 ++-- include/solverFP.h | 7 +- pyfans/micro.cpp | 5 ++ pyfans/micro.hpp | 1 + src/logging.cpp | 147 ++++++++++++++++++++++++++++++++++++++ src/main.cpp | 20 +++--- src/reader.cpp | 74 +++++++++---------- 12 files changed, 350 insertions(+), 142 deletions(-) create mode 100755 include/logging.h create mode 100755 src/logging.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 85a7736..efb3c1f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -112,6 +112,9 @@ find_package(nlohmann_json REQUIRED) # TARGETS # ############################################################################## +set(FANS_VERBOSITY 5 CACHE STRING "Set verbosity level: 0 none, 5 all") +add_definitions(-DVERBOSITY=${FANS_VERBOSITY}) + option(FANS_BUILD_STATIC "Build static library" OFF) if (FANS_BUILD_STATIC) add_library(FANS_FANS STATIC) @@ -150,6 +153,7 @@ target_include_directories(FANS_FANS PUBLIC $ @@ -110,51 +111,47 @@ class MaterialManager { compute_reference_stiffness(reader); // Print detailed information about material configuration for logging - if (reader.world_rank == 0) { - printf("\n# MaterialManager initialized:\n"); - printf("# Number of material models: %zu\n", models.size()); - printf("# Number of phases: %d\n#\n", n_phases); - - for (size_t i = 0; i < mats.size(); ++i) { - const auto &mg = mats[i]; - printf("# Material Model %zu: %s\n", i + 1, mg["matmodel"].get().c_str()); - - // Print phases - auto phases = mg["phases"].get>(); - printf("# Phases: ["); - for (size_t j = 0; j < phases.size(); ++j) { - printf("%d", phases[j]); - if (j < phases.size() - 1) - printf(", "); - } - printf("]\n"); - - // Print material properties - printf("# Material properties:\n"); - const auto &props = mg["material_properties"]; - for (auto it = props.begin(); it != props.end(); ++it) { - printf("# %s: ", it.key().c_str()); - if (it.value().is_array()) { - printf("["); - for (size_t k = 0; k < it.value().size(); ++k) { - if (it.value()[k].is_number()) { - printf("%.5g", it.value()[k].get()); - } else if (it.value()[k].is_string()) { - printf("\"%s\"", it.value()[k].get().c_str()); - } - if (k < it.value().size() - 1) - printf(", "); + Log::io->info() << "\n# MaterialManager initialized:\n"; + Log::io->info() << "# Number of material models: " << models.size() << "\n"; + Log::io->info() << "# Number of phases: " << n_phases << "\n#\n"; + + for (size_t i = 0; i < mats.size(); ++i) { + const auto &mg = mats[i]; + Log::io->info() << "# Material Model " << i + 1 << ": " << mg["matmodel"].get() << "\n"; + + // Print phases + auto phases = mg["phases"].get>(); + Log::io->info() << "# Phases: ["; + for (size_t j = 0; j < phases.size(); ++j) { + Log::io->info(true) << phases[j]; + if (j < phases.size() - 1) Log::io->info(true) << ", "; + } + Log::io->info(true) << "]\n"; + + // Print material properties + Log::io->info() << "# Material properties:\n"; + const auto &props = mg["material_properties"]; + for (auto it = props.begin(); it != props.end(); ++it) { + Log::io->info() << "# " << it.key() << ": "; + if (it.value().is_array()) { + Log::io->info(true) << "["; + for (size_t k = 0; k < it.value().size(); ++k) { + if (it.value()[k].is_number()) { + Log::io->info(true) << std::setprecision(5) << it.value()[k].get() << std::defaultfloat; + } else if (it.value()[k].is_string()) { + Log::io->info(true) << "\"" << it.value()[k].get() << "\""; } - printf("]"); - } else if (it.value().is_number()) { - printf("%.5g", it.value().get()); - } else if (it.value().is_string()) { - printf("\"%s\"", it.value().get().c_str()); + if (k < it.value().size() - 1) Log::io->info(true) << ", "; } - printf("\n"); + Log::io->info(true) << "]"; + } else if (it.value().is_number()) { + Log::io->info(true) << std::setprecision(5) << it.value().get() << std::defaultfloat; + } else if (it.value().is_string()) { + Log::io->info(true) << "\"" << it.value().get() << "\""; } - printf("#\n"); + Log::io->info(true) << "\n"; } + Log::io->info() << "#\n"; } } diff --git a/include/general.h b/include/general.h index 1128e8a..1949615 100644 --- a/include/general.h +++ b/include/general.h @@ -58,6 +58,4 @@ inline void FANS_free(V *p) } #endif // FANS_MALLOC_H -#define VERBOSITY 0 - // #define EIGEN_RUNTIME_NO_MALLOC diff --git a/include/logging.h b/include/logging.h new file mode 100755 index 0000000..871ed0a --- /dev/null +++ b/include/logging.h @@ -0,0 +1,78 @@ +#ifndef LOGGING_H +#define LOGGING_H + +#include +#include +#include +#include + +namespace Log { + +enum Level { + Error, + Info, + Warn, + Debug, + All +}; + +[[maybe_unused]] const std::string level_to_string(Level level); + +extern Level active_level; + +class Logger { + public: + explicit Logger(std::string prefix, int comm_rank, int comm_size); + + /// error > info > warn > debug > trace + std::ostream& error(bool append=false); + /// error > info > warn > debug > trace + std::ostream& info(bool append=false); + /// error > info > warn > debug > trace + std::ostream& warn(bool append=false); + /// error > info > warn > debug > trace + std::ostream& debug(bool append=false); + /// error > info > warn > debug > trace + std::ostream& trace(bool append=false); + /// progress bar + void progress(const std::string& prefix, int step, int max); + + private: + /// starting time + std::chrono::steady_clock::time_point _start_time; + /// what the logger should always print first + const std::string _prefix; + /// empty stream to write nothing + std::ofstream _nullstr; + /// MPI comm rank + int _comm_rank; + /// MPI comm size + int _comm_size; + +}; + +/** + * Creates all loggers and sets the level + * */ +void init(int comm_rank, int comm_size); + +/** + * Frees all memory + * */ +void finalize(); + +/** + * Set activate rank + */ +[[maybe_unused]] void setActiveRank(int rank); + +/// logger with prefix [GENERAL] +extern std::unique_ptr general; +/// logger with prefix [SOLVER] +extern std::unique_ptr solver; +/// logger with prefix [IO] +extern std::unique_ptr io; + +}; // namespace Log + +#endif \ No newline at end of file diff --git a/include/solver.h b/include/solver.h index cb05d49..8cc9740 100644 --- a/include/solver.h +++ b/include/solver.h @@ -3,6 +3,7 @@ #include "matmodel.h" #include "MaterialManager.h" +#include "logging.h" class J2Plasticity; @@ -144,9 +145,8 @@ Solver::Solver(Reader &reader, MaterialManager * template void Solver::computeFundamentalSolution() { - if (world_rank == 0) { - printf("\n# Start creating Fundamental Solution(s) \n"); - } + Log::solver->info() << "\n# Start creating Fundamental Solution(s) \n"; + clock_t tot_time = clock(); Matrix Ker0 = matmanager->models[0]->Compute_Reference_ElementStiffness(matmanager->kapparef_mat); @@ -198,9 +198,7 @@ void Solver::computeFundamentalSolution() fundamentalSolution /= (double) (n_x * n_y * n_z); tot_time = clock() - tot_time; - if (world_rank == 0) { - printf("# Complete; Time for construction of Fundamental Solution(s): %f seconds\n", double(tot_time) / CLOCKS_PER_SEC); - } + Log::solver->info() << "# Complete; Time for construction of Fundamental Solution(s): " << static_cast(tot_time) / CLOCKS_PER_SEC << " seconds\n"; } template @@ -282,20 +280,18 @@ void Solver::compute_residual(RealArray &r_matrix, RealArray &u_ template void Solver::solve() { - err_all = ArrayXd::Zero(n_it + 1); fft_time = 0.0; clock_t tot_time = clock(); internalSolve(); tot_time = clock() - tot_time; - // if( VERBOSITY > 5 ){ - if (world_rank == 0) { - printf("# FFT Time per iteration ....... %2.6f sec\n", double(fft_time) / CLOCKS_PER_SEC / iter); - printf("# Total FFT Time ............... %2.6f sec\n", double(fft_time) / CLOCKS_PER_SEC); - printf("# Total Time per iteration ..... %2.6f sec\n", double(tot_time) / CLOCKS_PER_SEC / iter); - printf("# Total Time ................... %2.6f sec\n", double(tot_time) / CLOCKS_PER_SEC); - printf("# FFT contribution to total time %2.6f %% \n", 100. * double(fft_time) / double(tot_time)); - } + + Log::solver->info() << "# FFT Time per iteration ....... " << std::setw(2) << std::setprecision(6) << double(fft_time) / CLOCKS_PER_SEC / iter << std::defaultfloat << " sec\n"; + Log::solver->info() << "# Total FFT Time ............... " << std::setw(2) << std::setprecision(6) << double(fft_time) / CLOCKS_PER_SEC << std::defaultfloat << " sec\n"; + Log::solver->info() << "# Total Time per iteration ..... " << std::setw(2) << std::setprecision(6) << double(tot_time) / CLOCKS_PER_SEC / iter << std::defaultfloat << " sec\n"; + Log::solver->info() << "# Total Time ................... " << std::setw(2) << std::setprecision(6) << double(tot_time) / CLOCKS_PER_SEC << std::defaultfloat << " sec\n"; + Log::solver->info() << "# FFT contribution to total time " << std::setw(2) << std::setprecision(6) << 100. * double(fft_time) / double(tot_time) << std::defaultfloat << " % \n"; + matmanager->update_internal_variables(); } @@ -433,13 +429,12 @@ double Solver::compute_error(RealArray &r) double err0 = err_all[0]; double err_rel = (iter == 0 ? 100 : err / err0); - if (world_rank == 0) { - if (iter == 0) { - printf("Before 1st iteration: %16.8e\n", err0); - } else { - printf("it %3lu .... err %16.8e / %8.4e, ratio: %4.8e, FFT time: %2.6f sec\n", iter, err, err / err0, (iter == 1 ? 0.0 : err / err_all[iter - 1]), double(buftime) / CLOCKS_PER_SEC); - } - } + if (iter == 0) Log::solver->info() << "Before 1st iteration: " << std::setw(16) << std::setprecision(8) << err0 << std::defaultfloat << "\n"; + else Log::solver->info() << "it " << iter << " .... " + << "err " << std::setw(16) << std::setprecision(8) << err << std::defaultfloat << " / " + << std::setw( 8) << std::setprecision(4) << err / err0 << std::defaultfloat + << ", ratio: " << std::setw(4) << std::setprecision(8) << (iter == 1 ? 0.0 : err / err_all[iter - 1]) << std::defaultfloat + << ", FFT time: " << std::setw(2) << std::setprecision(6) << static_cast(buftime) / CLOCKS_PER_SEC << std::defaultfloat << " sec\n"; const std::string &error_type = reader.errorParameters["type"].get(); if (error_type == "absolute") { @@ -573,16 +568,12 @@ void Solver::postprocess(Reader &reader, int load_idx, int time_ } } - if (world_rank == 0) { - printf("# Effective Stress .. ("); - for (int i = 0; i < n_str; ++i) - printf("%+.12f ", stress_average[i]); - printf(") \n"); - printf("# Effective Strain .. ("); - for (int i = 0; i < n_str; ++i) - printf("%+.12f ", strain_average[i]); - printf(") \n\n"); - } + Log::solver->info() << "# Effective Stress .. ("; + for (int i = 0; i < n_str; ++i) Log::solver->info(true) << std::showpos << std::fixed << std::setprecision(12) << stress_average[i] << " " << std::noshowpos << std::defaultfloat; + Log::solver->info(true) << ") \n"; + Log::solver->info() << "# Effective Strain .. ("; + for (int i = 0; i < n_str; ++i) Log::solver->info(true) << std::showpos << std::fixed << std::setprecision(12) << strain_average[i] << " " << std::noshowpos << std::defaultfloat; + Log::solver->info(true) << ") \n\n"; /* ====================================================================== * * u_total = g0·X + ũ (vector or scalar, decided at compile time) @@ -695,11 +686,7 @@ void Solver::postprocess(Reader &reader, int load_idx, int time_ if (find(reader.resultsToWrite.begin(), reader.resultsToWrite.end(), "homogenized_tangent") != reader.resultsToWrite.end()) { homogenized_tangent = get_homogenized_tangent(1e-6); hsize_t dims[2] = {static_cast(n_str), static_cast(n_str)}; - if (world_rank == 0) { - cout << "# Homogenized tangent: " << endl - << setprecision(12) << homogenized_tangent << endl - << endl; - } + Log::solver->info() << "# Homogenized tangent: \n" << std::setprecision(12) << homogenized_tangent << std::defaultfloat << "\n\n"; reader.writeData("homogenized_tangent", load_idx, time_idx, homogenized_tangent.data(), dims, 2); } extrapolateDisplacement(); // prepare v_u for next time step diff --git a/include/solverCG.h b/include/solverCG.h index b02a428..513a34b 100644 --- a/include/solverCG.h +++ b/include/solverCG.h @@ -60,8 +60,7 @@ double SolverCG::dotProduct(RealArray &a, RealArray &b) template void SolverCG::internalSolve() { - if (this->world_rank == 0) - printf("\n# Start FANS - Conjugate Gradient Solver \n"); + Log::solver->info() << "\n# Start FANS - Conjugate Gradient Solver \n"; bool islinear = this->matmanager->all_linear; @@ -110,8 +109,8 @@ void SolverCG::internalSolve() iter++; err_rel = this->compute_error(v_r_real); } - if (this->world_rank == 0) - printf("# Complete FANS - Conjugate Gradient Solver \n"); + + Log::solver->info() << "# Complete FANS - Conjugate Gradient Solver \n"; } template @@ -142,8 +141,10 @@ void SolverCG::LineSearchSecant() } v_u_real += d_real * (alpha_new - alpha_old); v_r_real = rnew_real; - if (this->world_rank == 0) - printf("line search iter %i, alpha %f - error %e - ", _iter, alpha_new, err); + + Log::solver->info() << "line search iter " << _iter + << ", alpha " << std::fixed << std::setprecision(6) << alpha_new << std::defaultfloat + << " - error " << std::fixed << std::setprecision(6) << err << std::defaultfloat << " - "; } template diff --git a/include/solverFP.h b/include/solverFP.h index 67b99a4..6a00691 100644 --- a/include/solverFP.h +++ b/include/solverFP.h @@ -32,8 +32,7 @@ SolverFP::SolverFP(Reader &reader, MaterialManager void SolverFP::internalSolve() { - if (this->world_rank == 0) - printf("\n# Start FANS - Fixed Point Solver \n"); + Log::solver->info() << "\n# Start FANS - Fixed Point Solver \n"; this->template compute_residual<2>(v_r_real, v_u_real); @@ -50,7 +49,7 @@ void SolverFP::internalSolve() iter++; err_rel = this->compute_error(v_r_real); } - if (this->world_rank == 0) - printf("# Complete FANS - Fixed Point Solver \n"); + + Log::solver->info() << "# Complete FANS - Fixed Point Solver \n"; } #endif diff --git a/pyfans/micro.cpp b/pyfans/micro.cpp index b9f3636..c195ab1 100644 --- a/pyfans/micro.cpp +++ b/pyfans/micro.cpp @@ -37,6 +37,11 @@ MicroSimulation::MicroSimulation(int sim_id, char *input_file) solver = createSolver<3, 6>(reader, matmanager); } +MicroSimulation::~MicroSimulation() +{ + Log::finalize(); +} + py::dict MicroSimulation::solve(py::dict macro_data, double dt) { // Time step value dt is not used currently, but is available for future use diff --git a/pyfans/micro.hpp b/pyfans/micro.hpp index b6ebc6c..a8c5bb2 100644 --- a/pyfans/micro.hpp +++ b/pyfans/micro.hpp @@ -20,6 +20,7 @@ namespace py = pybind11; class MicroSimulation { public: MicroSimulation(int sim_id, char *input_file = "input.json"); + ~MicroSimulation(); py::dict solve(py::dict macro_write_data, double dt); private: diff --git a/src/logging.cpp b/src/logging.cpp new file mode 100755 index 0000000..9a8a354 --- /dev/null +++ b/src/logging.cpp @@ -0,0 +1,147 @@ +#include "logging.h" + +#include + +#include "mpi.h" + +int active_rank = 0; + +Log::Logger::Logger(std::string prefix, int comm_rank, int comm_size) : _prefix(std::move(prefix)), _nullstr(), _comm_rank(comm_rank), _comm_size(comm_size) { + _start_time = std::chrono::steady_clock::now(); + _nullstr.setstate(std::ios_base::badbit); +} + +std::ostream &Log::Logger::error(bool append) { + if (active_level >= Error && _comm_rank == active_rank) { + if (append) return std::cout; + std::cout << _prefix; + auto now = std::chrono::steady_clock::now(); + auto elapse_time = static_cast(std::chrono::duration_cast(now - _start_time).count()) / 1000.0; + std::cout << elapse_time << ": "; + return std::cout; + } else return _nullstr; +} + +std::ostream &Log::Logger::info(bool append) { + if (active_level >= Info && _comm_rank == active_rank) { + if (append) return std::cout; + std::cout << _prefix; + auto now = std::chrono::steady_clock::now(); + auto elapse_time = static_cast(std::chrono::duration_cast(now - _start_time).count()) / 1000.0; + std::cout << elapse_time << ": "; + return std::cout; + } else return _nullstr; +} + +std::ostream &Log::Logger::warn(bool append) { + if (active_level >= Warn && _comm_rank == active_rank) { + if (append) return std::cout; + std::cout << _prefix; + auto now = std::chrono::steady_clock::now(); + auto elapse_time = static_cast(std::chrono::duration_cast(now - _start_time).count()) / 1000.0; + std::cout << elapse_time << ": "; + return std::cout; + } else return _nullstr; +} + +std::ostream &Log::Logger::debug(bool append) { + if (active_level >= Debug && _comm_rank == active_rank) { + if (append) return std::cout; + std::cout << _prefix; + auto now = std::chrono::steady_clock::now(); + auto elapse_time = static_cast(std::chrono::duration_cast(now - _start_time).count()) / 1000.0; + std::cout << elapse_time << ": "; + return std::cout; + } else return _nullstr; +} + +std::ostream &Log::Logger::trace(bool append) { + if (active_level >= All && _comm_rank == active_rank) { + if (append) return std::cout; + std::cout << _prefix; + auto now = std::chrono::steady_clock::now(); + auto elapse_time = static_cast(std::chrono::duration_cast(now - _start_time).count()) / 1000.0; + std::cout << elapse_time << ": "; + return std::cout; + } else return _nullstr; +} +void Log::Logger::progress(const std::string &prefix, int step, int max) { + if (_comm_rank != active_rank) return; + + if (step == 0) std::cout << _prefix << prefix; + + int digits = 1; + int div = 10; + while ((max / div) > 0) { + digits++; + div *= 10; + } + if (step > 0) for (int i = 0; i < digits; i++) std::cout << '\b'; + + int step_digits = 1; + div = 10; + while ((step / div) > 0) { + step_digits++; + div *= 10; + } + + for (int i = 0; i < digits - step_digits; i++) std::cout << ' '; + std::cout << step; + + if (step >= max-1) { + for (int i = 0; i < digits; i++) + std::cout << '\b'; + for (int i = 0; i < prefix.size(); i++) + std::cout << '\b'; + for (int i = 0; i < _prefix.size(); i++) + std::cout << '\b'; + } + //if (step >= max) std::cout << '\n'; + std::cout.flush(); +} + +void Log::init(int comm_rank, int comm_size) { + Level level = Log::Error; + if constexpr (VERBOSITY <= 0) level = Error; + if constexpr (VERBOSITY == 1) level = Info; + if constexpr (VERBOSITY == 2) level = Warn; + if constexpr (VERBOSITY == 3) level = Debug; + if constexpr (VERBOSITY >= 4) level = All; + + active_level = level; + general = std::make_unique("[FANS-GENERAL] ", comm_rank, comm_size); + solver = std::make_unique("[FANS-SOLVER] ", comm_rank, comm_size); + io = std::make_unique("[FANS-IO] ", comm_rank, comm_size); +} + +void Log::finalize() { + general = nullptr; + solver = nullptr; + io = nullptr; +} + +Log::Level Log::active_level = Info; +std::unique_ptr Log::general = nullptr; +std::unique_ptr Log::solver = nullptr; +std::unique_ptr Log::io = nullptr; + +const std::string Log::level_to_string(Log::Level level) { + switch (level) { + case Error: + return "Error"; + case Info: + return "Info"; + case Warn: + return "Warn"; + case Debug: + return "Debug"; + case All: + return "All"; + } + return ""; +} + +void Log::setActiveRank(int rank) { + active_rank = rank; +} + diff --git a/src/main.cpp b/src/main.cpp index 6c69bf6..bcdce0b 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -4,6 +4,7 @@ #include "solver.h" // Version +#include "logging.h" #include "version.h" template @@ -15,17 +16,13 @@ void runSolver(Reader &reader) MaterialManager *matmanager = createMaterialManager(reader); Solver *solver = createSolver(reader, matmanager); - if (reader.world_rank == 0) { - printf("\n╔════════════════════════════════════════════════════════════ Load case %zu/%zu: %zu time steps ════════════════════════════════════════════════════════════╗\n", - load_path_idx + 1, reader.load_cases.size(), reader.load_cases[load_path_idx].n_steps); - } + Log::general->info() << "\n╔════════════════════════════════════════════════════════════ Load case " + << load_path_idx + 1 << "/" << reader.load_cases.size() << ": " << reader.load_cases[load_path_idx].n_steps + << " time steps ════════════════════════════════════════════════════════════╗\n"; for (size_t time_step_idx = 0; time_step_idx < reader.load_cases[load_path_idx].n_steps; ++time_step_idx) { - if (reader.world_rank == 0) { - printf("║ ▶ Time step %zu/%zu (load case %zu/%zu) ◀ \n", - time_step_idx + 1, reader.load_cases[load_path_idx].n_steps, - load_path_idx + 1, reader.load_cases.size()); - } + Log::general->info() << "║ ▶ Time step " << time_step_idx + 1 << "/" << reader.load_cases[load_path_idx].n_steps + << " (load case " << load_path_idx + 1 << "/" << reader.load_cases.size() << ") ◀ \n"; if (reader.load_cases[load_path_idx].mixed) { solver->enableMixedBC(reader.load_cases[load_path_idx].mbc, time_step_idx); } else { @@ -37,9 +34,7 @@ void runSolver(Reader &reader) } delete solver; delete matmanager; - if (reader.world_rank == 0) { - printf("╚══════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════╝\n"); - } + Log::general->info() << "╚══════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════╝\n"; } } @@ -73,6 +68,7 @@ int main(int argc, char *argv[]) } reader.CloseResultsFile(); + Log::finalize(); MPI_Finalize(); return 0; } diff --git a/src/reader.cpp b/src/reader.cpp index 288d4af..a10984f 100644 --- a/src/reader.cpp +++ b/src/reader.cpp @@ -1,5 +1,6 @@ #include "general.h" #include "reader.h" +#include "logging.h" #include "H5Cpp.h" #include "fftw3-mpi.h" @@ -35,10 +36,8 @@ void Reader::ComputeVolumeFractions() // Calculate total number of materials n_mat = global_max - global_min + 1; - if (world_rank == 0) { - printf("# Number of materials: %i (from %u to %u)\n", n_mat, global_min, global_max); - printf("# Volume fractions\n"); - } + Log::io->info() << "# Number of materials: " << n_mat << " (from " << global_min << " to " << global_max << ")\n"; + Log::io->info() << "# Volume fractions\n"; // Using dynamic allocation for arrays since we don't know size at compile time std::vector vol_frac(n_mat, 0); @@ -54,9 +53,7 @@ void Reader::ComputeVolumeFractions() long vf; MPI_Allreduce(&(vol_frac[i]), &vf, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD); v_frac[i] = double(vf) / double(dims[0] * dims[1] * dims[2]); - if (world_rank == 0) - printf("# material %4u vol. frac. %10.4f%% \n", - static_cast(i) + global_min, 100. * v_frac[i]); + Log::io->info() << "# material " << static_cast(i) + global_min << " vol. frac. " << std::setw(10) << std::setprecision(4) << 100. * v_frac[i] << std::defaultfloat << "%\n"; } } @@ -67,6 +64,8 @@ void Reader ::ReadInputFile(char input_fn[]) MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); MPI_Comm_size(MPI_COMM_WORLD, &world_size); + Log::init(world_rank, world_size); + ifstream i(input_fn); json j; i >> j; @@ -148,22 +147,20 @@ void Reader ::ReadInputFile(char input_fn[]) load_cases.push_back(std::move(lc)); } - if (world_rank == 0) { - printf("# microstructure file name: \t '%s'\n", ms_filename); - printf("# microstructure dataset name: \t '%s'\n", ms_datasetname); - printf("# strain type: \t %s\n", strain_type.c_str()); - printf("# problem type: \t %s\n", problemType.c_str()); - printf("# FE type: \t %s\n", FE_type.c_str()); - printf( - "# FANS error measure: \t %s %s error \n", - errorParameters["type"].get().c_str(), - errorParameters["measure"].get().c_str()); - printf("# FANS Tolerance: \t %10.5e\n", errorParameters["tolerance"].get()); - printf("# Max iterations: \t %6i\n", n_it); - } + Log::io->info() << "# microstructure file name: \t " << ms_filename << "\n"; + Log::io->info() << "# microstructure dataset name: \t " << ms_datasetname <<"\n"; + Log::io->info() << "# strain type: \t " << strain_type << "\n"; + Log::io->info() << "# problem type: \t " << problemType << "\n"; + Log::io->info() << "# FE type: \t " << FE_type << "\n"; + Log::io->info() << "# FANS error measure: \t " << + errorParameters["type"].get() << " " << + errorParameters["measure"].get() << " error \n"; + Log::io->info() << "# FANS Tolerance: \t " << std::setw(10) << std::setprecision(5) << errorParameters["tolerance"].get() << std::defaultfloat << "e\n"; + Log::io->info() << "# Max iterations: \t " << n_it << "\n"; } catch (const std::exception &e) { - fprintf(stderr, "ERROR trying to read input file '%s' for FANS\n", input_fn); + Log::io->error() << "ERROR trying to read input file '" << input_fn << "' for FANS\n"; + Log::finalize(); exit(10); } } @@ -257,13 +254,8 @@ void Reader ::ReadMS(int hm) H5Aclose(attr_id); H5Tclose(attr_type); } - if (world_rank == 0) { - if (is_zyx) { - printf("# Using Z-Y-X dimension ordering for the microstructure data\n"); - } else { - printf("# Using X-Y-Z dimension ordering for the microstructure data\n"); - } - } + if (is_zyx) Log::io->info() << "# Using Z-Y-X dimension ordering for the microstructure data\n"; + else Log::io->info() << "# Using X-Y-Z dimension ordering for the microstructure data\n"; dims.resize(3); if (is_zyx) { /* file layout Z Y X -> logical X Y Z */ @@ -281,18 +273,20 @@ void Reader ::ReadMS(int hm) l_e[1] = L[1] / double(dims[1]); l_e[2] = L[2] / double(dims[2]); - if (world_rank == 0) { - printf("# grid size set to [%i x %i x %i] --> %i voxels \nMicrostructure length: [%3.6f x %3.6f x %3.6f]\n", dims[0], dims[1], dims[2], dims[0] * dims[1] * dims[2], L[0], L[1], L[2]); - if (dims[0] % 2 != 0) - fprintf(stderr, "[ FANS3D_Grid ] WARNING: n_x is not a multiple of 2\n"); - if (dims[1] % 2 != 0) - fprintf(stderr, "[ FANS3D_Grid ] WARNING: n_y is not a multiple of 2\n"); - if (dims[2] % 2 != 0) - fprintf(stderr, "[ FANS3D_Grid ] WARNING: n_z is not a multiple of 2\n"); - if (dims[0] / 4 < world_size) - throw std::runtime_error("[ FANS3D_Grid ] ERROR: Please decrease the number of processes or increase the grid size to ensure that each process has at least 4 boxels in the x direction."); - printf("Voxel length: [%1.8f, %1.8f, %1.8f]\n", l_e[0], l_e[1], l_e[2]); - } + Log::io->info() << "# grid size set to [" << dims[0] << " x " << dims[1] << " x " << dims[2] << "] --> " << dims[0] * dims[1] * dims[2] << " voxels \n"; + Log::io->info() << "Microstructure length: [" + << std::setw(3) << std::setprecision(6) << L[0] << std::defaultfloat << " x " + << std::setw(3) << std::setprecision(6) << L[1] << std::defaultfloat << " x " + << std::setw(3) << std::setprecision(6) << L[2] << std::defaultfloat << "]\n"; + if (dims[0] % 2 != 0) Log::io->warn() << "[ FANS3D_Grid ] WARNING: n_x is not a multiple of 2\n"; + if (dims[1] % 2 != 0) Log::io->warn() << "[ FANS3D_Grid ] WARNING: n_y is not a multiple of 2\n"; + if (dims[2] % 2 != 0) Log::io->warn() << "[ FANS3D_Grid ] WARNING: n_z is not a multiple of 2\n"; + if (dims[0] / 4 < world_size) + throw std::runtime_error("[ FANS3D_Grid ] ERROR: Please decrease the number of processes or increase the grid size to ensure that each process has at least 4 boxels in the x direction."); + Log::io->info() << "Voxel length: [" + << std::setw(1) << std::setprecision(8) << l_e[0] << std::defaultfloat << ", " + << std::setw(1) << std::setprecision(8) << l_e[1] << std::defaultfloat << ", " + << std::setw(1) << std::setprecision(8) << l_e[2] << std::defaultfloat << "]\n"; const ptrdiff_t n[3] = {dims[0], dims[1], dims[2] / 2 + 1}; ptrdiff_t block0 = FFTW_MPI_DEFAULT_BLOCK; From a10c32959122a49cd8a41c349840ea5a2caf5b43 Mon Sep 17 00:00:00 2001 From: Alex Hocks Date: Sat, 3 Jan 2026 17:20:06 +0100 Subject: [PATCH 12/14] small optim --- include/MaterialManager.h | 2 +- include/matmodel.h | 10 ++++++---- pyfans/micro.cpp | 8 +++++--- pyfans/micro.hpp | 2 ++ 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/include/MaterialManager.h b/include/MaterialManager.h index e1d7469..6a9b5ef 100644 --- a/include/MaterialManager.h +++ b/include/MaterialManager.h @@ -210,7 +210,7 @@ class MaterialManager { } // Set macroscale loading gradient for all models - void set_gradient(vector g0) + void set_gradient(const vector& g0) { for (auto *model : models) { model->setGradient(g0); diff --git a/include/matmodel.h b/include/matmodel.h index bd3a82d..a43087f 100644 --- a/include/matmodel.h +++ b/include/matmodel.h @@ -27,7 +27,7 @@ class Matmodel { Matrix Compute_Reference_ElementStiffness(const Matrix &kapparef_mat); Matrix &element_residual(Matrix &ue, int mat_index, ptrdiff_t element_idx); void getStrainStress(double *strain, double *stress, Matrix &ue, int mat_index, ptrdiff_t element_idx); - void setGradient(vector _g0); + void setGradient(const vector &_g0); // Accessors for internal Gauss point data (populated after getStrainStress call) inline const double *get_eps_data() const @@ -225,11 +225,13 @@ void Matmodel::getStrainStress(double *strain, double *stress, M } template -void Matmodel::setGradient(vector _g0) +void Matmodel::setGradient(const vector &_g0) { + // _g0 is smaller than g0, can stay in cache + // if j loop inner, then not using cache-lines well macroscale_loading = _g0; - for (int i = 0; i < n_str; i++) { - for (int j = 0; j < n_gp; ++j) { + for (int j = 0; j < n_gp; ++j) { + for (int i = 0; i < n_str; i++) { g0(n_str * j + i, 0) = _g0[i]; } } diff --git a/pyfans/micro.cpp b/pyfans/micro.cpp index c195ab1..eb45ff2 100644 --- a/pyfans/micro.cpp +++ b/pyfans/micro.cpp @@ -35,6 +35,8 @@ MicroSimulation::MicroSimulation(int sim_id, char *input_file) reader.ReadMS(3); matmanager = createMaterialManager<3, 6>(reader); solver = createSolver<3, 6>(reader, matmanager); + + g0.resize(6, 0); } MicroSimulation::~MicroSimulation() @@ -51,17 +53,17 @@ py::dict MicroSimulation::solve(py::dict macro_data, double dt) py::array_t strain2 = macro_data["strains4to6"].cast>(); py::array_t strain = merge_arrays(strain1, strain2); - std::vector g0 = std::vector(strain.data(), strain.data() + strain.size()); // convert numpy array to std::vector. + std::copy(strain.begin(), strain.end(), g0.begin()); // convert numpy array to std::vector. VectorXd homogenized_stress; - matmanager->set_gradient(g0); + matmanager->set_gradient(g0); // TODO done solver->solve(); homogenized_stress = solver->get_homogenized_stress(); - auto C = solver->get_homogenized_tangent(pert_param); + MatrixXd C = solver->get_homogenized_tangent(pert_param); // Convert data to a py::dict again to send it back to the Micro Manager py::dict micro_write_data; diff --git a/pyfans/micro.hpp b/pyfans/micro.hpp index a8c5bb2..51ce762 100644 --- a/pyfans/micro.hpp +++ b/pyfans/micro.hpp @@ -30,4 +30,6 @@ class MicroSimulation { MaterialManager<3, 6> *matmanager; Solver<3, 6> *solver; double pert_param = 1e-6; // scalar strain perturbation parameter + + std::vector g0; // gradient buffer, alloc once }; From 4bb23c14dc580ed02a15395f4686f1b5a7d782f1 Mon Sep 17 00:00:00 2001 From: Alex Hocks Date: Sat, 3 Jan 2026 17:43:11 +0100 Subject: [PATCH 13/14] fix --- pyfans/micro.cpp | 3 +-- pyfans/micro.hpp | 2 -- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/pyfans/micro.cpp b/pyfans/micro.cpp index eb45ff2..65f8b8b 100644 --- a/pyfans/micro.cpp +++ b/pyfans/micro.cpp @@ -36,7 +36,6 @@ MicroSimulation::MicroSimulation(int sim_id, char *input_file) matmanager = createMaterialManager<3, 6>(reader); solver = createSolver<3, 6>(reader, matmanager); - g0.resize(6, 0); } MicroSimulation::~MicroSimulation() @@ -53,7 +52,7 @@ py::dict MicroSimulation::solve(py::dict macro_data, double dt) py::array_t strain2 = macro_data["strains4to6"].cast>(); py::array_t strain = merge_arrays(strain1, strain2); - std::copy(strain.begin(), strain.end(), g0.begin()); // convert numpy array to std::vector. + std::vector g0 = std::vector(strain.data(), strain.data() + strain.size()); // convert numpy array to std::vector. VectorXd homogenized_stress; diff --git a/pyfans/micro.hpp b/pyfans/micro.hpp index 51ce762..a8c5bb2 100644 --- a/pyfans/micro.hpp +++ b/pyfans/micro.hpp @@ -30,6 +30,4 @@ class MicroSimulation { MaterialManager<3, 6> *matmanager; Solver<3, 6> *solver; double pert_param = 1e-6; // scalar strain perturbation parameter - - std::vector g0; // gradient buffer, alloc once }; From 9e751eb11088594e3fdb192fd240bc7c03a5a9b1 Mon Sep 17 00:00:00 2001 From: Alex Hocks Date: Mon, 12 Jan 2026 20:26:54 +0100 Subject: [PATCH 14/14] Add rank sync on trace log and impl serialization --- CMakeLists.txt | 1 + include/MaterialManager.h | 17 ++- include/logging.h | 36 ++++- include/material_models/J2Plasticity.h | 11 ++ include/material_models/J2PlasticityNew.h | 9 ++ include/matmodel.h | 37 ++++- include/reader.h | 20 ++- include/serialization.h | 164 ++++++++++++++++++++++ include/solver.h | 39 ++++- pyfans/micro.cpp | 104 +++++++++++--- pyfans/micro.hpp | 8 +- src/logging.cpp | 43 ++++-- src/reader.cpp | 94 ++++++++++++- 13 files changed, 539 insertions(+), 44 deletions(-) create mode 100644 include/serialization.h diff --git a/CMakeLists.txt b/CMakeLists.txt index efb3c1f..91523d6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -162,6 +162,7 @@ set_property(TARGET FANS_FANS PROPERTY PUBLIC_HEADER include/solver.h include/setup.h include/mixedBCs.h + include/serialization.h include/material_models/LinearThermal.h include/material_models/GBDiffusion.h diff --git a/include/MaterialManager.h b/include/MaterialManager.h index 6a9b5ef..a905c97 100644 --- a/include/MaterialManager.h +++ b/include/MaterialManager.h @@ -4,6 +4,7 @@ #include "general.h" #include "logging.h" #include "matmodel.h" +#include "serialization.h" template Matmodel *createMatmodel(const Reader &reader); @@ -26,7 +27,7 @@ struct MaterialInfo { * */ template -class MaterialManager { +class MaterialManager : public Serializable { private: MaterialInfo *phase_to_info{nullptr}; // [n_phases] - HOT DATA int n_phases; @@ -201,6 +202,20 @@ class MaterialManager { } } + void register_serialization(registry_t &r) override + { + for (auto *model : models) { + model->register_serialization(r); + } + } + + void init_deserialization() override + { + for (auto *model : models) { + model->init_deserialization(); + } + } + // Update internal variables after converged time step void update_internal_variables() { diff --git a/include/logging.h b/include/logging.h index 871ed0a..734a669 100755 --- a/include/logging.h +++ b/include/logging.h @@ -4,7 +4,9 @@ #include #include #include +#include #include +#include "mpi.h" namespace Log { @@ -20,9 +22,33 @@ enum Level { extern Level active_level; +class Logger; +class MPI_TraceSync { + public: + explicit MPI_TraceSync(Logger &log, bool append); + ~MPI_TraceSync(); + + template + MPI_TraceSync &operator<<(const T &v); + MPI_TraceSync &operator<<(std::ostream &(*m)(std::ostream &) ); + + private: + Logger &_log; + bool _append; + std::ostringstream _buffer; +}; + +template +MPI_TraceSync &MPI_TraceSync::operator<<(const T &v) +{ + _buffer << v; + return *this; +} + class Logger { public: - explicit Logger(std::string prefix, int comm_rank, int comm_size); + friend class MPI_TraceSync; + explicit Logger(std::string prefix, int comm_rank, int comm_size, const MPI_Comm& comm); /// error > info > warn > debug > trace std::ostream& error(bool append=false); @@ -33,11 +59,12 @@ class Logger { /// error > info > warn > debug > trace std::ostream& debug(bool append=false); /// error > info > warn > debug > trace - std::ostream& trace(bool append=false); + MPI_TraceSync trace(bool append=false); /// progress bar void progress(const std::string& prefix, int step, int max); private: + std::ostream& trace_impl(bool append=false); /// starting time std::chrono::steady_clock::time_point _start_time; /// what the logger should always print first @@ -48,13 +75,14 @@ class Logger { int _comm_rank; /// MPI comm size int _comm_size; - + /// communicator + MPI_Comm _comm; }; /** * Creates all loggers and sets the level * */ -void init(int comm_rank, int comm_size); +void init(int comm_rank, int comm_size, const MPI_Comm& comm); /** * Frees all memory diff --git a/include/material_models/J2Plasticity.h b/include/material_models/J2Plasticity.h index 33f2a69..e657a20 100644 --- a/include/material_models/J2Plasticity.h +++ b/include/material_models/J2Plasticity.h @@ -55,6 +55,17 @@ class J2Plasticity : public SmallStrainMechModel { psi_bar_t.resize(num_elements, Matrix::Zero(6, num_gauss_points)); } + void register_serialization(registry_t &r) override + { + SmallStrainMechModel::register_serialization(r); + for (auto &mat : plasticStrain_t) r.emplace_back(std::data(mat), (mat.size()) * sizeof(double), true); + for (auto &mat : plasticStrain) r.emplace_back(std::data(mat), (mat.size()) * sizeof(double), true); + for (auto &mat : psi_bar_t) r.emplace_back(std::data(mat), (mat.size()) * sizeof(double), true); + for (auto &mat : psi_bar) r.emplace_back(std::data(mat), (mat.size()) * sizeof(double), true); + for (auto &vec : psi_t) r.emplace_back(std::data(vec), (vec.size()) * sizeof(double), true); + for (auto &vec : psi) r.emplace_back(std::data(vec), (vec.size()) * sizeof(double), true); + } + virtual void updateInternalVariables() override { plasticStrain_t = plasticStrain; diff --git a/include/material_models/J2PlasticityNew.h b/include/material_models/J2PlasticityNew.h index e62996e..3f0e26c 100644 --- a/include/material_models/J2PlasticityNew.h +++ b/include/material_models/J2PlasticityNew.h @@ -34,6 +34,15 @@ class J2PlasticityNew : public SmallStrainMechModel { q_t.resize(num_elements, VectorXd::Zero(num_gauss_points)); } + void register_serialization(registry_t &r) override + { + SmallStrainMechModel::register_serialization(r); + for (auto &mat : plasticStrain_t) r.emplace_back(std::data(mat), (mat.size()) * sizeof(double), true); + for (auto &mat : plasticStrain) r.emplace_back(std::data(mat), (mat.size()) * sizeof(double), true); + for (auto &vec : q_t) r.emplace_back(std::data(vec), (vec.size()) * sizeof(double), true); + for (auto &vec : q) r.emplace_back(std::data(vec), (vec.size()) * sizeof(double), true); + } + virtual void updateInternalVariables() override { plasticStrain_t = plasticStrain; diff --git a/include/matmodel.h b/include/matmodel.h index a43087f..ffdb1bc 100644 --- a/include/matmodel.h +++ b/include/matmodel.h @@ -3,12 +3,13 @@ #define MATMODEL_H #include "general.h" +#include "serialization.h" template class Solver; template -class Matmodel { +class Matmodel : public Serializable { public: EIGEN_MAKE_ALIGNED_OPERATOR_NEW // see http://eigen.tuxfamily.org/dox-devel/group__TopicStructHavingEigenMembers.html @@ -53,6 +54,9 @@ class Matmodel { virtual ~Matmodel() = default; + void register_serialization(registry_t &r) override; + void init_deserialization() override; + protected: double l_e_x; double l_e_y; @@ -101,6 +105,37 @@ Matmodel::Matmodel(const Reader &reader) sigma.resize(n_str * n_gp); } +template +void Matmodel::register_serialization(registry_t &r) +{ + //r.emplace_back(&verbosity, sizeof(int), false); + //r.emplace_back(&n_mat, sizeof(int), false); + //r.emplace_back(&n_gp, sizeof(int), false); + + //r.emplace_back(std::data(FE_type), (FE_type.size() + 1) * sizeof(char), true); + //r.emplace_back(std::data(macroscale_loading), macroscale_loading.size() * sizeof(double), true); + + //r.emplace_back(&l_e_x, sizeof(double), false); + //r.emplace_back(&l_e_y, sizeof(double), false); + //r.emplace_back(&l_e_z, sizeof(double), false); + //r.emplace_back(&v_e, sizeof(double), false); + + //for (auto& mat : B_int) r.emplace_back(std::data(mat), mat.size() * sizeof(double), true); + + r.emplace_back(std::data(B), B.size() * sizeof(double), true); + r.emplace_back(std::data(eps), eps.size() * sizeof(double), true); + r.emplace_back(std::data(g0), g0.size() * sizeof(double), true); + r.emplace_back(std::data(sigma), sigma.size() * sizeof(double), true); + r.emplace_back(std::data(res_e), res_e.size() * sizeof(double), true); +} + +template +void Matmodel::init_deserialization() +{ + // need to allocate enough memory first + //FE_type = std::string("\0\0\0\0\0\0"); +} + template void Matmodel::Construct_B() { diff --git a/include/reader.h b/include/reader.h index ef6a87a..e720704 100644 --- a/include/reader.h +++ b/include/reader.h @@ -11,12 +11,21 @@ #include "H5FDmpio.h" #include "mpi.h" +#include "serialization.h" + using namespace std; -class Reader { +class Reader : public Serializable { public: - // Default constructor - Reader() = default; + // Explicit default constructor + Reader(): + force_single_rank(false), communicator(nullptr), n_mat(0), + TOL(0), n_it(0), world_rank(0), world_size(0), alloc_local(0), + local_n0(0), local_0_start(0), local_n1(0), local_1_start(0) + { + manual_serialize = true; + manual_deserialize = true; + } // Destructor to free allocated memory ~Reader(); @@ -65,8 +74,11 @@ class Reader { // void Setup(ptrdiff_t howmany); void ReadInputFile(char input_fn[]); + void ReadJson(json j); void ReadMS(int hm); - void ComputeVolumeFractions(); + void ComputeVolumeFractions(); + length_t serialize_override(buffer_t &buffer, length_t offset) override; + length_t deserialize_override(buffer_t &buffer, length_t offset) override; // void ReadHDF5(char file_name[], char dset_name[]); void safe_create_group(hid_t file, const char *const name); void OpenResultsFile(const char *output_fn); // Open results file once diff --git a/include/serialization.h b/include/serialization.h new file mode 100644 index 0000000..4771241 --- /dev/null +++ b/include/serialization.h @@ -0,0 +1,164 @@ +// +// Created by Alex Hocks on 09/01/26. +// + +#ifndef SERIALIZATION_H +#define SERIALIZATION_H +#include "logging.h" + +#include +#include +#include + +template, typename r = std::conditional_t> +class SerializationBuffer : public Base { + // Buffer always has size multiple of 4 byte +public: + using Base::Base; + using size_type = typename Base::size_type; + void resize(size_type new_size) + { + const auto num_bytes = new_size * data_size; + const auto delta_bytes = (4 - (num_bytes % 4)) % 4; + new_size += delta_bytes / data_size; + Base::resize(new_size); + } + + void resize(size_type new_size, const T& value) + { + const auto num_bytes = new_size * data_size; + const auto delta_bytes = (4 - (num_bytes % 4)) % 4; + new_size += delta_bytes / data_size; + Base::resize(new_size, value); + } + + void add_size(size_type delta) { resize(Base::size() + delta); } + + std::vector& as_int_vector() { return *reinterpret_cast*>(this); } + + static SerializationBuffer& from_int_vector(std::vector& vec) { return *reinterpret_cast(&vec); } + +private: + static constexpr auto data_size = sizeof(T); +}; + +class Serializable { + /** + * Serializes any object that implements this. + * Buffer elements will be bytes, but total count will always be padded to be multiple of 4. + * Data is run length encoded. + */ +public: + // run length type + using length_t = uint64_t; + // run length block size + static constexpr auto header_len = sizeof(length_t); + // byte data type + using data_t = uint8_t; + // buffer type + using buffer_t = SerializationBuffer; + // registry type: pointer to data, data size, true: run length encoded, false: direct read write + using reg_entry_t = std::tuple; + using registry_t = std::vector; + + virtual ~Serializable() = default; + + /** + * Serializes this object + * @return serial buffer + */ + buffer_t serialize_full() + { + if (manual_deserialize) { + buffer_t buffer; + serialize_override(buffer, 0); + return buffer; + } + + Log::io->trace() << "Serialization::serialize_full() - register_serialization\n"; + registry_t registry; + register_serialization(registry); + + Log::io->trace() << "Serialization::serialize_full() - accumulate\n"; + const length_t total_size = std::accumulate(registry.begin(), registry.end(), 0UL, + [](const length_t acc, const reg_entry_t &e) { + const bool use_run_length = std::get<2>(e); + length_t size = std::get<1>(e); + if (use_run_length) size += header_len; + return acc + size; + }); + + buffer_t buffer; + buffer.resize(total_size); + Log::io->trace() << "Serialization::serialize_full() - buffer size=" << total_size << " addr=" << static_cast(std::data(buffer)) << "\n"; + + Log::io->trace() << "Serialization::serialize_full() - process entries\n"; + length_t offset = 0UL; + length_t pos = 0UL; + for (const reg_entry_t &e : registry) { + const bool use_run_length = std::get<2>(e); + length_t size = std::get<1>(e); + void* data = std::get<0>(e); + Log::io->trace() << "Serialization::serialize_full() - entry: pos=" << pos << " addr=" << data << " offset=" << offset << " useRL=" << use_run_length << " size=" << size << "\n"; + + if (use_run_length) { + *reinterpret_cast(std::data(buffer) + offset) = size; + offset += header_len; + } + std::memcpy(std::data(buffer) + offset, data, size); + offset += size; + pos++; + } + + Log::io->trace() << "Serialization::serialize_full() - done\n"; + return buffer; + } + + virtual void register_serialization(registry_t &r) {} + + virtual void init_deserialization() {} + + virtual registry_t::iterator late_init_deserialization(registry_t &r, registry_t::iterator begin) { return r.begin(); } + + bool manual_deserialize = false; + bool manual_serialize = false; + + /** + * Deserialize this object using provided data + * @param buffer + */ + void deserialize_full(buffer_t &buffer) + { + if (manual_deserialize) { + deserialize_override(buffer, 0); + return; + } + + init_deserialization(); // alloc mem if needed + + // gather serialization description + registry_t registry; + register_serialization(registry); + // place correct lengths into descriptor list, etc + late_init_deserialization(registry, registry.begin()); + + length_t offset = 0UL; + for (reg_entry_t &e : registry) { + const bool use_run_length = std::get<2>(e); + length_t &size = std::get<1>(e); + void* data = std::get<0>(e); + + if (use_run_length) { + size = *reinterpret_cast(std::data(buffer) + offset); + offset += header_len; + } + std::memcpy(data, std::data(buffer) + offset, size); + offset += size; + } + } + + virtual length_t deserialize_override(buffer_t &buffer, length_t offset) { return offset; } + virtual length_t serialize_override(buffer_t &buffer, length_t offset) { return offset; } +}; + +#endif diff --git a/include/solver.h b/include/solver.h index 82539de..6cd7c37 100644 --- a/include/solver.h +++ b/include/solver.h @@ -4,15 +4,16 @@ #include "matmodel.h" #include "MaterialManager.h" #include "logging.h" +#include "serialization.h" class J2Plasticity; typedef Map, Unaligned, OuterStride<>> RealArray; template -class Solver : private MixedBCController { +class Solver : MixedBCController, public Serializable { public: - Solver(Reader &reader, MaterialManager *matmanager); + explicit Solver(Reader &reader, MaterialManager *matmanager=nullptr); virtual ~Solver(); Reader &reader; @@ -45,6 +46,10 @@ class Solver : private MixedBCController { ArrayXd err_all; //!< Absolute error history Matrix fundamentalSolution; + void init_fundamentalSolutionBuffer() + { + fundamentalSolution = Matrix(howmany, (local_n1 * n_x * (n_z / 2 + 1) * (howmany + 1)) / 2); + } template void iterateCubes(F f); @@ -64,6 +69,8 @@ class Solver : private MixedBCController { double compute_error(RealArray &r); void CreateFFTWPlans(double *in, fftw_complex *transformed, double *out); + void register_serialization(registry_t &r) override; + VectorXd homogenized_stress; VectorXd get_homogenized_stress(); @@ -133,15 +140,19 @@ Solver::Solver(Reader &reader, MaterialManager * rhat((std::complex *) v_r, local_n1 * n_x * (n_z / 2 + 1) * howmany), // actual initialization is below buffer_padding(fftw_alloc_real(n_y * (n_z + 2) * howmany)) { + Log::solver->trace() << "Start constructing solver\n"; + v_u_real.setZero(); for (ptrdiff_t i = local_n0 * n_y * n_z * howmany; i < (local_n0 + 1) * n_y * n_z * howmany; i++) { this->v_u[i] = 0; } std::memset(v_u_prev, 0, local_n0 * n_y * n_z * howmany * sizeof(double)); - matmanager->initialize_internal_variables(local_n0 * n_y * n_z, matmanager->models[0]->n_gp); - - computeFundamentalSolution(); + if (matmanager != nullptr) { + Log::solver->trace() << "Init internal vars.\n"; + matmanager->initialize_internal_variables(local_n0 * n_y * n_z, matmanager->models[0]->n_gp); + computeFundamentalSolution(); + } } template @@ -162,7 +173,7 @@ void Solver::computeFundamentalSolution() Matrix, 8, 1> A; Matrix AA; Matrix block; - fundamentalSolution = Matrix(howmany, (local_n1 * n_x * (n_z / 2 + 1) * (howmany + 1)) / 2); + init_fundamentalSolutionBuffer(); fundamentalSolution.setZero(); for (int i_y = 0; i_y < local_n1; ++i_y) { @@ -206,6 +217,7 @@ void Solver::computeFundamentalSolution() template void Solver::CreateFFTWPlans(double *in, fftw_complex *transformed, double *out) { + Log::solver->trace() << "Solver::CreateFFTWPlans() begin \n"; int rank = 3; ptrdiff_t iblock = FFTW_MPI_DEFAULT_BLOCK; ptrdiff_t oblock = FFTW_MPI_DEFAULT_BLOCK; @@ -225,6 +237,21 @@ void Solver::CreateFFTWPlans(double *in, fftw_complex *transform new (&rhat) Map((std::complex *) transformed, local_n1 * n_x * (n_z / 2 + 1) * howmany); } +template +void Solver::register_serialization(registry_t &r) +{ + Log::io->trace() << "Solver::register_serialization v_r size=" << std::max(reader.alloc_local * 2, (local_n0 + 1) * n_y * (n_z + 2) * howmany) * sizeof(double) << "\n"; + r.emplace_back(v_r, std::max(reader.alloc_local * 2, (local_n0 + 1) * n_y * (n_z + 2) * howmany) * sizeof(double), true); + Log::io->trace() << "Solver::register_serialization v_u size=" << (local_n0 * n_y * n_z * howmany) * sizeof(double) << "\n"; + r.emplace_back(v_u, (local_n0 * n_y * n_z * howmany) * sizeof(double), true); + Log::io->trace() << "Solver::register_serialization v_u_prev size=" << (local_n0 * n_y * n_z * howmany) * sizeof(double) << "\n"; + r.emplace_back(v_u_prev, (local_n0 * n_y * n_z * howmany) * sizeof(double), true); + Log::io->trace() << "Solver::register_serialization rhat size=" << (local_n1 * n_x * (n_z / 2 + 1) * howmany) * sizeof(std::complex) << "\n"; + r.emplace_back(std::data(rhat), (local_n1 * n_x * (n_z / 2 + 1) * howmany) * sizeof(std::complex), true); + Log::io->trace() << "Solver::register_serialization fundamentalSolution size=" << ((local_n1 * n_x * (n_z / 2 + 1) * (howmany + 1)) / 2) * sizeof(double) << "\n"; + r.emplace_back(std::data(fundamentalSolution), ((local_n1 * n_x * (n_z / 2 + 1) * (howmany + 1)) / 2) * sizeof(double), true); +} + // TODO: possibly circumvent the padding problem by accessing r as a matrix? template template diff --git a/pyfans/micro.cpp b/pyfans/micro.cpp index 81c164a..4e82f22 100644 --- a/pyfans/micro.cpp +++ b/pyfans/micro.cpp @@ -25,25 +25,23 @@ py::array_t merge_arrays(py::array_t array1, py::array_t return result; } -MicroSimulation::MicroSimulation(int sim_id, char *input_file) +MicroSimulation::MicroSimulation(int sim_id, bool late_init, char *input_file) : _sim_id(sim_id) { // initialize fftw mpi fftw_mpi_init(); - // Input file name is hardcoded. TODO: Make it configurable - reader.ReadInputFile(input_file); - - reader.force_single_rank = true; - reader.communicator = MPI_COMM_SELF; - - reader.ReadMS(3); - - if (reader.strain_type == "small") { - matmanager = createMaterialManager<3, 6>(reader); - solver = createSolver<3, 6>(reader, std::get *>(matmanager)); - } else { - matmanager = createMaterialManager<3, 9>(reader); - solver = createSolver<3, 9>(reader, std::get *>(matmanager)); + if (not late_init || sim_id >= 0) { + // Input file name is hardcoded. TODO: Make it configurable + reader.ReadInputFile(input_file); + reader.ReadMS(3); + + if (reader.strain_type == "small") { + matmanager = createMaterialManager<3, 6>(reader); + solver = createSolver<3, 6>(reader, std::get *>(matmanager)); + } else { + matmanager = createMaterialManager<3, 9>(reader); + solver = createSolver<3, 9>(reader, std::get *>(matmanager)); + } } } @@ -114,6 +112,63 @@ py::dict MicroSimulation::solve(py::dict macro_data, double dt) return micro_write_data; } +py::dict MicroSimulation::get_state() +{ + py::dict state; + + Log::io->debug() << "Serializing reader\n"; + Serializable::buffer_t reader_state = reader.serialize_full(); + state["reader_state"] = reader_state.as_int_vector(); + Log::io->debug() << "Serializing solver\n"; + Serializable::buffer_t solver_state = std::visit([](auto &s) { return s->serialize_full(); }, solver); + state["solver_state"] = solver_state.as_int_vector(); + Log::io->debug() << "Serializing material manager\n"; + Serializable::buffer_t matmngr_state = std::visit([](auto &m) { return m->serialize_full(); }, matmanager); + state["matmanager_state"] = matmngr_state.as_int_vector(); + Log::io->debug() << "Serializing micro sim done\n"; + + return state; +} + +void MicroSimulation::set_state(py::dict &state) +{ + auto py_reader_state = state["reader_state"].cast>(); + std::vector reader_state_i(py_reader_state.data(), py_reader_state.data() + py_reader_state.size()); + Serializable::buffer_t &reader_state = Serializable::buffer_t::from_int_vector(reader_state_i); + + auto py_solver_state = state["solver_state"].cast>(); + std::vector solver_state_i(py_solver_state.data(), py_solver_state.data() + py_solver_state.size()); + Serializable::buffer_t &solver_state = Serializable::buffer_t::from_int_vector(solver_state_i); + + auto py_matmngr_state = state["matmanager_state"].cast>(); + std::vector matmngr_state_i(py_matmngr_state.data(), py_matmngr_state.data() + py_matmngr_state.size()); + Serializable::buffer_t &matmngr_state = Serializable::buffer_t::from_int_vector(matmngr_state_i); + + reader.deserialize_full(reader_state); + if (reader.strain_type == "small") { + auto *mat_ptr = createMaterialManager<3, 6>(reader); + mat_ptr->deserialize_full(matmngr_state); + matmanager = mat_ptr; + auto *sol_ptr = createSolver<3, 6>(reader, nullptr); + sol_ptr->init_fundamentalSolutionBuffer(); + sol_ptr->matmanager = std::get *>(matmanager); + sol_ptr->deserialize_full(solver_state); + solver = sol_ptr; + + } else { + auto *mat_ptr = createMaterialManager<3, 9>(reader); + mat_ptr->deserialize_full(matmngr_state); + matmanager = mat_ptr; + auto *sol_ptr = createSolver<3, 9>(reader, nullptr); + sol_ptr->init_fundamentalSolutionBuffer(); + sol_ptr->matmanager = std::get *>(matmanager); + sol_ptr->deserialize_full(solver_state); + solver = sol_ptr; + } +} + +int MicroSimulation::get_id() { return _sim_id; } + PYBIND11_MODULE(PyFANS, m) { // optional docstring @@ -121,5 +176,22 @@ PYBIND11_MODULE(PyFANS, m) py::class_(m, "MicroSimulation") .def(py::init()) - .def("solve", &MicroSimulation::solve); + .def("solve", &MicroSimulation::solve) + .def(py::pickle( + [](MicroSimulation& m) { + return py::make_tuple(m.get_id(), m.get_state()); + }, + [](py::tuple t) { + int id = t[0].cast(); + py::dict state = t[1].cast(); + + MicroSimulation* m = new MicroSimulation(id, true); + m->set_state(state); + return m; + } + )) + .def("set_state", &MicroSimulation::set_state) + .def("get_state", &MicroSimulation::get_state) + .def("get_global_id", &MicroSimulation::get_id) + ; } diff --git a/pyfans/micro.hpp b/pyfans/micro.hpp index 0e7293e..db83a04 100644 --- a/pyfans/micro.hpp +++ b/pyfans/micro.hpp @@ -15,15 +15,21 @@ #include "matmodel.h" #include "MaterialManager.h" #include "solver.h" +#include "serialization.h" namespace py = pybind11; class MicroSimulation { public: - MicroSimulation(int sim_id, char *input_file = "input.json"); + MicroSimulation(int sim_id, bool late_init = false, char *input_file = "input.json"); ~MicroSimulation(); py::dict solve(py::dict macro_write_data, double dt); + py::dict get_state(); + void set_state(py::dict &state); + + int get_id(); + private: int _sim_id; Reader reader; diff --git a/src/logging.cpp b/src/logging.cpp index 9a8a354..cc36aa6 100755 --- a/src/logging.cpp +++ b/src/logging.cpp @@ -6,7 +6,29 @@ int active_rank = 0; -Log::Logger::Logger(std::string prefix, int comm_rank, int comm_size) : _prefix(std::move(prefix)), _nullstr(), _comm_rank(comm_rank), _comm_size(comm_size) { +Log::MPI_TraceSync::MPI_TraceSync(Logger &log, bool append) : _log(log), _append(append) { } +Log::MPI_TraceSync::~MPI_TraceSync() +{ + if (active_level >= All) { + MPI_Barrier(_log._comm); + const int size = _log._comm_size; + for (int i = 0; i < size; i++) { + Log::setActiveRank(i); + MPI_Barrier(_log._comm); + _log.trace_impl(_append) << _buffer.str() << std::flush; + } + Log::setActiveRank(0); + MPI_Barrier(_log._comm); + } +} + +Log::MPI_TraceSync &Log::MPI_TraceSync::operator<<(std::ostream &(*m)(std::ostream &) ) +{ + _buffer << m; + return *this; +} + +Log::Logger::Logger(std::string prefix, int comm_rank, int comm_size, const MPI_Comm& comm) : _prefix(std::move(prefix)), _nullstr(), _comm_rank(comm_rank), _comm_size(comm_size), _comm(comm) { _start_time = std::chrono::steady_clock::now(); _nullstr.setstate(std::ios_base::badbit); } @@ -55,16 +77,21 @@ std::ostream &Log::Logger::debug(bool append) { } else return _nullstr; } -std::ostream &Log::Logger::trace(bool append) { +Log::MPI_TraceSync Log::Logger::trace(bool append) { + return Log::MPI_TraceSync(*this, append); +} + +std::ostream &Log::Logger::trace_impl(bool append) { if (active_level >= All && _comm_rank == active_rank) { if (append) return std::cout; - std::cout << _prefix; + std::cout << _prefix << "[" << active_rank << "] "; auto now = std::chrono::steady_clock::now(); auto elapse_time = static_cast(std::chrono::duration_cast(now - _start_time).count()) / 1000.0; std::cout << elapse_time << ": "; return std::cout; - } else return _nullstr; + } else return _nullstr; } + void Log::Logger::progress(const std::string &prefix, int step, int max) { if (_comm_rank != active_rank) return; @@ -100,7 +127,7 @@ void Log::Logger::progress(const std::string &prefix, int step, int max) { std::cout.flush(); } -void Log::init(int comm_rank, int comm_size) { +void Log::init(int comm_rank, int comm_size, const MPI_Comm& comm) { Level level = Log::Error; if constexpr (VERBOSITY <= 0) level = Error; if constexpr (VERBOSITY == 1) level = Info; @@ -109,9 +136,9 @@ void Log::init(int comm_rank, int comm_size) { if constexpr (VERBOSITY >= 4) level = All; active_level = level; - general = std::make_unique("[FANS-GENERAL] ", comm_rank, comm_size); - solver = std::make_unique("[FANS-SOLVER] ", comm_rank, comm_size); - io = std::make_unique("[FANS-IO] ", comm_rank, comm_size); + general = std::make_unique("[FANS-GENERAL] ", comm_rank, comm_size, comm); + solver = std::make_unique("[FANS-SOLVER] ", comm_rank, comm_size, comm); + io = std::make_unique("[FANS-IO] ", comm_rank, comm_size, comm); } void Log::finalize() { diff --git a/src/reader.cpp b/src/reader.cpp index 5734324..b1fe343 100644 --- a/src/reader.cpp +++ b/src/reader.cpp @@ -57,12 +57,24 @@ void Reader::ComputeVolumeFractions() } } -void Reader ::ReadInputFile(char input_fn[]) +void Reader::ReadInputFile(char input_fn[]) { try { ifstream i(input_fn); json j; i >> j; + ReadJson(j); + + } catch (const std::exception &e) { + Log::io->error() << "ERROR trying to read input file '" << input_fn << "' for FANS\n"; + Log::finalize(); + exit(10); + } +} + +void Reader::ReadJson(json j) +{ + try { inputJson = j; // Store complete input JSON for MaterialManager if (j.contains("no_mpi")) { @@ -75,7 +87,8 @@ void Reader ::ReadInputFile(char input_fn[]) MPI_Comm_rank(communicator, &world_rank); MPI_Comm_size(communicator, &world_size); - Log::init(world_rank, world_size); + Log::init(world_rank, world_size, communicator); + Log::io->trace() << "Running with total ranks=" << world_size << ", rank=" << world_rank << "\n"; microstructure = j["microstructure"]; strcpy(ms_filename, microstructure["filepath"].get().c_str()); @@ -165,7 +178,7 @@ void Reader ::ReadInputFile(char input_fn[]) Log::io->info() << "# Max iterations: \t " << n_it << "\n"; } catch (const std::exception &e) { - Log::io->error() << "ERROR trying to read input file '" << input_fn << "' for FANS\n"; + Log::io->error() << "ERROR trying to read input json for FANS\n"; Log::finalize(); exit(10); } @@ -381,6 +394,7 @@ void Reader ::ReadMS(int hm) FANS_free(tmp); } else { /* XYZ case: the slab is already in correct order */ + FANS_free(ms); // dealloc mem ms = tmp; // steal the buffer; no copy } @@ -396,6 +410,80 @@ void Reader ::ReadMS(int hm) this->ComputeVolumeFractions(); } +Reader::length_t Reader::serialize_override(buffer_t &buffer, length_t offset) +{ + // write JSON + std::string j_str = inputJson.dump(); + length_t block_size = (j_str.size()+1) * sizeof(char); + buffer.add_size(block_size + header_len); + *reinterpret_cast(std::data(buffer) + offset) = block_size; + offset += header_len; + std::memcpy(std::data(buffer) + offset, j_str.c_str(), block_size); + offset += block_size; + + // write ms + block_size = sizeof(bool) + + 3 * sizeof(int) + + 3 * sizeof(double) + + 5 * sizeof(ptrdiff_t) + + sizeof(int); + buffer.add_size(block_size); + *reinterpret_cast(std::data(buffer) + offset) = is_zyx; offset += sizeof(bool); + std::memcpy(std::data(buffer) + offset, std::data(dims), sizeof(int)*3); offset += sizeof(int) * 3; + std::memcpy(std::data(buffer) + offset, std::data(l_e), sizeof(double)*3); offset += sizeof(double) * 3; + *reinterpret_cast(std::data(buffer) + offset) = alloc_local; offset += sizeof(ptrdiff_t); + *reinterpret_cast(std::data(buffer) + offset) = local_n0; offset += sizeof(ptrdiff_t); + *reinterpret_cast(std::data(buffer) + offset) = local_0_start; offset += sizeof(ptrdiff_t); + *reinterpret_cast(std::data(buffer) + offset) = local_n1; offset += sizeof(ptrdiff_t); + *reinterpret_cast(std::data(buffer) + offset) = local_1_start; offset += sizeof(ptrdiff_t); + *reinterpret_cast(std::data(buffer) + offset) = n_mat; offset += sizeof(int); + + block_size = local_n0 * dims[1] * dims[2] * sizeof(unsigned short); + buffer.add_size(block_size); + std::memcpy(std::data(buffer) + offset, ms, block_size); + offset += block_size; + + return offset; +} + +Reader::length_t Reader::deserialize_override(buffer_t &buffer, length_t offset) +{ + // write JSON + length_t block_size = *reinterpret_cast(std::data(buffer) + offset); + offset += header_len; + std::string tmp(""); + tmp.resize(block_size / sizeof(char), '\0'); + std::memcpy(std::data(tmp), std::data(buffer) + offset, block_size); + offset += block_size; + + std::istringstream iss(tmp); + json j; + iss >> j; + ReadJson(j); + + // write ms + is_zyx = *reinterpret_cast(std::data(buffer) + offset); offset += sizeof(bool); + dims.resize(3); + std::memcpy(std::data(dims), std::data(buffer) + offset, sizeof(int)*3); offset += sizeof(int) * 3; + l_e.resize(3); + std::memcpy(std::data(l_e), std::data(buffer) + offset, sizeof(double)*3); offset += sizeof(double) * 3; + alloc_local = *reinterpret_cast(std::data(buffer) + offset); offset += sizeof(ptrdiff_t); + local_n0 = *reinterpret_cast(std::data(buffer) + offset); offset += sizeof(ptrdiff_t); + local_0_start = *reinterpret_cast(std::data(buffer) + offset); offset += sizeof(ptrdiff_t); + local_n1 = *reinterpret_cast(std::data(buffer) + offset); offset += sizeof(ptrdiff_t); + local_1_start = *reinterpret_cast(std::data(buffer) + offset); offset += sizeof(ptrdiff_t); + n_mat = *reinterpret_cast(std::data(buffer) + offset); offset += sizeof(int); + + block_size = local_n0 * dims[1] * dims[2] * sizeof(unsigned short); + ms = FANS_malloc(static_cast(local_n0) * + static_cast(dims[1]) * + static_cast(dims[2])); + std::memcpy(ms, std::data(buffer) + offset, block_size); + offset += block_size; + + return offset; +} + void Reader::OpenResultsFile(const char *output_fn) { std::snprintf(results_filename, sizeof(results_filename), "%s", output_fn);