diff --git a/CHANGELOG.md b/CHANGELOG.md index 104794915..f5442ff83 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,6 +45,9 @@ The format of this changelog is based on - Added adaptive time-stepping capability for transient simulations. The new ODE integrators rely on the SUNDIALS library and can be specified by setting the `config["Solver"]["Transient"]["Type"]` option to `"CVODE"` or `"ARKODE"`. + - Added an option to use the complex-valued system matrix for the coarse level solve (sparse + direct solve) instead of the real-valued approximation. This can be specified with + `config["Solver"]["Linear"]["ComplexCoarseSolve"]`. ## [0.13.0] - 2024-05-20 diff --git a/docs/src/config/solver.md b/docs/src/config/solver.md index 4120c4043..87cf8c7ef 100644 --- a/docs/src/config/solver.md +++ b/docs/src/config/solver.md @@ -322,6 +322,7 @@ under the directory specified by "MGSmoothOrder": , "PCMatReal": , "PCMatShifted": , + "ComplexCoarseSolve": , "PCSide": , "DivFreeTol": , "DivFreeMaxIts": , @@ -420,6 +421,9 @@ domain problems using a positive definite approximation of the system matrix by the sign for the mass matrix contribution, which can help performance at high frequencies (relative to the lowest nonzero eigenfrequencies of the model). +`"ComplexCoarseSolve" [false]` : When set to `true`, the coarse-level solver uses the true +complex-valued system matrix. When set to `false`, the real-valued approximation is used. + `"PCSide" ["Default"]` : Side for preconditioning. Not all options are available for all iterative solver choices, and the default choice depends on the iterative solver used. diff --git a/palace/drivers/basesolver.cpp b/palace/drivers/basesolver.cpp index c4c628d28..18920c04d 100644 --- a/palace/drivers/basesolver.cpp +++ b/palace/drivers/basesolver.cpp @@ -226,7 +226,7 @@ void BaseSolver::SolveEstimateMarkRefine(std::vector> &mes // Optionally rebalance and write the adapted mesh to file. { - const auto ratio_pre = mesh::RebalanceMesh(*mesh.back(), iodata); + const auto ratio_pre = mesh::RebalanceMesh(iodata, *mesh.back()); if (ratio_pre > refinement.maximum_imbalance) { int min_elem, max_elem; diff --git a/palace/linalg/gmg.hpp b/palace/linalg/gmg.hpp index 092ead72a..47a0c5f36 100644 --- a/palace/linalg/gmg.hpp +++ b/palace/linalg/gmg.hpp @@ -62,7 +62,7 @@ class GeometricMultigridSolver : public Solver const std::vector *G, int cycle_it, int smooth_it, int cheby_order, double cheby_sf_max, double cheby_sf_min, bool cheby_4th_kind); - GeometricMultigridSolver(MPI_Comm comm, const IoData &iodata, + GeometricMultigridSolver(const IoData &iodata, MPI_Comm comm, std::unique_ptr> &&coarse_solver, const std::vector &P, const std::vector *G = nullptr) diff --git a/palace/linalg/ksp.cpp b/palace/linalg/ksp.cpp index f1e70f9d9..db8b088a5 100644 --- a/palace/linalg/ksp.cpp +++ b/palace/linalg/ksp.cpp @@ -23,8 +23,8 @@ namespace { template -std::unique_ptr> ConfigureKrylovSolver(MPI_Comm comm, - const IoData &iodata) +std::unique_ptr> ConfigureKrylovSolver(const IoData &iodata, + MPI_Comm comm) { // Create the solver. std::unique_ptr> ksp; @@ -114,7 +114,7 @@ std::unique_ptr> ConfigureKrylovSolver(MPI_Comm comm, } template -auto MakeWrapperSolver(U &&...args) +auto MakeWrapperSolver(const IoData &iodata, U &&...args) { // Sparse direct solver types copy the input matrix, so there is no need to save the // parallel assembled operator. @@ -131,12 +131,13 @@ auto MakeWrapperSolver(U &&...args) #endif false); return std::make_unique>( - std::make_unique(std::forward(args)...), save_assembled); + std::make_unique(iodata, std::forward(args)...), save_assembled, + iodata.solver.linear.complex_coarse_solve); } template std::unique_ptr> -ConfigurePreconditionerSolver(MPI_Comm comm, const IoData &iodata, +ConfigurePreconditionerSolver(const IoData &iodata, MPI_Comm comm, FiniteElementSpaceHierarchy &fespaces, FiniteElementSpaceHierarchy *aux_fespaces) { @@ -161,7 +162,7 @@ ConfigurePreconditionerSolver(MPI_Comm comm, const IoData &iodata, break; case config::LinearSolverData::Type::SUPERLU: #if defined(MFEM_USE_SUPERLU) - pc = MakeWrapperSolver(comm, iodata, print); + pc = MakeWrapperSolver(iodata, comm, print); #else MFEM_ABORT("Solver was not built with SuperLU_DIST support, please choose a " "different solver!"); @@ -169,7 +170,7 @@ ConfigurePreconditionerSolver(MPI_Comm comm, const IoData &iodata, break; case config::LinearSolverData::Type::STRUMPACK: #if defined(MFEM_USE_STRUMPACK) - pc = MakeWrapperSolver(comm, iodata, print); + pc = MakeWrapperSolver(iodata, comm, print); #else MFEM_ABORT("Solver was not built with STRUMPACK support, please choose a " "different solver!"); @@ -177,7 +178,7 @@ ConfigurePreconditionerSolver(MPI_Comm comm, const IoData &iodata, break; case config::LinearSolverData::Type::STRUMPACK_MP: #if defined(MFEM_USE_STRUMPACK) - pc = MakeWrapperSolver(comm, iodata, print); + pc = MakeWrapperSolver(iodata, comm, print); #else MFEM_ABORT("Solver was not built with STRUMPACK support, please choose a " "different solver!"); @@ -185,7 +186,7 @@ ConfigurePreconditionerSolver(MPI_Comm comm, const IoData &iodata, break; case config::LinearSolverData::Type::MUMPS: #if defined(MFEM_USE_MUMPS) - pc = MakeWrapperSolver(comm, iodata, print); + pc = MakeWrapperSolver(iodata, comm, print); #else MFEM_ABORT( "Solver was not built with MUMPS support, please choose a different solver!"); @@ -214,12 +215,12 @@ ConfigurePreconditionerSolver(MPI_Comm comm, const IoData &iodata, "primary space and auxiliary spaces for construction!"); const auto G = fespaces.GetDiscreteInterpolators(*aux_fespaces); return std::make_unique>( - comm, iodata, std::move(pc), fespaces.GetProlongationOperators(), &G); + iodata, comm, std::move(pc), fespaces.GetProlongationOperators(), &G); } else { return std::make_unique>( - comm, iodata, std::move(pc), fespaces.GetProlongationOperators()); + iodata, comm, std::move(pc), fespaces.GetProlongationOperators()); } }(); gmg->EnableTimer(); // Enable timing for primary geometric multigrid solver @@ -238,9 +239,9 @@ BaseKspSolver::BaseKspSolver(const IoData &iodata, FiniteElementSpaceHierarchy &fespaces, FiniteElementSpaceHierarchy *aux_fespaces) : BaseKspSolver( - ConfigureKrylovSolver(fespaces.GetFinestFESpace().GetComm(), iodata), - ConfigurePreconditionerSolver(fespaces.GetFinestFESpace().GetComm(), - iodata, fespaces, aux_fespaces)) + ConfigureKrylovSolver(iodata, fespaces.GetFinestFESpace().GetComm()), + ConfigurePreconditionerSolver( + iodata, fespaces.GetFinestFESpace().GetComm(), fespaces, aux_fespaces)) { use_timer = true; } diff --git a/palace/linalg/mumps.hpp b/palace/linalg/mumps.hpp index fa054e9b1..693b837af 100644 --- a/palace/linalg/mumps.hpp +++ b/palace/linalg/mumps.hpp @@ -21,7 +21,7 @@ class MumpsSolver : public mfem::MUMPSSolver public: MumpsSolver(MPI_Comm comm, mfem::MUMPSSolver::MatType sym, config::LinearSolverData::SymFactType reorder, double blr_tol, int print); - MumpsSolver(MPI_Comm comm, const IoData &iodata, int print) + MumpsSolver(const IoData &iodata, MPI_Comm comm, int print) : MumpsSolver(comm, (iodata.solver.linear.pc_mat_shifted || iodata.problem.type == config::ProblemData::Type::TRANSIENT || diff --git a/palace/linalg/solver.cpp b/palace/linalg/solver.cpp index 7ea33e07b..222bfeb54 100644 --- a/palace/linalg/solver.cpp +++ b/palace/linalg/solver.cpp @@ -52,7 +52,27 @@ void MfemWrapperSolver::SetOperator(const ComplexOperator &op) } if (hAr && hAi) { - A.reset(mfem::Add(1.0, *hAr, 1.0, *hAi)); + if (complex_matrix) + { + // A = [Ar, -Ai] + // [Ai, Ar] + mfem::Array2D blocks(2, 2); + mfem::Array2D block_coeffs(2, 2); + blocks(0, 0) = hAr; + blocks(0, 1) = hAi; + blocks(1, 0) = hAi; + blocks(1, 1) = hAr; + block_coeffs(0, 0) = 1.0; + block_coeffs(0, 1) = -1.0; + block_coeffs(1, 0) = 1.0; + block_coeffs(1, 1) = 1.0; + A.reset(mfem::HypreParMatrixFromBlocks(blocks, &block_coeffs)); + } + else + { + // A = Ar + Ai. + A.reset(mfem::Add(1.0, *hAr, 1.0, *hAi)); + } if (PtAPr) { PtAPr->StealParallelAssemble(); @@ -101,13 +121,33 @@ template <> void MfemWrapperSolver::Mult(const ComplexVector &x, ComplexVector &y) const { - mfem::Array X(2); - mfem::Array Y(2); - X[0] = &x.Real(); - X[1] = &x.Imag(); - Y[0] = &y.Real(); - Y[1] = &y.Imag(); - pc->ArrayMult(X, Y); + if (pc->Height() == x.Size()) + { + mfem::Array X(2); + mfem::Array Y(2); + X[0] = &x.Real(); + X[1] = &x.Imag(); + Y[0] = &y.Real(); + Y[1] = &y.Imag(); + pc->ArrayMult(X, Y); + } + else + { + const int Nx = x.Size(), Ny = y.Size(); + Vector X(2 * Nx), Y(2 * Ny), yr, yi; + X.UseDevice(true); + Y.UseDevice(true); + yr.UseDevice(true); + yi.UseDevice(true); + linalg::SetSubVector(X, 0, x.Real()); + linalg::SetSubVector(X, Nx, x.Imag()); + pc->Mult(X, Y); + Y.ReadWrite(); + yr.MakeRef(Y, 0, Ny); + yi.MakeRef(Y, Ny, Ny); + y.Real() = yr; + y.Imag() = yi; + } } } // namespace palace diff --git a/palace/linalg/solver.hpp b/palace/linalg/solver.hpp index 3dcc1096a..38ef6b452 100644 --- a/palace/linalg/solver.hpp +++ b/palace/linalg/solver.hpp @@ -76,17 +76,22 @@ class MfemWrapperSolver : public Solver // The actual mfem::Solver. std::unique_ptr pc; - // Real-valued system matrix A = Ar + Ai in parallel assembled form. + // System matrix A in parallel assembled form. std::unique_ptr A; // Whether or not to save the parallel assembled matrix after calling // mfem::Solver::SetOperator (some solvers copy their input). bool save_assembled; + // Whether to use the exact complex-valued system matrix or the real-valued + // approximation A = Ar + Ai. + bool complex_matrix = true; + public: - MfemWrapperSolver(std::unique_ptr &&pc, bool save_assembled = true) + MfemWrapperSolver(std::unique_ptr &&pc, bool save_assembled = true, + bool complex_matrix = true) : Solver(pc->iterative_mode), pc(std::move(pc)), - save_assembled(save_assembled) + save_assembled(save_assembled), complex_matrix(complex_matrix) { } diff --git a/palace/linalg/strumpack.hpp b/palace/linalg/strumpack.hpp index f1d17c979..fa17c5f25 100644 --- a/palace/linalg/strumpack.hpp +++ b/palace/linalg/strumpack.hpp @@ -28,7 +28,7 @@ class StrumpackSolverBase : public StrumpackSolverType config::LinearSolverData::CompressionType compression, double lr_tol, int butterfly_l, int lossy_prec, int print); - StrumpackSolverBase(MPI_Comm comm, const IoData &iodata, int print) + StrumpackSolverBase(const IoData &iodata, MPI_Comm comm, int print) : StrumpackSolverBase(comm, iodata.solver.linear.sym_fact_type, iodata.solver.linear.strumpack_compression_type, iodata.solver.linear.strumpack_lr_tol, diff --git a/palace/linalg/superlu.hpp b/palace/linalg/superlu.hpp index 51febe601..177b078ed 100644 --- a/palace/linalg/superlu.hpp +++ b/palace/linalg/superlu.hpp @@ -29,7 +29,7 @@ class SuperLUSolver : public mfem::Solver public: SuperLUSolver(MPI_Comm comm, config::LinearSolverData::SymFactType reorder, bool use_3d, int print); - SuperLUSolver(MPI_Comm comm, const IoData &iodata, int print) + SuperLUSolver(const IoData &iodata, MPI_Comm comm, int print) : SuperLUSolver(comm, iodata.solver.linear.sym_fact_type, iodata.solver.linear.superlu_3d, print) { diff --git a/palace/linalg/vector.cpp b/palace/linalg/vector.cpp index b25b54583..bd3edc92a 100644 --- a/palace/linalg/vector.cpp +++ b/palace/linalg/vector.cpp @@ -520,12 +520,47 @@ void SetSubVector(ComplexVector &x, const mfem::Array &rows, const ComplexV mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE(int i) { - const auto id = idx[i]; + const int id = idx[i]; XR[id] = YR[id]; XI[id] = YI[id]; }); } +template <> +void SetSubVector(Vector &x, int start, const Vector &y) +{ + const bool use_dev = x.UseDevice(); + const int N = y.Size(); + MFEM_ASSERT(start >= 0 && start + N <= x.Size(), "Invalid range for SetSubVector!"); + const auto *Y = y.Read(use_dev); + auto *X = x.ReadWrite(use_dev); + mfem::forall_switch(use_dev, N, + [=] MFEM_HOST_DEVICE(int i) + { + const int id = start + i; + X[id] = Y[i]; + }); +} + +template <> +void SetSubVector(ComplexVector &x, int start, const ComplexVector &y) +{ + const bool use_dev = x.UseDevice(); + const int N = y.Size(); + MFEM_ASSERT(start >= 0 && start + N <= x.Size(), "Invalid range for SetSubVector!"); + const auto *YR = y.Real().Read(use_dev); + const auto *YI = y.Imag().Read(use_dev); + auto *XR = x.Real().ReadWrite(use_dev); + auto *XI = x.Imag().ReadWrite(use_dev); + mfem::forall_switch(use_dev, N, + [=] MFEM_HOST_DEVICE(int i) + { + const int id = start + i; + XR[id] = YR[i]; + XI[id] = YI[i]; + }); +} + template <> void SetSubVector(Vector &x, int start, int end, double s) { diff --git a/palace/linalg/vector.hpp b/palace/linalg/vector.hpp index 0163b48a9..9c533788b 100644 --- a/palace/linalg/vector.hpp +++ b/palace/linalg/vector.hpp @@ -166,6 +166,10 @@ void SetSubVector(VecType &x, const mfem::Array &rows, double s); template void SetSubVector(VecType &x, const mfem::Array &rows, const VecType &y); +// Sets contiguous entries from start to the given vector. +template +void SetSubVector(VecType &x, int start, const VecType &y); + // Sets all entries in the range [start, end) to the given value. template void SetSubVector(VecType &x, int start, int end, double s); diff --git a/palace/main.cpp b/palace/main.cpp index 3e0c671d1..ffaf4c7fa 100644 --- a/palace/main.cpp +++ b/palace/main.cpp @@ -308,7 +308,7 @@ int main(int argc, char *argv[]) std::vector> mesh; { std::vector> mfem_mesh; - mfem_mesh.push_back(mesh::ReadMesh(world_comm, iodata)); + mfem_mesh.push_back(mesh::ReadMesh(iodata, world_comm)); iodata.NondimensionalizeInputs(*mfem_mesh[0]); mesh::RefineMesh(iodata, mfem_mesh); for (auto &m : mfem_mesh) diff --git a/palace/utils/configfile.cpp b/palace/utils/configfile.cpp index 8c185f64c..fbe34e2f0 100644 --- a/palace/utils/configfile.cpp +++ b/palace/utils/configfile.cpp @@ -1779,6 +1779,7 @@ void LinearSolverData::SetUp(json &solver) // Preconditioner-specific options. pc_mat_real = linear->value("PCMatReal", pc_mat_real); pc_mat_shifted = linear->value("PCMatShifted", pc_mat_shifted); + complex_coarse_solve = linear->value("ComplexCoarseSolve", complex_coarse_solve); pc_side_type = linear->value("PCSide", pc_side_type); sym_fact_type = linear->value("ColumnOrdering", sym_fact_type); strumpack_compression_type = @@ -1821,6 +1822,7 @@ void LinearSolverData::SetUp(json &solver) linear->erase("PCMatReal"); linear->erase("PCMatShifted"); + linear->erase("ComplexCoarseSolve"); linear->erase("PCSide"); linear->erase("ColumnOrdering"); linear->erase("STRUMPACKCompressionType"); @@ -1865,6 +1867,7 @@ void LinearSolverData::SetUp(json &solver) std::cout << "PCMatReal: " << pc_mat_real << '\n'; std::cout << "PCMatShifted: " << pc_mat_shifted << '\n'; + std::cout << "ComplexCoarseSolve: " << complex_coarse_solve << '\n'; std::cout << "PCSide: " << pc_side_type << '\n'; std::cout << "ColumnOrdering: " << sym_fact_type << '\n'; std::cout << "STRUMPACKCompressionType: " << strumpack_compression_type << '\n'; diff --git a/palace/utils/configfile.hpp b/palace/utils/configfile.hpp index cd088235d..66d01f9d1 100644 --- a/palace/utils/configfile.hpp +++ b/palace/utils/configfile.hpp @@ -873,6 +873,10 @@ struct LinearSolverData // (makes the preconditoner matrix SPD). int pc_mat_shifted = -1; + // For frequency domain applications, use the complex-valued system matrix in the sparse + // direct solver. + bool complex_coarse_solve = false; + // Choose left or right preconditioning. enum class SideType { diff --git a/palace/utils/geodata.cpp b/palace/utils/geodata.cpp index e7444e6d5..f2829e4b1 100644 --- a/palace/utils/geodata.cpp +++ b/palace/utils/geodata.cpp @@ -59,8 +59,8 @@ std::unordered_map CheckMesh(const mfem::Mesh &, const config::Boundar // Adding boundary elements for material interfaces and exterior boundaries, and "crack" // desired internal boundary elements to disconnect the elements on either side. -int AddInterfaceBdrElements(std::unique_ptr &, std::unordered_map &, - const IoData &); +int AddInterfaceBdrElements(const IoData &, std::unique_ptr &, + std::unordered_map &); // Generate element-based mesh partitioning, using either a provided file or METIS. std::unique_ptr GetMeshPartitioning(const mfem::Mesh &, int, @@ -80,7 +80,7 @@ void RebalanceConformalMesh(std::unique_ptr &); namespace mesh { -std::unique_ptr ReadMesh(MPI_Comm comm, const IoData &iodata) +std::unique_ptr ReadMesh(const IoData &iodata, MPI_Comm comm) { // If possible on root, read the serial mesh (converting format if necessary), and do all // necessary serial preprocessing. When finished, distribute the mesh to all processes. @@ -198,7 +198,7 @@ std::unique_ptr ReadMesh(MPI_Comm comm, const IoData &iodata) { // Split all internal (non periodic) boundary elements for boundary attributes where // BC are applied (not just postprocessing). - while (AddInterfaceBdrElements(smesh, face_to_be, iodata) != 1) + while (AddInterfaceBdrElements(iodata, smesh, face_to_be) != 1) { // May require multiple calls due to early exit/retry approach. } @@ -485,7 +485,7 @@ void RefineMesh(const IoData &iodata, std::vector } if (max_region_ref_levels > 0 && mesh.capacity() == 1) { - RebalanceMesh(mesh[0], iodata); + RebalanceMesh(iodata, mesh[0]); } // Prior to MFEM's PR #1046, the tetrahedral mesh required reorientation after all mesh @@ -1638,7 +1638,7 @@ double GetVolume(const mfem::ParMesh &mesh, const mfem::Array &marker) return volume; } -double RebalanceMesh(std::unique_ptr &mesh, const IoData &iodata) +double RebalanceMesh(const IoData &iodata, std::unique_ptr &mesh) { BlockTimer bt0(Timer::REBALANCE); MPI_Comm comm = mesh->GetComm(); @@ -2461,8 +2461,8 @@ struct UnorderedPairHasher } }; -int AddInterfaceBdrElements(std::unique_ptr &orig_mesh, - std::unordered_map &face_to_be, const IoData &iodata) +int AddInterfaceBdrElements(const IoData &iodata, std::unique_ptr &orig_mesh, + std::unordered_map &face_to_be) { // Return if nothing to do. Otherwise, count vertices and boundary elements to add. if (iodata.boundaries.attributes.empty() && !iodata.model.add_bdr_elements) diff --git a/palace/utils/geodata.hpp b/palace/utils/geodata.hpp index 70588fb5a..3df36ede5 100644 --- a/palace/utils/geodata.hpp +++ b/palace/utils/geodata.hpp @@ -24,7 +24,7 @@ namespace mesh // Read and partition a serial mesh from file, returning a pointer to the new parallel mesh // object, which should be destroyed by the user. -std::unique_ptr ReadMesh(MPI_Comm comm, const IoData &iodata); +std::unique_ptr ReadMesh(const IoData &iodata, MPI_Comm comm); // Refine the provided mesh according to the data in the input file. If levels of refinement // are requested, the refined meshes are stored in order of increased refinement. Ownership @@ -236,7 +236,7 @@ inline double GetVolume(const mfem::ParMesh &mesh, int attr) // Helper function responsible for rebalancing the mesh, and optionally writing meshes from // the intermediate stages to disk. Returns the imbalance ratio before rebalancing. -double RebalanceMesh(std::unique_ptr &mesh, const IoData &iodata); +double RebalanceMesh(const IoData &iodata, std::unique_ptr &mesh); } // namespace mesh diff --git a/scripts/schema/config/solver.json b/scripts/schema/config/solver.json index c94d23a0c..2d772ee73 100644 --- a/scripts/schema/config/solver.json +++ b/scripts/schema/config/solver.json @@ -118,6 +118,7 @@ "MGSmoothChebyshev4th": { "type": "boolean" }, "PCMatReal": { "type": "boolean" }, "PCMatShifted": { "type": "boolean" }, + "ComplexCoarseSolve": {"type": "boolean"}, "PCSide": { "type": "string" }, "ColumnOrdering": { "type": "string" }, "STRUMPACKCompressionType": { "type": "string" },