From fbe004768016ed894f090b5345f911c0ceec7f04 Mon Sep 17 00:00:00 2001 From: Jacob de Nobel Date: Fri, 23 May 2025 13:25:04 +0200 Subject: [PATCH 01/74] vbadsa --- include/bounds.hpp | 40 ++++++++++++++++++++++------------- include/matrix_adaptation.hpp | 2 ++ include/settings.hpp | 4 ++-- src/bounds.cpp | 3 +++ src/main.cpp | 15 +++++++++---- 5 files changed, 43 insertions(+), 21 deletions(-) diff --git a/include/bounds.hpp b/include/bounds.hpp index b822097..82247e8 100644 --- a/include/bounds.hpp +++ b/include/bounds.hpp @@ -15,8 +15,8 @@ namespace bounds { using Mask = Eigen::Array; - Mask is_out_of_bounds(const Vector &xi, const Vector &lb, const Vector &ub); - bool any_out_of_bounds(const Vector &xi, const Vector &lb, const Vector &ub); + Mask is_out_of_bounds(const Vector& xi, const Vector& lb, const Vector& ub); + bool any_out_of_bounds(const Vector& xi, const Vector& lb, const Vector& ub); struct BoundCorrection { @@ -24,17 +24,27 @@ namespace bounds Vector lb, ub, db; Float diameter; size_t n_out_of_bounds = 0; + bool has_bounds; - BoundCorrection(const Vector &lb, const Vector &ub) : lb(lb), ub(ub), db(ub - lb), - diameter((ub - lb).norm()) {} + BoundCorrection(const Vector& lb, const Vector& ub) : lb(lb), ub(ub), db(ub - lb), + diameter((ub - lb).norm()), + has_bounds(true) + { + //! find a better way + if (!std::isfinite(diameter)) + { + diameter = 10; + has_bounds = false; + } + } - void correct(const Eigen::Index i, parameters::Parameters &p); + void correct(const Eigen::Index i, parameters::Parameters& p); - virtual Vector correct_x(const Vector &xi, const Mask &oob) = 0; + virtual Vector correct_x(const Vector& xi, const Mask& oob) = 0; - [[nodiscard]] Mask is_out_of_bounds(const Vector &xi) const; + [[nodiscard]] Mask is_out_of_bounds(const Vector& xi) const; - [[nodiscard]] Vector delta_out_of_bounds(const Vector &xi, const Mask &oob) const; + [[nodiscard]] Vector delta_out_of_bounds(const Vector& xi, const Mask& oob) const; [[nodiscard]] bool any_out_of_bounds() const { @@ -46,7 +56,7 @@ namespace bounds { using BoundCorrection::BoundCorrection; - Vector correct_x(const Vector &xi, const Mask &oob) override + Vector correct_x(const Vector& xi, const Mask& oob) override { return xi; } @@ -63,14 +73,14 @@ namespace bounds COTN(Eigen::Ref lb, Eigen::Ref ub) : BoundCorrection(lb, ub), sampler(static_cast(lb.size()), rng::normal(0, 1.0 / 3.)) {} - Vector correct_x(const Vector &xi, const Mask &oob) override; + Vector correct_x(const Vector& xi, const Mask& oob) override; }; struct Mirror final : BoundCorrection { using BoundCorrection::BoundCorrection; - Vector correct_x(const Vector &xi, const Mask &oob) override; + Vector correct_x(const Vector& xi, const Mask& oob) override; }; struct UniformResample final : BoundCorrection @@ -79,24 +89,24 @@ namespace bounds UniformResample(Eigen::Ref lb, Eigen::Ref ub) : BoundCorrection(lb, ub), sampler(static_cast(lb.size())) {} - Vector correct_x(const Vector &xi, const Mask &oob) override; + Vector correct_x(const Vector& xi, const Mask& oob) override; }; struct Saturate final : BoundCorrection { using BoundCorrection::BoundCorrection; - Vector correct_x(const Vector &xi, const Mask &oob) override; + Vector correct_x(const Vector& xi, const Mask& oob) override; }; struct Toroidal final : BoundCorrection { using BoundCorrection::BoundCorrection; - Vector correct_x(const Vector &xi, const Mask &oob) override; + Vector correct_x(const Vector& xi, const Mask& oob) override; }; - inline std::shared_ptr get(const parameters::CorrectionMethod &m, const Vector &lb, const Vector &ub) + inline std::shared_ptr get(const parameters::CorrectionMethod& m, const Vector& lb, const Vector& ub) { using namespace parameters; switch (m) diff --git a/include/matrix_adaptation.hpp b/include/matrix_adaptation.hpp index 778acac..a7676cb 100644 --- a/include/matrix_adaptation.hpp +++ b/include/matrix_adaptation.hpp @@ -107,6 +107,8 @@ namespace matrix_adaptation }; + + struct OnePlusOneAdaptation: CovarianceAdaptation { constexpr static Float max_success_ratio = 0.44; diff --git a/include/settings.hpp b/include/settings.hpp index ae475f4..a93a1ae 100644 --- a/include/settings.hpp +++ b/include/settings.hpp @@ -52,8 +52,8 @@ namespace parameters lambda0(lambda.value_or(4 + std::floor(3 * std::log(dim)))), mu0(mu.value_or(lambda0 / 2)), x0(x0), - lb(lb.value_or(Vector::Ones(dim) * -5)), - ub(ub.value_or(Vector::Ones(dim) * 5)), + lb(lb.value_or(Vector::Ones(dim) * -std::numeric_limits::infinity())), + ub(ub.value_or(Vector::Ones(dim) * std::numeric_limits::infinity())), cs(cs), cc(cc), cmu(cmu), diff --git a/src/bounds.cpp b/src/bounds.cpp index 449ab5d..d5d8340 100644 --- a/src/bounds.cpp +++ b/src/bounds.cpp @@ -33,6 +33,9 @@ namespace bounds void BoundCorrection::correct(const Eigen::Index i, parameters::Parameters& p) { + if (!has_bounds) + return; + const auto oob = is_out_of_bounds(p.pop.X.col(i)); if (oob.any()) { diff --git a/src/main.cpp b/src/main.cpp index 1cf686b..47a0f43 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -47,15 +47,22 @@ struct Timer int main() { rng::set_seed(42); - const size_t dim = 10; + const size_t dim = 100; + const size_t budget = dim * 1000; + parameters::Modules m; - parameters::Settings settings(dim, m, 1e-8, std::nullopt, 1000 * dim, 2.0, 1); + //m.matrix_adaptation = parameters::MatrixAdaptationType::MATRIX; + m.sample_transformation = parameters::SampleTranformerType::SCALED_UNIFORM; + m.bound_correction = parameters::CorrectionMethod::NONE; + + parameters::Settings settings(dim, m, -std::numeric_limits::infinity(), + std::nullopt, budget, 2.0); auto p = std::make_shared(settings); auto cma = ModularCMAES(p); + Timer t; FunctionType f = Function(); - while (cma.step(f)) { //std::cout << cma.p->stats << std::endl; @@ -63,7 +70,7 @@ int main() //auto sr = std::dynamic_pointer_cast(cma.p->mutation); //std::cout << "p_succ: " << sr->success_ratio << ", " << sr->max_success_ratio << std::endl; } - std::cout << cma.p->stats << std::endl; + std::cout << cma.p->stats.evaluations << std::endl; } From e0c3b85242a209e4cdf71e8295f4855b12efdf9e Mon Sep 17 00:00:00 2001 From: Jacob de Nobel Date: Fri, 23 May 2025 17:04:01 +0200 Subject: [PATCH 02/74] asa --- include/matrix_adaptation.hpp | 17 +++++------ include/repelling.hpp | 2 +- include/stats.hpp | 2 ++ include/weights.hpp | 2 ++ src/interface.cpp | 6 ++-- src/main.cpp | 5 ++-- src/matrix_adaptation.cpp | 56 +++++++++++++++++------------------ src/mutation.cpp | 6 ++-- src/repelling.cpp | 16 +++++----- src/weights.cpp | 3 ++ 10 files changed, 61 insertions(+), 54 deletions(-) diff --git a/include/matrix_adaptation.hpp b/include/matrix_adaptation.hpp index a7676cb..28314bc 100644 --- a/include/matrix_adaptation.hpp +++ b/include/matrix_adaptation.hpp @@ -13,13 +13,11 @@ namespace matrix_adaptation Vector m, m_old, dm, ps; Float dd; Float expected_length_z; - Matrix inv_C; Adaptation(const size_t dim, const Vector& x0, const Vector& ps, const Float expected_length_z) : m(x0), m_old(dim), dm(Vector::Zero(dim)), ps(ps), dd(static_cast(dim)), - expected_length_z(expected_length_z), - inv_C(Matrix::Identity(dim, dim)) + expected_length_z(expected_length_z) { } @@ -28,7 +26,7 @@ namespace matrix_adaptation const parameters::Stats& stats, size_t mu, size_t lambda) = 0; virtual bool adapt_matrix(const parameters::Weights& w, const parameters::Modules& m, const Population& pop, - size_t mu, const parameters::Settings& settings, const parameters::Stats& stats) = 0; + size_t mu, const parameters::Settings& settings, parameters::Stats& stats) = 0; virtual void restart(const parameters::Settings& settings) = 0; @@ -47,7 +45,7 @@ namespace matrix_adaptation } bool adapt_matrix(const parameters::Weights& w, const parameters::Modules& m, const Population& pop, - const size_t mu, const parameters::Settings& settings, const parameters::Stats& stats) override + const size_t mu, const parameters::Settings& settings, parameters::Stats& stats) override { return true; } @@ -70,6 +68,7 @@ namespace matrix_adaptation Matrix inv_root_C; bool hs = true; + CovarianceAdaptation(const size_t dim, const Vector& x0, const Float expected_length_z) : Adaptation(dim, x0, Vector::Zero(dim), expected_length_z), pc(Vector::Zero(dim)), @@ -90,7 +89,7 @@ namespace matrix_adaptation size_t mu, size_t lambda) override; bool adapt_matrix(const parameters::Weights& w, const parameters::Modules& m, const Population& pop, size_t mu, - const parameters::Settings& settings, const parameters::Stats& stats) override; + const parameters::Settings& settings, parameters::Stats& stats) override; void restart(const parameters::Settings& settings) override; @@ -107,8 +106,6 @@ namespace matrix_adaptation }; - - struct OnePlusOneAdaptation: CovarianceAdaptation { constexpr static Float max_success_ratio = 0.44; @@ -120,7 +117,7 @@ namespace matrix_adaptation size_t mu, size_t lambda) override; bool adapt_matrix(const parameters::Weights& w, const parameters::Modules& m, const Population& pop, size_t mu, - const parameters::Settings& settings, const parameters::Stats& stats) override; + const parameters::Settings& settings, parameters::Stats& stats) override; }; @@ -141,7 +138,7 @@ namespace matrix_adaptation size_t mu, size_t lambda) override; bool adapt_matrix(const parameters::Weights& w, const parameters::Modules& m, const Population& pop, size_t mu, - const parameters::Settings& settings, const parameters::Stats& stats) override; + const parameters::Settings& settings, parameters::Stats& stats) override; void restart(const parameters::Settings& settings) override; diff --git a/include/repelling.hpp b/include/repelling.hpp index c8cdbb7..0003393 100644 --- a/include/repelling.hpp +++ b/include/repelling.hpp @@ -72,7 +72,7 @@ namespace repelling int attempts = 0; Float coverage = 20.0; // Matrix C; - // Matrix C_inv; + Matrix C_inv; virtual ~Repelling() = default; diff --git a/include/stats.hpp b/include/stats.hpp index 9aa108d..0e2855b 100644 --- a/include/stats.hpp +++ b/include/stats.hpp @@ -17,6 +17,8 @@ namespace parameters bool has_improved = false; Float success_ratio = 2.0 / 11.0; Float cs = 1.0 / 12.0; + size_t last_update = 0; + size_t n_updates = 0; void update_best(const Vector &x, const Float y) { diff --git a/include/weights.hpp b/include/weights.hpp index 17ed571..1b33f47 100644 --- a/include/weights.hpp +++ b/include/weights.hpp @@ -13,6 +13,8 @@ namespace parameters Float mueff, mueff_neg; Float c1, cmu, cc; + Float lazy_update_interval; + Float sigma_path_scale; Weights(const size_t dim, const size_t mu, const size_t lambda, const Settings &settings); diff --git a/src/interface.cpp b/src/interface.cpp index 9f138a3..7aad6a1 100644 --- a/src/interface.cpp +++ b/src/interface.cpp @@ -303,7 +303,8 @@ void define_repelling(py::module &main) .def("prepare_sampling", &Repelling::prepare_sampling, py::arg("p")) .def_readwrite("archive", &Repelling::archive) .def_readwrite("coverage", &Repelling::coverage) - .def_readwrite("attempts", &Repelling::attempts); + .def_readwrite("attempts", &Repelling::attempts) + .def_readwrite("C_inv", &Repelling::C_inv); py::class_>(m, "NoRepelling") .def(py::init<>()); @@ -326,7 +327,6 @@ void define_matrix_adaptation(py::module &main) .def_readwrite("ps", &Adaptation::ps) .def_readwrite("dd", &Adaptation::dd) .def_readwrite("expected_length_z", &Adaptation::expected_length_z) - .def_readwrite("inv_C", &CovarianceAdaptation::inv_C) .def("adapt_evolution_paths", &Adaptation::adapt_evolution_paths, py::arg("pop"), py::arg("weights"), @@ -518,6 +518,8 @@ void define_parameters(py::module &main) .def_readwrite("global_best", &Stats::global_best) .def_readwrite("has_improved", &Stats::has_improved) .def_readwrite("success_ratio", &Stats::success_ratio) + .def_readwrite("last_update", &Stats::last_update) + .def_readwrite("n_updates", &Stats::n_updates) .def("__repr__", [](Stats &stats) { std::stringstream ss; diff --git a/src/main.cpp b/src/main.cpp index 47a0f43..af67ff8 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -71,6 +71,7 @@ int main() //std::cout << "p_succ: " << sr->success_ratio << ", " << sr->max_success_ratio << std::endl; } std::cout << cma.p->stats.evaluations << std::endl; - - + std::cout << cma.p->stats.t << std::endl; + std::cout << cma.p->stats.n_updates << std::endl; + std::cout << cma.p->stats << std::endl; } diff --git a/src/matrix_adaptation.cpp b/src/matrix_adaptation.cpp index ef90444..86fec0e 100644 --- a/src/matrix_adaptation.cpp +++ b/src/matrix_adaptation.cpp @@ -18,6 +18,7 @@ namespace matrix_adaptation const Float actual_ps_length = ps.norm() / sqrt( 1.0 - pow(1.0 - mutation->cs, 2.0 * (stats.evaluations / lambda))); + const Float expected_ps_length = (1.4 + (2.0 / (dd + 1.0))) * expected_length_z; hs = actual_ps_length < expected_ps_length; @@ -31,22 +32,18 @@ namespace matrix_adaptation const auto dhs = (1 - hs) * w.cc * (2.0 - w.cc); const auto old_c = (1 - (w.c1 * dhs) - w.c1 - (w.cmu * w.positive.sum())) * C; - Matrix rank_mu; if (m.active) { auto weights = w.weights.topRows(pop.Y.cols()); - rank_mu = w.cmu * ((pop.Y.array().rowwise() * weights.array().transpose()).matrix() * pop.Y.transpose()); + C = old_c + rank_one + w.cmu * ((pop.Y.array().rowwise() * weights.array().transpose()).matrix() * pop.Y.transpose()); } else { - rank_mu = w.cmu * ((pop.Y.leftCols(mu).array().rowwise() * w.positive.array().transpose()).matrix() * pop.Y. - leftCols(mu).transpose()); + C = old_c + rank_one + (w.cmu * ((pop.Y.leftCols(mu).array().rowwise() * w.positive.array().transpose()).matrix() * pop.Y. + leftCols(mu).transpose())); } - C = old_c + rank_one + rank_mu; - - C = C.triangularView().toDenseMatrix() + - C.triangularView().toDenseMatrix().transpose(); + C = 0.5 * (C + C.transpose().eval()); } bool CovarianceAdaptation::perform_eigendecomposition(const Settings& settings) @@ -72,17 +69,26 @@ namespace matrix_adaptation } return false; } - inv_C = ((B * d.cwiseInverse().asDiagonal()) * B.transpose()); + + d = d.cwiseSqrt(); - inv_root_C = (B * d.cwiseInverse().asDiagonal()) * B.transpose(); + inv_root_C = B * d.cwiseInverse().asDiagonal() * B.transpose(); return true; } bool CovarianceAdaptation::adapt_matrix(const Weights& w, const Modules& m, const Population& pop, const size_t mu, - const Settings& settings, const parameters::Stats& stats) + const Settings& settings, parameters::Stats& stats) { - adapt_covariance_matrix(w, m, pop, mu); - return perform_eigendecomposition(settings); + + if (static_cast(stats.t) >= static_cast(stats.last_update) + w.lazy_update_interval) + { + stats.last_update = stats.t; + stats.n_updates++; + adapt_covariance_matrix(w, m, pop, mu); + return perform_eigendecomposition(settings); + } + return true; + } void CovarianceAdaptation::restart(const Settings& settings) @@ -90,7 +96,6 @@ namespace matrix_adaptation B = Matrix::Identity(settings.dim, settings.dim); C = Matrix::Identity(settings.dim, settings.dim); inv_root_C = Matrix::Identity(settings.dim, settings.dim); - inv_C = Matrix::Identity(settings.dim, settings.dim); d.setOnes(); m = settings.x0.value_or(Vector::Zero(settings.dim)); m_old.setZero(); @@ -101,12 +106,12 @@ namespace matrix_adaptation Vector CovarianceAdaptation::compute_y(const Vector& zi) { - return B * (d.asDiagonal() * zi); + return B * d.cwiseProduct(zi); } Vector CovarianceAdaptation::invert_y(const Vector& yi) { - return d.cwiseInverse().asDiagonal() * (B.transpose() * yi); + return (B.transpose() * yi).cwiseQuotient(d); } bool SeperableAdaptation::perform_eigendecomposition(const Settings& settings) @@ -131,7 +136,7 @@ namespace matrix_adaptation } bool OnePlusOneAdaptation::adapt_matrix(const parameters::Weights& w, const parameters::Modules& m, const Population& pop, size_t mu, - const parameters::Settings& settings, const parameters::Stats& stats) + const parameters::Settings& settings, parameters::Stats& stats) { if (!stats.has_improved) { @@ -142,7 +147,6 @@ namespace matrix_adaptation - void MatrixAdaptation::adapt_evolution_paths(const Population& pop, const Weights& w, const std::shared_ptr& mutation, const Stats& stats, const size_t mu, const size_t lambda) @@ -155,7 +159,7 @@ namespace matrix_adaptation } bool MatrixAdaptation::adapt_matrix(const Weights& w, const Modules& m, const Population& pop, const size_t mu, - const Settings& settings, const parameters::Stats& stats) + const Settings& settings, parameters::Stats& stats) { const auto old_m = (1. - (0.5 * w.c1) - (0.5 * w.cmu)) * M; const auto scaled_ps = (0.5 * w.c1) * (M * ps) * ps.transpose(); @@ -163,25 +167,21 @@ namespace matrix_adaptation const auto old_m_inv = (1. + (0.5 * w.c1) + (0.5 * w.cmu)) * M_inv; const auto scaled_inv_ps = (0.5 * w.c1) * ps * (ps.transpose() * M); - Matrix new_m, new_m_inv; if (m.active) { - // TODO: Check if we can do this like this const auto scaled_weights = ((0.5 * w.cmu) * w.weights.topRows(pop.Y.cols())).array().transpose(); const auto scaled_y = (pop.Y.array().rowwise() * scaled_weights).matrix(); - new_m = scaled_y * pop.Z.transpose(); - new_m_inv = scaled_y * (pop.Z.transpose() * M_inv); + + M = old_m + scaled_ps + scaled_y * pop.Z.transpose(); + M_inv = old_m_inv - scaled_inv_ps - scaled_y * (pop.Z.transpose() * M_inv); } else { const auto scaled_weights = ((0.5 * w.cmu) * w.positive).array().transpose(); const auto scaled_y = (pop.Y.leftCols(mu).array().rowwise() * scaled_weights).matrix(); - new_m = scaled_y * pop.Z.leftCols(mu).transpose(); - new_m_inv = scaled_y * (pop.Z.leftCols(mu).transpose() * M_inv); + M = old_m + scaled_ps + scaled_y * pop.Z.leftCols(mu).transpose(); + M_inv = old_m_inv - scaled_inv_ps - scaled_y * (pop.Z.leftCols(mu).transpose() * M_inv); } - - M = old_m + scaled_ps + new_m; - M_inv = old_m_inv - scaled_inv_ps - new_m_inv; return true; } diff --git a/src/mutation.cpp b/src/mutation.cpp index 693d60a..0fbb157 100644 --- a/src/mutation.cpp +++ b/src/mutation.cpp @@ -38,9 +38,9 @@ namespace mutation size_t n_rej = 0; do { - p.pop.Z.col(i) = p.mutation->tc->scale((*p.sampler)(), p.bounds->diameter, p.settings.budget, p.stats.evaluations); - p.pop.Y.col(i) = p.adaptation->compute_y(p.pop.Z.col(i)); - p.pop.X.col(i) = p.pop.Y.col(i) * p.pop.s(i) + p.adaptation->m; + p.pop.Z.col(i).noalias() = p.mutation->tc->scale((*p.sampler)(), p.bounds->diameter, p.settings.budget, p.stats.evaluations); + p.pop.Y.col(i).noalias() = p.adaptation->compute_y(p.pop.Z.col(i)); + p.pop.X.col(i).noalias() = p.pop.Y.col(i) * p.pop.s(i) + p.adaptation->m; p.bounds->correct(i, p); } while ( (p.settings.modules.bound_correction == parameters::CorrectionMethod::RESAMPLE && n_rej++ < 5*p.settings.dim && p.bounds->is_out_of_bounds(p.pop.X.col(i)).any()) || p.repelling->is_rejected(p.pop.X.col(i), p)); diff --git a/src/repelling.cpp b/src/repelling.cpp index 9da3ec2..27bf3f4 100644 --- a/src/repelling.cpp +++ b/src/repelling.cpp @@ -66,7 +66,7 @@ namespace repelling bool TabooPoint::rejects(const Vector &xi, const parameters::Parameters &p, const int attempts) const { const Float rejection_radius = std::pow(shrinkage, attempts) * radius; - const Float delta_xi = distance::mahanolobis(xi, solution.x, p.adaptation->inv_C) / p.mutation->sigma; + const Float delta_xi = distance::mahanolobis(xi, solution.x, p.repelling->C_inv) / p.mutation->sigma; if (delta_xi < rejection_radius) return true; @@ -81,7 +81,7 @@ namespace repelling void TabooPoint::calculate_criticality(const parameters::Parameters &p) { - const Float delta_m = distance::mahanolobis(p.adaptation->m, solution.x, p.adaptation->inv_C) / p.mutation->sigma; + const Float delta_m = distance::mahanolobis(p.adaptation->m, solution.x, p.repelling->C_inv) / p.mutation->sigma; const auto u = delta_m + radius; const auto l = delta_m - radius; criticality = cdf(u) - cdf(l); @@ -97,10 +97,8 @@ namespace repelling { return a.criticality > b.criticality; }); //! If it is not intialized - /* - if (C.cols() != p.settings.dim) + if (C_inv.cols() != p.settings.dim) { - C = Matrix::Identity(p.settings.dim, p.settings.dim); C_inv = Matrix::Identity(p.settings.dim, p.settings.dim); } @@ -110,14 +108,16 @@ namespace repelling using namespace matrix_adaptation; const auto dynamic = std::dynamic_pointer_cast(p.adaptation); - const Float d_sigma = p.mutation->sigma / p.settings.sigma0; + C_inv.noalias() = (dynamic->B * dynamic->d.cwiseInverse().asDiagonal()) * dynamic->B.transpose(); + + /*const Float d_sigma = p.mutation->sigma / p.settings.sigma0; if (d_sigma > constants::sigma_threshold) { C = dynamic->C / dynamic->C.maxCoeff(); C_inv = dynamic->inv_C / dynamic->inv_C.maxCoeff(); - } + }*/ } - */ + } void Repelling::update_archive(FunctionType &objective, parameters::Parameters &p) diff --git a/src/weights.cpp b/src/weights.cpp index e6accd5..97f1d27 100644 --- a/src/weights.cpp +++ b/src/weights.cpp @@ -49,6 +49,9 @@ namespace parameters negative *= (neg_scaler / negative.cwiseAbs().sum()); weights << positive, negative; + + lazy_update_interval = 1.0 / (c1 + cmu + 1e-23) / d / 10.0; + std::cout << lazy_update_interval << std::endl; } void Weights::weights_default(const size_t lambda) From cf9a9aa11bc26d125024b9132c075ecd424d0faf Mon Sep 17 00:00:00 2001 From: jacobdenobel Date: Thu, 29 May 2025 11:32:02 +0200 Subject: [PATCH 03/74] fix criteria --- include/restart_criteria.hpp | 126 +++++++++++++++++---------------- scripts/repelling/repelling.py | 122 +++++++++++++++++++++++++------ src/c_maes.cpp | 2 +- src/interface.cpp | 8 ++- src/main.cpp | 56 +++++++-------- src/mutation.cpp | 1 + src/parameters.cpp | 2 +- src/repelling.cpp | 4 +- src/restart_criteria.cpp | 4 +- src/weights.cpp | 1 - 10 files changed, 208 insertions(+), 118 deletions(-) diff --git a/include/restart_criteria.hpp b/include/restart_criteria.hpp index 2d84462..6c1f1fb 100644 --- a/include/restart_criteria.hpp +++ b/include/restart_criteria.hpp @@ -5,158 +5,162 @@ namespace parameters { - struct Parameters; + struct Parameters; } namespace restart { - struct Criterion { + struct Criterion + { bool met; std::string name; size_t last_restart; - Criterion(const std::string& name): met(false), name(name) {} + Criterion(const std::string &name) : met(false), name(name) {} virtual ~Criterion() = default; - void reset(const parameters::Parameters &p); + void reset(const parameters::Parameters &p); virtual void update(const parameters::Parameters &p) = 0; - virtual void on_reset(const parameters::Parameters &p){}; + virtual void on_reset(const parameters::Parameters &p) {}; }; using vCriteria = std::vector>; - - struct Criteria { - Criteria(const vCriteria& c): items(c){} - - void update(const parameters::Parameters &p) - { - any = false; - for (const auto& c: items) - { - c->update(p); - any = any or c->met; - } - } - - void reset(const parameters::Parameters &p) - { - for (const auto& c: items) - c->reset(p); - } - - vCriteria items; - bool any; + + struct Criteria + { + Criteria(const vCriteria &c) : items(c) {} + + void update(const parameters::Parameters &p) + { + for (const auto &c : items) + { + c->update(p); + } + } + + void reset(const parameters::Parameters &p) + { + for (const auto &c : items) + c->reset(p); + } + + bool any() const + { + for (const auto &c : items) + if (c->met) + return true; + return false; + } + + vCriteria items; static Criteria get(const parameters::Modules modules); - }; + }; - - struct ExceededMaxIter: Criterion + struct ExceededMaxIter : Criterion { size_t max_iter; - ExceededMaxIter(): Criterion("ExceededMaxIter"){} + ExceededMaxIter() : Criterion("ExceededMaxIter") {} void update(const parameters::Parameters &p) override; void on_reset(const parameters::Parameters &p) override; }; - struct NoImprovement: Criterion + struct NoImprovement : Criterion { size_t n_bin; std::vector best_fitnesses; - NoImprovement(): Criterion("NoImprovement"){} + NoImprovement() : Criterion("NoImprovement") {} void update(const parameters::Parameters &p) override; void on_reset(const parameters::Parameters &p) override; }; - struct MaxSigma: Criterion + struct MaxSigma : Criterion { static inline Float tolerance = 1e4; - MaxSigma(): Criterion("MaxSigma"){} + MaxSigma() : Criterion("MaxSigma") {} void update(const parameters::Parameters &p) override; }; - struct MinSigma: Criterion + struct MinSigma : Criterion { static inline Float tolerance = 1e-20; - MinSigma(): Criterion("MinSigma"){} + MinSigma() : Criterion("MinSigma") {} void update(const parameters::Parameters &p) override; }; - struct UnableToAdapt: Criterion + struct UnableToAdapt : Criterion { - UnableToAdapt(): Criterion("UnableToAdapt"){} + UnableToAdapt() : Criterion("UnableToAdapt") {} void update(const parameters::Parameters &p) override; }; - struct FlatFitness: Criterion + struct FlatFitness : Criterion { size_t max_flat_fitness; size_t flat_fitness_index; Eigen::Array flat_fitnesses; - - FlatFitness(): Criterion("FlatFitness"){} + + FlatFitness() : Criterion("FlatFitness") {} void update(const parameters::Parameters &p) override; void on_reset(const parameters::Parameters &p) override; }; - struct TolX: Criterion + struct TolX : Criterion { static inline Float tolerance = 10e-12; Vector tolx_vector; - TolX(): Criterion("TolX"){} + TolX() : Criterion("TolX") {} void update(const parameters::Parameters &p) override; void on_reset(const parameters::Parameters &p) override; }; - - struct MaxDSigma: Criterion + struct MaxDSigma : Criterion { static inline Float tolerance = std::pow(10., 20.); - MaxDSigma(): Criterion("MaxDSigma"){} + MaxDSigma() : Criterion("MaxDSigma") {} void update(const parameters::Parameters &p) override; }; - struct MinDSigma: Criterion + struct MinDSigma : Criterion { static inline Float tolerance = 1e-8; - MinDSigma(): Criterion("MinDSigma"){} + MinDSigma() : Criterion("MinDSigma") {} void update(const parameters::Parameters &p) override; }; - - struct ConditionC: Criterion + struct ConditionC : Criterion { static inline Float tolerance = std::pow(10., 14.); - ConditionC(): Criterion("ConditionC"){} + ConditionC() : Criterion("ConditionC") {} void update(const parameters::Parameters &p) override; }; - struct NoEffectAxis: Criterion + struct NoEffectAxis : Criterion { static inline Float tolerance = 0.; - NoEffectAxis(): Criterion("NoEffectAxis"){} + NoEffectAxis() : Criterion("NoEffectAxis") {} void update(const parameters::Parameters &p) override; }; - struct NoEffectCoord: Criterion + struct NoEffectCoord : Criterion { static inline Float tolerance = 0.; - NoEffectCoord(): Criterion("NoEffectCoord"){} + NoEffectCoord() : Criterion("NoEffectCoord") {} void update(const parameters::Parameters &p) override; }; - struct Stagnation: Criterion + struct Stagnation : Criterion { static inline Float tolerance = 0.3; - + size_t n_stagnation; std::vector median_fitnesses; - std::vector best_fitnesses; - Stagnation(): Criterion("Stagnation"){} + std::vector best_fitnesses; + Stagnation() : Criterion("Stagnation") {} void update(const parameters::Parameters &p) override; void on_reset(const parameters::Parameters &p) override; - }; + }; } \ No newline at end of file diff --git a/scripts/repelling/repelling.py b/scripts/repelling/repelling.py index d5c53ef..5a4528a 100644 --- a/scripts/repelling/repelling.py +++ b/scripts/repelling/repelling.py @@ -13,6 +13,7 @@ import pandas as pd import modcma.c_maes as c_cmaes from modcma.c_maes.cmaescpp.parameters import Solution +from scipy.spatial.distance import mahalanobis base_dir = os.path.realpath(os.path.dirname(__file__)) @@ -148,13 +149,11 @@ def plot( ax.scatter(X[0, :], X[1, :], color=main_color, alpha=0.5) for t, tabu_point in enumerate(cma.p.repelling.archive, 1): - if c_cmaes.constants.repelling_current_cov: - Ct = C - else: - Ct = tabu_point.C + Ct = C theta_t = np.degrees(np.arctan2(Ct[1, 0], Ct[0, 0])) - + + # print(theta_t, np.degrees(np.arctan2(tabu_point.C[1, 0], tabu_point.C[0, 0]))) current = Ellipse( tabu_point.solution.x, @@ -416,7 +415,71 @@ def calc_taboo_potential(fid=3, instance=6, dim=2, n_trials=1): problem.reset() -def interactive(fid=21, instance=6, dim=2, rep=True, coverage=5, save_frames = True): +class CloseToTaboo(c_cmaes.restart.Criterion): + + def __init__(self): + super().__init__("CloseToTaboo") + + def update(self, par: c_cmaes.Parameters): + self.met = False + if len(par.repelling.archive) != 0: + d_sigma = par.mutation.sigma / par.settings.sigma0 + somewhat_converged = d_sigma < 1e-1 + + if somewhat_converged: + for p in par.repelling.archive: + distance = mahalanobis(par.adaptation.m, p.solution.x, par.repelling.C_inv) + threshold = 1 / np.sqrt(par.settings.dim) + if distance < threshold: + print("close to taboo", distance, threshold) + self.met = True + + + def on_update(self, par: c_cmaes.Parameters): + self.met = False + +class TooMuchRepelling(c_cmaes.restart.Criterion): + + def __init__(self): + super().__init__("TooMuchRepelling") + self.decay = 0 + self.alpha = 0.9 + + def update(self, par: c_cmaes.Parameters): + self.met = False + self.decay = (1 - self.alpha) * self.decay + (self.alpha * par.repelling.attempts) + if self.decay > (2 * par.lamb): + self.met = True + + def on_update(self, par: c_cmaes.Parameters): + self.met = False + self.decay = 0 + +class ConvergingToBadBasin(c_cmaes.restart.Criterion): + + def __init__(self): + super().__init__("ConvergingToBadBasin") + + def update(self, par: c_cmaes.Parameters): + self.met = False + if len(par.repelling.archive) != 0: + d_sigma = par.mutation.sigma / par.settings.sigma0 + somewhat_converged = d_sigma < 1e-1 + + if somewhat_converged: + function_values = np.array([p.solution.y for p in par.repelling.archive]) + threshold_value = np.median(function_values) + if threshold_value < par.pop.f.min(): + print("Bad basin", threshold_value, "vs", par.pop.f.min()) + self.met = True + + def on_update(self, par: c_cmaes.Parameters): + self.met = False + + + + +def interactive(fid=21, instance=6, dim=2, rep=True, coverage=5, save_frames = False): lb = -5 ub = 5 @@ -434,7 +497,6 @@ def interactive(fid=21, instance=6, dim=2, rep=True, coverage=5, save_frames = T # c_cmaes.constants.sigma_threshold = 0.25 # c_cmaes.constants.tol_min_sigma = 0.01 - c_cmaes.constants.repelling_current_cov = True modules = c_cmaes.parameters.Modules() modules.restart_strategy = c_cmaes.options.RESTART modules.bound_correction = c_cmaes.options.SATURATE @@ -447,36 +509,56 @@ def interactive(fid=21, instance=6, dim=2, rep=True, coverage=5, save_frames = T sigma0=2.0, budget=10_000 * dim, target=problem.optimum.y + 1e-8, + lb=np.ones(dim) * -5, + ub=np.ones(dim) * 5, ) parameters = c_cmaes.Parameters(settings) parameters.repelling.coverage = coverage cma = c_cmaes.ModularCMAES(parameters) + + c1 = CloseToTaboo() + c2 = TooMuchRepelling() + c3 = ConvergingToBadBasin() + cma.p.criteria.items = cma.p.criteria.items + [c1, c2, c3] archive_size = 0 while not cma.break_conditions(): + # if any(x.met for x in cma.p.criteria.items): + # breakpoint() + + # print("before start", cma.p.criteria.items) + # print(cma.p.repelling.archive) + # print(cma.p.stats.solutions) + cma.p.start(problem) + # print() + # print("after start", cma.p.criteria.items) + # print(cma.p.repelling.archive) + # print(cma.p.stats.solutions) cma.mutate(problem) + # print() + # print("after mutate", cma.p.criteria.items) + # print(cma.p.repelling.archive) + # print(cma.p.stats.solutions) + if dim == 2: plot(cma, X, Y, Z, lb, ub, problem) - if save_frames: - plt.savefig(os.path.join( - base_dir, - f"figures/interactive/f{fid}i{instance}r{rep}{cma.p.stats.t:03d}.png" - )) - - if len(cma.p.repelling.archive) != archive_size: - archive_size = len(cma.p.repelling.archive) - for p in cma.p.repelling.archive: - print(f"({p.radius:.2e}, {p.criticality: .2e})", end=", ") - print() + + # if len(cma.p.repelling.archive) != archive_size: + # archive_size = len(cma.p.repelling.archive) + # for p in cma.p.repelling.archive: + # print(f"({p.radius:.2e}, {p.criticality: .2e})", end=", ") + # print() # breakpoint() # time.sleep(1) cma.select() cma.recombine() - cma.adapt(problem) + cma.adapt() + + print(problem.optimum) - print(cma.p.stats.solutions) + print(len(cma.p.stats.solutions)) # breakpoint() final_target = problem.state.current_best.y - problem.optimum.y print("final target: ", final_target, "used budget: ", problem.state.evaluations) diff --git a/src/c_maes.cpp b/src/c_maes.cpp index 8244ead..1a363ce 100644 --- a/src/c_maes.cpp +++ b/src/c_maes.cpp @@ -49,6 +49,6 @@ bool ModularCMAES::break_conditions() const const auto budget_used_up = p->stats.evaluations >= p->settings.budget; const auto exceed_gens = p->settings.max_generations and p->stats.t >= p->settings.max_generations; const auto restart_strategy_criteria = p->settings.modules.restart_strategy == parameters::RestartStrategyType::STOP - and p->criteria.any; + and p->criteria.any(); return exceed_gens or target_reached or budget_used_up or restart_strategy_criteria; } diff --git a/src/interface.cpp b/src/interface.cpp index 7aad6a1..19bb299 100644 --- a/src/interface.cpp +++ b/src/interface.cpp @@ -294,7 +294,11 @@ void define_repelling(py::module &main) .def_readwrite("n_rep", &TabooPoint::n_rep) .def_readwrite("solution", &TabooPoint::solution) .def_readwrite("shrinkage", &TabooPoint::shrinkage) - .def_readwrite("criticality", &TabooPoint::criticality); + .def_readwrite("criticality", &TabooPoint::criticality) + .def("__repr__", [](TabooPoint &tb) { + return ""; + }); py::class_>(m, "Repelling") .def(py::init<>()) @@ -1016,7 +1020,7 @@ void define_restart_criteria(py::module &main) .def_readwrite("items", &Criteria::items) .def("reset", &Criteria::reset, py::arg("parameters")) .def("update", &Criteria::update, py::arg("parameters")) - .def_readonly("any", &Criteria::any); + .def("any", &Criteria::any); } diff --git a/src/main.cpp b/src/main.cpp index af67ff8..3d3fd4a 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -32,7 +32,7 @@ void call(Callable& o) struct Timer { - std::chrono::time_point t1; + std::chrono::time_point t1; Timer() : t1(high_resolution_clock::now()) {} ~Timer() @@ -44,34 +44,34 @@ struct Timer }; -int main() -{ - rng::set_seed(42); - const size_t dim = 100; - const size_t budget = dim * 1000; +// int main() +// { +// rng::set_seed(42); +// const size_t dim = 100; +// const size_t budget = dim * 1000; - parameters::Modules m; - //m.matrix_adaptation = parameters::MatrixAdaptationType::MATRIX; - m.sample_transformation = parameters::SampleTranformerType::SCALED_UNIFORM; - m.bound_correction = parameters::CorrectionMethod::NONE; +// parameters::Modules m; +// //m.matrix_adaptation = parameters::MatrixAdaptationType::MATRIX; +// m.sample_transformation = parameters::SampleTranformerType::SCALED_UNIFORM; +// m.bound_correction = parameters::CorrectionMethod::NONE; - parameters::Settings settings(dim, m, -std::numeric_limits::infinity(), - std::nullopt, budget, 2.0); - auto p = std::make_shared(settings); +// parameters::Settings settings(dim, m, -std::numeric_limits::infinity(), +// std::nullopt, budget, 2.0); +// auto p = std::make_shared(settings); - auto cma = ModularCMAES(p); +// auto cma = ModularCMAES(p); - Timer t; - FunctionType f = Function(); - while (cma.step(f)) - { - //std::cout << cma.p->stats << std::endl; - //std::cout << cma.p->mutation->sigma << std::endl; - //auto sr = std::dynamic_pointer_cast(cma.p->mutation); - //std::cout << "p_succ: " << sr->success_ratio << ", " << sr->max_success_ratio << std::endl; - } - std::cout << cma.p->stats.evaluations << std::endl; - std::cout << cma.p->stats.t << std::endl; - std::cout << cma.p->stats.n_updates << std::endl; - std::cout << cma.p->stats << std::endl; -} +// Timer t; +// FunctionType f = Function(); +// while (cma.step(f)) +// { +// //std::cout << cma.p->stats << std::endl; +// //std::cout << cma.p->mutation->sigma << std::endl; +// //auto sr = std::dynamic_pointer_cast(cma.p->mutation); +// //std::cout << "p_succ: " << sr->success_ratio << ", " << sr->max_success_ratio << std::endl; +// } +// std::cout << cma.p->stats.evaluations << std::endl; +// std::cout << cma.p->stats.t << std::endl; +// std::cout << cma.p->stats.n_updates << std::endl; +// std::cout << cma.p->stats << std::endl; +// } diff --git a/src/mutation.cpp b/src/mutation.cpp index 0fbb157..055b668 100644 --- a/src/mutation.cpp +++ b/src/mutation.cpp @@ -33,6 +33,7 @@ namespace mutation ss->sample(sigma, p.pop); p.bounds->n_out_of_bounds = 0; p.repelling->prepare_sampling(p); + for (Eigen::Index i = 0; i < static_cast(n_offspring); ++i) { size_t n_rej = 0; diff --git a/src/parameters.cpp b/src/parameters.cpp index 832308a..e89adb1 100644 --- a/src/parameters.cpp +++ b/src/parameters.cpp @@ -80,7 +80,7 @@ namespace parameters void Parameters::start(FunctionType &objective) { old_pop = pop; - if (criteria.any) + if (criteria.any()) { const auto sig = restart_strategy->update(*this); perform_restart(objective, sig); diff --git a/src/repelling.cpp b/src/repelling.cpp index 27bf3f4..ea6e5a9 100644 --- a/src/repelling.cpp +++ b/src/repelling.cpp @@ -143,11 +143,12 @@ namespace repelling if (accept_candidate) archive.emplace_back(candidate_point, 1.0);// , C, C_inv); + const Float volume_per_n = p.settings.volume / (p.settings.sigma0 * coverage * p.stats.solutions.size()); const Float n = p.adaptation->dd; const Float gamma_f = std::pow(std::tgamma(n / 2.0 + 1.0), 1.0 / n) / std::sqrt(M_PI); for (auto &point : archive) - point.radius = std::pow(volume_per_n * point.n_rep, 1.0 / n) * gamma_f; + point.radius = (std::pow(volume_per_n * point.n_rep, 1.0 / n) * gamma_f) / std::sqrt(n); } bool Repelling::is_rejected(const Vector &xi, parameters::Parameters &p) @@ -168,7 +169,6 @@ namespace repelling } } } - return false; } } diff --git a/src/restart_criteria.cpp b/src/restart_criteria.cpp index b86fb89..9d9d305 100644 --- a/src/restart_criteria.cpp +++ b/src/restart_criteria.cpp @@ -154,7 +154,7 @@ namespace restart { const Eigen::Index t = p.stats.t % p.settings.dim; const auto effect_axis = 0.1 * p.mutation->sigma * std::sqrt(dynamic->d(t)) * dynamic->B.col(t); - met = (effect_axis.array() < tolerance).all(); + met = (effect_axis.array().abs() < tolerance).all(); } } @@ -163,7 +163,7 @@ namespace restart if (const auto dynamic = std::dynamic_pointer_cast(p.adaptation)) { const auto effect_coord = 0.2 * p.mutation->sigma * dynamic->C.diagonal().cwiseSqrt(); - met = (effect_coord.array() < tolerance).all(); + met = (effect_coord.array().abs() < tolerance).all(); } } diff --git a/src/weights.cpp b/src/weights.cpp index 97f1d27..c194ab0 100644 --- a/src/weights.cpp +++ b/src/weights.cpp @@ -51,7 +51,6 @@ namespace parameters weights << positive, negative; lazy_update_interval = 1.0 / (c1 + cmu + 1e-23) / d / 10.0; - std::cout << lazy_update_interval << std::endl; } void Weights::weights_default(const size_t lambda) From 6a8c70d459bccaae6a8cb129add7fbd235e19cc2 Mon Sep 17 00:00:00 2001 From: jacobdenobel Date: Thu, 29 May 2025 11:35:15 +0200 Subject: [PATCH 04/74] timing --- scripts/test_modma_timing.py | 117 +++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 scripts/test_modma_timing.py diff --git a/scripts/test_modma_timing.py b/scripts/test_modma_timing.py new file mode 100644 index 0000000..c0ffe7b --- /dev/null +++ b/scripts/test_modma_timing.py @@ -0,0 +1,117 @@ +import sys +import inspect +import warnings + +from time import perf_counter +from pprint import pprint +from dataclasses import dataclass + + +import numpy as np +from modcma import ModularCMAES +import modcma.c_maes as modcma +import cma as pycma +import ioh +from fcmaes import optimizer, retry + +np.random.seed(12) + +def timeit(f): + def inner(*args, **kwargs): + start = perf_counter() + result = f(*args, **kwargs) + stop = perf_counter() + elapsed = stop - start + return elapsed + return inner + + +# @timeit +# def run_modcmapy(f: ioh.ProblemType, dim: int, n_evaluations, x0: np.ndarray): +# cma = ModularCMAES(f, dim, budget=n_evaluations, x0=x0) +# cma.run() +# assert f.state.evaluations >= n_evaluations + + +@timeit +def run_fcmaes(f: ioh.ProblemType, dim: int, n_evaluations, x0: np.ndarray): + + bounds = np.array([f.bounds.lb, f.bounds.ub]) + res = optimizer.cmaescpp.minimize( + f, x0=x0, max_evaluations=n_evaluations, + stop_hist=0, accuracy=1e-10, stop_fitness=-700, + popsize=4 + ) + + + # ret = retry.minimize(f, bounds.T, optimizer=optimizer.Cma_cpp(n_evaluations)) + assert f.state.evaluations >= n_evaluations + + +@timeit +def run_modma(f: ioh.ProblemType, dim: int, n_evaluations, x0: np.ndarray): + modules = modcma.parameters.Modules() + modules.sample_transformation = modcma.options.SCALED_UNIFORM + modules.matrix_adaptation = modcma.options.COVARIANCE + settings = modcma.Settings(dim, budget=n_evaluations, x0=x0, modules=modules, verbose=True) + + cma = modcma.ModularCMAES(settings) + + + maxp = 1/(10 * dim * (cma.p.weights.c1 +cma.p.weights.cmu)) + # print(dim, max(1, maxp), maxp) + # breakpoint() + + cma.run(f) + print(cma.p.stats.t, cma.p.stats.n_updates) + assert f.state.evaluations >= n_evaluations + return cma + + +@timeit +def run_pycma(f: ioh.ProblemType, dim: int, n_evaluations: int, x0: np.ndarray): + options = pycma.CMAOptions() + options['CMA_active'] = False + # options['maxfevals'] = n_evaluations + options["verbose"] = -1 + options["CMA_diagonal"] = False + # pprint(options) + + cma = pycma.CMAEvolutionStrategy(x0, 2.0, options=options) + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + while f.state.evaluations < n_evaluations: + X, y = cma.ask_and_eval(f) + cma.tell(X, y) + # cma.disp() + assert f.state.evaluations >= n_evaluations + + +if __name__ == "__main__": + n_iters = 1 + n_evals = 1_000 + fid = 12 + dimensions = [100] + names, functions = zip( + *[ + (name, obj) + for name, obj in inspect.getmembers(sys.modules[__name__]) + if name.startswith("run") + ] + ) + data = {name: dict.fromkeys(dimensions) for name in names} + + for d in dimensions: + x0 = np.random.uniform(size=d) + for name, function in zip(names, functions): + data[name][d] = np.array( + [ + function(ioh.get_problem(fid, 1, d), d, n_evals * d, x0) + for _ in range(n_iters) + ] + ) + + print(f"fid: {fid} ({d}D) budget: {d * n_evals}") + for name in names: + print(name, data[name][d].mean(), data[name][d].std()) From 242285fca0b6f56cca139410c0bc3e1b12ae13c6 Mon Sep 17 00:00:00 2001 From: jacobdenobel Date: Thu, 29 May 2025 12:01:00 +0200 Subject: [PATCH 05/74] timing --- include/sampling.hpp | 2 +- scripts/{test_modma_timing.py => timing/test_timing.py} | 0 setup.py | 3 ++- src/restart_strategy.cpp | 4 ++-- 4 files changed, 5 insertions(+), 4 deletions(-) rename scripts/{test_modma_timing.py => timing/test_timing.py} (100%) diff --git a/include/sampling.hpp b/include/sampling.hpp index 4db1e2d..a3e2444 100644 --- a/include/sampling.hpp +++ b/include/sampling.hpp @@ -33,7 +33,7 @@ namespace sampling /** * @brief Testing sampler, simple incrementing generator. */ - struct Tester : Sampler + struct Tester : Sampler { Tester(const size_t d) : Sampler(d) {} diff --git a/scripts/test_modma_timing.py b/scripts/timing/test_timing.py similarity index 100% rename from scripts/test_modma_timing.py rename to scripts/timing/test_timing.py diff --git a/setup.py b/setup.py index 6a931f6..c62b0b5 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,8 @@ if platform.system() in ("Linux", "Darwin"): os.environ["CC"] = "g++" os.environ["CXX"] = "g++" - flags = ["-O3", "-fno-math-errno", ] #"-fopenmp" + flags = ["-O3", "-fno-math-errno", "-msse2", "-mavx", "-mfma", "-mtune=native"] + if platform.system() == "Darwin": flags.append("-mmacosx-version-min=10.15") else: diff --git a/src/restart_strategy.cpp b/src/restart_strategy.cpp index 9f6c1b8..2f762d2 100644 --- a/src/restart_strategy.cpp +++ b/src/restart_strategy.cpp @@ -8,7 +8,7 @@ namespace restart { Float Strategy::update(parameters::Parameters &p) - { + { return p.settings.sigma0; } @@ -43,7 +43,7 @@ namespace restart lambda_large *= 2; } else - { + { budget_small -= last_used_budget; } From 9809c405977e7d054dd6170bd3b645cf05de402b Mon Sep 17 00:00:00 2001 From: Jacob de Nobel Date: Thu, 29 May 2025 18:51:15 +0200 Subject: [PATCH 06/74] cholesky decomp --- include/common.hpp | 3 ++ include/matrix_adaptation.hpp | 23 +++++---- include/restart_criteria.hpp | 2 +- src/common.cpp | 35 ++++++++++++- src/interface.cpp | 2 + src/main.cpp | 94 ++++++++++++++++++++++------------- src/matrix_adaptation.cpp | 58 ++++++++++++++------- 7 files changed, 154 insertions(+), 63 deletions(-) diff --git a/include/common.hpp b/include/common.hpp index 1aa6e5e..f6840e7 100644 --- a/include/common.hpp +++ b/include/common.hpp @@ -39,6 +39,7 @@ namespace constants extern size_t cache_min_samples; extern bool cache_samples; extern bool clip_sigma; + extern bool calc_eigv; } /** @@ -338,4 +339,6 @@ namespace functions Float sphere(const Vector &x); Float rastrigin(const Vector &x); Float ellipse(const Vector& x); + Float rosenbrock(const Vector& x); + Matrix random_rotation_matrix(int n, int seed); } diff --git a/include/matrix_adaptation.hpp b/include/matrix_adaptation.hpp index 28314bc..b5731c8 100644 --- a/include/matrix_adaptation.hpp +++ b/include/matrix_adaptation.hpp @@ -10,7 +10,7 @@ namespace matrix_adaptation { struct Adaptation { - Vector m, m_old, dm, ps; + Vector m, m_old, dm, ps, dz; Float dd; Float expected_length_z; @@ -21,7 +21,11 @@ namespace matrix_adaptation { } - virtual void adapt_evolution_paths(const Population& pop, const parameters::Weights& w, + void adapt_evolution_paths(const Population& pop, const parameters::Weights& w, + const std::shared_ptr& mutation, + const parameters::Stats& stats, size_t mu, size_t lambda); + + virtual void adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, const std::shared_ptr& mutation, const parameters::Stats& stats, size_t mu, size_t lambda) = 0; @@ -50,7 +54,7 @@ namespace matrix_adaptation return true; } - void adapt_evolution_paths(const Population& pop, const parameters::Weights& w, + void adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, const std::shared_ptr& mutation, const parameters::Stats& stats, size_t mu, size_t lambda) override; @@ -65,16 +69,17 @@ namespace matrix_adaptation { Vector pc, d; Matrix B, C; - Matrix inv_root_C; - + Matrix A; + Matrix inv_root_C; bool hs = true; - + CovarianceAdaptation(const size_t dim, const Vector& x0, const Float expected_length_z) : Adaptation(dim, x0, Vector::Zero(dim), expected_length_z), pc(Vector::Zero(dim)), d(Vector::Ones(dim)), B(Matrix::Identity(dim, dim)), C(Matrix::Identity(dim, dim)), + A(Matrix::Identity(dim, dim)), inv_root_C(Matrix::Identity(dim, dim)) { } @@ -84,7 +89,7 @@ namespace matrix_adaptation virtual bool perform_eigendecomposition(const parameters::Settings& settings); - void adapt_evolution_paths(const Population& pop, const parameters::Weights& w, + void adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, const std::shared_ptr& mutation, const parameters::Stats& stats, size_t mu, size_t lambda) override; @@ -112,7 +117,7 @@ namespace matrix_adaptation using CovarianceAdaptation::CovarianceAdaptation; - void adapt_evolution_paths(const Population& pop, const parameters::Weights& w, + void adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, const std::shared_ptr& mutation, const parameters::Stats& stats, size_t mu, size_t lambda) override; @@ -133,7 +138,7 @@ namespace matrix_adaptation { } - void adapt_evolution_paths(const Population& pop, const parameters::Weights& w, + void adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, const std::shared_ptr& mutation, const parameters::Stats& stats, size_t mu, size_t lambda) override; diff --git a/include/restart_criteria.hpp b/include/restart_criteria.hpp index 6c1f1fb..e77562d 100644 --- a/include/restart_criteria.hpp +++ b/include/restart_criteria.hpp @@ -16,7 +16,7 @@ namespace restart std::string name; size_t last_restart; - Criterion(const std::string &name) : met(false), name(name) {} + Criterion(const std::string &name) : met(false), name(name), last_restart(0) {} virtual ~Criterion() = default; diff --git a/src/common.cpp b/src/common.cpp index 09478e8..f97b69b 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -14,7 +14,8 @@ namespace constants size_t cache_max_doubles = 2'000'000; size_t cache_min_samples = 128; bool cache_samples = false; - bool clip_sigma = false; + bool clip_sigma = false; + bool calc_eigv = true; } namespace utils @@ -168,4 +169,36 @@ namespace functions res += pow(1.0e6, static_cast(i) / (static_cast(x.size()) - 1)) * x(i) * x(i); return res; } + + Float rosenbrock(const Vector& x) { + Float sum = 0.0; + for (auto i = 0; i < x.size() - 1; ++i) { + Float xi = x[i]; + Float xi1 = x[i + 1]; + Float term1 = 100.0 * std::pow(xi1 - xi * xi, 2); + Float term2 = std::pow(1.0 - xi, 2); + sum += term1 + term2; + } + return sum; + } + + Matrix random_rotation_matrix(int n, int seed) { + std::mt19937 gen(seed); + std::normal_distribution<> d(0, 1); + + Matrix A(n, n); + for (int i = 0; i < n; ++i) + for (int j = 0; j < n; ++j) + A(i, j) = d(gen); + + Eigen::HouseholderQR qr(A); + Matrix Q = qr.householderQ(); + + if (Q.determinant() < 0) { + Q.col(0) *= -1; + } + + return Q; + } + } diff --git a/src/interface.cpp b/src/interface.cpp index 19bb299..e2a7b4e 100644 --- a/src/interface.cpp +++ b/src/interface.cpp @@ -328,6 +328,7 @@ void define_matrix_adaptation(py::module &main) .def_readwrite("m", &Adaptation::m) .def_readwrite("m_old", &Adaptation::m_old) .def_readwrite("dm", &Adaptation::dm) + .def_readwrite("dz", &Adaptation::dz) .def_readwrite("ps", &Adaptation::ps) .def_readwrite("dd", &Adaptation::dd) .def_readwrite("expected_length_z", &Adaptation::expected_length_z) @@ -369,6 +370,7 @@ void define_matrix_adaptation(py::module &main) .def_readwrite("d", &CovarianceAdaptation::d) .def_readwrite("B", &CovarianceAdaptation::B) .def_readwrite("C", &CovarianceAdaptation::C) + .def_readwrite("A", &CovarianceAdaptation::A) .def_readwrite("inv_root_C", &CovarianceAdaptation::inv_root_C) .def_readwrite("hs", &CovarianceAdaptation::hs) .def("adapt_covariance_matrix", &CovarianceAdaptation::adapt_covariance_matrix, diff --git a/src/main.cpp b/src/main.cpp index 3d3fd4a..c5f1473 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -8,16 +8,21 @@ using std::chrono::duration; using std::chrono::milliseconds; - -struct Function +struct Ellipse { size_t evals = 0; + Matrix R; + + Ellipse(const int dim, const bool rotated = false) : + R{ rotated ? functions::random_rotation_matrix(dim, 1): Matrix::Identity(dim, dim) } + { + } Float operator()(const Vector& x) { evals++; - const auto x_shift = (x.array() - 1.).matrix(); - return functions::ellipse(x_shift); + const auto x_shift = R * (x.array() - 1.).matrix(); + return functions::rosenbrock(x_shift); } }; @@ -44,34 +49,53 @@ struct Timer }; -// int main() -// { -// rng::set_seed(42); -// const size_t dim = 100; -// const size_t budget = dim * 1000; - -// parameters::Modules m; -// //m.matrix_adaptation = parameters::MatrixAdaptationType::MATRIX; -// m.sample_transformation = parameters::SampleTranformerType::SCALED_UNIFORM; -// m.bound_correction = parameters::CorrectionMethod::NONE; - -// parameters::Settings settings(dim, m, -std::numeric_limits::infinity(), -// std::nullopt, budget, 2.0); -// auto p = std::make_shared(settings); - -// auto cma = ModularCMAES(p); - -// Timer t; -// FunctionType f = Function(); -// while (cma.step(f)) -// { -// //std::cout << cma.p->stats << std::endl; -// //std::cout << cma.p->mutation->sigma << std::endl; -// //auto sr = std::dynamic_pointer_cast(cma.p->mutation); -// //std::cout << "p_succ: " << sr->success_ratio << ", " << sr->max_success_ratio << std::endl; -// } -// std::cout << cma.p->stats.evaluations << std::endl; -// std::cout << cma.p->stats.t << std::endl; -// std::cout << cma.p->stats.n_updates << std::endl; -// std::cout << cma.p->stats << std::endl; -// } +int main() +{ + rng::set_seed(42); + const size_t dim = 100; + const size_t budget = dim * 10000; + const bool rotated = true; + + parameters::Modules m; + //m.matrix_adaptation = parameters::MatrixAdaptationType::MATRIX; + //m.sample_transformation = parameters::SampleTranformerType::SCALED_UNIFORM; + m.bound_correction = parameters::CorrectionMethod::NONE; + + parameters::Settings settings(dim, m, -std::numeric_limits::infinity(), + std::nullopt, budget, 2.0); + auto p = std::make_shared(settings); + auto cma = ModularCMAES(p); + + Timer t; + FunctionType f = Ellipse(dim, rotated); + while (cma.step(f)) + { + //std::cout << cma.p->stats << std::endl; + //std::cout << cma.p->mutation->sigma << std::endl; + //auto sr = std::dynamic_pointer_cast(cma.p->mutation); + //std::cout << "p_succ: " << sr->success_ratio << ", " << sr->max_success_ratio << std::endl; + + //if (cma.p->stats.current_best.y < 1e-8) + // break; + + // No rotation + // e: Stats t=549 e=5490 + // no-e: Stats t=594 e=5940 + // Rotation + // e: Stats t = 559 e = 5590 + // no-e: Stats t=549 e=5490 + + // Rosen + // no rotation + // e: Stats t = 617 e = 6170 + // noe: Stats t=625 e=6250 + // rotation: + // e: Stats t=618 e=6180 + // no-e Stats t=568 e=5680 + // + } + std::cout << cma.p->stats.evaluations << std::endl; + std::cout << cma.p->stats.t << std::endl; + std::cout << cma.p->stats.n_updates << std::endl; + std::cout << cma.p->stats << std::endl; +} diff --git a/src/matrix_adaptation.cpp b/src/matrix_adaptation.cpp index 86fec0e..ae2e154 100644 --- a/src/matrix_adaptation.cpp +++ b/src/matrix_adaptation.cpp @@ -9,12 +9,23 @@ namespace matrix_adaptation return (xi - m) / sigma; } - void CovarianceAdaptation::adapt_evolution_paths(const Population& pop, const Weights& w, + void Adaptation::adapt_evolution_paths(const Population& pop, const Weights& w, const std::shared_ptr& mutation, const Stats& stats, const size_t mu, const size_t lambda) { dm = (m - m_old) / mutation->sigma; - ps = (1.0 - mutation->cs) * ps + (sqrt(mutation->cs * (2.0 - mutation->cs) * w.mueff) * inv_root_C * dm); + dz = pop.Z.leftCols(mu) * w.positive.head(mu); + adapt_evolution_paths_inner(pop, w, mutation, stats, mu, lambda); + } + + + void CovarianceAdaptation::adapt_evolution_paths_inner(const Population& pop, const Weights& w, + const std::shared_ptr& mutation, + const Stats& stats, const size_t mu, const size_t lambda) + { + const auto& expr = constants::calc_eigv ? inv_root_C * dm : dz; + + ps = (1.0 - mutation->cs) * ps + (sqrt(mutation->cs * (2.0 - mutation->cs) * w.mueff) * expr); const Float actual_ps_length = ps.norm() / sqrt( 1.0 - pow(1.0 - mutation->cs, 2.0 * (stats.evaluations / lambda))); @@ -48,6 +59,23 @@ namespace matrix_adaptation bool CovarianceAdaptation::perform_eigendecomposition(const Settings& settings) { + if (!constants::calc_eigv) + { + const Eigen::LLT chol(C); + if (chol.info() != Eigen::Success) + { + if (settings.verbose) + { + std::cout << "Cholesky solver failed, we need to restart reason:" + << chol.info() << '\n'; + } + return false; + } + + A = chol.matrixL(); + return true; + } + const Eigen::SelfAdjointEigenSolver eigen_solver(C); if (eigen_solver.info() != Eigen::Success) { @@ -71,8 +99,9 @@ namespace matrix_adaptation } - d = d.cwiseSqrt(); - inv_root_C = B * d.cwiseInverse().asDiagonal() * B.transpose(); + d.noalias() = d.cwiseSqrt(); + inv_root_C.noalias() = eigen_solver.operatorInverseSqrt(); + A.noalias() = B * d.asDiagonal(); return true; } @@ -95,6 +124,7 @@ namespace matrix_adaptation { B = Matrix::Identity(settings.dim, settings.dim); C = Matrix::Identity(settings.dim, settings.dim); + A = Matrix::Identity(settings.dim, settings.dim); inv_root_C = Matrix::Identity(settings.dim, settings.dim); d.setOnes(); m = settings.x0.value_or(Vector::Zero(settings.dim)); @@ -106,11 +136,14 @@ namespace matrix_adaptation Vector CovarianceAdaptation::compute_y(const Vector& zi) { - return B * d.cwiseProduct(zi); + return A * zi; } Vector CovarianceAdaptation::invert_y(const Vector& yi) { + if (!constants::calc_eigv) + return A.triangularView().solve(yi); + return (B.transpose() * yi).cwiseQuotient(d); } @@ -121,11 +154,10 @@ namespace matrix_adaptation } - void OnePlusOneAdaptation::adapt_evolution_paths(const Population& pop, const parameters::Weights& w, + void OnePlusOneAdaptation::adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, const std::shared_ptr& mutation, const parameters::Stats& stats, size_t mu, size_t lambda) { - dm = (m - m_old) / mutation->sigma; if (!stats.has_improved) return; @@ -147,14 +179,10 @@ namespace matrix_adaptation - void MatrixAdaptation::adapt_evolution_paths(const Population& pop, const Weights& w, + void MatrixAdaptation::adapt_evolution_paths_inner(const Population& pop, const Weights& w, const std::shared_ptr& mutation, const Stats& stats, const size_t mu, const size_t lambda) { - dm = (m - m_old) / mutation->sigma; - - const auto dz = (pop.Z.leftCols(mu).array().rowwise() * w.positive.array().transpose()).rowwise().sum(). - matrix(); ps = (1.0 - mutation->cs) * ps + (sqrt(mutation->cs * (2.0 - mutation->cs) * w.mueff) * dz); } @@ -206,14 +234,10 @@ namespace matrix_adaptation } - void None::adapt_evolution_paths(const Population& pop, const Weights& w, + void None::adapt_evolution_paths_inner(const Population& pop, const Weights& w, const std::shared_ptr& mutation, const Stats& stats, const size_t mu, const size_t lambda) { - dm = (m - m_old) / mutation->sigma; - - const auto dz = (pop.Z.leftCols(mu).array().rowwise() * w.positive.array().transpose()).rowwise().sum(). - matrix(); ps = (1.0 - mutation->cs) * ps + (sqrt(mutation->cs * (2.0 - mutation->cs) * w.mueff) * dz); } From e4a7c450f4f61f3e4aaa81f439db33f9da39c108 Mon Sep 17 00:00:00 2001 From: Jacob de Nobel Date: Fri, 30 May 2025 14:19:00 +0200 Subject: [PATCH 07/74] update eigen add fcmaea --- CMakeLists.txt | 15 +- external/Eigen/version.txt | 1 + .../cmake/ComputeCppCompilerChecks.cmake | 50 + .../eigen-3.4.0/cmake/ComputeCppIRMap.cmake | 18 + .../eigen-3.4.0/cmake/Eigen3Config.cmake.in | 23 + .../cmake/Eigen3ConfigLegacy.cmake.in | 30 + .../cmake/EigenConfigureTesting.cmake | 58 + .../cmake/EigenDetermineOSVersion.cmake | 46 + .../cmake/EigenDetermineVSServicePack.cmake | 41 + .../cmake/EigenSmokeTestList.cmake | 131 ++ external/eigen-3.4.0/cmake/EigenTesting.cmake | 782 +++++++ .../eigen-3.4.0/cmake/EigenUninstall.cmake | 40 + external/eigen-3.4.0/cmake/FindAdolc.cmake | 20 + external/eigen-3.4.0/cmake/FindBLAS.cmake | 1407 ++++++++++++ external/eigen-3.4.0/cmake/FindBLASEXT.cmake | 384 ++++ external/eigen-3.4.0/cmake/FindCHOLMOD.cmake | 89 + .../eigen-3.4.0/cmake/FindComputeCpp.cmake | 455 ++++ external/eigen-3.4.0/cmake/FindEigen2.cmake | 80 + external/eigen-3.4.0/cmake/FindEigen3.cmake | 107 + external/eigen-3.4.0/cmake/FindFFTW.cmake | 120 + external/eigen-3.4.0/cmake/FindGLEW.cmake | 105 + external/eigen-3.4.0/cmake/FindGMP.cmake | 21 + external/eigen-3.4.0/cmake/FindGSL.cmake | 170 ++ .../eigen-3.4.0/cmake/FindGoogleHash.cmake | 23 + external/eigen-3.4.0/cmake/FindHWLOC.cmake | 332 +++ external/eigen-3.4.0/cmake/FindKLU.cmake | 48 + external/eigen-3.4.0/cmake/FindLAPACK.cmake | 274 +++ external/eigen-3.4.0/cmake/FindMPFR.cmake | 83 + external/eigen-3.4.0/cmake/FindMPREAL.cmake | 103 + external/eigen-3.4.0/cmake/FindMetis.cmake | 265 +++ external/eigen-3.4.0/cmake/FindPASTIX.cmake | 704 ++++++ external/eigen-3.4.0/cmake/FindPTSCOTCH.cmake | 422 ++++ external/eigen-3.4.0/cmake/FindSCOTCH.cmake | 370 ++++ external/eigen-3.4.0/cmake/FindSPQR.cmake | 41 + .../cmake/FindStandardMathLibrary.cmake | 70 + external/eigen-3.4.0/cmake/FindSuperLU.cmake | 97 + external/eigen-3.4.0/cmake/FindTriSYCL.cmake | 173 ++ external/eigen-3.4.0/cmake/FindUMFPACK.cmake | 53 + external/eigen-3.4.0/cmake/RegexUtils.cmake | 19 + external/eigen-3.4.0/cmake/UseEigen3.cmake | 6 + include/acmaes.hpp | 116 + include/common.hpp | 2 +- include/evaluator.h | 508 +++++ include/pcg_extras.hpp | 667 ++++++ include/pcg_random.hpp | 1958 +++++++++++++++++ include/pcg_uint128.hpp | 1010 +++++++++ src/acmaes.cpp | 670 ++++++ src/main.cpp | 97 +- src/matrix_adaptation.cpp | 2 +- 49 files changed, 12284 insertions(+), 22 deletions(-) create mode 100644 external/Eigen/version.txt create mode 100644 external/eigen-3.4.0/cmake/ComputeCppCompilerChecks.cmake create mode 100644 external/eigen-3.4.0/cmake/ComputeCppIRMap.cmake create mode 100644 external/eigen-3.4.0/cmake/Eigen3Config.cmake.in create mode 100644 external/eigen-3.4.0/cmake/Eigen3ConfigLegacy.cmake.in create mode 100644 external/eigen-3.4.0/cmake/EigenConfigureTesting.cmake create mode 100644 external/eigen-3.4.0/cmake/EigenDetermineOSVersion.cmake create mode 100644 external/eigen-3.4.0/cmake/EigenDetermineVSServicePack.cmake create mode 100644 external/eigen-3.4.0/cmake/EigenSmokeTestList.cmake create mode 100644 external/eigen-3.4.0/cmake/EigenTesting.cmake create mode 100644 external/eigen-3.4.0/cmake/EigenUninstall.cmake create mode 100644 external/eigen-3.4.0/cmake/FindAdolc.cmake create mode 100644 external/eigen-3.4.0/cmake/FindBLAS.cmake create mode 100644 external/eigen-3.4.0/cmake/FindBLASEXT.cmake create mode 100644 external/eigen-3.4.0/cmake/FindCHOLMOD.cmake create mode 100644 external/eigen-3.4.0/cmake/FindComputeCpp.cmake create mode 100644 external/eigen-3.4.0/cmake/FindEigen2.cmake create mode 100644 external/eigen-3.4.0/cmake/FindEigen3.cmake create mode 100644 external/eigen-3.4.0/cmake/FindFFTW.cmake create mode 100644 external/eigen-3.4.0/cmake/FindGLEW.cmake create mode 100644 external/eigen-3.4.0/cmake/FindGMP.cmake create mode 100644 external/eigen-3.4.0/cmake/FindGSL.cmake create mode 100644 external/eigen-3.4.0/cmake/FindGoogleHash.cmake create mode 100644 external/eigen-3.4.0/cmake/FindHWLOC.cmake create mode 100644 external/eigen-3.4.0/cmake/FindKLU.cmake create mode 100644 external/eigen-3.4.0/cmake/FindLAPACK.cmake create mode 100644 external/eigen-3.4.0/cmake/FindMPFR.cmake create mode 100644 external/eigen-3.4.0/cmake/FindMPREAL.cmake create mode 100644 external/eigen-3.4.0/cmake/FindMetis.cmake create mode 100644 external/eigen-3.4.0/cmake/FindPASTIX.cmake create mode 100644 external/eigen-3.4.0/cmake/FindPTSCOTCH.cmake create mode 100644 external/eigen-3.4.0/cmake/FindSCOTCH.cmake create mode 100644 external/eigen-3.4.0/cmake/FindSPQR.cmake create mode 100644 external/eigen-3.4.0/cmake/FindStandardMathLibrary.cmake create mode 100644 external/eigen-3.4.0/cmake/FindSuperLU.cmake create mode 100644 external/eigen-3.4.0/cmake/FindTriSYCL.cmake create mode 100644 external/eigen-3.4.0/cmake/FindUMFPACK.cmake create mode 100644 external/eigen-3.4.0/cmake/RegexUtils.cmake create mode 100644 external/eigen-3.4.0/cmake/UseEigen3.cmake create mode 100644 include/acmaes.hpp create mode 100644 include/evaluator.h create mode 100644 include/pcg_extras.hpp create mode 100644 include/pcg_random.hpp create mode 100644 include/pcg_uint128.hpp create mode 100644 src/acmaes.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index fd8470d..a383403 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,22 +5,19 @@ project(modcma) # Set the version of C/C++ (here C++17) set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) file(GLOB SRC_FILES ${PROJECT_SOURCE_DIR}/src/*.cpp) list(FILTER SRC_FILES EXCLUDE REGEX ".*interface.cpp$") -add_executable(main ${SRC_FILES}) - -#add_subdirectory(${PROJECT_SOURCE_DIR}/../IOHexperimenter ${CMAKE_CURRENT_BINARY_DIR}/ioh) +add_executable(main ${SRC_FILES}) target_include_directories(main PUBLIC - ${PROJECT_SOURCE_DIR}/include - ${PROJECT_SOURCE_DIR}/external -# ${PROJECT_SOURCE_DIR}/../IOHexperimenter/include + ${PROJECT_SOURCE_DIR}/include + ${PROJECT_SOURCE_DIR}/external ) -#target_link_libraries(main PUBLIC ioh) - if (MSVC) target_compile_options(main PRIVATE /bigobj) -endif() +endif() + \ No newline at end of file diff --git a/external/Eigen/version.txt b/external/Eigen/version.txt new file mode 100644 index 0000000..fbcbf73 --- /dev/null +++ b/external/Eigen/version.txt @@ -0,0 +1 @@ +3.4.0 \ No newline at end of file diff --git a/external/eigen-3.4.0/cmake/ComputeCppCompilerChecks.cmake b/external/eigen-3.4.0/cmake/ComputeCppCompilerChecks.cmake new file mode 100644 index 0000000..1807485 --- /dev/null +++ b/external/eigen-3.4.0/cmake/ComputeCppCompilerChecks.cmake @@ -0,0 +1,50 @@ +cmake_minimum_required(VERSION 3.4.3) + +if(CMAKE_COMPILER_IS_GNUCXX) + if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.8) + message(FATAL_ERROR "host compiler - gcc version must be > 4.8") + endif() +elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") + if (${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.6) + message(FATAL_ERROR "host compiler - clang version must be > 3.6") + endif() +endif() + +if(MSVC) + set(ComputeCpp_STL_CHECK_SRC __STL_check) + file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/${ComputeCpp_STL_CHECK_SRC}.cpp + "#include \n" + "int main() { return 0; }\n") + execute_process( + COMMAND ${ComputeCpp_DEVICE_COMPILER_EXECUTABLE} + ${COMPUTECPP_DEVICE_COMPILER_FLAGS} + -isystem ${ComputeCpp_INCLUDE_DIRS} + -o ${ComputeCpp_STL_CHECK_SRC}.sycl + -c ${ComputeCpp_STL_CHECK_SRC}.cpp + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + RESULT_VARIABLE ComputeCpp_STL_CHECK_RESULT + ERROR_QUIET + OUTPUT_QUIET) + if(NOT ${ComputeCpp_STL_CHECK_RESULT} EQUAL 0) + # Try disabling compiler version checks + execute_process( + COMMAND ${ComputeCpp_DEVICE_COMPILER_EXECUTABLE} + ${COMPUTECPP_DEVICE_COMPILER_FLAGS} + -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH + -isystem ${ComputeCpp_INCLUDE_DIRS} + -o ${ComputeCpp_STL_CHECK_SRC}.cpp.sycl + -c ${ComputeCpp_STL_CHECK_SRC}.cpp + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + RESULT_VARIABLE ComputeCpp_STL_CHECK_RESULT + ERROR_QUIET + OUTPUT_QUIET) + if(NOT ${ComputeCpp_STL_CHECK_RESULT} EQUAL 0) + message(STATUS "Device compiler cannot consume hosted STL headers. Using any parts of the STL will likely result in device compiler errors.") + else() + message(STATUS "Device compiler does not meet certain STL version requirements. Disabling version checks and hoping for the best.") + list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH) + endif() + endif() + file(REMOVE ${CMAKE_CURRENT_BINARY_DIR}/${ComputeCpp_STL_CHECK_SRC}.cpp + ${CMAKE_CURRENT_BINARY_DIR}/${ComputeCpp_STL_CHECK_SRC}.cpp.sycl) +endif(MSVC) diff --git a/external/eigen-3.4.0/cmake/ComputeCppIRMap.cmake b/external/eigen-3.4.0/cmake/ComputeCppIRMap.cmake new file mode 100644 index 0000000..942d91d --- /dev/null +++ b/external/eigen-3.4.0/cmake/ComputeCppIRMap.cmake @@ -0,0 +1,18 @@ +cmake_minimum_required(VERSION 3.4.3) + +# These should match the types of IR output by compute++ +set(IR_MAP_spir bc) +set(IR_MAP_spir64 bc) +set(IR_MAP_spir32 bc) +set(IR_MAP_spirv spv) +set(IR_MAP_spirv64 spv) +set(IR_MAP_spirv32 spv) +set(IR_MAP_aorta-x86_64 o) +set(IR_MAP_aorta-aarch64 o) +set(IR_MAP_aorta-rcar-cve o) +set(IR_MAP_custom-spir64 bc) +set(IR_MAP_custom-spir32 bc) +set(IR_MAP_custom-spirv64 spv) +set(IR_MAP_custom-spirv32 spv) +set(IR_MAP_ptx64 s) +set(IR_MAP_amdgcn s) diff --git a/external/eigen-3.4.0/cmake/Eigen3Config.cmake.in b/external/eigen-3.4.0/cmake/Eigen3Config.cmake.in new file mode 100644 index 0000000..0a1ac61 --- /dev/null +++ b/external/eigen-3.4.0/cmake/Eigen3Config.cmake.in @@ -0,0 +1,23 @@ +# This file exports the Eigen3::Eigen CMake target which should be passed to the +# target_link_libraries command. + +@PACKAGE_INIT@ + +if (NOT TARGET eigen) + include ("${CMAKE_CURRENT_LIST_DIR}/Eigen3Targets.cmake") +endif () + +# Legacy variables, do *not* use. May be removed in the future. + +set (EIGEN3_FOUND 1) +set (EIGEN3_USE_FILE "${CMAKE_CURRENT_LIST_DIR}/UseEigen3.cmake") + +set (EIGEN3_DEFINITIONS "@EIGEN_DEFINITIONS@") +set (EIGEN3_INCLUDE_DIR "@PACKAGE_EIGEN_INCLUDE_DIR@") +set (EIGEN3_INCLUDE_DIRS "@PACKAGE_EIGEN_INCLUDE_DIR@") +set (EIGEN3_ROOT_DIR "@PACKAGE_EIGEN_ROOT_DIR@") + +set (EIGEN3_VERSION_STRING "@EIGEN_VERSION_STRING@") +set (EIGEN3_VERSION_MAJOR "@EIGEN_VERSION_MAJOR@") +set (EIGEN3_VERSION_MINOR "@EIGEN_VERSION_MINOR@") +set (EIGEN3_VERSION_PATCH "@EIGEN_VERSION_PATCH@") diff --git a/external/eigen-3.4.0/cmake/Eigen3ConfigLegacy.cmake.in b/external/eigen-3.4.0/cmake/Eigen3ConfigLegacy.cmake.in new file mode 100644 index 0000000..62d7224 --- /dev/null +++ b/external/eigen-3.4.0/cmake/Eigen3ConfigLegacy.cmake.in @@ -0,0 +1,30 @@ +# -*- cmake -*- +# +# Eigen3Config.cmake(.in) + +# Use the following variables to compile and link against Eigen: +# EIGEN3_FOUND - True if Eigen was found on your system +# EIGEN3_USE_FILE - The file making Eigen usable +# EIGEN3_DEFINITIONS - Definitions needed to build with Eigen +# EIGEN3_INCLUDE_DIR - Directory where signature_of_eigen3_matrix_library can be found +# EIGEN3_INCLUDE_DIRS - List of directories of Eigen and it's dependencies +# EIGEN3_ROOT_DIR - The base directory of Eigen +# EIGEN3_VERSION_STRING - A human-readable string containing the version +# EIGEN3_VERSION_MAJOR - The major version of Eigen +# EIGEN3_VERSION_MINOR - The minor version of Eigen +# EIGEN3_VERSION_PATCH - The patch version of Eigen + +@PACKAGE_INIT@ + +set ( EIGEN3_FOUND 1 ) +set ( EIGEN3_USE_FILE "${CMAKE_CURRENT_LIST_DIR}/UseEigen3.cmake" ) + +set ( EIGEN3_DEFINITIONS "@EIGEN_DEFINITIONS@" ) +set ( EIGEN3_INCLUDE_DIR "@PACKAGE_EIGEN_INCLUDE_DIR@" ) +set ( EIGEN3_INCLUDE_DIRS "@PACKAGE_EIGEN_INCLUDE_DIR@" ) +set ( EIGEN3_ROOT_DIR "@PACKAGE_EIGEN_ROOT_DIR@" ) + +set ( EIGEN3_VERSION_STRING "@EIGEN_VERSION_STRING@" ) +set ( EIGEN3_VERSION_MAJOR "@EIGEN_VERSION_MAJOR@" ) +set ( EIGEN3_VERSION_MINOR "@EIGEN_VERSION_MINOR@" ) +set ( EIGEN3_VERSION_PATCH "@EIGEN_VERSION_PATCH@" ) diff --git a/external/eigen-3.4.0/cmake/EigenConfigureTesting.cmake b/external/eigen-3.4.0/cmake/EigenConfigureTesting.cmake new file mode 100644 index 0000000..9cb3bb2 --- /dev/null +++ b/external/eigen-3.4.0/cmake/EigenConfigureTesting.cmake @@ -0,0 +1,58 @@ +include(EigenTesting) +include(CheckCXXSourceCompiles) + +# configure the "site" and "buildname" +ei_set_sitename() + +# retrieve and store the build string +ei_set_build_string() + +add_custom_target(buildtests) +add_custom_target(check COMMAND "ctest") +add_dependencies(check buildtests) + +# check whether /bin/bash exists (disabled as not used anymore) +# find_file(EIGEN_BIN_BASH_EXISTS "/bin/bash" PATHS "/" NO_DEFAULT_PATH) + +# This call activates testing and generates the DartConfiguration.tcl +include(CTest) + +set(EIGEN_TEST_BUILD_FLAGS "" CACHE STRING "Options passed to the build command of unit tests") +set(EIGEN_DASHBOARD_BUILD_TARGET "buildtests" CACHE STRING "Target to be built in dashboard mode, default is buildtests") +set(EIGEN_CTEST_ERROR_EXCEPTION "" CACHE STRING "Regular expression for build error messages to be filtered out") + +# Overwrite default DartConfiguration.tcl such that ctest can build our unit tests. +# Recall that our unit tests are not in the "all" target, so we have to explicitly ask ctest to build our custom 'buildtests' target. +# At this stage, we can also add custom flags to the build tool through the user defined EIGEN_TEST_BUILD_FLAGS variable. +file(READ "${CMAKE_CURRENT_BINARY_DIR}/DartConfiguration.tcl" EIGEN_DART_CONFIG_FILE) +# try to grab the default flags +string(REGEX MATCH "MakeCommand:.*-- (.*)\nDefaultCTestConfigurationType" EIGEN_DUMMY ${EIGEN_DART_CONFIG_FILE}) +if(NOT CMAKE_MATCH_1) +string(REGEX MATCH "MakeCommand:.*[^c]make (.*)\nDefaultCTestConfigurationType" EIGEN_DUMMY ${EIGEN_DART_CONFIG_FILE}) +endif() +string(REGEX REPLACE "MakeCommand:.*DefaultCTestConfigurationType" "MakeCommand: ${CMAKE_COMMAND} --build . --target ${EIGEN_DASHBOARD_BUILD_TARGET} --config \"\${CTEST_CONFIGURATION_TYPE}\" -- ${CMAKE_MATCH_1} ${EIGEN_TEST_BUILD_FLAGS}\nDefaultCTestConfigurationType" + EIGEN_DART_CONFIG_FILE2 ${EIGEN_DART_CONFIG_FILE}) +file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/DartConfiguration.tcl" ${EIGEN_DART_CONFIG_FILE2}) + +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/CTestCustom.cmake.in ${CMAKE_BINARY_DIR}/CTestCustom.cmake) + +# some documentation of this function would be nice +ei_init_testing() + +# configure Eigen related testing options +option(EIGEN_NO_ASSERTION_CHECKING "Disable checking of assertions using exceptions" OFF) +option(EIGEN_DEBUG_ASSERTS "Enable advanced debugging of assertions" OFF) + +if(CMAKE_COMPILER_IS_GNUCXX) + option(EIGEN_COVERAGE_TESTING "Enable/disable gcov" OFF) + if(EIGEN_COVERAGE_TESTING) + set(COVERAGE_FLAGS "-fprofile-arcs -ftest-coverage") + set(CTEST_CUSTOM_COVERAGE_EXCLUDE "/test/") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COVERAGE_FLAGS}") + endif() + +elseif(MSVC) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D_CRT_SECURE_NO_WARNINGS /D_SCL_SECURE_NO_WARNINGS") +endif() + + diff --git a/external/eigen-3.4.0/cmake/EigenDetermineOSVersion.cmake b/external/eigen-3.4.0/cmake/EigenDetermineOSVersion.cmake new file mode 100644 index 0000000..9246fa6 --- /dev/null +++ b/external/eigen-3.4.0/cmake/EigenDetermineOSVersion.cmake @@ -0,0 +1,46 @@ +# The utility function DetermineOSVersion aims at providing an +# improved version of the CMake variable ${CMAKE_SYSTEM} on Windows +# machines. +# +# Usage: +# include(EigenDetermineOSVersion) +# DetermineOSVersion(OS_VERSION) +# message("OS: ${OS_VERSION}") + +# - A little helper variable which should not be directly called +function(DetermineShortWindowsName WIN_VERSION win_num_version) + if (${win_num_version} VERSION_EQUAL "6.1") + set(_version "win7") + elseif(${win_num_version} VERSION_EQUAL "6.0") + set(_version "winVista") + elseif(${win_num_version} VERSION_EQUAL "5.2") + set(_version "winXpProf") + elseif(${win_num_version} VERSION_EQUAL "5.1") + set(_version "winXp") + elseif(${win_num_version} VERSION_EQUAL "5.0") + set(_version "win2000Prof") + else() + set(_version "unknownWin") + endif() + set(${WIN_VERSION} ${_version} PARENT_SCOPE) +endfunction() + +function(DetermineOSVersion OS_VERSION) + if (WIN32 AND CMAKE_HOST_SYSTEM_NAME MATCHES Windows) + file (TO_NATIVE_PATH "$ENV{COMSPEC}" SHELL) + exec_program( ${SHELL} ARGS "/c" "ver" OUTPUT_VARIABLE ver_output) + + string(REGEX MATCHALL "[0-9]+" + ver_list "${ver_output}") + list(GET ver_list 0 _major) + list(GET ver_list 1 _minor) + + set(win_num_version ${_major}.${_minor}) + DetermineShortWindowsName(win_version "${win_num_version}") + if(win_version) + set(${OS_VERSION} ${win_version} PARENT_SCOPE) + endif() + else() + set(${OS_VERSION} ${CMAKE_SYSTEM} PARENT_SCOPE) + endif() +endfunction() diff --git a/external/eigen-3.4.0/cmake/EigenDetermineVSServicePack.cmake b/external/eigen-3.4.0/cmake/EigenDetermineVSServicePack.cmake new file mode 100644 index 0000000..fed7819 --- /dev/null +++ b/external/eigen-3.4.0/cmake/EigenDetermineVSServicePack.cmake @@ -0,0 +1,41 @@ +include(CMakeDetermineVSServicePack) + +# The code is almost identical to the CMake version. The only difference is that we remove +# _DetermineVSServicePack_FastCheckVersionWithCompiler which lead to errors on some systems. +function(EigenDetermineVSServicePack _pack) + if(NOT DETERMINED_VS_SERVICE_PACK OR NOT ${_pack}) + if(NOT DETERMINED_VS_SERVICE_PACK) + _DetermineVSServicePack_CheckVersionWithTryCompile(DETERMINED_VS_SERVICE_PACK _cl_version) + if(NOT DETERMINED_VS_SERVICE_PACK) + _DetermineVSServicePack_CheckVersionWithTryRun(DETERMINED_VS_SERVICE_PACK _cl_version) + endif() + endif() + + if(DETERMINED_VS_SERVICE_PACK) + if(_cl_version) + # Call helper function to determine VS version + _DetermineVSServicePackFromCompiler(_sp "${_cl_version}") + + # temporary fix, until CMake catches up + if (NOT _sp) + if(${_cl_version} VERSION_EQUAL "17.00.50727.1") + set(_sp "vc110") + elseif(${_cl_version} VERSION_EQUAL "17.00.51106.1") + set(_sp "vc110sp1") + elseif(${_cl_version} VERSION_EQUAL "17.00.60315.1") + set(_sp "vc110sp2") + elseif(${_cl_version} VERSION_EQUAL "17.00.60610.1") + set(_sp "vc110sp3") + else() + set(_sp ${CMAKE_CXX_COMPILER_VERSION}) + endif() + endif() + + if(_sp) + set(${_pack} ${_sp} CACHE INTERNAL + "The Visual Studio Release with Service Pack") + endif() + endif() + endif() + endif() +endfunction() diff --git a/external/eigen-3.4.0/cmake/EigenSmokeTestList.cmake b/external/eigen-3.4.0/cmake/EigenSmokeTestList.cmake new file mode 100644 index 0000000..6f0f724 --- /dev/null +++ b/external/eigen-3.4.0/cmake/EigenSmokeTestList.cmake @@ -0,0 +1,131 @@ +# List of tests that will be build and run during Eigen's smoke testing. If one +# of these tests doesn't exists or cannot be build with the current configuration +# it will just be skipped. +set(ei_smoke_test_list + adjoint_1 + alignedvector3 + array_cwise_7 + array_cwise_8 + array_for_matrix_1 + array_of_string + array_replicate_1 + array_reverse_1 + autodiff_1 + autodiff_scalar_1 + bandmatrix + bdcsvd_9 + bessel_functions_1 + bfloat16_float + blasutil_1 + block_5 + BVH + cholesky_1 + cholmod_support_23 + cholmod_support_24 + conservative_resize_1 + constructor_1 + corners_1 + ctorleakmiscmatrices_4 + dense_storage + determinant_1 + diagonal_1 + diagonal_2 + diagonalmatrices_1 + dynalloc + eigensolver_complex_1 + eigensolver_selfadjoint_8 + EulerAngles_1 + exceptions + fastmath + first_aligned + geo_alignedbox_2 + geo_eulerangles_1 + geo_homogeneous_1 + geo_hyperplane_1 + geo_orthomethods_1 + geo_parametrizedline_1 + geo_transformations_7 + half_float + hessenberg_1 + hessenberg_6qr_10 + householder_8 + indexed_view_1 + inplace_decomposition_1 + integer_types_1 + inverse_1 + is_same_dense + jacobi_1 + jacobisvd_1 + kronecker_product + linearstructure_1 + mapped_matrix_1 + mapstaticmethods_1 + mapstride_1 + matrix_square_root_1 + meta + minres_2 + miscmatrices_1 + mixingtypes_7 + nestbyvalue + nesting_ops_1 + nomalloc_1 + nullary_1 + num_dimensions + NumericalDiff + numext + packetmath + permutationmatrices_1 + polynomialsolver_1 + prec_inverse_4x4_1 + product_extra_5 + product_selfadjoint_1 + product_small_7 + product_symm_1 + product_syrk_1 + product_trmm_1 + product_trmv_1 + product_trsolve_5 + qr_1 + qr_colpivoting_7 + qr_fullpivoting_4 + rand + real_qz_1 + redux_1 + ref_1 + resize + rvalue_types_1 + schur_complex_1 + schur_real_1 + selfadjoint_1 + sizeof + sizeoverflow + smallvectors + sparse_basic_3 + sparse_block_1 + sparse_extra_4 + sparse_permutations_2 + sparse_product_4 + sparse_ref_1 + sparse_solvers_1 + sparse_vector_1 + special_functions_1 + special_numbers_1 + special_packetmath_1 + spqr_support_2 + stable_norm_1 + stddeque_1 + stddeque_overload_1 + stdlist_1 + stdlist_overload_1 + stdvector_1 + stdvector_overload_1 + stl_iterators_1 + swap_1 + symbolic_index_1 + triangular_1 + type_aliaslu_9 + umeyama_3 + unalignedassert + unalignedcount + vectorwiseop_1 + visitor_1) \ No newline at end of file diff --git a/external/eigen-3.4.0/cmake/EigenTesting.cmake b/external/eigen-3.4.0/cmake/EigenTesting.cmake new file mode 100644 index 0000000..eb8457d --- /dev/null +++ b/external/eigen-3.4.0/cmake/EigenTesting.cmake @@ -0,0 +1,782 @@ + +macro(ei_add_property prop value) + get_property(previous GLOBAL PROPERTY ${prop}) + if ((NOT previous) OR (previous STREQUAL "")) + set_property(GLOBAL PROPERTY ${prop} "${value}") + else() + set_property(GLOBAL PROPERTY ${prop} "${previous} ${value}") + endif() +endmacro() + +#internal. See documentation of ei_add_test for details. +macro(ei_add_test_internal testname testname_with_suffix) + set(targetname ${testname_with_suffix}) + + if(EIGEN_ADD_TEST_FILENAME_EXTENSION) + set(filename ${testname}.${EIGEN_ADD_TEST_FILENAME_EXTENSION}) + else() + set(filename ${testname}.cpp) + endif() + + # Add the current target to the list of subtest targets + get_property(EIGEN_SUBTESTS_LIST GLOBAL PROPERTY EIGEN_SUBTESTS_LIST) + set(EIGEN_SUBTESTS_LIST "${EIGEN_SUBTESTS_LIST}${targetname}\n") + set_property(GLOBAL PROPERTY EIGEN_SUBTESTS_LIST "${EIGEN_SUBTESTS_LIST}") + + if(EIGEN_ADD_TEST_FILENAME_EXTENSION STREQUAL cu) + if(EIGEN_TEST_HIP) + hip_reset_flags() + hip_add_executable(${targetname} ${filename} HIPCC_OPTIONS "-DEIGEN_USE_HIP ${ARGV2}") + elseif(EIGEN_TEST_CUDA_CLANG) + set_source_files_properties(${filename} PROPERTIES LANGUAGE CXX) + + if(CUDA_64_BIT_DEVICE_CODE AND (EXISTS "${CUDA_TOOLKIT_ROOT_DIR}/lib64")) + link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64") + else() + link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib") + endif() + + if (${ARGC} GREATER 2) + add_executable(${targetname} ${filename}) + else() + add_executable(${targetname} ${filename} OPTIONS ${ARGV2}) + endif() + set(CUDA_CLANG_LINK_LIBRARIES "cudart_static" "cuda" "dl" "pthread") + if (CMAKE_SYSTEM_NAME STREQUAL "Linux") + set(CUDA_CLANG_LINK_LIBRARIES ${CUDA_CLANG_LINK_LIBRARIES} "rt") + endif() + target_link_libraries(${targetname} ${CUDA_CLANG_LINK_LIBRARIES}) + else() + if (${ARGC} GREATER 2) + cuda_add_executable(${targetname} ${filename} OPTIONS ${ARGV2}) + else() + cuda_add_executable(${targetname} ${filename}) + endif() + endif() + else() + add_executable(${targetname} ${filename}) + endif() + + if (targetname MATCHES "^eigen2_") + add_dependencies(eigen2_buildtests ${targetname}) + else() + add_dependencies(buildtests ${targetname}) + endif() + + if(EIGEN_NO_ASSERTION_CHECKING) + ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_NO_ASSERTION_CHECKING=1") + else() + if(EIGEN_DEBUG_ASSERTS) + ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_DEBUG_ASSERTS=1") + endif() + endif() + + ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_TEST_MAX_SIZE=${EIGEN_TEST_MAX_SIZE}") + + if(MSVC) + ei_add_target_property(${targetname} COMPILE_FLAGS "/bigobj") + endif() + + # let the user pass flags. + if(${ARGC} GREATER 2) + ei_add_target_property(${targetname} COMPILE_FLAGS "${ARGV2}") + endif() + + if(EIGEN_TEST_CUSTOM_CXX_FLAGS) + ei_add_target_property(${targetname} COMPILE_FLAGS "${EIGEN_TEST_CUSTOM_CXX_FLAGS}") + endif() + + if(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO) + target_link_libraries(${targetname} ${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO}) + endif() + if(EXTERNAL_LIBS) + target_link_libraries(${targetname} ${EXTERNAL_LIBS}) + endif() + if(EIGEN_TEST_CUSTOM_LINKER_FLAGS) + target_link_libraries(${targetname} ${EIGEN_TEST_CUSTOM_LINKER_FLAGS}) + endif() + + if(${ARGC} GREATER 3) + set(libs_to_link ${ARGV3}) + # it could be that some cmake module provides a bad library string " " (just spaces), + # and that severely breaks target_link_libraries ("can't link to -l-lstdc++" errors). + # so we check for strings containing only spaces. + string(STRIP "${libs_to_link}" libs_to_link_stripped) + string(LENGTH "${libs_to_link_stripped}" libs_to_link_stripped_length) + if(${libs_to_link_stripped_length} GREATER 0) + # notice: no double quotes around ${libs_to_link} here. It may be a list. + target_link_libraries(${targetname} ${libs_to_link}) + endif() + endif() + + add_test(${testname_with_suffix} "${targetname}") + + # Specify target and test labels according to EIGEN_CURRENT_SUBPROJECT + get_property(current_subproject GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT) + if ((current_subproject) AND (NOT (current_subproject STREQUAL ""))) + set_property(TARGET ${targetname} PROPERTY LABELS "Build${current_subproject}") + add_dependencies("Build${current_subproject}" ${targetname}) + set_property(TEST ${testname_with_suffix} PROPERTY LABELS "${current_subproject}") + endif() + if(EIGEN_SYCL) + # Force include of the SYCL file at the end to avoid errors. + set_property(TARGET ${targetname} PROPERTY COMPUTECPP_INCLUDE_AFTER 1) + # Set COMPILE_FLAGS to COMPILE_DEFINITIONS instead to avoid having to duplicate the flags + # to the device compiler. + get_target_property(target_compile_flags ${targetname} COMPILE_FLAGS) + separate_arguments(target_compile_flags) + foreach(flag ${target_compile_flags}) + if(${flag} MATCHES "^-D.*") + string(REPLACE "-D" "" definition_flag ${flag}) + set_property(TARGET ${targetname} APPEND PROPERTY COMPILE_DEFINITIONS ${definition_flag}) + list(REMOVE_ITEM target_compile_flags ${flag}) + endif() + endforeach() + set_property(TARGET ${targetname} PROPERTY COMPILE_FLAGS ${target_compile_flags}) + # Link against pthread and add sycl to target + set(THREADS_PREFER_PTHREAD_FLAG ON) + find_package(Threads REQUIRED) + target_link_libraries(${targetname} Threads::Threads) + add_sycl_to_target(TARGET ${targetname} SOURCES ${filename}) + endif(EIGEN_SYCL) +endmacro(ei_add_test_internal) +# Macro to add a test +# +# the unique mandatory parameter testname must correspond to a file +# .cpp which follows this pattern: +# +# #include "main.h" +# void test_() { ... } +# +# Depending on the contents of that file, this macro can have 2 behaviors, +# see below. +# +# The optional 2nd parameter is libraries to link to. +# +# A. Default behavior +# +# this macro adds an executable as well as a ctest test +# named too. +# +# On platforms with bash simply run: +# "ctest -V" or "ctest -V -R " +# On other platform use ctest as usual +# +# B. Multi-part behavior +# +# If the source file matches the regexp +# CALL_SUBTEST_[0-9]+|EIGEN_TEST_PART_[0-9]+ +# then it is interpreted as a multi-part test. The behavior then depends on the +# CMake option EIGEN_SPLIT_LARGE_TESTS, which is ON by default. +# +# If EIGEN_SPLIT_LARGE_TESTS is OFF, the behavior is the same as in A (the multi-part +# aspect is ignored). +# +# If EIGEN_SPLIT_LARGE_TESTS is ON, the test is split into multiple executables +# test__ +# where N runs from 1 to the greatest occurrence found in the source file. Each of these +# executables is built passing -DEIGEN_TEST_PART_N. This allows to split large tests +# into smaller executables. +# +# Moreover, targets are still generated, they +# have the effect of building all the parts of the test. +# +# Again, ctest -R allows to run all matching tests. +macro(ei_add_test testname) + get_property(EIGEN_TESTS_LIST GLOBAL PROPERTY EIGEN_TESTS_LIST) + set(EIGEN_TESTS_LIST "${EIGEN_TESTS_LIST}${testname}\n") + set_property(GLOBAL PROPERTY EIGEN_TESTS_LIST "${EIGEN_TESTS_LIST}") + + if(EIGEN_ADD_TEST_FILENAME_EXTENSION) + set(filename ${testname}.${EIGEN_ADD_TEST_FILENAME_EXTENSION}) + else() + set(filename ${testname}.cpp) + endif() + + file(READ "${filename}" test_source) + string(REGEX MATCHALL "CALL_SUBTEST_[0-9]+|EIGEN_TEST_PART_[0-9]+|EIGEN_SUFFIXES(;[0-9]+)+" + occurrences "${test_source}") + string(REGEX REPLACE "CALL_SUBTEST_|EIGEN_TEST_PART_|EIGEN_SUFFIXES" "" suffixes "${occurrences}") + list(REMOVE_DUPLICATES suffixes) + set(explicit_suffixes "") + if( (NOT EIGEN_SPLIT_LARGE_TESTS) AND suffixes) + # Check whether we have EIGEN_TEST_PART_* statements, in which case we likely must enforce splitting. + # For instance, indexed_view activate a different c++ version for each part. + string(REGEX MATCHALL "EIGEN_TEST_PART_[0-9]+" occurrences "${test_source}") + string(REGEX REPLACE "EIGEN_TEST_PART_" "" explicit_suffixes "${occurrences}") + list(REMOVE_DUPLICATES explicit_suffixes) + endif() + if( (EIGEN_SPLIT_LARGE_TESTS AND suffixes) OR explicit_suffixes) + add_custom_target(${testname}) + foreach(suffix ${suffixes}) + ei_add_test_internal(${testname} ${testname}_${suffix} + "${ARGV1} -DEIGEN_TEST_PART_${suffix}=1" "${ARGV2}") + add_dependencies(${testname} ${testname}_${suffix}) + endforeach() + else() + ei_add_test_internal(${testname} ${testname} "${ARGV1} -DEIGEN_TEST_PART_ALL=1" "${ARGV2}") + endif() +endmacro() + +# adds a failtest, i.e. a test that succeed if the program fails to compile +# note that the test runner for these is CMake itself, when passed -DEIGEN_FAILTEST=ON +# so here we're just running CMake commands immediately, we're not adding any targets. +macro(ei_add_failtest testname) + + set(test_target_ok ${testname}_ok) + set(test_target_ko ${testname}_ko) + + # Add executables + add_executable(${test_target_ok} ${testname}.cpp) + add_executable(${test_target_ko} ${testname}.cpp) + + # Remove them from the normal build process + set_target_properties(${test_target_ok} ${test_target_ko} PROPERTIES + EXCLUDE_FROM_ALL TRUE + EXCLUDE_FROM_DEFAULT_BUILD TRUE) + + # Configure the failing test + target_compile_definitions(${test_target_ko} PRIVATE EIGEN_SHOULD_FAIL_TO_BUILD) + + # Add the tests to ctest. + add_test(NAME ${test_target_ok} + COMMAND ${CMAKE_COMMAND} --build . --target ${test_target_ok} --config $ + WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) + add_test(NAME ${test_target_ko} + COMMAND ${CMAKE_COMMAND} --build . --target ${test_target_ko} --config $ + WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) + + # Expect the second test to fail + set_tests_properties(${test_target_ko} PROPERTIES WILL_FAIL TRUE) +endmacro() + +# print a summary of the different options +macro(ei_testing_print_summary) + message(STATUS "************************************************************") + message(STATUS "*** Eigen's unit tests configuration summary ***") + message(STATUS "************************************************************") + message(STATUS "") + message(STATUS "Build type: ${CMAKE_BUILD_TYPE}") + message(STATUS "Build site: ${SITE}") + message(STATUS "Build string: ${BUILDNAME}") + get_property(EIGEN_TESTING_SUMMARY GLOBAL PROPERTY EIGEN_TESTING_SUMMARY) + get_property(EIGEN_TESTED_BACKENDS GLOBAL PROPERTY EIGEN_TESTED_BACKENDS) + get_property(EIGEN_MISSING_BACKENDS GLOBAL PROPERTY EIGEN_MISSING_BACKENDS) + message(STATUS "Enabled backends: ${EIGEN_TESTED_BACKENDS}") + message(STATUS "Disabled backends: ${EIGEN_MISSING_BACKENDS}") + + if(EIGEN_DEFAULT_TO_ROW_MAJOR) + message(STATUS "Default order: Row-major") + else() + message(STATUS "Default order: Column-major") + endif() + + if(EIGEN_TEST_NO_EXPLICIT_ALIGNMENT) + message(STATUS "Explicit alignment (hence vectorization) disabled") + elseif(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION) + message(STATUS "Explicit vectorization disabled (alignment kept enabled)") + else() + + message(STATUS "Maximal matrix/vector size: ${EIGEN_TEST_MAX_SIZE}") + + if(EIGEN_TEST_SSE2) + message(STATUS "SSE2: ON") + else() + message(STATUS "SSE2: Using architecture defaults") + endif() + + if(EIGEN_TEST_SSE3) + message(STATUS "SSE3: ON") + else() + message(STATUS "SSE3: Using architecture defaults") + endif() + + if(EIGEN_TEST_SSSE3) + message(STATUS "SSSE3: ON") + else() + message(STATUS "SSSE3: Using architecture defaults") + endif() + + if(EIGEN_TEST_SSE4_1) + message(STATUS "SSE4.1: ON") + else() + message(STATUS "SSE4.1: Using architecture defaults") + endif() + + if(EIGEN_TEST_SSE4_2) + message(STATUS "SSE4.2: ON") + else() + message(STATUS "SSE4.2: Using architecture defaults") + endif() + + if(EIGEN_TEST_AVX) + message(STATUS "AVX: ON") + else() + message(STATUS "AVX: Using architecture defaults") + endif() + + if(EIGEN_TEST_AVX2) + message(STATUS "AVX2: ON") + else() + message(STATUS "AVX2: Using architecture defaults") + endif() + + if(EIGEN_TEST_FMA) + message(STATUS "FMA: ON") + else() + message(STATUS "FMA: Using architecture defaults") + endif() + + if(EIGEN_TEST_AVX512) + message(STATUS "AVX512: ON") + else() + message(STATUS "AVX512: Using architecture defaults") + endif() + + if(EIGEN_TEST_AVX512DQ) + message(STATUS "AVX512DQ: ON") + else() + message(STATUS "AVX512DQ: Using architecture defaults") + endif() + + if(EIGEN_TEST_ALTIVEC) + message(STATUS "Altivec: ON") + else() + message(STATUS "Altivec: Using architecture defaults") + endif() + + if(EIGEN_TEST_VSX) + message(STATUS "VSX: ON") + else() + message(STATUS "VSX: Using architecture defaults") + endif() + + if(EIGEN_TEST_MSA) + message(STATUS "MIPS MSA: ON") + else() + message(STATUS "MIPS MSA: Using architecture defaults") + endif() + + if(EIGEN_TEST_NEON) + message(STATUS "ARM NEON: ON") + else() + message(STATUS "ARM NEON: Using architecture defaults") + endif() + + if(EIGEN_TEST_NEON64) + message(STATUS "ARMv8 NEON: ON") + else() + message(STATUS "ARMv8 NEON: Using architecture defaults") + endif() + + if(EIGEN_TEST_ZVECTOR) + message(STATUS "S390X ZVECTOR: ON") + else() + message(STATUS "S390X ZVECTOR: Using architecture defaults") + endif() + + if(EIGEN_TEST_CXX11) + message(STATUS "C++11: ON") + else() + message(STATUS "C++11: OFF") + endif() + + if(EIGEN_TEST_SYCL) + if(EIGEN_SYCL_TRISYCL) + message(STATUS "SYCL: ON (using triSYCL)") + else() + message(STATUS "SYCL: ON (using computeCPP)") + endif() + else() + message(STATUS "SYCL: OFF") + endif() + if(EIGEN_TEST_CUDA) + if(EIGEN_TEST_CUDA_CLANG) + message(STATUS "CUDA: ON (using clang)") + else() + message(STATUS "CUDA: ON (using nvcc)") + endif() + else() + message(STATUS "CUDA: OFF") + endif() + if(EIGEN_TEST_HIP) + message(STATUS "HIP: ON (using hipcc)") + else() + message(STATUS "HIP: OFF") + endif() + + endif() # vectorization / alignment options + + message(STATUS "\n${EIGEN_TESTING_SUMMARY}") + + message(STATUS "************************************************************") +endmacro() + +macro(ei_init_testing) + define_property(GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT BRIEF_DOCS " " FULL_DOCS " ") + define_property(GLOBAL PROPERTY EIGEN_TESTED_BACKENDS BRIEF_DOCS " " FULL_DOCS " ") + define_property(GLOBAL PROPERTY EIGEN_MISSING_BACKENDS BRIEF_DOCS " " FULL_DOCS " ") + define_property(GLOBAL PROPERTY EIGEN_TESTING_SUMMARY BRIEF_DOCS " " FULL_DOCS " ") + define_property(GLOBAL PROPERTY EIGEN_TESTS_LIST BRIEF_DOCS " " FULL_DOCS " ") + define_property(GLOBAL PROPERTY EIGEN_SUBTESTS_LIST BRIEF_DOCS " " FULL_DOCS " ") + + set_property(GLOBAL PROPERTY EIGEN_TESTED_BACKENDS "") + set_property(GLOBAL PROPERTY EIGEN_MISSING_BACKENDS "") + set_property(GLOBAL PROPERTY EIGEN_TESTING_SUMMARY "") + set_property(GLOBAL PROPERTY EIGEN_TESTS_LIST "") + set_property(GLOBAL PROPERTY EIGEN_SUBTESTS_LIST "") + + define_property(GLOBAL PROPERTY EIGEN_FAILTEST_FAILURE_COUNT BRIEF_DOCS " " FULL_DOCS " ") + define_property(GLOBAL PROPERTY EIGEN_FAILTEST_COUNT BRIEF_DOCS " " FULL_DOCS " ") + + set_property(GLOBAL PROPERTY EIGEN_FAILTEST_FAILURE_COUNT "0") + set_property(GLOBAL PROPERTY EIGEN_FAILTEST_COUNT "0") + + # uncomment anytime you change the ei_get_compilerver_from_cxx_version_string macro + # ei_test_get_compilerver_from_cxx_version_string() +endmacro() + +macro(ei_set_sitename) + # if the sitename is not yet set, try to set it + if(NOT ${SITE} OR ${SITE} STREQUAL "") + set(eigen_computername $ENV{COMPUTERNAME}) + set(eigen_hostname $ENV{HOSTNAME}) + if(eigen_hostname) + set(SITE ${eigen_hostname}) + elseif(eigen_computername) + set(SITE ${eigen_computername}) + endif() + endif() + # in case it is already set, enforce lower case + if(SITE) + string(TOLOWER ${SITE} SITE) + endif() +endmacro() + +macro(ei_get_compilerver VAR) + if(MSVC) + # on windows system, we use a modified CMake script + include(EigenDetermineVSServicePack) + EigenDetermineVSServicePack( my_service_pack ) + + if( my_service_pack ) + set(${VAR} ${my_service_pack}) + else() + set(${VAR} "na") + endif() + elseif(${CMAKE_CXX_COMPILER_ID} MATCHES "PGI") + set(${VAR} "${CMAKE_CXX_COMPILER_ID}-${CMAKE_CXX_COMPILER_VERSION}") + else() + # on all other system we rely on ${CMAKE_CXX_COMPILER} + # supporting a "--version" or "/version" flag + + if(WIN32 AND ${CMAKE_CXX_COMPILER_ID} EQUAL "Intel") + set(EIGEN_CXX_FLAG_VERSION "/version") + else() + set(EIGEN_CXX_FLAG_VERSION "--version") + endif() + + execute_process(COMMAND ${CMAKE_CXX_COMPILER} ${EIGEN_CXX_FLAG_VERSION} + OUTPUT_VARIABLE eigen_cxx_compiler_version_string OUTPUT_STRIP_TRAILING_WHITESPACE) + string(REGEX REPLACE "^[ \n\r]+" "" eigen_cxx_compiler_version_string ${eigen_cxx_compiler_version_string}) + string(REGEX REPLACE "[\n\r].*" "" eigen_cxx_compiler_version_string ${eigen_cxx_compiler_version_string}) + + ei_get_compilerver_from_cxx_version_string("${eigen_cxx_compiler_version_string}" CNAME CVER) + set(${VAR} "${CNAME}-${CVER}") + + endif() +endmacro() + +# Extract compiler name and version from a raw version string +# WARNING: if you edit this macro, then please test it by uncommenting +# the testing macro call in ei_init_testing() of the EigenTesting.cmake file. +# See also the ei_test_get_compilerver_from_cxx_version_string macro at the end +# of the file +macro(ei_get_compilerver_from_cxx_version_string VERSTRING CNAME CVER) + # extract possible compiler names + string(REGEX MATCH "g\\+\\+" ei_has_gpp ${VERSTRING}) + string(REGEX MATCH "llvm|LLVM" ei_has_llvm ${VERSTRING}) + string(REGEX MATCH "gcc|GCC" ei_has_gcc ${VERSTRING}) + string(REGEX MATCH "icpc|ICC" ei_has_icpc ${VERSTRING}) + string(REGEX MATCH "clang|CLANG" ei_has_clang ${VERSTRING}) + string(REGEX MATCH "mingw32" ei_has_mingw ${VERSTRING}) + + # combine them + if((ei_has_llvm) AND (ei_has_gpp OR ei_has_gcc)) + set(${CNAME} "llvm-g++") + elseif((ei_has_llvm) AND (ei_has_clang)) + set(${CNAME} "llvm-clang++") + elseif(ei_has_clang) + set(${CNAME} "clang++") + elseif ((ei_has_mingw) AND (ei_has_gpp OR ei_has_gcc)) + set(${CNAME} "mingw32-g++") + elseif(ei_has_icpc) + set(${CNAME} "icpc") + elseif(ei_has_gpp OR ei_has_gcc) + set(${CNAME} "g++") + else() + set(${CNAME} "_") + endif() + + # extract possible version numbers + # first try to extract 3 isolated numbers: + string(REGEX MATCH " [0-9]+\\.[0-9]+\\.[0-9]+" eicver ${VERSTRING}) + if(NOT eicver) + # try to extract 2 isolated ones: + string(REGEX MATCH " [0-9]+\\.[0-9]+" eicver ${VERSTRING}) + if(NOT eicver) + # try to extract 3: + string(REGEX MATCH "[^0-9][0-9]+\\.[0-9]+\\.[0-9]+" eicver ${VERSTRING}) + if(NOT eicver) + # try to extract 2: + string(REGEX MATCH "[^0-9][0-9]+\\.[0-9]+" eicver ${VERSTRING}) + if (NOT eicver AND ei_has_mingw) + # try to extract 1 number plus suffix: + string(REGEX MATCH "[^0-9][0-9]+-win32" eicver ${VERSTRING}) + endif() + endif() + endif() + endif() + + if (NOT eicver) + set(eicver " _") + endif() + + string(REGEX REPLACE ".(.*)" "\\1" ${CVER} ${eicver}) + +endmacro() + +macro(ei_get_cxxflags VAR) + set(${VAR} "") + ei_is_64bit_env(IS_64BIT_ENV) + if(EIGEN_TEST_NEON) + set(${VAR} NEON) + elseif(EIGEN_TEST_NEON64) + set(${VAR} NEON) + elseif(EIGEN_TEST_ZVECTOR) + set(${VAR} ZVECTOR) + elseif(EIGEN_TEST_VSX) + set(${VAR} VSX) + elseif(EIGEN_TEST_ALTIVEC) + set(${VAR} ALVEC) + elseif(EIGEN_TEST_FMA) + set(${VAR} FMA) + elseif(EIGEN_TEST_AVX) + set(${VAR} AVX) + elseif(EIGEN_TEST_SSE4_2) + set(${VAR} SSE42) + elseif(EIGEN_TEST_SSE4_1) + set(${VAR} SSE41) + elseif(EIGEN_TEST_SSSE3) + set(${VAR} SSSE3) + elseif(EIGEN_TEST_SSE3) + set(${VAR} SSE3) + elseif(EIGEN_TEST_SSE2 OR IS_64BIT_ENV) + set(${VAR} SSE2) + elseif(EIGEN_TEST_MSA) + set(${VAR} MSA) + endif() + + if(EIGEN_TEST_OPENMP) + if (${VAR} STREQUAL "") + set(${VAR} OMP) + else() + set(${VAR} ${${VAR}}-OMP) + endif() + endif() + + if(EIGEN_DEFAULT_TO_ROW_MAJOR) + if (${VAR} STREQUAL "") + set(${VAR} ROW) + else() + set(${VAR} ${${VAR}}-ROWMAJ) + endif() + endif() +endmacro() + +macro(ei_set_build_string) + ei_get_compilerver(LOCAL_COMPILER_VERSION) + ei_get_cxxflags(LOCAL_COMPILER_FLAGS) + + include(EigenDetermineOSVersion) + DetermineOSVersion(OS_VERSION) + + set(TMP_BUILD_STRING ${OS_VERSION}-${LOCAL_COMPILER_VERSION}) + + if (NOT ${LOCAL_COMPILER_FLAGS} STREQUAL "") + set(TMP_BUILD_STRING ${TMP_BUILD_STRING}-${LOCAL_COMPILER_FLAGS}) + endif() + + if(EIGEN_TEST_EXTERNAL_BLAS) + set(TMP_BUILD_STRING ${TMP_BUILD_STRING}-external_blas) + endif() + + ei_is_64bit_env(IS_64BIT_ENV) + if(NOT IS_64BIT_ENV) + set(TMP_BUILD_STRING ${TMP_BUILD_STRING}-32bit) + else() + set(TMP_BUILD_STRING ${TMP_BUILD_STRING}-64bit) + endif() + + if(EIGEN_TEST_CXX11) + set(TMP_BUILD_STRING ${TMP_BUILD_STRING}-cxx11) + endif() + + if(EIGEN_BUILD_STRING_SUFFIX) + set(TMP_BUILD_STRING ${TMP_BUILD_STRING}-${EIGEN_BUILD_STRING_SUFFIX}) + endif() + + string(TOLOWER ${TMP_BUILD_STRING} BUILDNAME) +endmacro() + +macro(ei_is_64bit_env VAR) + if(CMAKE_SIZEOF_VOID_P EQUAL 8) + set(${VAR} 1) + elseif(CMAKE_SIZEOF_VOID_P EQUAL 4) + set(${VAR} 0) + else() + message(WARNING "Unsupported pointer size. Please contact the authors.") + endif() +endmacro() + + +# helper macro for testing ei_get_compilerver_from_cxx_version_string +# STR: raw version string +# REFNAME: expected compiler name +# REFVER: expected compiler version +macro(ei_test1_get_compilerver_from_cxx_version_string STR REFNAME REFVER) + ei_get_compilerver_from_cxx_version_string(${STR} CNAME CVER) + if((NOT ${REFNAME} STREQUAL ${CNAME}) OR (NOT ${REFVER} STREQUAL ${CVER})) + message("STATUS ei_get_compilerver_from_cxx_version_string error:") + message("Expected \"${REFNAME}-${REFVER}\", got \"${CNAME}-${CVER}\"") + endif() +endmacro() + +# macro for testing ei_get_compilerver_from_cxx_version_string +# feel free to add more version strings +macro(ei_test_get_compilerver_from_cxx_version_string) + ei_test1_get_compilerver_from_cxx_version_string("g++ (SUSE Linux) 4.5.3 20110428 [gcc-4_5-branch revision 173117]" "g++" "4.5.3") + ei_test1_get_compilerver_from_cxx_version_string("c++ (GCC) 4.5.1 20100924 (Red Hat 4.5.1-4)" "g++" "4.5.1") + ei_test1_get_compilerver_from_cxx_version_string("icpc (ICC) 11.0 20081105" "icpc" "11.0") + ei_test1_get_compilerver_from_cxx_version_string("g++-3.4 (GCC) 3.4.6" "g++" "3.4.6") + ei_test1_get_compilerver_from_cxx_version_string("SUSE Linux clang version 3.0 (branches/release_30 145598) (based on LLVM 3.0)" "llvm-clang++" "3.0") + ei_test1_get_compilerver_from_cxx_version_string("icpc (ICC) 12.0.5 20110719" "icpc" "12.0.5") + ei_test1_get_compilerver_from_cxx_version_string("Apple clang version 2.1 (tags/Apple/clang-163.7.1) (based on LLVM 3.0svn)" "llvm-clang++" "2.1") + ei_test1_get_compilerver_from_cxx_version_string("i686-apple-darwin11-llvm-g++-4.2 (GCC) 4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2335.15.00)" "llvm-g++" "4.2.1") + ei_test1_get_compilerver_from_cxx_version_string("g++-mp-4.4 (GCC) 4.4.6" "g++" "4.4.6") + ei_test1_get_compilerver_from_cxx_version_string("g++-mp-4.4 (GCC) 2011" "g++" "4.4") + ei_test1_get_compilerver_from_cxx_version_string("x86_64-w64-mingw32-g++ (GCC) 10-win32 20210110" "mingw32-g++" "10-win32") +endmacro() + +# Split all tests listed in EIGEN_TESTS_LIST into num_splits many targets +# named buildtestspartN with N = { 0, ..., num_splits-1}. +# +# The intention behind the existance of this macro is the size of Eigen's +# testsuite. Together with the relativly big compile-times building all tests +# can take a substantial amount of time depending on the available hardware. +# +# The last buildtestspartN target will build possible remaining tests. +# +# An example: +# +# EIGEN_TESTS_LIST= [ test1, test2, test3, test4, test5, test6, test7 ] +# +# A call to ei_split_testsuite(3) creates the following targets with dependencies +# +# Target Dependencies +# ------ ------------ +# buildtestspart0 test1, test2 +# buildtestspart1 test3, test4 +# buildtestspart2 test5, test6, test7 +# +macro(ei_split_testsuite num_splits) + get_property(EIGEN_TESTS_LIST GLOBAL PROPERTY EIGEN_TESTS_LIST) + + # Translate EIGEN_TESTS_LIST into a CMake list + string(REGEX REPLACE "\n" " " EIGEN_TESTS_LIST "${EIGEN_TESTS_LIST}") + set(EIGEN_TESTS_LIST "${EIGEN_TESTS_LIST}") + separate_arguments(EIGEN_TESTS_LIST) + + set(eigen_test_count "0") + foreach(t IN ITEMS ${EIGEN_TESTS_LIST}) + math(EXPR eigen_test_count "${eigen_test_count}+1") + endforeach() + + # Get number of tests per target + math(EXPR num_tests_per_target "${eigen_test_count}/${num_splits} - ${eigen_test_count}/${num_splits} % 1") + + set(test_idx "0") + math(EXPR target_bound "${num_splits}-1") + foreach(part RANGE "0" "${target_bound}") + # Create target + set(current_target "buildtestspart${part}") + add_custom_target("${current_target}") + math(EXPR upper_bound "${test_idx} + ${num_tests_per_target} - 1") + foreach(test_idx RANGE "${test_idx}" "${upper_bound}") + list(GET EIGEN_TESTS_LIST "${test_idx}" curr_test) + add_dependencies("${current_target}" "${curr_test}") + endforeach() + math(EXPR test_idx "${test_idx} + ${num_tests_per_target}") + endforeach() + + # Handle the possibly remaining tests + math(EXPR test_idx "${num_splits} * ${num_tests_per_target}") + math(EXPR target_bound "${eigen_test_count} - 1") + foreach(test_idx RANGE "${test_idx}" "${target_bound}") + list(GET EIGEN_TESTS_LIST "${test_idx}" curr_test) + add_dependencies("${current_target}" "${curr_test}") + endforeach() +endmacro(ei_split_testsuite num_splits) + +# Defines the custom command buildsmoketests to build a number of tests +# specified in smoke_test_list. +# +# Test in smoke_test_list can be either test targets (e.g. packetmath) or +# subtests targets (e.g. packetmath_2). If any of the test are not available +# in the current configuration they are just skipped. +# +# All tests added via this macro are labeled with the smoketest label. This +# allows running smoketests only using ctest. +# +# Smoke tests are intended to be run before the whole test suite is invoked, +# e.g., to smoke test patches. +macro(ei_add_smoke_tests smoke_test_list) + # Set the build target to build smoketests + set(buildtarget "buildsmoketests") + add_custom_target("${buildtarget}") + + # Get list of all tests and translate it into a CMake list + get_property(EIGEN_TESTS_LIST GLOBAL PROPERTY EIGEN_TESTS_LIST) + string(REGEX REPLACE "\n" " " EIGEN_TESTS_LIST "${EIGEN_TESTS_LIST}") + set(EIGEN_TESTS_LIST "${EIGEN_TESTS_LIST}") + separate_arguments(EIGEN_TESTS_LIST) + + # Check if the test in smoke_test_list is a currently valid test target + foreach(test IN ITEMS ${smoke_test_list}) + # Add tests in smoke_test_list to our smoke test target but only if the test + # is currently available, i.e., is in EIGEN_SUBTESTS_LIST + if ("${test}" IN_LIST EIGEN_TESTS_LIST) + add_dependencies("${buildtarget}" "${test}") + # In the case of a test we match all subtests + set(ctest_regex "${ctest_regex}^${test}_[0-9]+$$|") + endif() + endforeach() + + # Get list of all subtests and translate it into a CMake list + get_property(EIGEN_SUBTESTS_LIST GLOBAL PROPERTY EIGEN_SUBTESTS_LIST) + string(REGEX REPLACE "\n" " " EIGEN_SUBTESTS_LIST "${EIGEN_SUBTESTS_LIST}") + set(EIGEN_SUBTESTS_LIST "${EIGEN_SUBTESTS_LIST}") + separate_arguments(EIGEN_SUBTESTS_LIST) + + # Check if the test in smoke_test_list is a currently valid subtest target + foreach(test IN ITEMS ${smoke_test_list}) + # Add tests in smoke_test_list to our smoke test target but only if the test + # is currently available, i.e., is in EIGEN_SUBTESTS_LIST + if ("${test}" IN_LIST EIGEN_SUBTESTS_LIST) + add_dependencies("${buildtarget}" "${test}") + # Add label smoketest to be able to run smoketests using ctest + get_property(test_labels TEST ${test} PROPERTY LABELS) + set_property(TEST ${test} PROPERTY LABELS "${test_labels};smoketest") + endif() + endforeach() +endmacro(ei_add_smoke_tests) diff --git a/external/eigen-3.4.0/cmake/EigenUninstall.cmake b/external/eigen-3.4.0/cmake/EigenUninstall.cmake new file mode 100644 index 0000000..5e63c98 --- /dev/null +++ b/external/eigen-3.4.0/cmake/EigenUninstall.cmake @@ -0,0 +1,40 @@ +################ CMake Uninstall Template ####################### +# CMake Template file for uninstallation of files +# mentioned in 'install_manifest.txt' +# +# Used by uinstall target +################################################################# + +set(MANIFEST "${CMAKE_CURRENT_BINARY_DIR}/install_manifest.txt") + +if(EXISTS ${MANIFEST}) + message(STATUS "============== Uninstalling Eigen ===================") + + file(STRINGS ${MANIFEST} files) + foreach(file ${files}) + if(EXISTS ${file}) + message(STATUS "Removing file: '${file}'") + + execute_process( + COMMAND ${CMAKE_COMMAND} -E remove ${file} + OUTPUT_VARIABLE rm_out + RESULT_VARIABLE rm_retval + ) + + if(NOT "${rm_retval}" STREQUAL 0) + message(FATAL_ERROR "Failed to remove file: '${file}'.") + endif() + else() + message(STATUS "File '${file}' does not exist.") + endif() + endforeach() + + message(STATUS "========== Finished Uninstalling Eigen ==============") +else() + message(STATUS "Cannot find install manifest: '${MANIFEST}'") + message(STATUS "Probably make install has not been performed") + message(STATUS " or install_manifest.txt has been deleted.") +endif() + + + diff --git a/external/eigen-3.4.0/cmake/FindAdolc.cmake b/external/eigen-3.4.0/cmake/FindAdolc.cmake new file mode 100644 index 0000000..13c59fc --- /dev/null +++ b/external/eigen-3.4.0/cmake/FindAdolc.cmake @@ -0,0 +1,20 @@ + +if (ADOLC_INCLUDES AND ADOLC_LIBRARIES) + set(ADOLC_FIND_QUIETLY TRUE) +endif () + +find_path(ADOLC_INCLUDES + NAMES adolc/adtl.h + PATHS $ENV{ADOLCDIR} $ENV{ADOLCDIR}/include ${INCLUDE_INSTALL_DIR} +) + +find_library(ADOLC_LIBRARIES + adolc + PATHS $ENV{ADOLCDIR} ${LIB_INSTALL_DIR} + PATH_SUFFIXES lib lib64) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(Adolc DEFAULT_MSG + ADOLC_INCLUDES ADOLC_LIBRARIES) + +mark_as_advanced(ADOLC_INCLUDES ADOLC_LIBRARIES) diff --git a/external/eigen-3.4.0/cmake/FindBLAS.cmake b/external/eigen-3.4.0/cmake/FindBLAS.cmake new file mode 100644 index 0000000..1bb8f19 --- /dev/null +++ b/external/eigen-3.4.0/cmake/FindBLAS.cmake @@ -0,0 +1,1407 @@ +### +# +# @copyright (c) 2009-2014 The University of Tennessee and The University +# of Tennessee Research Foundation. +# All rights reserved. +# @copyright (c) 2012-2016 Inria. All rights reserved. +# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. +# +### +# +# - Find BLAS library +# This module finds an installed fortran library that implements the BLAS +# linear-algebra interface (see http://www.netlib.org/blas/). +# The list of libraries searched for is taken +# from the autoconf macro file, acx_blas.m4 (distributed at +# http://ac-archive.sourceforge.net/ac-archive/acx_blas.html). +# +# This module sets the following variables: +# BLAS_FOUND - set to true if a library implementing the BLAS interface +# is found +# BLAS_LINKER_FLAGS - uncached list of required linker flags (excluding -l +# and -L). +# BLAS_COMPILER_FLAGS - uncached list of required compiler flags (including -I for mkl headers). +# BLAS_LIBRARIES - uncached list of libraries (using full path name) to +# link against to use BLAS +# BLAS95_LIBRARIES - uncached list of libraries (using full path name) +# to link against to use BLAS95 interface +# BLAS95_FOUND - set to true if a library implementing the BLAS f95 interface +# is found +# BLA_STATIC if set on this determines what kind of linkage we do (static) +# BLA_VENDOR if set checks only the specified vendor, if not set checks +# all the possibilities +# BLAS_VENDOR_FOUND stores the BLAS vendor found +# BLA_F95 if set on tries to find the f95 interfaces for BLAS/LAPACK +# The user can give specific paths where to find the libraries adding cmake +# options at configure (ex: cmake path/to/project -DBLAS_DIR=path/to/blas): +# BLAS_DIR - Where to find the base directory of blas +# BLAS_INCDIR - Where to find the header files +# BLAS_LIBDIR - Where to find the library files +# The module can also look for the following environment variables if paths +# are not given as cmake variable: BLAS_DIR, BLAS_INCDIR, BLAS_LIBDIR +# For MKL case and if no paths are given as hints, we will try to use the MKLROOT +# environment variable +# BLAS_VERBOSE Print some additional information during BLAS libraries detection +########## +### List of vendors (BLA_VENDOR) valid in this module +########## List of vendors (BLA_VENDOR) valid in this module +## Open (for OpenBlas), Eigen (for EigenBlas), Goto, ATLAS PhiPACK, +##  CXML, DXML, SunPerf, SCSL, SGIMATH, IBMESSL, IBMESSLMT +## Intel10_32 (intel mkl v10 32 bit), Intel10_64lp (intel mkl v10 64 bit,lp thread model, lp64 model), +## Intel10_64lp_seq (intel mkl v10 64 bit,sequential code, lp64 model), +## Intel( older versions of mkl 32 and 64 bit), +##  ACML, ACML_MP, ACML_GPU, Apple, NAS, Generic +# C/CXX should be enabled to use Intel mkl +### +# We handle different modes to find the dependency +# +# - Detection if already installed on the system +# - BLAS libraries can be detected from different ways +# Here is the order of precedence: +# 1) we look in cmake variable BLAS_LIBDIR or BLAS_DIR (we guess the libdirs) if defined +# 2) we look in environment variable BLAS_LIBDIR or BLAS_DIR (we guess the libdirs) if defined +# 3) we look in common environnment variables depending on the system (INCLUDE, C_INCLUDE_PATH, CPATH - LIB, DYLD_LIBRARY_PATH, LD_LIBRARY_PATH) +# 4) we look in common system paths depending on the system, see for example paths contained in the following cmake variables: +# - CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES, CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES +# - CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES, CMAKE_C_IMPLICIT_LINK_DIRECTORIES +# + +#============================================================================= +# Copyright 2007-2009 Kitware, Inc. +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of CMake, substitute the full +# License text for the above reference.) + +## Some macros to print status when search for headers and libs +# This macro informs why the _lib_to_find file has not been found +macro(Print_Find_Library_Blas_Status _libname _lib_to_find) + + # save _libname upper/lower case + string(TOUPPER ${_libname} LIBNAME) + string(TOLOWER ${_libname} libname) + + # print status + #message(" ") + if(${LIBNAME}_LIBDIR) + message("${Yellow}${LIBNAME}_LIBDIR is defined but ${_lib_to_find}" + "has not been found in ${ARGN}${ColourReset}") + else() + if(${LIBNAME}_DIR) + message("${Yellow}${LIBNAME}_DIR is defined but ${_lib_to_find}" + "has not been found in ${ARGN}${ColourReset}") + else() + message("${Yellow}${_lib_to_find} not found." + "Nor ${LIBNAME}_DIR neither ${LIBNAME}_LIBDIR" + "are defined so that we look for ${_lib_to_find} in" + "system paths (Linux: LD_LIBRARY_PATH, Windows: LIB," + "Mac: DYLD_LIBRARY_PATH," + "CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES," + "CMAKE_C_IMPLICIT_LINK_DIRECTORIES)${ColourReset}") + if(_lib_env) + message("${Yellow}${_lib_to_find} has not been found in" + "${_lib_env}${ColourReset}") + endif() + endif() + endif() + message("${BoldYellow}Please indicate where to find ${_lib_to_find}. You have three options:\n" + "- Option 1: Provide the Installation directory of BLAS library with cmake option: -D${LIBNAME}_DIR=your/path/to/${libname}/\n" + "- Option 2: Provide the directory where to find the library with cmake option: -D${LIBNAME}_LIBDIR=your/path/to/${libname}/lib/\n" + "- Option 3: Update your environment variable (Linux: LD_LIBRARY_PATH, Windows: LIB, Mac: DYLD_LIBRARY_PATH)\n" + "- Option 4: If your library provides a PkgConfig file, make sure pkg-config finds your library${ColourReset}") + +endmacro() + +# This macro informs why the _lib_to_find file has not been found +macro(Print_Find_Library_Blas_CheckFunc_Status _name) + + # save _libname upper/lower case + string(TOUPPER ${_name} FUNCNAME) + string(TOLOWER ${_name} funcname) + + # print status + #message(" ") + message("${Red}Libs have been found but check of symbol ${_name} failed " + "with following libraries ${ARGN}${ColourReset}") + message("${BoldRed}Please open your error file CMakeFiles/CMakeError.log" + "to figure out why it fails${ColourReset}") + #message(" ") + +endmacro() + +if (NOT BLAS_FOUND) + set(BLAS_DIR "" CACHE PATH "Installation directory of BLAS library") + if (NOT BLAS_FIND_QUIETLY) + message(STATUS "A cache variable, namely BLAS_DIR, has been set to specify the install directory of BLAS") + endif() +endif() + +option(BLAS_VERBOSE "Print some additional information during BLAS libraries detection" OFF) +mark_as_advanced(BLAS_VERBOSE) + +include(CheckFunctionExists) +include(CheckFortranFunctionExists) +include(CMakeFindDependencyMacro) + +set(_blas_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES}) + +# Check the language being used +get_property( _LANGUAGES_ GLOBAL PROPERTY ENABLED_LANGUAGES ) +if( _LANGUAGES_ MATCHES Fortran AND CMAKE_Fortran_COMPILER) + set( _CHECK_FORTRAN TRUE ) +elseif( (_LANGUAGES_ MATCHES C) OR (_LANGUAGES_ MATCHES CXX) ) + set( _CHECK_FORTRAN FALSE ) +else() + if(BLAS_FIND_REQUIRED) + message(FATAL_ERROR "FindBLAS requires Fortran, C, or C++ to be enabled.") + else() + message(STATUS "Looking for BLAS... - NOT found (Unsupported languages)") + return() + endif() +endif() + +macro(Check_Fortran_Libraries LIBRARIES _prefix _name _flags _list _thread) + # This macro checks for the existence of the combination of fortran libraries + # given by _list. If the combination is found, this macro checks (using the + # Check_Fortran_Function_Exists macro) whether can link against that library + # combination using the name of a routine given by _name using the linker + # flags given by _flags. If the combination of libraries is found and passes + # the link test, LIBRARIES is set to the list of complete library paths that + # have been found. Otherwise, LIBRARIES is set to FALSE. + + # N.B. _prefix is the prefix applied to the names of all cached variables that + # are generated internally and marked advanced by this macro. + + set(_libdir ${ARGN}) + + set(_libraries_work TRUE) + set(${LIBRARIES}) + set(_combined_name) + set(ENV_MKLROOT "$ENV{MKLROOT}") + set(ENV_BLAS_DIR "$ENV{BLAS_DIR}") + set(ENV_BLAS_LIBDIR "$ENV{BLAS_LIBDIR}") + if (NOT _libdir) + if (BLAS_LIBDIR) + list(APPEND _libdir "${BLAS_LIBDIR}") + elseif (BLAS_DIR) + list(APPEND _libdir "${BLAS_DIR}") + list(APPEND _libdir "${BLAS_DIR}/lib") + if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8") + list(APPEND _libdir "${BLAS_DIR}/lib64") + list(APPEND _libdir "${BLAS_DIR}/lib/intel64") + else() + list(APPEND _libdir "${BLAS_DIR}/lib32") + list(APPEND _libdir "${BLAS_DIR}/lib/ia32") + endif() + elseif(ENV_BLAS_LIBDIR) + list(APPEND _libdir "${ENV_BLAS_LIBDIR}") + elseif(ENV_BLAS_DIR) + list(APPEND _libdir "${ENV_BLAS_DIR}") + list(APPEND _libdir "${ENV_BLAS_DIR}/lib") + if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8") + list(APPEND _libdir "${ENV_BLAS_DIR}/lib64") + list(APPEND _libdir "${ENV_BLAS_DIR}/lib/intel64") + else() + list(APPEND _libdir "${ENV_BLAS_DIR}/lib32") + list(APPEND _libdir "${ENV_BLAS_DIR}/lib/ia32") + endif() + else() + if (ENV_MKLROOT) + list(APPEND _libdir "${ENV_MKLROOT}/lib") + if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8") + list(APPEND _libdir "${ENV_MKLROOT}/lib64") + list(APPEND _libdir "${ENV_MKLROOT}/lib/intel64") + else() + list(APPEND _libdir "${ENV_MKLROOT}/lib32") + list(APPEND _libdir "${ENV_MKLROOT}/lib/ia32") + endif() + endif() + if (WIN32) + string(REPLACE ":" ";" _libdir2 "$ENV{LIB}") + elseif (APPLE) + string(REPLACE ":" ";" _libdir2 "$ENV{DYLD_LIBRARY_PATH}") + else () + string(REPLACE ":" ";" _libdir2 "$ENV{LD_LIBRARY_PATH}") + endif () + list(APPEND _libdir "${_libdir2}") + list(APPEND _libdir "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}") + list(APPEND _libdir "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") + endif() + endif () + + if (BLAS_VERBOSE) + message("${Cyan}Try to find BLAS libraries: ${_list}") + endif () + + foreach(_library ${_list}) + set(_combined_name ${_combined_name}_${_library}) + + if(_libraries_work) + if (BLA_STATIC) + if (WIN32) + set(CMAKE_FIND_LIBRARY_SUFFIXES .lib ${CMAKE_FIND_LIBRARY_SUFFIXES}) + endif () + if (APPLE) + set(CMAKE_FIND_LIBRARY_SUFFIXES .lib ${CMAKE_FIND_LIBRARY_SUFFIXES}) + else () + set(CMAKE_FIND_LIBRARY_SUFFIXES .a ${CMAKE_FIND_LIBRARY_SUFFIXES}) + endif () + else () + if (CMAKE_SYSTEM_NAME STREQUAL "Linux") + # for ubuntu's libblas3gf and liblapack3gf packages + set(CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES} .so.3gf) + endif () + endif () + find_library(${_prefix}_${_library}_LIBRARY + NAMES ${_library} + HINTS ${_libdir} + NO_DEFAULT_PATH + ) + mark_as_advanced(${_prefix}_${_library}_LIBRARY) + # Print status if not found + # ------------------------- + if (NOT ${_prefix}_${_library}_LIBRARY AND NOT BLAS_FIND_QUIETLY AND BLAS_VERBOSE) + Print_Find_Library_Blas_Status(blas ${_library} ${_libdir}) + endif () + set(${LIBRARIES} ${${LIBRARIES}} ${${_prefix}_${_library}_LIBRARY}) + set(_libraries_work ${${_prefix}_${_library}_LIBRARY}) + endif() + endforeach() + + if(_libraries_work) + # Test this combination of libraries. + if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND BLA_STATIC) + list(INSERT ${LIBRARIES} 0 "-Wl,--start-group") + list(APPEND ${LIBRARIES} "-Wl,--end-group") + endif() + set(CMAKE_REQUIRED_LIBRARIES "${_flags};${${LIBRARIES}};${_thread}") + set(CMAKE_REQUIRED_FLAGS "${BLAS_COMPILER_FLAGS}") + if (BLAS_VERBOSE) + message("${Cyan}BLAS libs found for BLA_VENDOR ${BLA_VENDOR}." + "Try to compile symbol ${_name} with following libraries:" + "${CMAKE_REQUIRED_LIBRARIES}") + endif () + if(NOT BLAS_FOUND) + unset(${_prefix}${_combined_name}_WORKS CACHE) + endif() + if (_CHECK_FORTRAN) + if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") + string(REPLACE "mkl_intel_lp64" "mkl_gf_lp64" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}") + string(REPLACE "mkl_intel_ilp64" "mkl_gf_ilp64" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}") + endif() + check_fortran_function_exists("${_name}" ${_prefix}${_combined_name}_WORKS) + else() + check_function_exists("${_name}_" ${_prefix}${_combined_name}_WORKS) + endif() + mark_as_advanced(${_prefix}${_combined_name}_WORKS) + set(_libraries_work ${${_prefix}${_combined_name}_WORKS}) + # Print status if not found + # ------------------------- + if (NOT _libraries_work AND NOT BLAS_FIND_QUIETLY AND BLAS_VERBOSE) + Print_Find_Library_Blas_CheckFunc_Status(${_name} ${CMAKE_REQUIRED_LIBRARIES}) + endif () + set(CMAKE_REQUIRED_LIBRARIES) + endif() + + if(_libraries_work) + set(${LIBRARIES} ${${LIBRARIES}} ${_thread}) + else() + set(${LIBRARIES} FALSE) + endif() + +endmacro() + + +set(BLAS_LINKER_FLAGS) +set(BLAS_LIBRARIES) +set(BLAS95_LIBRARIES) +if ($ENV{BLA_VENDOR} MATCHES ".+") + set(BLA_VENDOR $ENV{BLA_VENDOR}) +else () + if(NOT BLA_VENDOR) + set(BLA_VENDOR "All") + endif() +endif () + +#BLAS in intel mkl 10 library? (em64t 64bit) +if (BLA_VENDOR MATCHES "Intel*" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES OR BLA_VENDOR MATCHES "Intel*") + # Looking for include + # ------------------- + + # Add system include paths to search include + # ------------------------------------------ + unset(_inc_env) + set(ENV_MKLROOT "$ENV{MKLROOT}") + set(ENV_BLAS_DIR "$ENV{BLAS_DIR}") + set(ENV_BLAS_INCDIR "$ENV{BLAS_INCDIR}") + if(ENV_BLAS_INCDIR) + list(APPEND _inc_env "${ENV_BLAS_INCDIR}") + elseif(ENV_BLAS_DIR) + list(APPEND _inc_env "${ENV_BLAS_DIR}") + list(APPEND _inc_env "${ENV_BLAS_DIR}/include") + else() + if (ENV_MKLROOT) + list(APPEND _inc_env "${ENV_MKLROOT}/include") + endif() + # system variables + if(WIN32) + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") + list(APPEND _inc_env "${_path_env}") + else() + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{CPATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + endif() + endif() + list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") + list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") + list(REMOVE_DUPLICATES _inc_env) + + # set paths where to look for + set(PATH_TO_LOOK_FOR "${_inc_env}") + + # Try to find the fftw header in the given paths + # ------------------------------------------------- + # call cmake macro to find the header path + if(BLAS_INCDIR) + set(BLAS_mkl.h_DIRS "BLAS_mkl.h_DIRS-NOTFOUND") + find_path(BLAS_mkl.h_DIRS + NAMES mkl.h + HINTS ${BLAS_INCDIR}) + else() + if(BLAS_DIR) + set(BLAS_mkl.h_DIRS "BLAS_mkl.h_DIRS-NOTFOUND") + find_path(BLAS_mkl.h_DIRS + NAMES mkl.h + HINTS ${BLAS_DIR} + PATH_SUFFIXES "include") + else() + set(BLAS_mkl.h_DIRS "BLAS_mkl.h_DIRS-NOTFOUND") + find_path(BLAS_mkl.h_DIRS + NAMES mkl.h + HINTS ${PATH_TO_LOOK_FOR}) + endif() + endif() + mark_as_advanced(BLAS_mkl.h_DIRS) + + # If found, add path to cmake variable + # ------------------------------------ + if (BLAS_mkl.h_DIRS) + set(BLAS_INCLUDE_DIRS "${BLAS_mkl.h_DIRS}") + else () + set(BLAS_INCLUDE_DIRS "BLAS_INCLUDE_DIRS-NOTFOUND") + if(NOT BLAS_FIND_QUIETLY) + message(STATUS "Looking for BLAS -- mkl.h not found") + endif() + endif() + + if (WIN32) + string(REPLACE ":" ";" _libdir "$ENV{LIB}") + elseif (APPLE) + string(REPLACE ":" ";" _libdir "$ENV{DYLD_LIBRARY_PATH}") + else () + string(REPLACE ":" ";" _libdir "$ENV{LD_LIBRARY_PATH}") + endif () + list(APPEND _libdir "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}") + list(APPEND _libdir "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") + # libiomp5 + # -------- + set(OMP_iomp5_LIBRARY "OMP_iomp5_LIBRARY-NOTFOUND") + find_library(OMP_iomp5_LIBRARY + NAMES iomp5 + HINTS ${_libdir} + ) + mark_as_advanced(OMP_iomp5_LIBRARY) + set(OMP_LIB "") + # libgomp + # ------- + set(OMP_gomp_LIBRARY "OMP_gomp_LIBRARY-NOTFOUND") + find_library(OMP_gomp_LIBRARY + NAMES gomp + HINTS ${_libdir} + ) + mark_as_advanced(OMP_gomp_LIBRARY) + # choose one or another depending on the compilo + if (CMAKE_C_COMPILER_ID STREQUAL "GNU") + if (OMP_gomp_LIBRARY) + set(OMP_LIB "${OMP_gomp_LIBRARY}") + endif() + else() + if (OMP_iomp5_LIBRARY) + set(OMP_LIB "${OMP_iomp5_LIBRARY}") + endif() + endif() + + if (UNIX AND NOT WIN32) + # m + find_library(M_LIBRARY + NAMES m + HINTS ${_libdir}) + mark_as_advanced(M_LIBRARY) + if(M_LIBRARY) + set(LM "-lm") + else() + set(LM "") + endif() + # Fortran + set(LGFORTRAN "") + if (CMAKE_C_COMPILER_ID MATCHES "GNU") + find_library( + FORTRAN_gfortran_LIBRARY + NAMES gfortran + HINTS ${_libdir} + ) + mark_as_advanced(FORTRAN_gfortran_LIBRARY) + if (FORTRAN_gfortran_LIBRARY) + set(LGFORTRAN "${FORTRAN_gfortran_LIBRARY}") + endif() + elseif (CMAKE_C_COMPILER_ID MATCHES "Intel") + find_library( + FORTRAN_ifcore_LIBRARY + NAMES ifcore + HINTS ${_libdir} + ) + mark_as_advanced(FORTRAN_ifcore_LIBRARY) + if (FORTRAN_ifcore_LIBRARY) + set(LGFORTRAN "{FORTRAN_ifcore_LIBRARY}") + endif() + endif() + set(BLAS_COMPILER_FLAGS "") + if (NOT BLA_VENDOR STREQUAL "Intel10_64lp_seq") + if (CMAKE_C_COMPILER_ID STREQUAL "Intel") + list(APPEND BLAS_COMPILER_FLAGS "-openmp") + endif() + if (CMAKE_C_COMPILER_ID STREQUAL "GNU") + list(APPEND BLAS_COMPILER_FLAGS "-fopenmp") + endif() + endif() + if (CMAKE_C_COMPILER_ID STREQUAL "GNU") + if (BLA_VENDOR STREQUAL "Intel10_32") + list(APPEND BLAS_COMPILER_FLAGS "-m32") + else() + list(APPEND BLAS_COMPILER_FLAGS "-m64") + endif() + if (NOT BLA_VENDOR STREQUAL "Intel10_64lp_seq") + list(APPEND OMP_LIB "-ldl") + endif() + if (ENV_MKLROOT) + list(APPEND BLAS_COMPILER_FLAGS "-I${ENV_MKLROOT}/include") + endif() + endif() + + set(additional_flags "") + if (CMAKE_C_COMPILER_ID STREQUAL "GNU" AND CMAKE_SYSTEM_NAME STREQUAL "Linux") + set(additional_flags "-Wl,--no-as-needed") + endif() + endif () + + if (_LANGUAGES_ MATCHES C OR _LANGUAGES_ MATCHES CXX) + if(BLAS_FIND_QUIETLY OR NOT BLAS_FIND_REQUIRED) + find_dependency(Threads) + else() + find_dependency(Threads REQUIRED) + endif() + + set(BLAS_SEARCH_LIBS "") + + if(BLA_F95) + + set(BLAS_mkl_SEARCH_SYMBOL SGEMM) + set(_LIBRARIES BLAS95_LIBRARIES) + if (WIN32) + if (BLA_STATIC) + set(BLAS_mkl_DLL_SUFFIX "") + else() + set(BLAS_mkl_DLL_SUFFIX "_dll") + endif() + + # Find the main file (32-bit or 64-bit) + set(BLAS_SEARCH_LIBS_WIN_MAIN "") + if (BLA_VENDOR STREQUAL "Intel10_32" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS_WIN_MAIN + "mkl_blas95${BLAS_mkl_DLL_SUFFIX} mkl_intel_c${BLAS_mkl_DLL_SUFFIX}") + endif() + if (BLA_VENDOR STREQUAL "Intel10_64lp*" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS_WIN_MAIN + "mkl_blas95_lp64${BLAS_mkl_DLL_SUFFIX} mkl_intel_lp64${BLAS_mkl_DLL_SUFFIX}") + endif () + + # Add threading/sequential libs + set(BLAS_SEARCH_LIBS_WIN_THREAD "") + if (BLA_VENDOR STREQUAL "*_seq" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD + "mkl_sequential${BLAS_mkl_DLL_SUFFIX}") + endif() + if (NOT BLA_VENDOR STREQUAL "*_seq" OR BLA_VENDOR STREQUAL "All") + # old version + list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD + "libguide40 mkl_intel_thread${BLAS_mkl_DLL_SUFFIX}") + # mkl >= 10.3 + list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD + "libiomp5md mkl_intel_thread${BLAS_mkl_DLL_SUFFIX}") + endif() + + # Cartesian product of the above + foreach (MAIN ${BLAS_SEARCH_LIBS_WIN_MAIN}) + foreach (THREAD ${BLAS_SEARCH_LIBS_WIN_THREAD}) + list(APPEND BLAS_SEARCH_LIBS + "${MAIN} ${THREAD} mkl_core${BLAS_mkl_DLL_SUFFIX}") + endforeach() + endforeach() + else () + if (BLA_VENDOR STREQUAL "Intel10_32" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS + "mkl_blas95 mkl_intel mkl_intel_thread mkl_core guide") + endif () + if (BLA_VENDOR STREQUAL "Intel10_64lp" OR BLA_VENDOR STREQUAL "All") + # old version + list(APPEND BLAS_SEARCH_LIBS + "mkl_blas95 mkl_intel_lp64 mkl_intel_thread mkl_core guide") + # mkl >= 10.3 + if (CMAKE_C_COMPILER_ID STREQUAL "Intel") + list(APPEND BLAS_SEARCH_LIBS + "mkl_blas95_lp64 mkl_intel_lp64 mkl_intel_thread mkl_core") + endif() + if (CMAKE_C_COMPILER_ID STREQUAL "GNU") + list(APPEND BLAS_SEARCH_LIBS + "mkl_blas95_lp64 mkl_intel_lp64 mkl_gnu_thread mkl_core") + endif() + endif () + if (BLA_VENDOR STREQUAL "Intel10_64lp_seq" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS + "mkl_intel_lp64 mkl_sequential mkl_core") + if (BLA_VENDOR STREQUAL "Intel10_64lp_seq") + set(OMP_LIB "") + endif() + endif () + endif () + + else () + + set(BLAS_mkl_SEARCH_SYMBOL sgemm) + set(_LIBRARIES BLAS_LIBRARIES) + if (WIN32) + if (BLA_STATIC) + set(BLAS_mkl_DLL_SUFFIX "") + else() + set(BLAS_mkl_DLL_SUFFIX "_dll") + endif() + + # Find the main file (32-bit or 64-bit) + set(BLAS_SEARCH_LIBS_WIN_MAIN "") + if (BLA_VENDOR STREQUAL "Intel10_32" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS_WIN_MAIN + "mkl_intel_c${BLAS_mkl_DLL_SUFFIX}") + endif() + if (BLA_VENDOR STREQUAL "Intel10_64lp*" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS_WIN_MAIN + "mkl_intel_lp64${BLAS_mkl_DLL_SUFFIX}") + endif () + + # Add threading/sequential libs + set(BLAS_SEARCH_LIBS_WIN_THREAD "") + if (NOT BLA_VENDOR STREQUAL "*_seq" OR BLA_VENDOR STREQUAL "All") + # old version + list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD + "libguide40 mkl_intel_thread${BLAS_mkl_DLL_SUFFIX}") + # mkl >= 10.3 + list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD + "libiomp5md mkl_intel_thread${BLAS_mkl_DLL_SUFFIX}") + endif() + if (BLA_VENDOR STREQUAL "*_seq" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD + "mkl_sequential${BLAS_mkl_DLL_SUFFIX}") + endif() + + # Cartesian product of the above + foreach (MAIN ${BLAS_SEARCH_LIBS_WIN_MAIN}) + foreach (THREAD ${BLAS_SEARCH_LIBS_WIN_THREAD}) + list(APPEND BLAS_SEARCH_LIBS + "${MAIN} ${THREAD} mkl_core${BLAS_mkl_DLL_SUFFIX}") + endforeach() + endforeach() + else () + if (BLA_VENDOR STREQUAL "Intel10_32" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS + "mkl_intel mkl_intel_thread mkl_core guide") + endif () + if (BLA_VENDOR STREQUAL "Intel10_64lp" OR BLA_VENDOR STREQUAL "All") + # old version + list(APPEND BLAS_SEARCH_LIBS + "mkl_intel_lp64 mkl_intel_thread mkl_core guide") + # mkl >= 10.3 + if (CMAKE_C_COMPILER_ID STREQUAL "Intel") + list(APPEND BLAS_SEARCH_LIBS + "mkl_intel_lp64 mkl_intel_thread mkl_core") + endif() + if (CMAKE_C_COMPILER_ID STREQUAL "GNU") + list(APPEND BLAS_SEARCH_LIBS + "mkl_intel_lp64 mkl_gnu_thread mkl_core") + endif() + endif () + if (BLA_VENDOR STREQUAL "Intel10_64lp_seq" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS + "mkl_intel_lp64 mkl_sequential mkl_core") + if (BLA_VENDOR STREQUAL "Intel10_64lp_seq") + set(OMP_LIB "") + endif() + endif () + #older vesions of intel mkl libs + if (BLA_VENDOR STREQUAL "Intel" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS + "mkl") + list(APPEND BLAS_SEARCH_LIBS + "mkl_ia32") + list(APPEND BLAS_SEARCH_LIBS + "mkl_em64t") + endif () + endif () + + endif () + + foreach (IT ${BLAS_SEARCH_LIBS}) + string(REPLACE " " ";" SEARCH_LIBS ${IT}) + if (${_LIBRARIES}) + else () + check_fortran_libraries( + ${_LIBRARIES} + BLAS + ${BLAS_mkl_SEARCH_SYMBOL} + "${additional_flags}" + "${SEARCH_LIBS}" + "${OMP_LIB};${CMAKE_THREAD_LIBS_INIT};${LM}" + ) + if(_LIBRARIES) + set(BLAS_LINKER_FLAGS "${additional_flags}") + endif() + endif() + endforeach () + if(NOT BLAS_FIND_QUIETLY) + if(${_LIBRARIES}) + message(STATUS "Looking for MKL BLAS: found") + else() + message(STATUS "Looking for MKL BLAS: not found") + endif() + endif() + if (${_LIBRARIES} AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "Intel MKL") + endif() + endif () + endif() +endif () + + +if (BLA_VENDOR STREQUAL "Goto" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES) + # gotoblas (http://www.tacc.utexas.edu/tacc-projects/gotoblas2) + check_fortran_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "" + "goto2" + "" + ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for Goto BLAS: found") + else() + message(STATUS "Looking for Goto BLAS: not found") + endif() + endif() + endif() + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "Goto") + endif() + +endif () + + +# OpenBlas +if (BLA_VENDOR STREQUAL "Open" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES) + # openblas (http://www.openblas.net/) + check_fortran_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "" + "openblas" + "" + ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for Open BLAS: found") + else() + message(STATUS "Looking for Open BLAS: not found") + endif() + endif() + endif() + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "Openblas") + endif() + +endif () + + +# EigenBlas +if (BLA_VENDOR STREQUAL "Eigen" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES) + # eigenblas (http://eigen.tuxfamily.org/index.php?title=Main_Page) + check_fortran_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "" + "eigen_blas" + "" + ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + message(STATUS "Looking for Eigen BLAS: found") + else() + message(STATUS "Looking for Eigen BLAS: not found") + endif() + endif() + endif() + + if(NOT BLAS_LIBRARIES) + # eigenblas (http://eigen.tuxfamily.org/index.php?title=Main_Page) + check_fortran_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "" + "eigen_blas_static" + "" + ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for Eigen BLAS: found") + else() + message(STATUS "Looking for Eigen BLAS: not found") + endif() + endif() + endif() + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "Eigen") + endif() + +endif () + + +if (BLA_VENDOR STREQUAL "ATLAS" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES) + # BLAS in ATLAS library? (http://math-atlas.sourceforge.net/) + check_fortran_libraries( + BLAS_LIBRARIES + BLAS + dgemm + "" + "f77blas;atlas" + "" + ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for Atlas BLAS: found") + else() + message(STATUS "Looking for Atlas BLAS: not found") + endif() + endif() + endif() + + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "Atlas") + endif() + +endif () + + +# BLAS in PhiPACK libraries? (requires generic BLAS lib, too) +if (BLA_VENDOR STREQUAL "PhiPACK" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES) + check_fortran_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "" + "sgemm;dgemm;blas" + "" + ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for PhiPACK BLAS: found") + else() + message(STATUS "Looking for PhiPACK BLAS: not found") + endif() + endif() + endif() + + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "PhiPACK") + endif() + +endif () + + +# BLAS in Alpha CXML library? +if (BLA_VENDOR STREQUAL "CXML" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES) + check_fortran_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "" + "cxml" + "" + ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for CXML BLAS: found") + else() + message(STATUS "Looking for CXML BLAS: not found") + endif() + endif() + endif() + + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "CXML") + endif() + +endif () + + +# BLAS in Alpha DXML library? (now called CXML, see above) +if (BLA_VENDOR STREQUAL "DXML" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES) + check_fortran_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "" + "dxml" + "" + ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for DXML BLAS: found") + else() + message(STATUS "Looking for DXML BLAS: not found") + endif() + endif() + endif() + + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "DXML") + endif() + +endif () + + +# BLAS in Sun Performance library? +if (BLA_VENDOR STREQUAL "SunPerf" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES) + check_fortran_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "-xlic_lib=sunperf" + "sunperf;sunmath" + "" + ) + if(BLAS_LIBRARIES) + set(BLAS_LINKER_FLAGS "-xlic_lib=sunperf") + endif() + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for SunPerf BLAS: found") + else() + message(STATUS "Looking for SunPerf BLAS: not found") + endif() + endif() + endif() + + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "SunPerf") + endif() + +endif () + + +# BLAS in SCSL library? (SGI/Cray Scientific Library) +if (BLA_VENDOR STREQUAL "SCSL" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES) + check_fortran_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "" + "scsl" + "" + ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for SCSL BLAS: found") + else() + message(STATUS "Looking for SCSL BLAS: not found") + endif() + endif() + endif() + + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "SunPerf") + endif() + +endif () + + +# BLAS in SGIMATH library? +if (BLA_VENDOR STREQUAL "SGIMATH" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES) + check_fortran_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "" + "complib.sgimath" + "" + ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for SGIMATH BLAS: found") + else() + message(STATUS "Looking for SGIMATH BLAS: not found") + endif() + endif() + endif() + + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "SGIMATH") + endif() + +endif () + + +# BLAS in IBM ESSL library (requires generic BLAS lib, too) +if (BLA_VENDOR STREQUAL "IBMESSL" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES) + check_fortran_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "" + "essl;xlfmath;xlf90_r;blas" + "" + ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for IBM ESSL BLAS: found") + else() + message(STATUS "Looking for IBM ESSL BLAS: not found") + endif() + endif() + endif() + + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "IBM ESSL") + endif() + +endif () + +# BLAS in IBM ESSL_MT library (requires generic BLAS lib, too) +if (BLA_VENDOR STREQUAL "IBMESSLMT" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES) + check_fortran_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "" + "esslsmp;xlsmp;xlfmath;xlf90_r;blas" + "" + ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for IBM ESSL MT BLAS: found") + else() + message(STATUS "Looking for IBM ESSL MT BLAS: not found") + endif() + endif() + endif() + + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "IBM ESSL MT") + endif() + +endif () + + +#BLAS in acml library? +if (BLA_VENDOR MATCHES "ACML.*" OR BLA_VENDOR STREQUAL "All") + + if( ((BLA_VENDOR STREQUAL "ACML") AND (NOT BLAS_ACML_LIB_DIRS)) OR + ((BLA_VENDOR STREQUAL "ACML_MP") AND (NOT BLAS_ACML_MP_LIB_DIRS)) OR + ((BLA_VENDOR STREQUAL "ACML_GPU") AND (NOT BLAS_ACML_GPU_LIB_DIRS))) + + # try to find acml in "standard" paths + if( WIN32 ) + file( GLOB _ACML_ROOT "C:/AMD/acml*/ACML-EULA.txt" ) + else() + file( GLOB _ACML_ROOT "/opt/acml*/ACML-EULA.txt" ) + endif() + if( WIN32 ) + file( GLOB _ACML_GPU_ROOT "C:/AMD/acml*/GPGPUexamples" ) + else() + file( GLOB _ACML_GPU_ROOT "/opt/acml*/GPGPUexamples" ) + endif() + list(GET _ACML_ROOT 0 _ACML_ROOT) + list(GET _ACML_GPU_ROOT 0 _ACML_GPU_ROOT) + + if( _ACML_ROOT ) + + get_filename_component( _ACML_ROOT ${_ACML_ROOT} PATH ) + if( SIZEOF_INTEGER EQUAL 8 ) + set( _ACML_PATH_SUFFIX "_int64" ) + else() + set( _ACML_PATH_SUFFIX "" ) + endif() + if( CMAKE_Fortran_COMPILER_ID STREQUAL "Intel" ) + set( _ACML_COMPILER32 "ifort32" ) + set( _ACML_COMPILER64 "ifort64" ) + elseif( CMAKE_Fortran_COMPILER_ID STREQUAL "SunPro" ) + set( _ACML_COMPILER32 "sun32" ) + set( _ACML_COMPILER64 "sun64" ) + elseif( CMAKE_Fortran_COMPILER_ID STREQUAL "PGI" ) + set( _ACML_COMPILER32 "pgi32" ) + if( WIN32 ) + set( _ACML_COMPILER64 "win64" ) + else() + set( _ACML_COMPILER64 "pgi64" ) + endif() + elseif( CMAKE_Fortran_COMPILER_ID STREQUAL "Open64" ) + # 32 bit builds not supported on Open64 but for code simplicity + # We'll just use the same directory twice + set( _ACML_COMPILER32 "open64_64" ) + set( _ACML_COMPILER64 "open64_64" ) + elseif( CMAKE_Fortran_COMPILER_ID STREQUAL "NAG" ) + set( _ACML_COMPILER32 "nag32" ) + set( _ACML_COMPILER64 "nag64" ) + else() + set( _ACML_COMPILER32 "gfortran32" ) + set( _ACML_COMPILER64 "gfortran64" ) + endif() + + if( BLA_VENDOR STREQUAL "ACML_MP" ) + set(_ACML_MP_LIB_DIRS + "${_ACML_ROOT}/${_ACML_COMPILER32}_mp${_ACML_PATH_SUFFIX}/lib" + "${_ACML_ROOT}/${_ACML_COMPILER64}_mp${_ACML_PATH_SUFFIX}/lib" ) + else() + set(_ACML_LIB_DIRS + "${_ACML_ROOT}/${_ACML_COMPILER32}${_ACML_PATH_SUFFIX}/lib" + "${_ACML_ROOT}/${_ACML_COMPILER64}${_ACML_PATH_SUFFIX}/lib" ) + endif() + + endif() + + elseif(BLAS_${BLA_VENDOR}_LIB_DIRS) + + set(_${BLA_VENDOR}_LIB_DIRS ${BLAS_${BLA_VENDOR}_LIB_DIRS}) + + endif() + + if( BLA_VENDOR STREQUAL "ACML_MP" ) + foreach( BLAS_ACML_MP_LIB_DIRS ${_ACML_MP_LIB_DIRS}) + check_fortran_libraries ( + BLAS_LIBRARIES + BLAS + sgemm + "" "acml_mp;acml_mv" "" ${BLAS_ACML_MP_LIB_DIRS} + ) + if( BLAS_LIBRARIES ) + break() + endif() + endforeach() + elseif( BLA_VENDOR STREQUAL "ACML_GPU" ) + foreach( BLAS_ACML_GPU_LIB_DIRS ${_ACML_GPU_LIB_DIRS}) + check_fortran_libraries ( + BLAS_LIBRARIES + BLAS + sgemm + "" "acml;acml_mv;CALBLAS" "" ${BLAS_ACML_GPU_LIB_DIRS} + ) + if( BLAS_LIBRARIES ) + break() + endif() + endforeach() + else() + foreach( BLAS_ACML_LIB_DIRS ${_ACML_LIB_DIRS} ) + check_fortran_libraries ( + BLAS_LIBRARIES + BLAS + sgemm + "" "acml;acml_mv" "" ${BLAS_ACML_LIB_DIRS} + ) + if( BLAS_LIBRARIES ) + break() + endif() + endforeach() + endif() + + # Either acml or acml_mp should be in LD_LIBRARY_PATH but not both + if(NOT BLAS_LIBRARIES) + check_fortran_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "" + "acml;acml_mv" + "" + ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for ACML BLAS: found") + else() + message(STATUS "Looking for ACML BLAS: not found") + endif() + endif() + endif() + + if(NOT BLAS_LIBRARIES) + check_fortran_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "" + "acml_mp;acml_mv" + "" + ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for ACML BLAS: found") + else() + message(STATUS "Looking for ACML BLAS: not found") + endif() + endif() + endif() + + if(NOT BLAS_LIBRARIES) + check_fortran_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "" + "acml;acml_mv;CALBLAS" + "" + ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for ACML BLAS: found") + else() + message(STATUS "Looking for ACML BLAS: not found") + endif() + endif() + endif() + + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "ACML") + endif() + +endif () # ACML + + +# Apple BLAS library? +if (BLA_VENDOR STREQUAL "Apple" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES) + check_fortran_libraries( + BLAS_LIBRARIES + BLAS + dgemm + "" + "Accelerate" + "" + ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for Apple BLAS: found") + else() + message(STATUS "Looking for Apple BLAS: not found") + endif() + endif() + endif() + + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "Apple Accelerate") + endif() + +endif () + + +if (BLA_VENDOR STREQUAL "NAS" OR BLA_VENDOR STREQUAL "All") + + if ( NOT BLAS_LIBRARIES ) + check_fortran_libraries( + BLAS_LIBRARIES + BLAS + dgemm + "" + "vecLib" + "" + ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for NAS BLAS: found") + else() + message(STATUS "Looking for NAS BLAS: not found") + endif() + endif() + endif () + + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "NAS") + endif() + +endif () + + +# Generic BLAS library? +if (BLA_VENDOR STREQUAL "Generic" OR BLA_VENDOR STREQUAL "All") + + set(BLAS_SEARCH_LIBS "blas;blas_LINUX;blas_MAC;blas_WINDOWS;refblas") + foreach (SEARCH_LIB ${BLAS_SEARCH_LIBS}) + if (BLAS_LIBRARIES) + else () + check_fortran_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "" + "${SEARCH_LIB}" + "${LGFORTRAN}" + ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for Generic BLAS: found") + else() + message(STATUS "Looking for Generic BLAS: not found") + endif() + endif() + endif() + endforeach () + + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "Netlib or other Generic libblas") + endif() + +endif () + + +if(BLA_F95) + + if(BLAS95_LIBRARIES) + set(BLAS95_FOUND TRUE) + else() + set(BLAS95_FOUND FALSE) + endif() + + if(NOT BLAS_FIND_QUIETLY) + if(BLAS95_FOUND) + message(STATUS "A library with BLAS95 API found.") + message(STATUS "BLAS_LIBRARIES ${BLAS_LIBRARIES}") + else() + message(WARNING "BLA_VENDOR has been set to ${BLA_VENDOR} but blas 95 libraries could not be found or check of symbols failed." + "\nPlease indicate where to find blas libraries. You have three options:\n" + "- Option 1: Provide the installation directory of BLAS library with cmake option: -DBLAS_DIR=your/path/to/blas\n" + "- Option 2: Provide the directory where to find BLAS libraries with cmake option: -DBLAS_LIBDIR=your/path/to/blas/libs\n" + "- Option 3: Update your environment variable (Linux: LD_LIBRARY_PATH, Windows: LIB, Mac: DYLD_LIBRARY_PATH)\n" + "\nTo follow libraries detection more precisely you can activate a verbose mode with -DBLAS_VERBOSE=ON at cmake configure." + "\nYou could also specify a BLAS vendor to look for by setting -DBLA_VENDOR=blas_vendor_name." + "\nList of possible BLAS vendor: Goto, ATLAS PhiPACK, CXML, DXML, SunPerf, SCSL, SGIMATH, IBMESSL, Intel10_32 (intel mkl v10 32 bit)," + "Intel10_64lp (intel mkl v10 64 bit, lp thread model, lp64 model), Intel10_64lp_seq (intel mkl v10 64 bit, sequential code, lp64 model)," + "Intel( older versions of mkl 32 and 64 bit), ACML, ACML_MP, ACML_GPU, Apple, NAS, Generic") + if(BLAS_FIND_REQUIRED) + message(FATAL_ERROR + "A required library with BLAS95 API not found. Please specify library location.") + else() + message(STATUS + "A library with BLAS95 API not found. Please specify library location.") + endif() + endif() + endif() + + set(BLAS_FOUND TRUE) + set(BLAS_LIBRARIES "${BLAS95_LIBRARIES}") + +else() + + if(BLAS_LIBRARIES) + set(BLAS_FOUND TRUE) + else() + set(BLAS_FOUND FALSE) + endif() + + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_FOUND) + message(STATUS "A library with BLAS API found.") + message(STATUS "BLAS_LIBRARIES ${BLAS_LIBRARIES}") + else() + message(WARNING "BLA_VENDOR has been set to ${BLA_VENDOR} but blas libraries could not be found or check of symbols failed." + "\nPlease indicate where to find blas libraries. You have three options:\n" + "- Option 1: Provide the installation directory of BLAS library with cmake option: -DBLAS_DIR=your/path/to/blas\n" + "- Option 2: Provide the directory where to find BLAS libraries with cmake option: -DBLAS_LIBDIR=your/path/to/blas/libs\n" + "- Option 3: Update your environment variable (Linux: LD_LIBRARY_PATH, Windows: LIB, Mac: DYLD_LIBRARY_PATH)\n" + "\nTo follow libraries detection more precisely you can activate a verbose mode with -DBLAS_VERBOSE=ON at cmake configure." + "\nYou could also specify a BLAS vendor to look for by setting -DBLA_VENDOR=blas_vendor_name." + "\nList of possible BLAS vendor: Goto, ATLAS PhiPACK, CXML, DXML, SunPerf, SCSL, SGIMATH, IBMESSL, Intel10_32 (intel mkl v10 32 bit)," + "Intel10_64lp (intel mkl v10 64 bit, lp thread model, lp64 model), Intel10_64lp_seq (intel mkl v10 64 bit, sequential code, lp64 model)," + "Intel( older versions of mkl 32 and 64 bit), ACML, ACML_MP, ACML_GPU, Apple, NAS, Generic") + if(BLAS_FIND_REQUIRED) + message(FATAL_ERROR + "A required library with BLAS API not found. Please specify library location.") + else() + message(STATUS + "A library with BLAS API not found. Please specify library location.") + endif() + endif() + endif() + +endif() + +set(CMAKE_FIND_LIBRARY_SUFFIXES ${_blas_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES}) + +if (BLAS_FOUND) + list(GET BLAS_LIBRARIES 0 first_lib) + get_filename_component(first_lib_path "${first_lib}" PATH) + if (${first_lib_path} MATCHES "(/lib(32|64)?$)|(/lib/intel64$|/lib/ia32$)") + string(REGEX REPLACE "(/lib(32|64)?$)|(/lib/intel64$|/lib/ia32$)" "" not_cached_dir "${first_lib_path}") + set(BLAS_DIR_FOUND "${not_cached_dir}" CACHE PATH "Installation directory of BLAS library" FORCE) + else() + set(BLAS_DIR_FOUND "${first_lib_path}" CACHE PATH "Installation directory of BLAS library" FORCE) + endif() +endif() +mark_as_advanced(BLAS_DIR) +mark_as_advanced(BLAS_DIR_FOUND) diff --git a/external/eigen-3.4.0/cmake/FindBLASEXT.cmake b/external/eigen-3.4.0/cmake/FindBLASEXT.cmake new file mode 100644 index 0000000..69a9418 --- /dev/null +++ b/external/eigen-3.4.0/cmake/FindBLASEXT.cmake @@ -0,0 +1,384 @@ +### +# +# @copyright (c) 2009-2014 The University of Tennessee and The University +# of Tennessee Research Foundation. +# All rights reserved. +# @copyright (c) 2012-2016 Inria. All rights reserved. +# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. +# +### +# +# - Find BLAS EXTENDED for MORSE projects: find include dirs and libraries +# +# This module allows to find BLAS libraries by calling the official FindBLAS module +# and handles the creation of different library lists whether the user wishes to link +# with a sequential BLAS or a multihreaded (BLAS_SEQ_LIBRARIES and BLAS_PAR_LIBRARIES). +# BLAS is detected with a FindBLAS call then if the BLAS vendor is Intel10_64lp, ACML +# or IBMESSLMT then the module attempts to find the corresponding multithreaded libraries. +# +# The following variables have been added to manage links with sequential or multithreaded +# versions: +# BLAS_INCLUDE_DIRS - BLAS include directories +# BLAS_LIBRARY_DIRS - Link directories for BLAS libraries +# BLAS_SEQ_LIBRARIES - BLAS component libraries to be linked (sequential) +# BLAS_PAR_LIBRARIES - BLAS component libraries to be linked (multithreaded) + +#============================================================================= +# Copyright 2012-2013 Inria +# Copyright 2012-2013 Emmanuel Agullo +# Copyright 2012-2013 Mathieu Faverge +# Copyright 2012 Cedric Castagnede +# Copyright 2013-2016 Florent Pruvost +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file MORSE-Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of Morse, substitute the full +# License text for the above reference.) + +# macro to factorize this call +include(CMakeFindDependencyMacro) +macro(find_package_blas) + if(BLASEXT_FIND_REQUIRED) + if(BLASEXT_FIND_QUIETLY) + find_dependency(BLAS REQUIRED QUIET) + else() + find_dependency(BLAS REQUIRED) + endif() + else() + if(BLASEXT_FIND_QUIETLY) + find_dependency(BLAS QUIET) + else() + find_dependency(BLAS) + endif() + endif() +endmacro() + +# add a cache variable to let the user specify the BLAS vendor +set(BLA_VENDOR "" CACHE STRING "list of possible BLAS vendor: + Open, Eigen, Goto, ATLAS PhiPACK, CXML, DXML, SunPerf, SCSL, SGIMATH, IBMESSL, IBMESSLMT, + Intel10_32 (intel mkl v10 32 bit), + Intel10_64lp (intel mkl v10 64 bit, lp thread model, lp64 model), + Intel10_64lp_seq (intel mkl v10 64 bit, sequential code, lp64 model), + Intel( older versions of mkl 32 and 64 bit), + ACML, ACML_MP, ACML_GPU, Apple, NAS, Generic") + +if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "In FindBLASEXT") + message(STATUS "If you want to force the use of one specific library, " + "\n please specify the BLAS vendor by setting -DBLA_VENDOR=blas_vendor_name" + "\n at cmake configure.") + message(STATUS "List of possible BLAS vendor: Goto, ATLAS PhiPACK, CXML, " + "\n DXML, SunPerf, SCSL, SGIMATH, IBMESSL, IBMESSLMT, Intel10_32 (intel mkl v10 32 bit)," + "\n Intel10_64lp (intel mkl v10 64 bit, lp thread model, lp64 model)," + "\n Intel10_64lp_seq (intel mkl v10 64 bit, sequential code, lp64 model)," + "\n Intel( older versions of mkl 32 and 64 bit)," + "\n ACML, ACML_MP, ACML_GPU, Apple, NAS, Generic") +endif() + +if (NOT BLAS_FOUND) + # First try to detect two cases: + # 1: only SEQ libs are handled + # 2: both SEQ and PAR libs are handled + find_package_blas() +endif () + +# detect the cases where SEQ and PAR libs are handled +if(BLA_VENDOR STREQUAL "All" AND + (BLAS_mkl_core_LIBRARY OR BLAS_mkl_core_dll_LIBRARY) + ) + set(BLA_VENDOR "Intel") + if(BLAS_mkl_intel_LIBRARY) + set(BLA_VENDOR "Intel10_32") + endif() + if(BLAS_mkl_intel_lp64_LIBRARY) + set(BLA_VENDOR "Intel10_64lp") + endif() + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "A BLAS library has been found (${BLAS_LIBRARIES}) but we" + "\n have also potentially detected some multithreaded BLAS libraries from the MKL." + "\n We try to find both libraries lists (Sequential/Multithreaded).") + endif() + set(BLAS_FOUND "") +elseif(BLA_VENDOR STREQUAL "All" AND BLAS_acml_LIBRARY) + set(BLA_VENDOR "ACML") + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "A BLAS library has been found (${BLAS_LIBRARIES}) but we" + "\n have also potentially detected some multithreaded BLAS libraries from the ACML." + "\n We try to find both libraries lists (Sequential/Multithreaded).") + endif() + set(BLAS_FOUND "") +elseif(BLA_VENDOR STREQUAL "All" AND BLAS_essl_LIBRARY) + set(BLA_VENDOR "IBMESSL") + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "A BLAS library has been found (${BLAS_LIBRARIES}) but we" + "\n have also potentially detected some multithreaded BLAS libraries from the ESSL." + "\n We try to find both libraries lists (Sequential/Multithreaded).") + endif() + set(BLAS_FOUND "") +endif() + +# Intel case +if(BLA_VENDOR MATCHES "Intel*") + + ### + # look for include path if the BLAS vendor is Intel + ### + + # gather system include paths + unset(_inc_env) + if(WIN32) + string(REPLACE ":" ";" _inc_env "$ENV{INCLUDE}") + else() + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{CPATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + endif() + list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") + list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") + set(ENV_MKLROOT "$ENV{MKLROOT}") + if (ENV_MKLROOT) + list(APPEND _inc_env "${ENV_MKLROOT}/include") + endif() + list(REMOVE_DUPLICATES _inc_env) + + # find mkl.h inside known include paths + set(BLAS_mkl.h_INCLUDE_DIRS "BLAS_mkl.h_INCLUDE_DIRS-NOTFOUND") + if(BLAS_INCDIR) + set(BLAS_mkl.h_INCLUDE_DIRS "BLAS_mkl.h_INCLUDE_DIRS-NOTFOUND") + find_path(BLAS_mkl.h_INCLUDE_DIRS + NAMES mkl.h + HINTS ${BLAS_INCDIR}) + else() + if(BLAS_DIR) + set(BLAS_mkl.h_INCLUDE_DIRS "BLAS_mkl.h_INCLUDE_DIRS-NOTFOUND") + find_path(BLAS_mkl.h_INCLUDE_DIRS + NAMES mkl.h + HINTS ${BLAS_DIR} + PATH_SUFFIXES include) + else() + set(BLAS_mkl.h_INCLUDE_DIRS "BLAS_mkl.h_INCLUDE_DIRS-NOTFOUND") + find_path(BLAS_mkl.h_INCLUDE_DIRS + NAMES mkl.h + HINTS ${_inc_env}) + endif() + endif() + mark_as_advanced(BLAS_mkl.h_INCLUDE_DIRS) + ## Print status if not found + ## ------------------------- + #if (NOT BLAS_mkl.h_INCLUDE_DIRS AND MORSE_VERBOSE) + # Print_Find_Header_Status(blas mkl.h) + #endif () + set(BLAS_INCLUDE_DIRS "") + if(BLAS_mkl.h_INCLUDE_DIRS) + list(APPEND BLAS_INCLUDE_DIRS "${BLAS_mkl.h_INCLUDE_DIRS}" ) + endif() + + ### + # look for libs + ### + # if Intel 10 64 bit -> look for sequential and multithreaded versions + if(BLA_VENDOR MATCHES "Intel10_64lp*") + + ## look for the sequential version + set(BLA_VENDOR "Intel10_64lp_seq") + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "Look for the sequential version Intel10_64lp_seq") + endif() + find_package_blas() + if(BLAS_FOUND) + set(BLAS_SEQ_LIBRARIES "${BLAS_LIBRARIES}") + else() + set(BLAS_SEQ_LIBRARIES "${BLAS_SEQ_LIBRARIES-NOTFOUND}") + endif() + + ## look for the multithreaded version + set(BLA_VENDOR "Intel10_64lp") + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "Look for the multithreaded version Intel10_64lp") + endif() + find_package_blas() + if(BLAS_FOUND) + set(BLAS_PAR_LIBRARIES "${BLAS_LIBRARIES}") + else() + set(BLAS_PAR_LIBRARIES "${BLAS_PAR_LIBRARIES-NOTFOUND}") + endif() + + else() + + if(BLAS_FOUND) + set(BLAS_SEQ_LIBRARIES "${BLAS_LIBRARIES}") + else() + set(BLAS_SEQ_LIBRARIES "${BLAS_SEQ_LIBRARIES-NOTFOUND}") + endif() + + endif() + + # ACML case +elseif(BLA_VENDOR MATCHES "ACML*") + + ## look for the sequential version + set(BLA_VENDOR "ACML") + find_package_blas() + if(BLAS_FOUND) + set(BLAS_SEQ_LIBRARIES "${BLAS_LIBRARIES}") + else() + set(BLAS_SEQ_LIBRARIES "${BLAS_SEQ_LIBRARIES-NOTFOUND}") + endif() + + ## look for the multithreaded version + set(BLA_VENDOR "ACML_MP") + find_package_blas() + if(BLAS_FOUND) + set(BLAS_PAR_LIBRARIES "${BLAS_LIBRARIES}") + else() + set(BLAS_PAR_LIBRARIES "${BLAS_PAR_LIBRARIES-NOTFOUND}") + endif() + + # IBMESSL case +elseif(BLA_VENDOR MATCHES "IBMESSL*") + + ## look for the sequential version + set(BLA_VENDOR "IBMESSL") + find_package_blas() + if(BLAS_FOUND) + set(BLAS_SEQ_LIBRARIES "${BLAS_LIBRARIES}") + else() + set(BLAS_SEQ_LIBRARIES "${BLAS_SEQ_LIBRARIES-NOTFOUND}") + endif() + + ## look for the multithreaded version + set(BLA_VENDOR "IBMESSLMT") + find_package_blas() + if(BLAS_FOUND) + set(BLAS_PAR_LIBRARIES "${BLAS_LIBRARIES}") + else() + set(BLAS_PAR_LIBRARIES "${BLAS_PAR_LIBRARIES-NOTFOUND}") + endif() + +else() + + if(BLAS_FOUND) + # define the SEQ libs as the BLAS_LIBRARIES + set(BLAS_SEQ_LIBRARIES "${BLAS_LIBRARIES}") + else() + set(BLAS_SEQ_LIBRARIES "${BLAS_SEQ_LIBRARIES-NOTFOUND}") + endif() + set(BLAS_PAR_LIBRARIES "${BLAS_PAR_LIBRARIES-NOTFOUND}") + +endif() + + +if(BLAS_SEQ_LIBRARIES) + set(BLAS_LIBRARIES "${BLAS_SEQ_LIBRARIES}") +endif() + +# extract libs paths +# remark: because it is not given by find_package(BLAS) +set(BLAS_LIBRARY_DIRS "") +string(REPLACE " " ";" BLAS_LIBRARIES "${BLAS_LIBRARIES}") +foreach(blas_lib ${BLAS_LIBRARIES}) + if (EXISTS "${blas_lib}") + get_filename_component(a_blas_lib_dir "${blas_lib}" PATH) + list(APPEND BLAS_LIBRARY_DIRS "${a_blas_lib_dir}" ) + else() + string(REPLACE "-L" "" blas_lib "${blas_lib}") + if (EXISTS "${blas_lib}") + list(APPEND BLAS_LIBRARY_DIRS "${blas_lib}" ) + else() + get_filename_component(a_blas_lib_dir "${blas_lib}" PATH) + if (EXISTS "${a_blas_lib_dir}") + list(APPEND BLAS_LIBRARY_DIRS "${a_blas_lib_dir}" ) + endif() + endif() + endif() +endforeach() +if (BLAS_LIBRARY_DIRS) + list(REMOVE_DUPLICATES BLAS_LIBRARY_DIRS) +endif () + +# check that BLAS has been found +# --------------------------------- +include(FindPackageHandleStandardArgs) +if(BLA_VENDOR MATCHES "Intel*") + if(BLA_VENDOR MATCHES "Intel10_64lp*") + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "BLAS found is Intel MKL:" + "\n we manage two lists of libs, one sequential and one parallel if found" + "\n (see BLAS_SEQ_LIBRARIES and BLAS_PAR_LIBRARIES)") + message(STATUS "BLAS sequential libraries stored in BLAS_SEQ_LIBRARIES") + endif() + find_package_handle_standard_args(BLASEXT DEFAULT_MSG + BLAS_SEQ_LIBRARIES + BLAS_LIBRARY_DIRS + BLAS_INCLUDE_DIRS) + if(BLAS_PAR_LIBRARIES) + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "BLAS parallel libraries stored in BLAS_PAR_LIBRARIES") + endif() + find_package_handle_standard_args(BLASEXT DEFAULT_MSG + BLAS_PAR_LIBRARIES) + endif() + else() + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "BLAS sequential libraries stored in BLAS_SEQ_LIBRARIES") + endif() + find_package_handle_standard_args(BLASEXT DEFAULT_MSG + BLAS_SEQ_LIBRARIES + BLAS_LIBRARY_DIRS + BLAS_INCLUDE_DIRS) + endif() +elseif(BLA_VENDOR MATCHES "ACML*") + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "BLAS found is ACML:" + "\n we manage two lists of libs, one sequential and one parallel if found" + "\n (see BLAS_SEQ_LIBRARIES and BLAS_PAR_LIBRARIES)") + message(STATUS "BLAS sequential libraries stored in BLAS_SEQ_LIBRARIES") + endif() + find_package_handle_standard_args(BLASEXT DEFAULT_MSG + BLAS_SEQ_LIBRARIES + BLAS_LIBRARY_DIRS) + if(BLAS_PAR_LIBRARIES) + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "BLAS parallel libraries stored in BLAS_PAR_LIBRARIES") + endif() + find_package_handle_standard_args(BLASEXT DEFAULT_MSG + BLAS_PAR_LIBRARIES) + endif() +elseif(BLA_VENDOR MATCHES "IBMESSL*") + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "BLAS found is ESSL:" + "\n we manage two lists of libs, one sequential and one parallel if found" + "\n (see BLAS_SEQ_LIBRARIES and BLAS_PAR_LIBRARIES)") + message(STATUS "BLAS sequential libraries stored in BLAS_SEQ_LIBRARIES") + endif() + find_package_handle_standard_args(BLASEXT DEFAULT_MSG + BLAS_SEQ_LIBRARIES + BLAS_LIBRARY_DIRS) + if(BLAS_PAR_LIBRARIES) + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "BLAS parallel libraries stored in BLAS_PAR_LIBRARIES") + endif() + find_package_handle_standard_args(BLASEXT DEFAULT_MSG + BLAS_PAR_LIBRARIES) + endif() +else() + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "BLAS sequential libraries stored in BLAS_SEQ_LIBRARIES") + endif() + find_package_handle_standard_args(BLASEXT DEFAULT_MSG + BLAS_SEQ_LIBRARIES + BLAS_LIBRARY_DIRS) +endif() + +# Callers expect BLAS_FOUND to be set as well. +set(BLAS_FOUND BLASEXT_FOUND) diff --git a/external/eigen-3.4.0/cmake/FindCHOLMOD.cmake b/external/eigen-3.4.0/cmake/FindCHOLMOD.cmake new file mode 100644 index 0000000..e470cb2 --- /dev/null +++ b/external/eigen-3.4.0/cmake/FindCHOLMOD.cmake @@ -0,0 +1,89 @@ +# CHOLMOD lib usually requires linking to a blas and lapack library. +# It is up to the user of this module to find a BLAS and link to it. + +if (CHOLMOD_INCLUDES AND CHOLMOD_LIBRARIES) + set(CHOLMOD_FIND_QUIETLY TRUE) +endif () + +find_path(CHOLMOD_INCLUDES + NAMES + cholmod.h + PATHS + $ENV{CHOLMODDIR} + ${INCLUDE_INSTALL_DIR} + PATH_SUFFIXES + suitesparse + ufsparse +) + +find_library(CHOLMOD_LIBRARIES cholmod PATHS $ENV{CHOLMODDIR} ${LIB_INSTALL_DIR}) + +if(CHOLMOD_LIBRARIES) + + get_filename_component(CHOLMOD_LIBDIR ${CHOLMOD_LIBRARIES} PATH) + + find_library(AMD_LIBRARY amd PATHS ${CHOLMOD_LIBDIR} $ENV{CHOLMODDIR} ${LIB_INSTALL_DIR}) + if (AMD_LIBRARY) + set(CHOLMOD_LIBRARIES ${CHOLMOD_LIBRARIES} ${AMD_LIBRARY}) + else () + set(CHOLMOD_LIBRARIES FALSE) + endif () + +endif() + +if(CHOLMOD_LIBRARIES) + + find_library(COLAMD_LIBRARY colamd PATHS ${CHOLMOD_LIBDIR} $ENV{CHOLMODDIR} ${LIB_INSTALL_DIR}) + if (COLAMD_LIBRARY) + set(CHOLMOD_LIBRARIES ${CHOLMOD_LIBRARIES} ${COLAMD_LIBRARY}) + else () + set(CHOLMOD_LIBRARIES FALSE) + endif () + +endif() + +if(CHOLMOD_LIBRARIES) + + find_library(CAMD_LIBRARY camd PATHS ${CHOLMOD_LIBDIR} $ENV{CHOLMODDIR} ${LIB_INSTALL_DIR}) + if (CAMD_LIBRARY) + set(CHOLMOD_LIBRARIES ${CHOLMOD_LIBRARIES} ${CAMD_LIBRARY}) + else () + set(CHOLMOD_LIBRARIES FALSE) + endif () + +endif() + +if(CHOLMOD_LIBRARIES) + + find_library(CCOLAMD_LIBRARY ccolamd PATHS ${CHOLMOD_LIBDIR} $ENV{CHOLMODDIR} ${LIB_INSTALL_DIR}) + if (CCOLAMD_LIBRARY) + set(CHOLMOD_LIBRARIES ${CHOLMOD_LIBRARIES} ${CCOLAMD_LIBRARY}) + else () + set(CHOLMOD_LIBRARIES FALSE) + endif () + +endif() + +if(CHOLMOD_LIBRARIES) + + find_library(CHOLMOD_METIS_LIBRARY metis PATHS ${CHOLMOD_LIBDIR} $ENV{CHOLMODDIR} ${LIB_INSTALL_DIR}) + if (CHOLMOD_METIS_LIBRARY) + set(CHOLMOD_LIBRARIES ${CHOLMOD_LIBRARIES} ${CHOLMOD_METIS_LIBRARY}) + endif () + +endif() + +if(CHOLMOD_LIBRARIES) + + find_library(SUITESPARSE_LIBRARY SuiteSparse PATHS ${CHOLMOD_LIBDIR} $ENV{CHOLMODDIR} ${LIB_INSTALL_DIR}) + if (SUITESPARSE_LIBRARY) + set(CHOLMOD_LIBRARIES ${CHOLMOD_LIBRARIES} ${SUITESPARSE_LIBRARY}) + endif () + +endif() + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(CHOLMOD DEFAULT_MSG + CHOLMOD_INCLUDES CHOLMOD_LIBRARIES) + +mark_as_advanced(CHOLMOD_INCLUDES CHOLMOD_LIBRARIES AMD_LIBRARY COLAMD_LIBRARY SUITESPARSE_LIBRARY CAMD_LIBRARY CCOLAMD_LIBRARY CHOLMOD_METIS_LIBRARY) diff --git a/external/eigen-3.4.0/cmake/FindComputeCpp.cmake b/external/eigen-3.4.0/cmake/FindComputeCpp.cmake new file mode 100644 index 0000000..1c271f0 --- /dev/null +++ b/external/eigen-3.4.0/cmake/FindComputeCpp.cmake @@ -0,0 +1,455 @@ +#.rst: +# FindComputeCpp +#--------------- +# +# Copyright 2016-2018 Codeplay Software Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use these files except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +######################### +# FindComputeCpp.cmake +######################### +# +# Tools for finding and building with ComputeCpp. +# +# User must define ComputeCpp_DIR pointing to the ComputeCpp +# installation. +# +# Latest version of this file can be found at: +# https://github.com/codeplaysoftware/computecpp-sdk + +cmake_minimum_required(VERSION 3.4.3) +include(FindPackageHandleStandardArgs) +include(ComputeCppIRMap) + +set(COMPUTECPP_USER_FLAGS "" CACHE STRING "User flags for compute++") +separate_arguments(COMPUTECPP_USER_FLAGS) +mark_as_advanced(COMPUTECPP_USER_FLAGS) + +set(COMPUTECPP_BITCODE "spir64" CACHE STRING + "Bitcode type to use as SYCL target in compute++") +mark_as_advanced(COMPUTECPP_BITCODE) + +include(CMakeFindDependencyMacro) +find_dependency(OpenCL REQUIRED) + +# Find ComputeCpp package + +if(DEFINED ComputeCpp_DIR) + set(computecpp_find_hint ${ComputeCpp_DIR}) +elseif(DEFINED ENV{COMPUTECPP_DIR}) + set(computecpp_find_hint $ENV{COMPUTECPP_DIR}) +endif() + +# Used for running executables on the host +set(computecpp_host_find_hint ${computecpp_find_hint}) + +if(CMAKE_CROSSCOMPILING) + # ComputeCpp_HOST_DIR is used to find executables that are run on the host + if(DEFINED ComputeCpp_HOST_DIR) + set(computecpp_host_find_hint ${ComputeCpp_HOST_DIR}) + elseif(DEFINED ENV{COMPUTECPP_HOST_DIR}) + set(computecpp_host_find_hint $ENV{COMPUTECPP_HOST_DIR}) + endif() +endif() + +find_program(ComputeCpp_DEVICE_COMPILER_EXECUTABLE compute++ + HINTS ${computecpp_host_find_hint} + PATH_SUFFIXES bin + NO_SYSTEM_ENVIRONMENT_PATH) + +find_program(ComputeCpp_INFO_EXECUTABLE computecpp_info + HINTS ${computecpp_host_find_hint} + PATH_SUFFIXES bin + NO_SYSTEM_ENVIRONMENT_PATH) + +find_library(COMPUTECPP_RUNTIME_LIBRARY + NAMES ComputeCpp ComputeCpp_vs2015 + HINTS ${computecpp_find_hint} + PATH_SUFFIXES lib + DOC "ComputeCpp Runtime Library") + +find_library(COMPUTECPP_RUNTIME_LIBRARY_DEBUG + NAMES ComputeCpp_d ComputeCpp ComputeCpp_vs2015_d + HINTS ${computecpp_find_hint} + PATH_SUFFIXES lib + DOC "ComputeCpp Debug Runtime Library") + +find_path(ComputeCpp_INCLUDE_DIRS + NAMES "CL/sycl.hpp" + HINTS ${computecpp_find_hint}/include + DOC "The ComputeCpp include directory") +get_filename_component(ComputeCpp_INCLUDE_DIRS ${ComputeCpp_INCLUDE_DIRS} ABSOLUTE) + +get_filename_component(computecpp_canonical_root_dir "${ComputeCpp_INCLUDE_DIRS}/.." ABSOLUTE) +set(ComputeCpp_ROOT_DIR "${computecpp_canonical_root_dir}" CACHE PATH + "The root of the ComputeCpp install") + +if(NOT ComputeCpp_INFO_EXECUTABLE) + message(WARNING "Can't find computecpp_info - check ComputeCpp_DIR") +else() + execute_process(COMMAND ${ComputeCpp_INFO_EXECUTABLE} "--dump-version" + OUTPUT_VARIABLE ComputeCpp_VERSION + RESULT_VARIABLE ComputeCpp_INFO_EXECUTABLE_RESULT OUTPUT_STRIP_TRAILING_WHITESPACE) + if(NOT ComputeCpp_INFO_EXECUTABLE_RESULT EQUAL "0") + message(WARNING "Package version - Error obtaining version!") + endif() + + execute_process(COMMAND ${ComputeCpp_INFO_EXECUTABLE} "--dump-is-supported" + OUTPUT_VARIABLE COMPUTECPP_PLATFORM_IS_SUPPORTED + RESULT_VARIABLE ComputeCpp_INFO_EXECUTABLE_RESULT OUTPUT_STRIP_TRAILING_WHITESPACE) + if(NOT ComputeCpp_INFO_EXECUTABLE_RESULT EQUAL "0") + message(WARNING "platform - Error checking platform support!") + else() + mark_as_advanced(COMPUTECPP_PLATFORM_IS_SUPPORTED) + if (COMPUTECPP_PLATFORM_IS_SUPPORTED) + message(STATUS "platform - your system can support ComputeCpp") + else() + message(STATUS "platform - your system is not officially supported") + endif() + endif() +endif() + +find_package_handle_standard_args(ComputeCpp + REQUIRED_VARS ComputeCpp_ROOT_DIR + ComputeCpp_DEVICE_COMPILER_EXECUTABLE + ComputeCpp_INFO_EXECUTABLE + COMPUTECPP_RUNTIME_LIBRARY + COMPUTECPP_RUNTIME_LIBRARY_DEBUG + ComputeCpp_INCLUDE_DIRS + VERSION_VAR ComputeCpp_VERSION) +mark_as_advanced(ComputeCpp_ROOT_DIR + ComputeCpp_DEVICE_COMPILER_EXECUTABLE + ComputeCpp_INFO_EXECUTABLE + COMPUTECPP_RUNTIME_LIBRARY + COMPUTECPP_RUNTIME_LIBRARY_DEBUG + ComputeCpp_INCLUDE_DIRS + ComputeCpp_VERSION) + +if(NOT ComputeCpp_FOUND) + return() +endif() + +list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS -O2 -mllvm -inline-threshold=1000 -intelspirmetadata) +mark_as_advanced(COMPUTECPP_DEVICE_COMPILER_FLAGS) + +if(CMAKE_CROSSCOMPILING) + if(NOT COMPUTECPP_DONT_USE_TOOLCHAIN) + list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS --gcc-toolchain=${COMPUTECPP_TOOLCHAIN_DIR}) + endif() + list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS --sysroot=${COMPUTECPP_SYSROOT_DIR}) + list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS -target ${COMPUTECPP_TARGET_TRIPLE}) +endif() + +list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS -sycl-target ${COMPUTECPP_BITCODE}) +message(STATUS "compute++ flags - ${COMPUTECPP_DEVICE_COMPILER_FLAGS}") + +include(ComputeCppCompilerChecks) + +if(NOT TARGET OpenCL::OpenCL) + add_library(OpenCL::OpenCL UNKNOWN IMPORTED) + set_target_properties(OpenCL::OpenCL PROPERTIES + IMPORTED_LOCATION "${OpenCL_LIBRARIES}" + INTERFACE_INCLUDE_DIRECTORIES "${OpenCL_INCLUDE_DIRS}" + ) +endif() + +if(NOT TARGET ComputeCpp::ComputeCpp) + add_library(ComputeCpp::ComputeCpp UNKNOWN IMPORTED) + set_target_properties(ComputeCpp::ComputeCpp PROPERTIES + IMPORTED_LOCATION_DEBUG "${COMPUTECPP_RUNTIME_LIBRARY_DEBUG}" + IMPORTED_LOCATION_RELWITHDEBINFO "${COMPUTECPP_RUNTIME_LIBRARY}" + IMPORTED_LOCATION "${COMPUTECPP_RUNTIME_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${ComputeCpp_INCLUDE_DIRS}" + INTERFACE_LINK_LIBRARIES "OpenCL::OpenCL" + ) +endif() + +# This property allows targets to specify that their sources should be +# compiled with the integration header included after the user's +# sources, not before (e.g. when an enum is used in a kernel name, this +# is not technically valid SYCL code but can work with ComputeCpp) +define_property( + TARGET PROPERTY COMPUTECPP_INCLUDE_AFTER + BRIEF_DOCS "Include integration header after user source" + FULL_DOCS "Changes compiler arguments such that the source file is + actually the integration header, and the .cpp file is included on + the command line so that it is seen by the compiler first. Enables + non-standards-conformant SYCL code to compile with ComputeCpp." +) +define_property( + TARGET PROPERTY INTERFACE_COMPUTECPP_FLAGS + BRIEF_DOCS "Interface compile flags to provide compute++" + FULL_DOCS "Set additional compile flags to pass to compute++ when compiling + any target which links to this one." +) +define_property( + SOURCE PROPERTY COMPUTECPP_SOURCE_FLAGS + BRIEF_DOCS "Source file compile flags for compute++" + FULL_DOCS "Set additional compile flags for compiling the SYCL integration + header for the given source file." +) + +#################### +# __build_ir +#################### +# +# Adds a custom target for running compute++ and adding a dependency for the +# resulting integration header and kernel binary. +# +# TARGET : Name of the target. +# SOURCE : Source file to be compiled. +# COUNTER : Counter included in name of custom target. Different counter +# values prevent duplicated names of custom target when source files with +# the same name, but located in different directories, are used for the +# same target. +# +function(__build_ir) + set(options) + set(one_value_args + TARGET + SOURCE + COUNTER + ) + set(multi_value_args) + cmake_parse_arguments(SDK_BUILD_IR + "${options}" + "${one_value_args}" + "${multi_value_args}" + ${ARGN} + ) + get_filename_component(sourceFileName ${SDK_BUILD_IR_SOURCE} NAME) + + # Set the path to the integration header. + # The .sycl filename must depend on the target so that different targets + # using the same source file will be generated with a different rule. + set(baseSyclName ${CMAKE_CURRENT_BINARY_DIR}/${SDK_BUILD_IR_TARGET}_${sourceFileName}) + set(outputSyclFile ${baseSyclName}.sycl) + set(outputDeviceFile ${baseSyclName}.${IR_MAP_${COMPUTECPP_BITCODE}}) + set(depFileName ${baseSyclName}.sycl.d) + + set(include_directories "$") + set(compile_definitions "$") + set(generated_include_directories + $<$:-I\"$\">) + set(generated_compile_definitions + $<$:-D$>) + + # Obtain language standard of the file + set(device_compiler_cxx_standard) + get_target_property(targetCxxStandard ${SDK_BUILD_IR_TARGET} CXX_STANDARD) + if (targetCxxStandard MATCHES 17) + set(device_compiler_cxx_standard "-std=c++1z") + elseif (targetCxxStandard MATCHES 14) + set(device_compiler_cxx_standard "-std=c++14") + elseif (targetCxxStandard MATCHES 11) + set(device_compiler_cxx_standard "-std=c++11") + elseif (targetCxxStandard MATCHES 98) + message(FATAL_ERROR "SYCL applications cannot be compiled using C++98") + else () + set(device_compiler_cxx_standard "") + endif() + + get_property(source_compile_flags + SOURCE ${SDK_BUILD_IR_SOURCE} + PROPERTY COMPUTECPP_SOURCE_FLAGS + ) + separate_arguments(source_compile_flags) + if(source_compile_flags) + list(APPEND computecpp_source_flags ${source_compile_flags}) + endif() + + list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS + ${device_compiler_cxx_standard} + ${COMPUTECPP_USER_FLAGS} + ${computecpp_source_flags} + ) + + set(ir_dependencies ${SDK_BUILD_IR_SOURCE}) + get_target_property(target_libraries ${SDK_BUILD_IR_TARGET} LINK_LIBRARIES) + if(target_libraries) + foreach(library ${target_libraries}) + if(TARGET ${library}) + list(APPEND ir_dependencies ${library}) + endif() + endforeach() + endif() + + # Depfile support was only added in CMake 3.7 + # CMake throws an error if it is unsupported by the generator (i. e. not ninja) + if((NOT CMAKE_VERSION VERSION_LESS 3.7.0) AND + CMAKE_GENERATOR MATCHES "Ninja") + file(RELATIVE_PATH relOutputFile ${CMAKE_BINARY_DIR} ${outputDeviceFile}) + set(generate_depfile -MMD -MF ${depFileName} -MT ${relOutputFile}) + set(enable_depfile DEPFILE ${depFileName}) + endif() + + # Add custom command for running compute++ + add_custom_command( + OUTPUT ${outputDeviceFile} ${outputSyclFile} + COMMAND ${ComputeCpp_DEVICE_COMPILER_EXECUTABLE} + ${COMPUTECPP_DEVICE_COMPILER_FLAGS} + ${generated_include_directories} + ${generated_compile_definitions} + -sycl-ih ${outputSyclFile} + -o ${outputDeviceFile} + -c ${SDK_BUILD_IR_SOURCE} + ${generate_depfile} + DEPENDS ${ir_dependencies} + IMPLICIT_DEPENDS CXX ${SDK_BUILD_IR_SOURCE} + ${enable_depfile} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMENT "Building ComputeCpp integration header file ${outputSyclFile}") + + # Name: (user-defined name)_(source file)_(counter)_ih + set(headerTargetName + ${SDK_BUILD_IR_TARGET}_${sourceFileName}_${SDK_BUILD_IR_COUNTER}_ih) + + if(NOT MSVC) + # Add a custom target for the generated integration header + add_custom_target(${headerTargetName} DEPENDS ${outputDeviceFile} ${outputSyclFile}) + add_dependencies(${SDK_BUILD_IR_TARGET} ${headerTargetName}) + endif() + + # This property can be set on a per-target basis to indicate that the + # integration header should appear after the main source listing + get_target_property(includeAfter ${SDK_ADD_SYCL_TARGET} COMPUTECPP_INCLUDE_AFTER) + + if(includeAfter) + # Change the source file to the integration header - e.g. + # g++ -c source_file_name.cpp.sycl + get_target_property(current_sources ${SDK_BUILD_IR_TARGET} SOURCES) + # Remove absolute path to source file + list(REMOVE_ITEM current_sources ${SDK_BUILD_IR_SOURCE}) + # Remove relative path to source file + string(REPLACE "${CMAKE_CURRENT_SOURCE_DIR}/" "" + rel_source_file ${SDK_BUILD_IR_SOURCE} + ) + list(REMOVE_ITEM current_sources ${rel_source_file}) + # Add SYCL header to source list + list(APPEND current_sources ${outputSyclFile}) + set_property(TARGET ${SDK_BUILD_IR_TARGET} + PROPERTY SOURCES ${current_sources}) + # CMake/gcc don't know what language a .sycl file is, so tell them + set_property(SOURCE ${outputSyclFile} PROPERTY LANGUAGE CXX) + set(includedFile ${SDK_BUILD_IR_SOURCE}) + set(cppFile ${outputSyclFile}) + else() + set_property(SOURCE ${outputSyclFile} PROPERTY HEADER_FILE_ONLY ON) + set(includedFile ${outputSyclFile}) + set(cppFile ${SDK_BUILD_IR_SOURCE}) + endif() + + # Force inclusion of the integration header for the host compiler + if(MSVC) + # Group SYCL files inside Visual Studio + source_group("SYCL" FILES ${outputSyclFile}) + + if(includeAfter) + # Allow the source file to be edited using Visual Studio. + # It will be added as a header file so it won't be compiled. + set_property(SOURCE ${SDK_BUILD_IR_SOURCE} PROPERTY HEADER_FILE_ONLY true) + endif() + + # Add both source and the sycl files to the VS solution. + target_sources(${SDK_BUILD_IR_TARGET} PUBLIC ${SDK_BUILD_IR_SOURCE} ${outputSyclFile}) + + set(forceIncludeFlags "/FI${includedFile} /TP") + else() + set(forceIncludeFlags "-include ${includedFile} -x c++") + endif() + + set_property( + SOURCE ${cppFile} + APPEND_STRING PROPERTY COMPILE_FLAGS "${forceIncludeFlags}" + ) + +endfunction(__build_ir) + +####################### +# add_sycl_to_target +####################### +# +# Adds a SYCL compilation custom command associated with an existing +# target and sets a dependancy on that new command. +# +# TARGET : Name of the target to add SYCL to. +# SOURCES : Source files to be compiled for SYCL. +# +function(add_sycl_to_target) + set(options) + set(one_value_args + TARGET + ) + set(multi_value_args + SOURCES + ) + cmake_parse_arguments(SDK_ADD_SYCL + "${options}" + "${one_value_args}" + "${multi_value_args}" + ${ARGN} + ) + + set_target_properties(${SDK_ADD_SYCL_TARGET} PROPERTIES LINKER_LANGUAGE CXX) + + # If the CXX compiler is set to compute++ enable the driver. + get_filename_component(cmakeCxxCompilerFileName "${CMAKE_CXX_COMPILER}" NAME) + if("${cmakeCxxCompilerFileName}" STREQUAL "compute++") + if(MSVC) + message(FATAL_ERROR "The compiler driver is not supported by this system, + revert the CXX compiler to your default host compiler.") + endif() + + get_target_property(includeAfter ${SDK_ADD_SYCL_TARGET} COMPUTECPP_INCLUDE_AFTER) + if(includeAfter) + list(APPEND COMPUTECPP_USER_FLAGS -fsycl-ih-last) + endif() + list(INSERT COMPUTECPP_DEVICE_COMPILER_FLAGS 0 -sycl-driver) + # Prepend COMPUTECPP_DEVICE_COMPILER_FLAGS and append COMPUTECPP_USER_FLAGS + foreach(prop COMPILE_OPTIONS INTERFACE_COMPILE_OPTIONS) + get_target_property(target_compile_options ${SDK_ADD_SYCL_TARGET} ${prop}) + if(NOT target_compile_options) + set(target_compile_options "") + endif() + set_property( + TARGET ${SDK_ADD_SYCL_TARGET} + PROPERTY ${prop} + ${COMPUTECPP_DEVICE_COMPILER_FLAGS} + ${target_compile_options} + ${COMPUTECPP_USER_FLAGS} + ) + endforeach() + else() + set(fileCounter 0) + list(INSERT COMPUTECPP_DEVICE_COMPILER_FLAGS 0 -sycl) + # Add custom target to run compute++ and generate the integration header + foreach(sourceFile ${SDK_ADD_SYCL_SOURCES}) + if(NOT IS_ABSOLUTE ${sourceFile}) + set(sourceFile "${CMAKE_CURRENT_SOURCE_DIR}/${sourceFile}") + endif() + __build_ir( + TARGET ${SDK_ADD_SYCL_TARGET} + SOURCE ${sourceFile} + COUNTER ${fileCounter} + ) + MATH(EXPR fileCounter "${fileCounter} + 1") + endforeach() + endif() + + set_property(TARGET ${SDK_ADD_SYCL_TARGET} + APPEND PROPERTY LINK_LIBRARIES ComputeCpp::ComputeCpp) + set_property(TARGET ${SDK_ADD_SYCL_TARGET} + APPEND PROPERTY INTERFACE_LINK_LIBRARIES ComputeCpp::ComputeCpp) +endfunction(add_sycl_to_target) diff --git a/external/eigen-3.4.0/cmake/FindEigen2.cmake b/external/eigen-3.4.0/cmake/FindEigen2.cmake new file mode 100644 index 0000000..eb2709d --- /dev/null +++ b/external/eigen-3.4.0/cmake/FindEigen2.cmake @@ -0,0 +1,80 @@ +# - Try to find Eigen2 lib +# +# This module supports requiring a minimum version, e.g. you can do +# find_package(Eigen2 2.0.3) +# to require version 2.0.3 to newer of Eigen2. +# +# Once done this will define +# +# EIGEN2_FOUND - system has eigen lib with correct version +# EIGEN2_INCLUDE_DIR - the eigen include directory +# EIGEN2_VERSION - eigen version + +# Copyright (c) 2006, 2007 Montel Laurent, +# Copyright (c) 2008, 2009 Gael Guennebaud, +# Redistribution and use is allowed according to the terms of the BSD license. + +if(NOT Eigen2_FIND_VERSION) + if(NOT Eigen2_FIND_VERSION_MAJOR) + set(Eigen2_FIND_VERSION_MAJOR 2) + endif() + if(NOT Eigen2_FIND_VERSION_MINOR) + set(Eigen2_FIND_VERSION_MINOR 0) + endif() + if(NOT Eigen2_FIND_VERSION_PATCH) + set(Eigen2_FIND_VERSION_PATCH 0) + endif() + + set(Eigen2_FIND_VERSION "${Eigen2_FIND_VERSION_MAJOR}.${Eigen2_FIND_VERSION_MINOR}.${Eigen2_FIND_VERSION_PATCH}") +endif() + +macro(_eigen2_check_version) + file(READ "${EIGEN2_INCLUDE_DIR}/Eigen/src/Core/util/Macros.h" _eigen2_version_header) + + string(REGEX MATCH "define[ \t]+EIGEN_WORLD_VERSION[ \t]+([0-9]+)" _eigen2_world_version_match "${_eigen2_version_header}") + set(EIGEN2_WORLD_VERSION "${CMAKE_MATCH_1}") + string(REGEX MATCH "define[ \t]+EIGEN_MAJOR_VERSION[ \t]+([0-9]+)" _eigen2_major_version_match "${_eigen2_version_header}") + set(EIGEN2_MAJOR_VERSION "${CMAKE_MATCH_1}") + string(REGEX MATCH "define[ \t]+EIGEN_MINOR_VERSION[ \t]+([0-9]+)" _eigen2_minor_version_match "${_eigen2_version_header}") + set(EIGEN2_MINOR_VERSION "${CMAKE_MATCH_1}") + + set(EIGEN2_VERSION ${EIGEN2_WORLD_VERSION}.${EIGEN2_MAJOR_VERSION}.${EIGEN2_MINOR_VERSION}) + if((${EIGEN2_WORLD_VERSION} NOTEQUAL 2) OR (${EIGEN2_MAJOR_VERSION} GREATER 10) OR (${EIGEN2_VERSION} VERSION_LESS ${Eigen2_FIND_VERSION})) + set(EIGEN2_VERSION_OK FALSE) + else() + set(EIGEN2_VERSION_OK TRUE) + endif() + + if(NOT EIGEN2_VERSION_OK) + + message(STATUS "Eigen2 version ${EIGEN2_VERSION} found in ${EIGEN2_INCLUDE_DIR}, " + "but at least version ${Eigen2_FIND_VERSION} is required") + endif() +endmacro() + +if (EIGEN2_INCLUDE_DIR) + + # in cache already + _eigen2_check_version() + set(EIGEN2_FOUND ${EIGEN2_VERSION_OK}) + +else () + +find_path(EIGEN2_INCLUDE_DIR NAMES Eigen/Core + PATHS + ${INCLUDE_INSTALL_DIR} + ${KDE4_INCLUDE_DIR} + PATH_SUFFIXES eigen2 + ) + +if(EIGEN2_INCLUDE_DIR) + _eigen2_check_version() +endif() + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(Eigen2 DEFAULT_MSG EIGEN2_INCLUDE_DIR EIGEN2_VERSION_OK) + +mark_as_advanced(EIGEN2_INCLUDE_DIR) + +endif() + diff --git a/external/eigen-3.4.0/cmake/FindEigen3.cmake b/external/eigen-3.4.0/cmake/FindEigen3.cmake new file mode 100644 index 0000000..0b36805 --- /dev/null +++ b/external/eigen-3.4.0/cmake/FindEigen3.cmake @@ -0,0 +1,107 @@ +# - Try to find Eigen3 lib +# +# This module supports requiring a minimum version, e.g. you can do +# find_package(Eigen3 3.1.2) +# to require version 3.1.2 or newer of Eigen3. +# +# Once done this will define +# +# EIGEN3_FOUND - system has eigen lib with correct version +# EIGEN3_INCLUDE_DIR - the eigen include directory +# EIGEN3_VERSION - eigen version +# +# and the following imported target: +# +# Eigen3::Eigen - The header-only Eigen library +# +# This module reads hints about search locations from +# the following environment variables: +# +# EIGEN3_ROOT +# EIGEN3_ROOT_DIR + +# Copyright (c) 2006, 2007 Montel Laurent, +# Copyright (c) 2008, 2009 Gael Guennebaud, +# Copyright (c) 2009 Benoit Jacob +# Redistribution and use is allowed according to the terms of the 2-clause BSD license. + +if(NOT Eigen3_FIND_VERSION) + if(NOT Eigen3_FIND_VERSION_MAJOR) + set(Eigen3_FIND_VERSION_MAJOR 2) + endif() + if(NOT Eigen3_FIND_VERSION_MINOR) + set(Eigen3_FIND_VERSION_MINOR 91) + endif() + if(NOT Eigen3_FIND_VERSION_PATCH) + set(Eigen3_FIND_VERSION_PATCH 0) + endif() + + set(Eigen3_FIND_VERSION "${Eigen3_FIND_VERSION_MAJOR}.${Eigen3_FIND_VERSION_MINOR}.${Eigen3_FIND_VERSION_PATCH}") +endif() + +macro(_eigen3_check_version) + file(READ "${EIGEN3_INCLUDE_DIR}/Eigen/src/Core/util/Macros.h" _eigen3_version_header) + + string(REGEX MATCH "define[ \t]+EIGEN_WORLD_VERSION[ \t]+([0-9]+)" _eigen3_world_version_match "${_eigen3_version_header}") + set(EIGEN3_WORLD_VERSION "${CMAKE_MATCH_1}") + string(REGEX MATCH "define[ \t]+EIGEN_MAJOR_VERSION[ \t]+([0-9]+)" _eigen3_major_version_match "${_eigen3_version_header}") + set(EIGEN3_MAJOR_VERSION "${CMAKE_MATCH_1}") + string(REGEX MATCH "define[ \t]+EIGEN_MINOR_VERSION[ \t]+([0-9]+)" _eigen3_minor_version_match "${_eigen3_version_header}") + set(EIGEN3_MINOR_VERSION "${CMAKE_MATCH_1}") + + set(EIGEN3_VERSION ${EIGEN3_WORLD_VERSION}.${EIGEN3_MAJOR_VERSION}.${EIGEN3_MINOR_VERSION}) + if(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION}) + set(EIGEN3_VERSION_OK FALSE) + else() + set(EIGEN3_VERSION_OK TRUE) + endif() + + if(NOT EIGEN3_VERSION_OK) + + message(STATUS "Eigen3 version ${EIGEN3_VERSION} found in ${EIGEN3_INCLUDE_DIR}, " + "but at least version ${Eigen3_FIND_VERSION} is required") + endif() +endmacro() + +if (EIGEN3_INCLUDE_DIR) + + # in cache already + _eigen3_check_version() + set(EIGEN3_FOUND ${EIGEN3_VERSION_OK}) + set(Eigen3_FOUND ${EIGEN3_VERSION_OK}) + +else () + + # search first if an Eigen3Config.cmake is available in the system, + # if successful this would set EIGEN3_INCLUDE_DIR and the rest of + # the script will work as usual + find_package(Eigen3 ${Eigen3_FIND_VERSION} NO_MODULE QUIET) + + if(NOT EIGEN3_INCLUDE_DIR) + find_path(EIGEN3_INCLUDE_DIR NAMES signature_of_eigen3_matrix_library + HINTS + ENV EIGEN3_ROOT + ENV EIGEN3_ROOT_DIR + PATHS + ${CMAKE_INSTALL_PREFIX}/include + ${KDE4_INCLUDE_DIR} + PATH_SUFFIXES eigen3 eigen + ) + endif() + + if(EIGEN3_INCLUDE_DIR) + _eigen3_check_version() + endif() + + include(FindPackageHandleStandardArgs) + find_package_handle_standard_args(Eigen3 DEFAULT_MSG EIGEN3_INCLUDE_DIR EIGEN3_VERSION_OK) + + mark_as_advanced(EIGEN3_INCLUDE_DIR) + +endif() + +if(EIGEN3_FOUND AND NOT TARGET Eigen3::Eigen) + add_library(Eigen3::Eigen INTERFACE IMPORTED) + set_target_properties(Eigen3::Eigen PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${EIGEN3_INCLUDE_DIR}") +endif() diff --git a/external/eigen-3.4.0/cmake/FindFFTW.cmake b/external/eigen-3.4.0/cmake/FindFFTW.cmake new file mode 100644 index 0000000..ed55c5f --- /dev/null +++ b/external/eigen-3.4.0/cmake/FindFFTW.cmake @@ -0,0 +1,120 @@ +# - Find the FFTW library +# +# Usage: +# find_package(FFTW [REQUIRED] [QUIET] ) +# +# It sets the following variables: +# FFTW_FOUND ... true if fftw is found on the system +# FFTW_LIBRARIES ... full path to fftw library +# FFTW_INCLUDES ... fftw include directory +# +# The following variables will be checked by the function +# FFTW_USE_STATIC_LIBS ... if true, only static libraries are found +# FFTW_ROOT ... if set, the libraries are exclusively searched +# under this path +# FFTW_LIBRARY ... fftw library to use +# FFTW_INCLUDE_DIR ... fftw include directory +# + +#If environment variable FFTWDIR is specified, it has same effect as FFTW_ROOT +if( NOT FFTW_ROOT AND ENV{FFTWDIR} ) + set( FFTW_ROOT $ENV{FFTWDIR} ) +endif() + +# Check if we can use PkgConfig +include(CMakeFindDependencyMacro) +find_dependency(PkgConfig) + +#Determine from PKG +if( PKG_CONFIG_FOUND AND NOT FFTW_ROOT ) + pkg_check_modules( PKG_FFTW QUIET "fftw3" ) +endif() + +#Check whether to search static or dynamic libs +set( CMAKE_FIND_LIBRARY_SUFFIXES_SAV ${CMAKE_FIND_LIBRARY_SUFFIXES} ) + +if( ${FFTW_USE_STATIC_LIBS} ) + set( CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_STATIC_LIBRARY_SUFFIX} ) +else() + set( CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_SHARED_LIBRARY_SUFFIX} ) +endif() + +if( FFTW_ROOT ) + + #find libs + find_library( + FFTW_LIB + NAMES "fftw3" + PATHS ${FFTW_ROOT} + PATH_SUFFIXES "lib" "lib64" + NO_DEFAULT_PATH + ) + + find_library( + FFTWF_LIB + NAMES "fftw3f" + PATHS ${FFTW_ROOT} + PATH_SUFFIXES "lib" "lib64" + NO_DEFAULT_PATH + ) + + find_library( + FFTWL_LIB + NAMES "fftw3l" + PATHS ${FFTW_ROOT} + PATH_SUFFIXES "lib" "lib64" + NO_DEFAULT_PATH + ) + + #find includes + find_path( + FFTW_INCLUDES + NAMES "fftw3.h" + PATHS ${FFTW_ROOT} + PATH_SUFFIXES "include" + NO_DEFAULT_PATH + ) + +else() + + find_library( + FFTW_LIB + NAMES "fftw3" + PATHS ${PKG_FFTW_LIBRARY_DIRS} ${LIB_INSTALL_DIR} + ) + + find_library( + FFTWF_LIB + NAMES "fftw3f" + PATHS ${PKG_FFTW_LIBRARY_DIRS} ${LIB_INSTALL_DIR} + ) + + + find_library( + FFTWL_LIB + NAMES "fftw3l" + PATHS ${PKG_FFTW_LIBRARY_DIRS} ${LIB_INSTALL_DIR} + ) + + find_path( + FFTW_INCLUDES + NAMES "fftw3.h" + PATHS ${PKG_FFTW_INCLUDE_DIRS} ${INCLUDE_INSTALL_DIR} + ) + +endif() + +set(FFTW_LIBRARIES ${FFTW_LIB} ${FFTWF_LIB}) + +if(FFTWL_LIB) + set(FFTW_LIBRARIES ${FFTW_LIBRARIES} ${FFTWL_LIB}) +endif() + +set( CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES_SAV} ) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(FFTW DEFAULT_MSG + FFTW_INCLUDES FFTW_LIBRARIES) + +mark_as_advanced(FFTW_INCLUDES FFTW_LIBRARIES FFTW_LIB FFTWF_LIB FFTWL_LIB) + diff --git a/external/eigen-3.4.0/cmake/FindGLEW.cmake b/external/eigen-3.4.0/cmake/FindGLEW.cmake new file mode 100644 index 0000000..9d486d5 --- /dev/null +++ b/external/eigen-3.4.0/cmake/FindGLEW.cmake @@ -0,0 +1,105 @@ +# Copyright (c) 2009 Boudewijn Rempt +# +# Redistribution and use is allowed according to the terms of the BSD license. +# For details see the accompanying COPYING-CMAKE-SCRIPTS file. +# +# - try to find glew library and include files +# GLEW_INCLUDE_DIR, where to find GL/glew.h, etc. +# GLEW_LIBRARIES, the libraries to link against +# GLEW_FOUND, If false, do not try to use GLEW. +# Also defined, but not for general use are: +# GLEW_GLEW_LIBRARY = the full path to the glew library. + +if (WIN32) + + if(CYGWIN) + + find_path( GLEW_INCLUDE_DIR GL/glew.h) + + find_library( GLEW_GLEW_LIBRARY glew32 + ${OPENGL_LIBRARY_DIR} + /usr/lib/w32api + /usr/X11R6/lib + ) + + + else(CYGWIN) + + find_path( GLEW_INCLUDE_DIR GL/glew.h + $ENV{GLEW_ROOT_PATH}/include + ) + + find_library( GLEW_GLEW_LIBRARY + NAMES glew glew32 + PATHS + $ENV{GLEW_ROOT_PATH}/lib + ${OPENGL_LIBRARY_DIR} + ) + + endif(CYGWIN) + +else (WIN32) + + if (APPLE) +# These values for Apple could probably do with improvement. + find_path( GLEW_INCLUDE_DIR glew.h + /System/Library/Frameworks/GLEW.framework/Versions/A/Headers + ${OPENGL_LIBRARY_DIR} + ) + set(GLEW_GLEW_LIBRARY "-framework GLEW" CACHE STRING "GLEW library for OSX") + set(GLEW_cocoa_LIBRARY "-framework Cocoa" CACHE STRING "Cocoa framework for OSX") + else (APPLE) + + find_path( GLEW_INCLUDE_DIR GL/glew.h + /usr/include/GL + /usr/openwin/share/include + /usr/openwin/include + /usr/X11R6/include + /usr/include/X11 + /opt/graphics/OpenGL/include + /opt/graphics/OpenGL/contrib/libglew + ) + + find_library( GLEW_GLEW_LIBRARY GLEW + /usr/openwin/lib + /usr/X11R6/lib + ) + + endif (APPLE) + +endif (WIN32) + +set( GLEW_FOUND "NO" ) +if(GLEW_INCLUDE_DIR) + if(GLEW_GLEW_LIBRARY) + # Is -lXi and -lXmu required on all platforms that have it? + # If not, we need some way to figure out what platform we are on. + set( GLEW_LIBRARIES + ${GLEW_GLEW_LIBRARY} + ${GLEW_cocoa_LIBRARY} + ) + set( GLEW_FOUND "YES" ) + +#The following deprecated settings are for backwards compatibility with CMake1.4 + set (GLEW_LIBRARY ${GLEW_LIBRARIES}) + set (GLEW_INCLUDE_PATH ${GLEW_INCLUDE_DIR}) + + endif(GLEW_GLEW_LIBRARY) +endif(GLEW_INCLUDE_DIR) + +if(GLEW_FOUND) + if(NOT GLEW_FIND_QUIETLY) + message(STATUS "Found Glew: ${GLEW_LIBRARIES}") + endif(NOT GLEW_FIND_QUIETLY) +else(GLEW_FOUND) + if(GLEW_FIND_REQUIRED) + message(FATAL_ERROR "Could not find Glew") + endif(GLEW_FIND_REQUIRED) +endif(GLEW_FOUND) + +mark_as_advanced( + GLEW_INCLUDE_DIR + GLEW_GLEW_LIBRARY + GLEW_Xmu_LIBRARY + GLEW_Xi_LIBRARY +) diff --git a/external/eigen-3.4.0/cmake/FindGMP.cmake b/external/eigen-3.4.0/cmake/FindGMP.cmake new file mode 100644 index 0000000..c41eedc --- /dev/null +++ b/external/eigen-3.4.0/cmake/FindGMP.cmake @@ -0,0 +1,21 @@ +# Try to find the GNU Multiple Precision Arithmetic Library (GMP) +# See http://gmplib.org/ + +if (GMP_INCLUDES AND GMP_LIBRARIES) + set(GMP_FIND_QUIETLY TRUE) +endif () + +find_path(GMP_INCLUDES + NAMES + gmp.h + PATHS + $ENV{GMPDIR} + ${INCLUDE_INSTALL_DIR} +) + +find_library(GMP_LIBRARIES gmp PATHS $ENV{GMPDIR} ${LIB_INSTALL_DIR}) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(GMP DEFAULT_MSG + GMP_INCLUDES GMP_LIBRARIES) +mark_as_advanced(GMP_INCLUDES GMP_LIBRARIES) diff --git a/external/eigen-3.4.0/cmake/FindGSL.cmake b/external/eigen-3.4.0/cmake/FindGSL.cmake new file mode 100644 index 0000000..8632232 --- /dev/null +++ b/external/eigen-3.4.0/cmake/FindGSL.cmake @@ -0,0 +1,170 @@ +# Try to find gnu scientific library GSL +# See +# http://www.gnu.org/software/gsl/ and +# http://gnuwin32.sourceforge.net/packages/gsl.htm +# +# Once run this will define: +# +# GSL_FOUND = system has GSL lib +# +# GSL_LIBRARIES = full path to the libraries +# on Unix/Linux with additional linker flags from "gsl-config --libs" +# +# CMAKE_GSL_CXX_FLAGS = Unix compiler flags for GSL, essentially "`gsl-config --cxxflags`" +# +# GSL_INCLUDE_DIR = where to find headers +# +# GSL_LINK_DIRECTORIES = link directories, useful for rpath on Unix +# GSL_EXE_LINKER_FLAGS = rpath on Unix +# +# Felix Woelk 07/2004 +# Jan Woetzel +# +# www.mip.informatik.uni-kiel.de +# -------------------------------- + +if(WIN32) + # JW tested with gsl-1.8, Windows XP, MSVS 7.1 + set(GSL_POSSIBLE_ROOT_DIRS + ${GSL_ROOT_DIR} + $ENV{GSL_ROOT_DIR} + ${GSL_DIR} + ${GSL_HOME} + $ENV{GSL_DIR} + $ENV{GSL_HOME} + $ENV{EXTRA} + "C:/Program Files/GnuWin32" + ) + find_path(GSL_INCLUDE_DIR + NAMES gsl/gsl_cdf.h gsl/gsl_randist.h + PATHS ${GSL_POSSIBLE_ROOT_DIRS} + PATH_SUFFIXES include + DOC "GSL header include dir" + ) + + find_library(GSL_GSL_LIBRARY + NAMES libgsl.dll.a gsl libgsl + PATHS ${GSL_POSSIBLE_ROOT_DIRS} + PATH_SUFFIXES lib + DOC "GSL library" ) + + if(NOT GSL_GSL_LIBRARY) + find_file(GSL_GSL_LIBRARY + NAMES libgsl.dll.a + PATHS ${GSL_POSSIBLE_ROOT_DIRS} + PATH_SUFFIXES lib + DOC "GSL library") + endif() + + find_library(GSL_GSLCBLAS_LIBRARY + NAMES libgslcblas.dll.a gslcblas libgslcblas + PATHS ${GSL_POSSIBLE_ROOT_DIRS} + PATH_SUFFIXES lib + DOC "GSL cblas library dir" ) + + if(NOT GSL_GSLCBLAS_LIBRARY) + find_file(GSL_GSLCBLAS_LIBRARY + NAMES libgslcblas.dll.a + PATHS ${GSL_POSSIBLE_ROOT_DIRS} + PATH_SUFFIXES lib + DOC "GSL library") + endif() + + set(GSL_LIBRARIES ${GSL_GSL_LIBRARY}) + + #message("DBG\n" + # "GSL_GSL_LIBRARY=${GSL_GSL_LIBRARY}\n" + # "GSL_GSLCBLAS_LIBRARY=${GSL_GSLCBLAS_LIBRARY}\n" + # "GSL_LIBRARIES=${GSL_LIBRARIES}") + + +else(WIN32) + + if(UNIX) + set(GSL_CONFIG_PREFER_PATH + "$ENV{GSL_DIR}/bin" + "$ENV{GSL_DIR}" + "$ENV{GSL_HOME}/bin" + "$ENV{GSL_HOME}" + CACHE STRING "preferred path to GSL (gsl-config)") + find_program(GSL_CONFIG gsl-config + ${GSL_CONFIG_PREFER_PATH} + /usr/bin/ + ) + # message("DBG GSL_CONFIG ${GSL_CONFIG}") + + if (GSL_CONFIG) + # set CXXFLAGS to be fed into CXX_FLAGS by the user: + set(GSL_CXX_FLAGS "`${GSL_CONFIG} --cflags`") + + # set INCLUDE_DIRS to prefix+include + exec_program(${GSL_CONFIG} + ARGS --prefix + OUTPUT_VARIABLE GSL_PREFIX) + set(GSL_INCLUDE_DIR ${GSL_PREFIX}/include CACHE STRING INTERNAL) + + # set link libraries and link flags + #set(GSL_LIBRARIES "`${GSL_CONFIG} --libs`") + exec_program(${GSL_CONFIG} + ARGS --libs + OUTPUT_VARIABLE GSL_LIBRARIES ) + + # extract link dirs for rpath + exec_program(${GSL_CONFIG} + ARGS --libs + OUTPUT_VARIABLE GSL_CONFIG_LIBS ) + + # extract version + exec_program(${GSL_CONFIG} + ARGS --version + OUTPUT_VARIABLE GSL_FULL_VERSION ) + + # split version as major/minor + string(REGEX MATCH "(.)\\..*" GSL_VERSION_MAJOR_ "${GSL_FULL_VERSION}") + set(GSL_VERSION_MAJOR ${CMAKE_MATCH_1}) + string(REGEX MATCH ".\\.(.*)" GSL_VERSION_MINOR_ "${GSL_FULL_VERSION}") + set(GSL_VERSION_MINOR ${CMAKE_MATCH_1}) + + # split off the link dirs (for rpath) + # use regular expression to match wildcard equivalent "-L*" + # with is a space or a semicolon + string(REGEX MATCHALL "[-][L]([^ ;])+" + GSL_LINK_DIRECTORIES_WITH_PREFIX + "${GSL_CONFIG_LIBS}" ) + # message("DBG GSL_LINK_DIRECTORIES_WITH_PREFIX=${GSL_LINK_DIRECTORIES_WITH_PREFIX}") + + # remove prefix -L because we need the pure directory for LINK_DIRECTORIES + + if (GSL_LINK_DIRECTORIES_WITH_PREFIX) + string(REGEX REPLACE "[-][L]" "" GSL_LINK_DIRECTORIES ${GSL_LINK_DIRECTORIES_WITH_PREFIX} ) + endif (GSL_LINK_DIRECTORIES_WITH_PREFIX) + set(GSL_EXE_LINKER_FLAGS "-Wl,-rpath,${GSL_LINK_DIRECTORIES}" CACHE STRING INTERNAL) + # message("DBG GSL_LINK_DIRECTORIES=${GSL_LINK_DIRECTORIES}") + # message("DBG GSL_EXE_LINKER_FLAGS=${GSL_EXE_LINKER_FLAGS}") + + # add_definitions("-DHAVE_GSL") + # set(GSL_DEFINITIONS "-DHAVE_GSL") + mark_as_advanced( + GSL_CXX_FLAGS + GSL_INCLUDE_DIR + GSL_LIBRARIES + GSL_LINK_DIRECTORIES + GSL_DEFINITIONS + ) + message(STATUS "Using GSL from ${GSL_PREFIX}") + + else(GSL_CONFIG) + message("FindGSL.cmake: gsl-config not found. Please set it manually. GSL_CONFIG=${GSL_CONFIG}") + endif(GSL_CONFIG) + + endif(UNIX) +endif(WIN32) + + +if(GSL_LIBRARIES) + if(GSL_INCLUDE_DIR OR GSL_CXX_FLAGS) + + set(GSL_FOUND 1) + + endif(GSL_INCLUDE_DIR OR GSL_CXX_FLAGS) +endif(GSL_LIBRARIES) diff --git a/external/eigen-3.4.0/cmake/FindGoogleHash.cmake b/external/eigen-3.4.0/cmake/FindGoogleHash.cmake new file mode 100644 index 0000000..481eb4d --- /dev/null +++ b/external/eigen-3.4.0/cmake/FindGoogleHash.cmake @@ -0,0 +1,23 @@ + +if (GOOGLEHASH_INCLUDES AND GOOGLEHASH_LIBRARIES) + set(GOOGLEHASH_FIND_QUIETLY TRUE) +endif () + +find_path(GOOGLEHASH_INCLUDES + NAMES + google/dense_hash_map + PATHS + ${INCLUDE_INSTALL_DIR} +) + +if(GOOGLEHASH_INCLUDES) + # let's make sure it compiles with the current compiler + file(WRITE ${CMAKE_BINARY_DIR}/googlehash_test.cpp + "#include \n#include \nint main(int argc, char** argv) { google::dense_hash_map a; google::sparse_hash_map b; return 0;}\n") + try_compile(GOOGLEHASH_COMPILE ${CMAKE_BINARY_DIR} ${CMAKE_BINARY_DIR}/googlehash_test.cpp OUTPUT_VARIABLE GOOGLEHASH_COMPILE_RESULT) +endif() + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(GoogleHash DEFAULT_MSG GOOGLEHASH_INCLUDES GOOGLEHASH_COMPILE) + +mark_as_advanced(GOOGLEHASH_INCLUDES) diff --git a/external/eigen-3.4.0/cmake/FindHWLOC.cmake b/external/eigen-3.4.0/cmake/FindHWLOC.cmake new file mode 100644 index 0000000..522f521 --- /dev/null +++ b/external/eigen-3.4.0/cmake/FindHWLOC.cmake @@ -0,0 +1,332 @@ +### +# +# @copyright (c) 2009-2014 The University of Tennessee and The University +# of Tennessee Research Foundation. +# All rights reserved. +# @copyright (c) 2012-2014 Inria. All rights reserved. +# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. +# +### +# +# - Find HWLOC include dirs and libraries +# Use this module by invoking find_package with the form: +# find_package(HWLOC +# [REQUIRED]) # Fail with error if hwloc is not found +# +# This module finds headers and hwloc library. +# Results are reported in variables: +# HWLOC_FOUND - True if headers and requested libraries were found +# HWLOC_INCLUDE_DIRS - hwloc include directories +# HWLOC_LIBRARY_DIRS - Link directories for hwloc libraries +# HWLOC_LIBRARIES - hwloc component libraries to be linked +# +# The user can give specific paths where to find the libraries adding cmake +# options at configure (ex: cmake path/to/project -DHWLOC_DIR=path/to/hwloc): +# HWLOC_DIR - Where to find the base directory of hwloc +# HWLOC_INCDIR - Where to find the header files +# HWLOC_LIBDIR - Where to find the library files +# The module can also look for the following environment variables if paths +# are not given as cmake variable: HWLOC_DIR, HWLOC_INCDIR, HWLOC_LIBDIR + +#============================================================================= +# Copyright 2012-2013 Inria +# Copyright 2012-2013 Emmanuel Agullo +# Copyright 2012-2013 Mathieu Faverge +# Copyright 2012 Cedric Castagnede +# Copyright 2013 Florent Pruvost +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file MORSE-Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of Morse, substitute the full +# License text for the above reference.) + +include(CheckStructHasMember) +include(CheckCSourceCompiles) + +if (NOT HWLOC_FOUND) + set(HWLOC_DIR "" CACHE PATH "Installation directory of HWLOC library") + if (NOT HWLOC_FIND_QUIETLY) + message(STATUS "A cache variable, namely HWLOC_DIR, has been set to specify the install directory of HWLOC") + endif() +endif() + +set(ENV_HWLOC_DIR "$ENV{HWLOC_DIR}") +set(ENV_HWLOC_INCDIR "$ENV{HWLOC_INCDIR}") +set(ENV_HWLOC_LIBDIR "$ENV{HWLOC_LIBDIR}") +set(HWLOC_GIVEN_BY_USER "FALSE") +if ( HWLOC_DIR OR ( HWLOC_INCDIR AND HWLOC_LIBDIR) OR ENV_HWLOC_DIR OR (ENV_HWLOC_INCDIR AND ENV_HWLOC_LIBDIR) ) + set(HWLOC_GIVEN_BY_USER "TRUE") +endif() + +# Optionally use pkg-config to detect include/library dirs (if pkg-config is available) +# ------------------------------------------------------------------------------------- +include(CMakeFindDependencyMacro) +# include(FindPkgConfig) +find_dependency(PkgConfig QUIET) +if( PKG_CONFIG_EXECUTABLE AND NOT HWLOC_GIVEN_BY_USER ) + + pkg_search_module(HWLOC hwloc) + if (NOT HWLOC_FIND_QUIETLY) + if (HWLOC_FOUND AND HWLOC_LIBRARIES) + message(STATUS "Looking for HWLOC - found using PkgConfig") + #if(NOT HWLOC_INCLUDE_DIRS) + # message("${Magenta}HWLOC_INCLUDE_DIRS is empty using PkgConfig." + # "Perhaps the path to hwloc headers is already present in your" + # "C(PLUS)_INCLUDE_PATH environment variable.${ColourReset}") + #endif() + else() + message(STATUS "${Magenta}Looking for HWLOC - not found using PkgConfig." + "\n Perhaps you should add the directory containing hwloc.pc to" + "\n the PKG_CONFIG_PATH environment variable.${ColourReset}") + endif() + endif() + +endif() + +if( (NOT PKG_CONFIG_EXECUTABLE) OR (PKG_CONFIG_EXECUTABLE AND NOT HWLOC_FOUND) OR (HWLOC_GIVEN_BY_USER) ) + + if (NOT HWLOC_FIND_QUIETLY) + message(STATUS "Looking for HWLOC - PkgConfig not used") + endif() + + # Looking for include + # ------------------- + + # Add system include paths to search include + # ------------------------------------------ + unset(_inc_env) + if(ENV_HWLOC_INCDIR) + list(APPEND _inc_env "${ENV_HWLOC_INCDIR}") + elseif(ENV_HWLOC_DIR) + list(APPEND _inc_env "${ENV_HWLOC_DIR}") + list(APPEND _inc_env "${ENV_HWLOC_DIR}/include") + list(APPEND _inc_env "${ENV_HWLOC_DIR}/include/hwloc") + else() + if(WIN32) + string(REPLACE ":" ";" _inc_env "$ENV{INCLUDE}") + else() + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{CPATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + endif() + endif() + list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") + list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") + list(REMOVE_DUPLICATES _inc_env) + + # set paths where to look for + set(PATH_TO_LOOK_FOR "${_inc_env}") + + # Try to find the hwloc header in the given paths + # ------------------------------------------------- + # call cmake macro to find the header path + if(HWLOC_INCDIR) + set(HWLOC_hwloc.h_DIRS "HWLOC_hwloc.h_DIRS-NOTFOUND") + find_path(HWLOC_hwloc.h_DIRS + NAMES hwloc.h + HINTS ${HWLOC_INCDIR}) + else() + if(HWLOC_DIR) + set(HWLOC_hwloc.h_DIRS "HWLOC_hwloc.h_DIRS-NOTFOUND") + find_path(HWLOC_hwloc.h_DIRS + NAMES hwloc.h + HINTS ${HWLOC_DIR} + PATH_SUFFIXES "include" "include/hwloc") + else() + set(HWLOC_hwloc.h_DIRS "HWLOC_hwloc.h_DIRS-NOTFOUND") + find_path(HWLOC_hwloc.h_DIRS + NAMES hwloc.h + HINTS ${PATH_TO_LOOK_FOR} + PATH_SUFFIXES "hwloc") + endif() + endif() + mark_as_advanced(HWLOC_hwloc.h_DIRS) + + # Add path to cmake variable + # ------------------------------------ + if (HWLOC_hwloc.h_DIRS) + set(HWLOC_INCLUDE_DIRS "${HWLOC_hwloc.h_DIRS}") + else () + set(HWLOC_INCLUDE_DIRS "HWLOC_INCLUDE_DIRS-NOTFOUND") + if(NOT HWLOC_FIND_QUIETLY) + message(STATUS "Looking for hwloc -- hwloc.h not found") + endif() + endif () + + if (HWLOC_INCLUDE_DIRS) + list(REMOVE_DUPLICATES HWLOC_INCLUDE_DIRS) + endif () + + + # Looking for lib + # --------------- + + # Add system library paths to search lib + # -------------------------------------- + unset(_lib_env) + if(ENV_HWLOC_LIBDIR) + list(APPEND _lib_env "${ENV_HWLOC_LIBDIR}") + elseif(ENV_HWLOC_DIR) + list(APPEND _lib_env "${ENV_HWLOC_DIR}") + list(APPEND _lib_env "${ENV_HWLOC_DIR}/lib") + else() + if(WIN32) + string(REPLACE ":" ";" _lib_env "$ENV{LIB}") + else() + if(APPLE) + string(REPLACE ":" ";" _lib_env "$ENV{DYLD_LIBRARY_PATH}") + else() + string(REPLACE ":" ";" _lib_env "$ENV{LD_LIBRARY_PATH}") + endif() + list(APPEND _lib_env "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}") + list(APPEND _lib_env "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") + endif() + endif() + list(REMOVE_DUPLICATES _lib_env) + + # set paths where to look for + set(PATH_TO_LOOK_FOR "${_lib_env}") + + # Try to find the hwloc lib in the given paths + # ---------------------------------------------- + + # call cmake macro to find the lib path + if(HWLOC_LIBDIR) + set(HWLOC_hwloc_LIBRARY "HWLOC_hwloc_LIBRARY-NOTFOUND") + find_library(HWLOC_hwloc_LIBRARY + NAMES hwloc + HINTS ${HWLOC_LIBDIR}) + else() + if(HWLOC_DIR) + set(HWLOC_hwloc_LIBRARY "HWLOC_hwloc_LIBRARY-NOTFOUND") + find_library(HWLOC_hwloc_LIBRARY + NAMES hwloc + HINTS ${HWLOC_DIR} + PATH_SUFFIXES lib lib32 lib64) + else() + set(HWLOC_hwloc_LIBRARY "HWLOC_hwloc_LIBRARY-NOTFOUND") + find_library(HWLOC_hwloc_LIBRARY + NAMES hwloc + HINTS ${PATH_TO_LOOK_FOR}) + endif() + endif() + mark_as_advanced(HWLOC_hwloc_LIBRARY) + + # If found, add path to cmake variable + # ------------------------------------ + if (HWLOC_hwloc_LIBRARY) + get_filename_component(hwloc_lib_path ${HWLOC_hwloc_LIBRARY} PATH) + # set cmake variables (respects naming convention) + set(HWLOC_LIBRARIES "${HWLOC_hwloc_LIBRARY}") + set(HWLOC_LIBRARY_DIRS "${hwloc_lib_path}") + else () + set(HWLOC_LIBRARIES "HWLOC_LIBRARIES-NOTFOUND") + set(HWLOC_LIBRARY_DIRS "HWLOC_LIBRARY_DIRS-NOTFOUND") + if(NOT HWLOC_FIND_QUIETLY) + message(STATUS "Looking for hwloc -- lib hwloc not found") + endif() + endif () + + if (HWLOC_LIBRARY_DIRS) + list(REMOVE_DUPLICATES HWLOC_LIBRARY_DIRS) + endif () + + # check a function to validate the find + if(HWLOC_LIBRARIES) + + set(REQUIRED_INCDIRS) + set(REQUIRED_LIBDIRS) + set(REQUIRED_LIBS) + + # HWLOC + if (HWLOC_INCLUDE_DIRS) + set(REQUIRED_INCDIRS "${HWLOC_INCLUDE_DIRS}") + endif() + if (HWLOC_LIBRARY_DIRS) + set(REQUIRED_LIBDIRS "${HWLOC_LIBRARY_DIRS}") + endif() + set(REQUIRED_LIBS "${HWLOC_LIBRARIES}") + + # set required libraries for link + set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}") + set(CMAKE_REQUIRED_LIBRARIES) + foreach(lib_dir ${REQUIRED_LIBDIRS}) + list(APPEND CMAKE_REQUIRED_LIBRARIES "-L${lib_dir}") + endforeach() + list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LIBS}") + string(REGEX REPLACE "^ -" "-" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}") + + # test link + unset(HWLOC_WORKS CACHE) + include(CheckFunctionExists) + check_function_exists(hwloc_topology_init HWLOC_WORKS) + mark_as_advanced(HWLOC_WORKS) + + if(NOT HWLOC_WORKS) + if(NOT HWLOC_FIND_QUIETLY) + message(STATUS "Looking for hwloc : test of hwloc_topology_init with hwloc library fails") + message(STATUS "CMAKE_REQUIRED_LIBRARIES: ${CMAKE_REQUIRED_LIBRARIES}") + message(STATUS "CMAKE_REQUIRED_INCLUDES: ${CMAKE_REQUIRED_INCLUDES}") + message(STATUS "Check in CMakeFiles/CMakeError.log to figure out why it fails") + endif() + endif() + set(CMAKE_REQUIRED_INCLUDES) + set(CMAKE_REQUIRED_FLAGS) + set(CMAKE_REQUIRED_LIBRARIES) + endif() + +endif() + +if (HWLOC_LIBRARIES) + if (HWLOC_LIBRARY_DIRS) + list(GET HWLOC_LIBRARY_DIRS 0 first_lib_path) + else() + list(GET HWLOC_LIBRARIES 0 first_lib) + get_filename_component(first_lib_path "${first_lib}" PATH) + endif() + if (${first_lib_path} MATCHES "/lib(32|64)?$") + string(REGEX REPLACE "/lib(32|64)?$" "" not_cached_dir "${first_lib_path}") + set(HWLOC_DIR_FOUND "${not_cached_dir}" CACHE PATH "Installation directory of HWLOC library" FORCE) + else() + set(HWLOC_DIR_FOUND "${first_lib_path}" CACHE PATH "Installation directory of HWLOC library" FORCE) + endif() +endif() +mark_as_advanced(HWLOC_DIR) +mark_as_advanced(HWLOC_DIR_FOUND) + +# check that HWLOC has been found +# ------------------------------- +include(FindPackageHandleStandardArgs) +if (PKG_CONFIG_EXECUTABLE AND HWLOC_FOUND) + find_package_handle_standard_args(HWLOC DEFAULT_MSG + HWLOC_LIBRARIES) +else() + find_package_handle_standard_args(HWLOC DEFAULT_MSG + HWLOC_LIBRARIES + HWLOC_WORKS) +endif() + +if (HWLOC_FOUND) + set(HWLOC_SAVE_CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES}) + list(APPEND CMAKE_REQUIRED_INCLUDES ${HWLOC_INCLUDE_DIRS}) + + # test headers to guess the version + check_struct_has_member( "struct hwloc_obj" parent hwloc.h HAVE_HWLOC_PARENT_MEMBER ) + check_struct_has_member( "struct hwloc_cache_attr_s" size hwloc.h HAVE_HWLOC_CACHE_ATTR ) + check_c_source_compiles( "#include + int main(void) { hwloc_obj_t o; o->type = HWLOC_OBJ_PU; return 0;}" HAVE_HWLOC_OBJ_PU) + include(CheckLibraryExists) + check_library_exists(${HWLOC_LIBRARIES} hwloc_bitmap_free "" HAVE_HWLOC_BITMAP) + + set(CMAKE_REQUIRED_INCLUDES ${HWLOC_SAVE_CMAKE_REQUIRED_INCLUDES}) +endif() diff --git a/external/eigen-3.4.0/cmake/FindKLU.cmake b/external/eigen-3.4.0/cmake/FindKLU.cmake new file mode 100644 index 0000000..6217d14 --- /dev/null +++ b/external/eigen-3.4.0/cmake/FindKLU.cmake @@ -0,0 +1,48 @@ +# KLU lib usually requires linking to a blas library. +# It is up to the user of this module to find a BLAS and link to it. + +if (KLU_INCLUDES AND KLU_LIBRARIES) + set(KLU_FIND_QUIETLY TRUE) +endif () + +find_path(KLU_INCLUDES + NAMES + klu.h + PATHS + $ENV{KLUDIR} + ${INCLUDE_INSTALL_DIR} + PATH_SUFFIXES + suitesparse + ufsparse +) + +find_library(KLU_LIBRARIES klu PATHS $ENV{KLUDIR} ${LIB_INSTALL_DIR}) + +if(KLU_LIBRARIES) + + if(NOT KLU_LIBDIR) + get_filename_component(KLU_LIBDIR ${KLU_LIBRARIES} PATH) + endif() + + find_library(COLAMD_LIBRARY colamd PATHS ${KLU_LIBDIR} $ENV{KLUDIR} ${LIB_INSTALL_DIR}) + if(COLAMD_LIBRARY) + set(KLU_LIBRARIES ${KLU_LIBRARIES} ${COLAMD_LIBRARY}) + endif () + + find_library(AMD_LIBRARY amd PATHS ${KLU_LIBDIR} $ENV{KLUDIR} ${LIB_INSTALL_DIR}) + if(AMD_LIBRARY) + set(KLU_LIBRARIES ${KLU_LIBRARIES} ${AMD_LIBRARY}) + endif () + + find_library(BTF_LIBRARY btf PATHS $ENV{KLU_LIBDIR} $ENV{KLUDIR} ${LIB_INSTALL_DIR}) + if(BTF_LIBRARY) + set(KLU_LIBRARIES ${KLU_LIBRARIES} ${BTF_LIBRARY}) + endif() + +endif() + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(KLU DEFAULT_MSG + KLU_INCLUDES KLU_LIBRARIES) + +mark_as_advanced(KLU_INCLUDES KLU_LIBRARIES AMD_LIBRARY COLAMD_LIBRARY BTF_LIBRARY) diff --git a/external/eigen-3.4.0/cmake/FindLAPACK.cmake b/external/eigen-3.4.0/cmake/FindLAPACK.cmake new file mode 100644 index 0000000..3fd7388 --- /dev/null +++ b/external/eigen-3.4.0/cmake/FindLAPACK.cmake @@ -0,0 +1,274 @@ +# Find LAPACK library +# +# This module finds an installed library that implements the LAPACK +# linear-algebra interface (see http://www.netlib.org/lapack/). +# The approach follows mostly that taken for the autoconf macro file, acx_lapack.m4 +# (distributed at http://ac-archive.sourceforge.net/ac-archive/acx_lapack.html). +# +# This module sets the following variables: +# LAPACK_FOUND - set to true if a library implementing the LAPACK interface +# is found +# LAPACK_INCLUDE_DIR - Directories containing the LAPACK header files +# LAPACK_DEFINITIONS - Compilation options to use LAPACK +# LAPACK_LINKER_FLAGS - Linker flags to use LAPACK (excluding -l +# and -L). +# LAPACK_LIBRARIES_DIR - Directories containing the LAPACK libraries. +# May be null if LAPACK_LIBRARIES contains libraries name using full path. +# LAPACK_LIBRARIES - List of libraries to link against LAPACK interface. +# May be null if the compiler supports auto-link (e.g. VC++). +# LAPACK_USE_FILE - The name of the cmake module to include to compile +# applications or libraries using LAPACK. +# +# This module was modified by CGAL team: +# - find libraries for a C++ compiler, instead of Fortran +# - added LAPACK_INCLUDE_DIR, LAPACK_DEFINITIONS and LAPACK_LIBRARIES_DIR +# - removed LAPACK95_LIBRARIES + + +include(CheckFunctionExists) +include(CMakeFindDependencyMacro) + +# This macro checks for the existence of the combination of fortran libraries +# given by _list. If the combination is found, this macro checks (using the +# check_function_exists macro) whether can link against that library +# combination using the name of a routine given by _name using the linker +# flags given by _flags. If the combination of libraries is found and passes +# the link test, LIBRARIES is set to the list of complete library paths that +# have been found and DEFINITIONS to the required definitions. +# Otherwise, LIBRARIES is set to FALSE. +# N.B. _prefix is the prefix applied to the names of all cached variables that +# are generated internally and marked advanced by this macro. +macro(check_lapack_libraries DEFINITIONS LIBRARIES _prefix _name _flags _list _blas _path) + #message("DEBUG: check_lapack_libraries(${_list} in ${_path} with ${_blas})") + + # Check for the existence of the libraries given by _list + set(_libraries_found TRUE) + set(_libraries_work FALSE) + set(${DEFINITIONS} "") + set(${LIBRARIES} "") + set(_combined_name) + foreach(_library ${_list}) + set(_combined_name ${_combined_name}_${_library}) + + if(_libraries_found) + # search first in ${_path} + find_library(${_prefix}_${_library}_LIBRARY + NAMES ${_library} + PATHS ${_path} NO_DEFAULT_PATH + ) + # if not found, search in environment variables and system + if ( WIN32 ) + find_library(${_prefix}_${_library}_LIBRARY + NAMES ${_library} + PATHS ENV LIB + ) + elseif ( APPLE ) + find_library(${_prefix}_${_library}_LIBRARY + NAMES ${_library} + PATHS /usr/local/lib /usr/lib /usr/local/lib64 /usr/lib64 ENV DYLD_LIBRARY_PATH + ) + else () + find_library(${_prefix}_${_library}_LIBRARY + NAMES ${_library} + PATHS /usr/local/lib /usr/lib /usr/local/lib64 /usr/lib64 ENV LD_LIBRARY_PATH + ) + endif() + mark_as_advanced(${_prefix}_${_library}_LIBRARY) + set(${LIBRARIES} ${${LIBRARIES}} ${${_prefix}_${_library}_LIBRARY}) + set(_libraries_found ${${_prefix}_${_library}_LIBRARY}) + endif() + endforeach() + if(_libraries_found) + set(_libraries_found ${${LIBRARIES}}) + endif() + + # Test this combination of libraries with the Fortran/f2c interface. + # We test the Fortran interface first as it is well standardized. + if(_libraries_found AND NOT _libraries_work) + set(${DEFINITIONS} "-D${_prefix}_USE_F2C") + set(${LIBRARIES} ${_libraries_found}) + # Some C++ linkers require the f2c library to link with Fortran libraries. + # I do not know which ones, thus I just add the f2c library if it is available. + find_dependency( F2C QUIET ) + if ( F2C_FOUND ) + set(${DEFINITIONS} ${${DEFINITIONS}} ${F2C_DEFINITIONS}) + set(${LIBRARIES} ${${LIBRARIES}} ${F2C_LIBRARIES}) + endif() + set(CMAKE_REQUIRED_DEFINITIONS ${${DEFINITIONS}}) + set(CMAKE_REQUIRED_LIBRARIES ${_flags} ${${LIBRARIES}} ${_blas}) + #message("DEBUG: CMAKE_REQUIRED_DEFINITIONS = ${CMAKE_REQUIRED_DEFINITIONS}") + #message("DEBUG: CMAKE_REQUIRED_LIBRARIES = ${CMAKE_REQUIRED_LIBRARIES}") + # Check if function exists with f2c calling convention (ie a trailing underscore) + check_function_exists(${_name}_ ${_prefix}_${_name}_${_combined_name}_f2c_WORKS) + set(CMAKE_REQUIRED_DEFINITIONS} "") + set(CMAKE_REQUIRED_LIBRARIES "") + mark_as_advanced(${_prefix}_${_name}_${_combined_name}_f2c_WORKS) + set(_libraries_work ${${_prefix}_${_name}_${_combined_name}_f2c_WORKS}) + endif() + + # If not found, test this combination of libraries with a C interface. + # A few implementations (ie ACML) provide a C interface. Unfortunately, there is no standard. + if(_libraries_found AND NOT _libraries_work) + set(${DEFINITIONS} "") + set(${LIBRARIES} ${_libraries_found}) + set(CMAKE_REQUIRED_DEFINITIONS "") + set(CMAKE_REQUIRED_LIBRARIES ${_flags} ${${LIBRARIES}} ${_blas}) + #message("DEBUG: CMAKE_REQUIRED_LIBRARIES = ${CMAKE_REQUIRED_LIBRARIES}") + check_function_exists(${_name} ${_prefix}_${_name}${_combined_name}_WORKS) + set(CMAKE_REQUIRED_LIBRARIES "") + mark_as_advanced(${_prefix}_${_name}${_combined_name}_WORKS) + set(_libraries_work ${${_prefix}_${_name}${_combined_name}_WORKS}) + endif() + + # on failure + if(NOT _libraries_work) + set(${DEFINITIONS} "") + set(${LIBRARIES} FALSE) + endif() + #message("DEBUG: ${DEFINITIONS} = ${${DEFINITIONS}}") + #message("DEBUG: ${LIBRARIES} = ${${LIBRARIES}}") +endmacro() + + +# +# main +# + +# LAPACK requires BLAS +if(LAPACK_FIND_QUIETLY OR NOT LAPACK_FIND_REQUIRED) + find_dependency(BLAS) +else() + find_dependency(BLAS REQUIRED) +endif() + +if (NOT BLAS_FOUND) + + message(STATUS "LAPACK requires BLAS.") + set(LAPACK_FOUND FALSE) + +# Is it already configured? +elseif (LAPACK_LIBRARIES_DIR OR LAPACK_LIBRARIES) + + set(LAPACK_FOUND TRUE) + +else() + + # reset variables + set( LAPACK_INCLUDE_DIR "" ) + set( LAPACK_DEFINITIONS "" ) + set( LAPACK_LINKER_FLAGS "" ) # unused (yet) + set( LAPACK_LIBRARIES "" ) + set( LAPACK_LIBRARIES_DIR "" ) + + # + # If Unix, search for LAPACK function in possible libraries + # + + #intel mkl lapack? + if(NOT LAPACK_LIBRARIES) + check_lapack_libraries( + LAPACK_DEFINITIONS + LAPACK_LIBRARIES + LAPACK + cheev + "" + "mkl_lapack" + "${BLAS_LIBRARIES}" + "${CGAL_TAUCS_LIBRARIES_DIR} ENV LAPACK_LIB_DIR" + ) + endif() + + #acml lapack? + if(NOT LAPACK_LIBRARIES) + check_lapack_libraries( + LAPACK_DEFINITIONS + LAPACK_LIBRARIES + LAPACK + cheev + "" + "acml" + "${BLAS_LIBRARIES}" + "${CGAL_TAUCS_LIBRARIES_DIR} ENV LAPACK_LIB_DIR" + ) + endif() + + # Apple LAPACK library? + if(NOT LAPACK_LIBRARIES) + check_lapack_libraries( + LAPACK_DEFINITIONS + LAPACK_LIBRARIES + LAPACK + cheev + "" + "Accelerate" + "${BLAS_LIBRARIES}" + "${CGAL_TAUCS_LIBRARIES_DIR} ENV LAPACK_LIB_DIR" + ) + endif() + + if ( NOT LAPACK_LIBRARIES ) + check_lapack_libraries( + LAPACK_DEFINITIONS + LAPACK_LIBRARIES + LAPACK + cheev + "" + "vecLib" + "${BLAS_LIBRARIES}" + "${CGAL_TAUCS_LIBRARIES_DIR} ENV LAPACK_LIB_DIR" + ) + endif () + + # Generic LAPACK library? + # This configuration *must* be the last try as this library is notably slow. + if ( NOT LAPACK_LIBRARIES ) + check_lapack_libraries( + LAPACK_DEFINITIONS + LAPACK_LIBRARIES + LAPACK + cheev + "" + "lapack" + "${BLAS_LIBRARIES}" + "${CGAL_TAUCS_LIBRARIES_DIR} ENV LAPACK_LIB_DIR" + ) + endif() + + if(LAPACK_LIBRARIES_DIR OR LAPACK_LIBRARIES) + set(LAPACK_FOUND TRUE) + else() + set(LAPACK_FOUND FALSE) + endif() + + if(NOT LAPACK_FIND_QUIETLY) + if(LAPACK_FOUND) + message(STATUS "A library with LAPACK API found.") + else() + if(LAPACK_FIND_REQUIRED) + message(FATAL_ERROR "A required library with LAPACK API not found. Please specify library location.") + else() + message(STATUS "A library with LAPACK API not found. Please specify library location.") + endif() + endif() + endif() + + # Add variables to cache + set( LAPACK_INCLUDE_DIR "${LAPACK_INCLUDE_DIR}" + CACHE PATH "Directories containing the LAPACK header files" FORCE ) + set( LAPACK_DEFINITIONS "${LAPACK_DEFINITIONS}" + CACHE STRING "Compilation options to use LAPACK" FORCE ) + set( LAPACK_LINKER_FLAGS "${LAPACK_LINKER_FLAGS}" + CACHE STRING "Linker flags to use LAPACK" FORCE ) + set( LAPACK_LIBRARIES "${LAPACK_LIBRARIES}" + CACHE FILEPATH "LAPACK libraries name" FORCE ) + set( LAPACK_LIBRARIES_DIR "${LAPACK_LIBRARIES_DIR}" + CACHE PATH "Directories containing the LAPACK libraries" FORCE ) + + #message("DEBUG: LAPACK_INCLUDE_DIR = ${LAPACK_INCLUDE_DIR}") + #message("DEBUG: LAPACK_DEFINITIONS = ${LAPACK_DEFINITIONS}") + #message("DEBUG: LAPACK_LINKER_FLAGS = ${LAPACK_LINKER_FLAGS}") + #message("DEBUG: LAPACK_LIBRARIES = ${LAPACK_LIBRARIES}") + #message("DEBUG: LAPACK_LIBRARIES_DIR = ${LAPACK_LIBRARIES_DIR}") + #message("DEBUG: LAPACK_FOUND = ${LAPACK_FOUND}") + +endif() diff --git a/external/eigen-3.4.0/cmake/FindMPFR.cmake b/external/eigen-3.4.0/cmake/FindMPFR.cmake new file mode 100644 index 0000000..d8da9d6 --- /dev/null +++ b/external/eigen-3.4.0/cmake/FindMPFR.cmake @@ -0,0 +1,83 @@ +# Try to find the MPFR library +# See http://www.mpfr.org/ +# +# This module supports requiring a minimum version, e.g. you can do +# find_package(MPFR 2.3.0) +# to require version 2.3.0 to newer of MPFR. +# +# Once done this will define +# +# MPFR_FOUND - system has MPFR lib with correct version +# MPFR_INCLUDES - the MPFR include directory +# MPFR_LIBRARIES - the MPFR library +# MPFR_VERSION - MPFR version + +# Copyright (c) 2006, 2007 Montel Laurent, +# Copyright (c) 2008, 2009 Gael Guennebaud, +# Copyright (c) 2010 Jitse Niesen, +# Redistribution and use is allowed according to the terms of the BSD license. + +# Set MPFR_INCLUDES + +find_path(MPFR_INCLUDES + NAMES + mpfr.h + PATHS + $ENV{GMPDIR} + ${INCLUDE_INSTALL_DIR} +) + +# Set MPFR_FIND_VERSION to 1.0.0 if no minimum version is specified + +if(NOT MPFR_FIND_VERSION) + if(NOT MPFR_FIND_VERSION_MAJOR) + set(MPFR_FIND_VERSION_MAJOR 1) + endif() + if(NOT MPFR_FIND_VERSION_MINOR) + set(MPFR_FIND_VERSION_MINOR 0) + endif() + if(NOT MPFR_FIND_VERSION_PATCH) + set(MPFR_FIND_VERSION_PATCH 0) + endif() + + set(MPFR_FIND_VERSION "${MPFR_FIND_VERSION_MAJOR}.${MPFR_FIND_VERSION_MINOR}.${MPFR_FIND_VERSION_PATCH}") +endif() + + +if(MPFR_INCLUDES) + + # Set MPFR_VERSION + + file(READ "${MPFR_INCLUDES}/mpfr.h" _mpfr_version_header) + + string(REGEX MATCH "define[ \t]+MPFR_VERSION_MAJOR[ \t]+([0-9]+)" _mpfr_major_version_match "${_mpfr_version_header}") + set(MPFR_MAJOR_VERSION "${CMAKE_MATCH_1}") + string(REGEX MATCH "define[ \t]+MPFR_VERSION_MINOR[ \t]+([0-9]+)" _mpfr_minor_version_match "${_mpfr_version_header}") + set(MPFR_MINOR_VERSION "${CMAKE_MATCH_1}") + string(REGEX MATCH "define[ \t]+MPFR_VERSION_PATCHLEVEL[ \t]+([0-9]+)" _mpfr_patchlevel_version_match "${_mpfr_version_header}") + set(MPFR_PATCHLEVEL_VERSION "${CMAKE_MATCH_1}") + + set(MPFR_VERSION ${MPFR_MAJOR_VERSION}.${MPFR_MINOR_VERSION}.${MPFR_PATCHLEVEL_VERSION}) + + # Check whether found version exceeds minimum version + + if(${MPFR_VERSION} VERSION_LESS ${MPFR_FIND_VERSION}) + set(MPFR_VERSION_OK FALSE) + message(STATUS "MPFR version ${MPFR_VERSION} found in ${MPFR_INCLUDES}, " + "but at least version ${MPFR_FIND_VERSION} is required") + else() + set(MPFR_VERSION_OK TRUE) + endif() + +endif() + +# Set MPFR_LIBRARIES + +find_library(MPFR_LIBRARIES mpfr PATHS $ENV{GMPDIR} ${LIB_INSTALL_DIR}) + +# Epilogue + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(MPFR DEFAULT_MSG + MPFR_INCLUDES MPFR_LIBRARIES MPFR_VERSION_OK) +mark_as_advanced(MPFR_INCLUDES MPFR_LIBRARIES) diff --git a/external/eigen-3.4.0/cmake/FindMPREAL.cmake b/external/eigen-3.4.0/cmake/FindMPREAL.cmake new file mode 100644 index 0000000..947a1ce --- /dev/null +++ b/external/eigen-3.4.0/cmake/FindMPREAL.cmake @@ -0,0 +1,103 @@ +# Try to find the MPFR C++ (MPREAL) library +# See http://www.holoborodko.com/pavel/mpreal/ +# +# This module supports requiring a minimum version, e.g. you can do +# find_package(MPREAL 1.8.6) +# to require version 1.8.6 or newer of MPREAL C++. +# +# Once done this will define +# +# MPREAL_FOUND - system has MPREAL lib with correct version +# MPREAL_INCLUDES - MPREAL required include directories +# MPREAL_LIBRARIES - MPREAL required libraries +# MPREAL_VERSION - MPREAL version + +# Copyright (c) 2020 The Eigen Authors. +# Redistribution and use is allowed according to the terms of the BSD license. + +include(CMakeFindDependencyMacro) +find_dependency(MPFR) +find_dependency(GMP) + +# Set MPREAL_INCLUDES +find_path(MPREAL_INCLUDES + NAMES + mpreal.h + PATHS + $ENV{GMPDIR} + ${INCLUDE_INSTALL_DIR} +) + +# Set MPREAL_FIND_VERSION to 1.0.0 if no minimum version is specified + +if(NOT MPREAL_FIND_VERSION) + if(NOT MPREAL_FIND_VERSION_MAJOR) + set(MPREAL_FIND_VERSION_MAJOR 1) + endif() + if(NOT MPREAL_FIND_VERSION_MINOR) + set(MPREAL_FIND_VERSION_MINOR 0) + endif() + if(NOT MPREAL_FIND_VERSION_PATCH) + set(MPREAL_FIND_VERSION_PATCH 0) + endif() + + set(MPREAL_FIND_VERSION "${MPREAL_FIND_VERSION_MAJOR}.${MPREAL_FIND_VERSION_MINOR}.${MPREAL_FIND_VERSION_PATCH}") +endif() + +# Check bugs +# - https://github.com/advanpix/mpreal/issues/7 +# - https://github.com/advanpix/mpreal/issues/9 +set(MPREAL_TEST_PROGRAM " +#include +#include +int main(int argc, char** argv) { + const mpfr::mpreal one = 1.0; + const mpfr::mpreal zero = 0.0; + using namespace std; + const mpfr::mpreal smaller = min(one, zero); + return 0; +}") + +if(MPREAL_INCLUDES) + + # Set MPREAL_VERSION + + file(READ "${MPREAL_INCLUDES}/mpreal.h" _mpreal_version_header) + + string(REGEX MATCH "define[ \t]+MPREAL_VERSION_MAJOR[ \t]+([0-9]+)" _mpreal_major_version_match "${_mpreal_version_header}") + set(MPREAL_MAJOR_VERSION "${CMAKE_MATCH_1}") + string(REGEX MATCH "define[ \t]+MPREAL_VERSION_MINOR[ \t]+([0-9]+)" _mpreal_minor_version_match "${_mpreal_version_header}") + set(MPREAL_MINOR_VERSION "${CMAKE_MATCH_1}") + string(REGEX MATCH "define[ \t]+MPREAL_VERSION_PATCHLEVEL[ \t]+([0-9]+)" _mpreal_patchlevel_version_match "${_mpreal_version_header}") + set(MPREAL_PATCHLEVEL_VERSION "${CMAKE_MATCH_1}") + + set(MPREAL_VERSION ${MPREAL_MAJOR_VERSION}.${MPREAL_MINOR_VERSION}.${MPREAL_PATCHLEVEL_VERSION}) + + # Check whether found version exceeds minimum version + + if(${MPREAL_VERSION} VERSION_LESS ${MPREAL_FIND_VERSION}) + set(MPREAL_VERSION_OK FALSE) + message(STATUS "MPREAL version ${MPREAL_VERSION} found in ${MPREAL_INCLUDES}, " + "but at least version ${MPREAL_FIND_VERSION} is required") + else() + set(MPREAL_VERSION_OK TRUE) + + list(APPEND MPREAL_INCLUDES "${MPFR_INCLUDES}" "${GMP_INCLUDES}") + list(REMOVE_DUPLICATES MPREAL_INCLUDES) + + list(APPEND MPREAL_LIBRARIES "${MPFR_LIBRARIES}" "${GMP_LIBRARIES}") + list(REMOVE_DUPLICATES MPREAL_LIBRARIES) + + # Make sure it compiles with the current compiler. + unset(MPREAL_WORKS CACHE) + include(CheckCXXSourceCompiles) + set(CMAKE_REQUIRED_INCLUDES "${MPREAL_INCLUDES}") + set(CMAKE_REQUIRED_LIBRARIES "${MPREAL_LIBRARIES}") + check_cxx_source_compiles("${MPREAL_TEST_PROGRAM}" MPREAL_WORKS) + endif() +endif() + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(MPREAL DEFAULT_MSG + MPREAL_INCLUDES MPREAL_VERSION_OK MPREAL_WORKS) +mark_as_advanced(MPREAL_INCLUDES) diff --git a/external/eigen-3.4.0/cmake/FindMetis.cmake b/external/eigen-3.4.0/cmake/FindMetis.cmake new file mode 100644 index 0000000..747f882 --- /dev/null +++ b/external/eigen-3.4.0/cmake/FindMetis.cmake @@ -0,0 +1,265 @@ +### +# +# @copyright (c) 2009-2014 The University of Tennessee and The University +# of Tennessee Research Foundation. +# All rights reserved. +# @copyright (c) 2012-2014 Inria. All rights reserved. +# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. +# +### +# +# - Find METIS include dirs and libraries +# Use this module by invoking find_package with the form: +# find_package(METIS +# [REQUIRED] # Fail with error if metis is not found +# ) +# +# This module finds headers and metis library. +# Results are reported in variables: +# METIS_FOUND - True if headers and requested libraries were found +# METIS_INCLUDE_DIRS - metis include directories +# METIS_LIBRARY_DIRS - Link directories for metis libraries +# METIS_LIBRARIES - metis component libraries to be linked +# +# The user can give specific paths where to find the libraries adding cmake +# options at configure (ex: cmake path/to/project -DMETIS_DIR=path/to/metis): +# METIS_DIR - Where to find the base directory of metis +# METIS_INCDIR - Where to find the header files +# METIS_LIBDIR - Where to find the library files +# The module can also look for the following environment variables if paths +# are not given as cmake variable: METIS_DIR, METIS_INCDIR, METIS_LIBDIR + +#============================================================================= +# Copyright 2012-2013 Inria +# Copyright 2012-2013 Emmanuel Agullo +# Copyright 2012-2013 Mathieu Faverge +# Copyright 2012 Cedric Castagnede +# Copyright 2013 Florent Pruvost +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file MORSE-Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of Morse, substitute the full +# License text for the above reference.) + +if (NOT METIS_FOUND) + set(METIS_DIR "" CACHE PATH "Installation directory of METIS library") + if (NOT METIS_FIND_QUIETLY) + message(STATUS "A cache variable, namely METIS_DIR, has been set to specify the install directory of METIS") + endif() +endif() + +# Looking for include +# ------------------- + +# Add system include paths to search include +# ------------------------------------------ +unset(_inc_env) +set(ENV_METIS_DIR "$ENV{METIS_DIR}") +set(ENV_METIS_INCDIR "$ENV{METIS_INCDIR}") +if(ENV_METIS_INCDIR) + list(APPEND _inc_env "${ENV_METIS_INCDIR}") +elseif(ENV_METIS_DIR) + list(APPEND _inc_env "${ENV_METIS_DIR}") + list(APPEND _inc_env "${ENV_METIS_DIR}/include") + list(APPEND _inc_env "${ENV_METIS_DIR}/include/metis") +else() + if(WIN32) + string(REPLACE ":" ";" _inc_env "$ENV{INCLUDE}") + else() + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{CPATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + endif() +endif() +list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") +list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") +list(REMOVE_DUPLICATES _inc_env) + + +# Try to find the metis header in the given paths +# ------------------------------------------------- +# call cmake macro to find the header path +if(METIS_INCDIR) + set(METIS_metis.h_DIRS "METIS_metis.h_DIRS-NOTFOUND") + find_path(METIS_metis.h_DIRS + NAMES metis.h + HINTS ${METIS_INCDIR}) +else() + if(METIS_DIR) + set(METIS_metis.h_DIRS "METIS_metis.h_DIRS-NOTFOUND") + find_path(METIS_metis.h_DIRS + NAMES metis.h + HINTS ${METIS_DIR} + PATH_SUFFIXES "include" "include/metis") + else() + set(METIS_metis.h_DIRS "METIS_metis.h_DIRS-NOTFOUND") + find_path(METIS_metis.h_DIRS + NAMES metis.h + HINTS ${_inc_env}) + endif() +endif() +mark_as_advanced(METIS_metis.h_DIRS) + + +# If found, add path to cmake variable +# ------------------------------------ +if (METIS_metis.h_DIRS) + set(METIS_INCLUDE_DIRS "${METIS_metis.h_DIRS}") +else () + set(METIS_INCLUDE_DIRS "METIS_INCLUDE_DIRS-NOTFOUND") + if(NOT METIS_FIND_QUIETLY) + message(STATUS "Looking for metis -- metis.h not found") + endif() +endif() + + +# Looking for lib +# --------------- + +# Add system library paths to search lib +# -------------------------------------- +unset(_lib_env) +set(ENV_METIS_LIBDIR "$ENV{METIS_LIBDIR}") +if(ENV_METIS_LIBDIR) + list(APPEND _lib_env "${ENV_METIS_LIBDIR}") +elseif(ENV_METIS_DIR) + list(APPEND _lib_env "${ENV_METIS_DIR}") + list(APPEND _lib_env "${ENV_METIS_DIR}/lib") +else() + if(WIN32) + string(REPLACE ":" ";" _lib_env "$ENV{LIB}") + else() + if(APPLE) + string(REPLACE ":" ";" _lib_env "$ENV{DYLD_LIBRARY_PATH}") + else() + string(REPLACE ":" ";" _lib_env "$ENV{LD_LIBRARY_PATH}") + endif() + list(APPEND _lib_env "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}") + list(APPEND _lib_env "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") + endif() +endif() +list(REMOVE_DUPLICATES _lib_env) + +# Try to find the metis lib in the given paths +# ---------------------------------------------- +# call cmake macro to find the lib path +if(METIS_LIBDIR) + set(METIS_metis_LIBRARY "METIS_metis_LIBRARY-NOTFOUND") + find_library(METIS_metis_LIBRARY + NAMES metis + HINTS ${METIS_LIBDIR}) +else() + if(METIS_DIR) + set(METIS_metis_LIBRARY "METIS_metis_LIBRARY-NOTFOUND") + find_library(METIS_metis_LIBRARY + NAMES metis + HINTS ${METIS_DIR} + PATH_SUFFIXES lib lib32 lib64) + else() + set(METIS_metis_LIBRARY "METIS_metis_LIBRARY-NOTFOUND") + find_library(METIS_metis_LIBRARY + NAMES metis + HINTS ${_lib_env}) + endif() +endif() +mark_as_advanced(METIS_metis_LIBRARY) + + +# If found, add path to cmake variable +# ------------------------------------ +if (METIS_metis_LIBRARY) + get_filename_component(metis_lib_path "${METIS_metis_LIBRARY}" PATH) + # set cmake variables + set(METIS_LIBRARIES "${METIS_metis_LIBRARY}") + set(METIS_LIBRARY_DIRS "${metis_lib_path}") +else () + set(METIS_LIBRARIES "METIS_LIBRARIES-NOTFOUND") + set(METIS_LIBRARY_DIRS "METIS_LIBRARY_DIRS-NOTFOUND") + if(NOT METIS_FIND_QUIETLY) + message(STATUS "Looking for metis -- lib metis not found") + endif() +endif () + +# check a function to validate the find +if(METIS_LIBRARIES) + + set(REQUIRED_INCDIRS) + set(REQUIRED_LIBDIRS) + set(REQUIRED_LIBS) + + # METIS + if (METIS_INCLUDE_DIRS) + set(REQUIRED_INCDIRS "${METIS_INCLUDE_DIRS}") + endif() + if (METIS_LIBRARY_DIRS) + set(REQUIRED_LIBDIRS "${METIS_LIBRARY_DIRS}") + endif() + set(REQUIRED_LIBS "${METIS_LIBRARIES}") + # m + find_library(M_LIBRARY NAMES m) + mark_as_advanced(M_LIBRARY) + if(M_LIBRARY) + list(APPEND REQUIRED_LIBS "-lm") + endif() + + # set required libraries for link + set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}") + set(CMAKE_REQUIRED_LIBRARIES) + foreach(lib_dir ${REQUIRED_LIBDIRS}) + list(APPEND CMAKE_REQUIRED_LIBRARIES "-L${lib_dir}") + endforeach() + list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LIBS}") + string(REGEX REPLACE "^ -" "-" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}") + + # test link + unset(METIS_WORKS CACHE) + include(CheckFunctionExists) + check_function_exists(METIS_NodeND METIS_WORKS) + mark_as_advanced(METIS_WORKS) + + if(NOT METIS_WORKS) + if(NOT METIS_FIND_QUIETLY) + message(STATUS "Looking for METIS : test of METIS_NodeND with METIS library fails") + message(STATUS "CMAKE_REQUIRED_LIBRARIES: ${CMAKE_REQUIRED_LIBRARIES}") + message(STATUS "CMAKE_REQUIRED_INCLUDES: ${CMAKE_REQUIRED_INCLUDES}") + message(STATUS "Check in CMakeFiles/CMakeError.log to figure out why it fails") + endif() + endif() + set(CMAKE_REQUIRED_INCLUDES) + set(CMAKE_REQUIRED_FLAGS) + set(CMAKE_REQUIRED_LIBRARIES) +endif() + +if (METIS_LIBRARIES) + list(GET METIS_LIBRARIES 0 first_lib) + get_filename_component(first_lib_path "${first_lib}" PATH) + if (${first_lib_path} MATCHES "/lib(32|64)?$") + string(REGEX REPLACE "/lib(32|64)?$" "" not_cached_dir "${first_lib_path}") + set(METIS_DIR_FOUND "${not_cached_dir}" CACHE PATH "Installation directory of METIS library" FORCE) + else() + set(METIS_DIR_FOUND "${first_lib_path}" CACHE PATH "Installation directory of METIS library" FORCE) + endif() +endif() +mark_as_advanced(METIS_DIR) +mark_as_advanced(METIS_DIR_FOUND) + +# check that METIS has been found +# --------------------------------- +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(METIS DEFAULT_MSG + METIS_LIBRARIES + METIS_WORKS + METIS_INCLUDE_DIRS) +# +# TODO: Add possibility to check for specific functions in the library +# diff --git a/external/eigen-3.4.0/cmake/FindPASTIX.cmake b/external/eigen-3.4.0/cmake/FindPASTIX.cmake new file mode 100644 index 0000000..db1427b --- /dev/null +++ b/external/eigen-3.4.0/cmake/FindPASTIX.cmake @@ -0,0 +1,704 @@ +### +# +# @copyright (c) 2009-2014 The University of Tennessee and The University +# of Tennessee Research Foundation. +# All rights reserved. +# @copyright (c) 2012-2014 Inria. All rights reserved. +# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. +# +### +# +# - Find PASTIX include dirs and libraries +# Use this module by invoking find_package with the form: +# find_package(PASTIX +# [REQUIRED] # Fail with error if pastix is not found +# [COMPONENTS ...] # dependencies +# ) +# +# PASTIX depends on the following libraries: +# - Threads, m, rt +# - MPI +# - HWLOC +# - BLAS +# +# COMPONENTS are optional libraries PASTIX could be linked with, +# Use it to drive detection of a specific compilation chain +# COMPONENTS can be some of the following: +# - MPI: to activate detection of the parallel MPI version (default) +# it looks for Threads, HWLOC, BLAS, MPI and ScaLAPACK libraries +# - SEQ: to activate detection of the sequential version (exclude MPI version) +# - STARPU: to activate detection of StarPU version +# it looks for MPI version of StarPU (default behaviour) +# if SEQ and STARPU are given, it looks for a StarPU without MPI +# - STARPU_CUDA: to activate detection of StarPU with CUDA +# - STARPU_FXT: to activate detection of StarPU with FxT +# - SCOTCH: to activate detection of PASTIX linked with SCOTCH +# - PTSCOTCH: to activate detection of PASTIX linked with SCOTCH +# - METIS: to activate detection of PASTIX linked with SCOTCH +# +# This module finds headers and pastix library. +# Results are reported in variables: +# PASTIX_FOUND - True if headers and requested libraries were found +# PASTIX_LINKER_FLAGS - list of required linker flags (excluding -l and -L) +# PASTIX_INCLUDE_DIRS - pastix include directories +# PASTIX_LIBRARY_DIRS - Link directories for pastix libraries +# PASTIX_LIBRARIES - pastix libraries +# PASTIX_INCLUDE_DIRS_DEP - pastix + dependencies include directories +# PASTIX_LIBRARY_DIRS_DEP - pastix + dependencies link directories +# PASTIX_LIBRARIES_DEP - pastix libraries + dependencies +# +# The user can give specific paths where to find the libraries adding cmake +# options at configure (ex: cmake path/to/project -DPASTIX_DIR=path/to/pastix): +# PASTIX_DIR - Where to find the base directory of pastix +# PASTIX_INCDIR - Where to find the header files +# PASTIX_LIBDIR - Where to find the library files +# The module can also look for the following environment variables if paths +# are not given as cmake variable: PASTIX_DIR, PASTIX_INCDIR, PASTIX_LIBDIR + +#============================================================================= +# Copyright 2012-2013 Inria +# Copyright 2012-2013 Emmanuel Agullo +# Copyright 2012-2013 Mathieu Faverge +# Copyright 2012 Cedric Castagnede +# Copyright 2013 Florent Pruvost +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file MORSE-Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of Morse, substitute the full +# License text for the above reference.) + + +if (NOT PASTIX_FOUND) + set(PASTIX_DIR "" CACHE PATH "Installation directory of PASTIX library") + if (NOT PASTIX_FIND_QUIETLY) + message(STATUS "A cache variable, namely PASTIX_DIR, has been set to specify the install directory of PASTIX") + endif() +endif() + +# Set the version to find +set(PASTIX_LOOK_FOR_MPI ON) +set(PASTIX_LOOK_FOR_SEQ OFF) +set(PASTIX_LOOK_FOR_STARPU OFF) +set(PASTIX_LOOK_FOR_STARPU_CUDA OFF) +set(PASTIX_LOOK_FOR_STARPU_FXT OFF) +set(PASTIX_LOOK_FOR_SCOTCH ON) +set(PASTIX_LOOK_FOR_PTSCOTCH OFF) +set(PASTIX_LOOK_FOR_METIS OFF) + +if( PASTIX_FIND_COMPONENTS ) + foreach( component ${PASTIX_FIND_COMPONENTS} ) + if (${component} STREQUAL "SEQ") + # means we look for the sequential version of PaStiX (without MPI) + set(PASTIX_LOOK_FOR_SEQ ON) + set(PASTIX_LOOK_FOR_MPI OFF) + endif() + if (${component} STREQUAL "MPI") + # means we look for the MPI version of PaStiX (default) + set(PASTIX_LOOK_FOR_SEQ OFF) + set(PASTIX_LOOK_FOR_MPI ON) + endif() + if (${component} STREQUAL "STARPU") + # means we look for PaStiX with StarPU + set(PASTIX_LOOK_FOR_STARPU ON) + endif() + if (${component} STREQUAL "STARPU_CUDA") + # means we look for PaStiX with StarPU + CUDA + set(PASTIX_LOOK_FOR_STARPU ON) + set(PASTIX_LOOK_FOR_STARPU_CUDA ON) + endif() + if (${component} STREQUAL "STARPU_FXT") + # means we look for PaStiX with StarPU + FxT + set(PASTIX_LOOK_FOR_STARPU_FXT ON) + endif() + if (${component} STREQUAL "SCOTCH") + set(PASTIX_LOOK_FOR_SCOTCH ON) + endif() + if (${component} STREQUAL "PTSCOTCH") + set(PASTIX_LOOK_FOR_PTSCOTCH ON) + endif() + if (${component} STREQUAL "METIS") + set(PASTIX_LOOK_FOR_METIS ON) + endif() + endforeach() +endif() + +# Dependencies detection +# ---------------------- + + +# Required dependencies +# --------------------- +include(CMakeFindDependencyMacro) +if (NOT PASTIX_FIND_QUIETLY) + message(STATUS "Looking for PASTIX - Try to detect pthread") +endif() +if (PASTIX_FIND_REQUIRED) + find_dependency(Threads REQUIRED QUIET) +else() + find_dependency(Threads QUIET) +endif() +set(PASTIX_EXTRA_LIBRARIES "") +if( THREADS_FOUND ) + list(APPEND PASTIX_EXTRA_LIBRARIES ${CMAKE_THREAD_LIBS_INIT}) +endif () + +# Add math library to the list of extra +# it normally exists on all common systems provided with a C compiler +if (NOT PASTIX_FIND_QUIETLY) + message(STATUS "Looking for PASTIX - Try to detect libm") +endif() +set(PASTIX_M_LIBRARIES "") +if(UNIX OR WIN32) + find_library( + PASTIX_M_m_LIBRARY + NAMES m + ) + mark_as_advanced(PASTIX_M_m_LIBRARY) + if (PASTIX_M_m_LIBRARY) + list(APPEND PASTIX_M_LIBRARIES "${PASTIX_M_m_LIBRARY}") + list(APPEND PASTIX_EXTRA_LIBRARIES "${PASTIX_M_m_LIBRARY}") + else() + if (PASTIX_FIND_REQUIRED) + message(FATAL_ERROR "Could NOT find libm on your system." + "Are you sure to a have a C compiler installed?") + endif() + endif() +endif() + +# Try to find librt (libposix4 - POSIX.1b Realtime Extensions library) +# on Unix systems except Apple ones because it does not exist on it +if (NOT PASTIX_FIND_QUIETLY) + message(STATUS "Looking for PASTIX - Try to detect librt") +endif() +set(PASTIX_RT_LIBRARIES "") +if(UNIX AND NOT APPLE) + find_library( + PASTIX_RT_rt_LIBRARY + NAMES rt + ) + mark_as_advanced(PASTIX_RT_rt_LIBRARY) + if (PASTIX_RT_rt_LIBRARY) + list(APPEND PASTIX_RT_LIBRARIES "${PASTIX_RT_rt_LIBRARY}") + list(APPEND PASTIX_EXTRA_LIBRARIES "${PASTIX_RT_rt_LIBRARY}") + else() + if (PASTIX_FIND_REQUIRED) + message(FATAL_ERROR "Could NOT find librt on your system") + endif() + endif() +endif() + +# PASTIX depends on HWLOC +#------------------------ +if (NOT PASTIX_FIND_QUIETLY) + message(STATUS "Looking for PASTIX - Try to detect HWLOC") +endif() +if (PASTIX_FIND_REQUIRED) + find_dependency(HWLOC REQUIRED QUIET) +else() + find_dependency(HWLOC QUIET) +endif() + +# PASTIX depends on BLAS +#----------------------- +if (NOT PASTIX_FIND_QUIETLY) + message(STATUS "Looking for PASTIX - Try to detect BLAS") +endif() +if (PASTIX_FIND_REQUIRED) + find_dependency(BLASEXT REQUIRED QUIET) +else() + find_dependency(BLASEXT QUIET) +endif() + +# Optional dependencies +# --------------------- + +# PASTIX may depend on MPI +#------------------------- +if (NOT MPI_FOUND AND PASTIX_LOOK_FOR_MPI) + if (NOT PASTIX_FIND_QUIETLY) + message(STATUS "Looking for PASTIX - Try to detect MPI") + endif() + # allows to use an external mpi compilation by setting compilers with + # -DMPI_C_COMPILER=path/to/mpicc -DMPI_Fortran_COMPILER=path/to/mpif90 + # at cmake configure + if(NOT MPI_C_COMPILER) + set(MPI_C_COMPILER mpicc) + endif() + if (PASTIX_FIND_REQUIRED AND PASTIX_FIND_REQUIRED_MPI) + find_dependency(MPI REQUIRED QUIET) + else() + find_dependency(MPI QUIET) + endif() + if (MPI_FOUND) + mark_as_advanced(MPI_LIBRARY) + mark_as_advanced(MPI_EXTRA_LIBRARY) + endif() +endif () + +# PASTIX may depend on STARPU +#---------------------------- +if( NOT STARPU_FOUND AND PASTIX_LOOK_FOR_STARPU) + + if (NOT PASTIX_FIND_QUIETLY) + message(STATUS "Looking for PASTIX - Try to detect StarPU") + endif() + + set(PASTIX_STARPU_VERSION "1.1" CACHE STRING "oldest STARPU version desired") + + # create list of components in order to make a single call to find_package(starpu...) + # we explicitly need a StarPU version built with hwloc + set(STARPU_COMPONENT_LIST "HWLOC") + + # StarPU may depend on MPI + # allows to use an external mpi compilation by setting compilers with + # -DMPI_C_COMPILER=path/to/mpicc -DMPI_Fortran_COMPILER=path/to/mpif90 + # at cmake configure + if (PASTIX_LOOK_FOR_MPI) + if(NOT MPI_C_COMPILER) + set(MPI_C_COMPILER mpicc) + endif() + list(APPEND STARPU_COMPONENT_LIST "MPI") + endif() + if (PASTIX_LOOK_FOR_STARPU_CUDA) + list(APPEND STARPU_COMPONENT_LIST "CUDA") + endif() + if (PASTIX_LOOK_FOR_STARPU_FXT) + list(APPEND STARPU_COMPONENT_LIST "FXT") + endif() + # set the list of optional dependencies we may discover + if (PASTIX_FIND_REQUIRED AND PASTIX_FIND_REQUIRED_STARPU) + find_dependency(STARPU ${PASTIX_STARPU_VERSION} REQUIRED + COMPONENTS ${STARPU_COMPONENT_LIST}) + else() + find_dependency(STARPU ${PASTIX_STARPU_VERSION} + COMPONENTS ${STARPU_COMPONENT_LIST}) + endif() + +endif() + +# PASTIX may depends on SCOTCH +#----------------------------- +if (NOT SCOTCH_FOUND AND PASTIX_LOOK_FOR_SCOTCH) + if (NOT PASTIX_FIND_QUIETLY) + message(STATUS "Looking for PASTIX - Try to detect SCOTCH") + endif() + if (PASTIX_FIND_REQUIRED AND PASTIX_FIND_REQUIRED_SCOTCH) + find_dependency(SCOTCH REQUIRED QUIET) + else() + find_dependency(SCOTCH QUIET) + endif() +endif() + +# PASTIX may depends on PTSCOTCH +#------------------------------- +if (NOT PTSCOTCH_FOUND AND PASTIX_LOOK_FOR_PTSCOTCH) + if (NOT PASTIX_FIND_QUIETLY) + message(STATUS "Looking for PASTIX - Try to detect PTSCOTCH") + endif() + if (PASTIX_FIND_REQUIRED AND PASTIX_FIND_REQUIRED_PTSCOTCH) + find_dependency(PTSCOTCH REQUIRED QUIET) + else() + find_dependency(PTSCOTCH QUIET) + endif() +endif() + +# PASTIX may depends on METIS +#---------------------------- +if (NOT METIS_FOUND AND PASTIX_LOOK_FOR_METIS) + if (NOT PASTIX_FIND_QUIETLY) + message(STATUS "Looking for PASTIX - Try to detect METIS") + endif() + if (PASTIX_FIND_REQUIRED AND PASTIX_FIND_REQUIRED_METIS) + find_dependency(METIS REQUIRED QUIET) + else() + find_dependency(METIS QUIET) + endif() +endif() + +# Error if pastix required and no partitioning lib found +if (PASTIX_FIND_REQUIRED AND NOT SCOTCH_FOUND AND NOT PTSCOTCH_FOUND AND NOT METIS_FOUND) + message(FATAL_ERROR "Could NOT find any partitioning library on your system" + " (install scotch, ptscotch or metis)") +endif() + + +# Looking for PaStiX +# ------------------ + +# Looking for include +# ------------------- + +# Add system include paths to search include +# ------------------------------------------ +unset(_inc_env) +set(ENV_PASTIX_DIR "$ENV{PASTIX_DIR}") +set(ENV_PASTIX_INCDIR "$ENV{PASTIX_INCDIR}") +if(ENV_PASTIX_INCDIR) + list(APPEND _inc_env "${ENV_PASTIX_INCDIR}") +elseif(ENV_PASTIX_DIR) + list(APPEND _inc_env "${ENV_PASTIX_DIR}") + list(APPEND _inc_env "${ENV_PASTIX_DIR}/include") + list(APPEND _inc_env "${ENV_PASTIX_DIR}/include/pastix") +else() + if(WIN32) + string(REPLACE ":" ";" _inc_env "$ENV{INCLUDE}") + else() + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{CPATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + endif() +endif() +list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") +list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") +list(REMOVE_DUPLICATES _inc_env) + + +# Try to find the pastix header in the given paths +# --------------------------------------------------- +# call cmake macro to find the header path +if(PASTIX_INCDIR) + set(PASTIX_pastix.h_DIRS "PASTIX_pastix.h_DIRS-NOTFOUND") + find_path(PASTIX_pastix.h_DIRS + NAMES pastix.h + HINTS ${PASTIX_INCDIR}) +else() + if(PASTIX_DIR) + set(PASTIX_pastix.h_DIRS "PASTIX_pastix.h_DIRS-NOTFOUND") + find_path(PASTIX_pastix.h_DIRS + NAMES pastix.h + HINTS ${PASTIX_DIR} + PATH_SUFFIXES "include" "include/pastix") + else() + set(PASTIX_pastix.h_DIRS "PASTIX_pastix.h_DIRS-NOTFOUND") + find_path(PASTIX_pastix.h_DIRS + NAMES pastix.h + HINTS ${_inc_env} + PATH_SUFFIXES "pastix") + endif() +endif() +mark_as_advanced(PASTIX_pastix.h_DIRS) + +# If found, add path to cmake variable +# ------------------------------------ +if (PASTIX_pastix.h_DIRS) + set(PASTIX_INCLUDE_DIRS "${PASTIX_pastix.h_DIRS}") +else () + set(PASTIX_INCLUDE_DIRS "PASTIX_INCLUDE_DIRS-NOTFOUND") + if(NOT PASTIX_FIND_QUIETLY) + message(STATUS "Looking for pastix -- pastix.h not found") + endif() +endif() + + +# Looking for lib +# --------------- + +# Add system library paths to search lib +# -------------------------------------- +unset(_lib_env) +set(ENV_PASTIX_LIBDIR "$ENV{PASTIX_LIBDIR}") +if(ENV_PASTIX_LIBDIR) + list(APPEND _lib_env "${ENV_PASTIX_LIBDIR}") +elseif(ENV_PASTIX_DIR) + list(APPEND _lib_env "${ENV_PASTIX_DIR}") + list(APPEND _lib_env "${ENV_PASTIX_DIR}/lib") +else() + if(WIN32) + string(REPLACE ":" ";" _lib_env "$ENV{LIB}") + else() + if(APPLE) + string(REPLACE ":" ";" _lib_env "$ENV{DYLD_LIBRARY_PATH}") + else() + string(REPLACE ":" ";" _lib_env "$ENV{LD_LIBRARY_PATH}") + endif() + list(APPEND _lib_env "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}") + list(APPEND _lib_env "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") + endif() +endif() +list(REMOVE_DUPLICATES _lib_env) + +# Try to find the pastix lib in the given paths +# ------------------------------------------------ + +# create list of libs to find +set(PASTIX_libs_to_find "pastix_murge;pastix") + +# call cmake macro to find the lib path +if(PASTIX_LIBDIR) + foreach(pastix_lib ${PASTIX_libs_to_find}) + set(PASTIX_${pastix_lib}_LIBRARY "PASTIX_${pastix_lib}_LIBRARY-NOTFOUND") + find_library(PASTIX_${pastix_lib}_LIBRARY + NAMES ${pastix_lib} + HINTS ${PASTIX_LIBDIR}) + endforeach() +else() + if(PASTIX_DIR) + foreach(pastix_lib ${PASTIX_libs_to_find}) + set(PASTIX_${pastix_lib}_LIBRARY "PASTIX_${pastix_lib}_LIBRARY-NOTFOUND") + find_library(PASTIX_${pastix_lib}_LIBRARY + NAMES ${pastix_lib} + HINTS ${PASTIX_DIR} + PATH_SUFFIXES lib lib32 lib64) + endforeach() + else() + foreach(pastix_lib ${PASTIX_libs_to_find}) + set(PASTIX_${pastix_lib}_LIBRARY "PASTIX_${pastix_lib}_LIBRARY-NOTFOUND") + find_library(PASTIX_${pastix_lib}_LIBRARY + NAMES ${pastix_lib} + HINTS ${_lib_env}) + endforeach() + endif() +endif() + +# If found, add path to cmake variable +# ------------------------------------ +foreach(pastix_lib ${PASTIX_libs_to_find}) + + get_filename_component(${pastix_lib}_lib_path ${PASTIX_${pastix_lib}_LIBRARY} PATH) + # set cmake variables (respects naming convention) + if (PASTIX_LIBRARIES) + list(APPEND PASTIX_LIBRARIES "${PASTIX_${pastix_lib}_LIBRARY}") + else() + set(PASTIX_LIBRARIES "${PASTIX_${pastix_lib}_LIBRARY}") + endif() + if (PASTIX_LIBRARY_DIRS) + list(APPEND PASTIX_LIBRARY_DIRS "${${pastix_lib}_lib_path}") + else() + set(PASTIX_LIBRARY_DIRS "${${pastix_lib}_lib_path}") + endif() + mark_as_advanced(PASTIX_${pastix_lib}_LIBRARY) + +endforeach() + +# check a function to validate the find +if(PASTIX_LIBRARIES) + + set(REQUIRED_LDFLAGS) + set(REQUIRED_INCDIRS) + set(REQUIRED_LIBDIRS) + set(REQUIRED_LIBS) + + # PASTIX + if (PASTIX_INCLUDE_DIRS) + set(REQUIRED_INCDIRS "${PASTIX_INCLUDE_DIRS}") + endif() + foreach(libdir ${PASTIX_LIBRARY_DIRS}) + if (libdir) + list(APPEND REQUIRED_LIBDIRS "${libdir}") + endif() + endforeach() + set(REQUIRED_LIBS "${PASTIX_LIBRARIES}") + # STARPU + if (PASTIX_LOOK_FOR_STARPU AND STARPU_FOUND) + if (STARPU_INCLUDE_DIRS_DEP) + list(APPEND REQUIRED_INCDIRS "${STARPU_INCLUDE_DIRS_DEP}") + elseif (STARPU_INCLUDE_DIRS) + list(APPEND REQUIRED_INCDIRS "${STARPU_INCLUDE_DIRS}") + endif() + if(STARPU_LIBRARY_DIRS_DEP) + list(APPEND REQUIRED_LIBDIRS "${STARPU_LIBRARY_DIRS_DEP}") + elseif(STARPU_LIBRARY_DIRS) + list(APPEND REQUIRED_LIBDIRS "${STARPU_LIBRARY_DIRS}") + endif() + if (STARPU_LIBRARIES_DEP) + list(APPEND REQUIRED_LIBS "${STARPU_LIBRARIES_DEP}") + elseif (STARPU_LIBRARIES) + foreach(lib ${STARPU_LIBRARIES}) + if (EXISTS ${lib} OR ${lib} MATCHES "^-") + list(APPEND REQUIRED_LIBS "${lib}") + else() + list(APPEND REQUIRED_LIBS "-l${lib}") + endif() + endforeach() + endif() + endif() + # CUDA + if (PASTIX_LOOK_FOR_STARPU_CUDA AND CUDA_FOUND) + if (CUDA_INCLUDE_DIRS) + list(APPEND REQUIRED_INCDIRS "${CUDA_INCLUDE_DIRS}") + endif() + foreach(libdir ${CUDA_LIBRARY_DIRS}) + if (libdir) + list(APPEND REQUIRED_LIBDIRS "${libdir}") + endif() + endforeach() + list(APPEND REQUIRED_LIBS "${CUDA_CUBLAS_LIBRARIES};${CUDA_LIBRARIES}") + endif() + # MPI + if (PASTIX_LOOK_FOR_MPI AND MPI_FOUND) + if (MPI_C_INCLUDE_PATH) + list(APPEND REQUIRED_INCDIRS "${MPI_C_INCLUDE_PATH}") + endif() + if (MPI_C_LINK_FLAGS) + if (${MPI_C_LINK_FLAGS} MATCHES " -") + string(REGEX REPLACE " -" "-" MPI_C_LINK_FLAGS ${MPI_C_LINK_FLAGS}) + endif() + list(APPEND REQUIRED_LDFLAGS "${MPI_C_LINK_FLAGS}") + endif() + list(APPEND REQUIRED_LIBS "${MPI_C_LIBRARIES}") + endif() + # HWLOC + if (HWLOC_FOUND) + if (HWLOC_INCLUDE_DIRS) + list(APPEND REQUIRED_INCDIRS "${HWLOC_INCLUDE_DIRS}") + endif() + foreach(libdir ${HWLOC_LIBRARY_DIRS}) + if (libdir) + list(APPEND REQUIRED_LIBDIRS "${libdir}") + endif() + endforeach() + foreach(lib ${HWLOC_LIBRARIES}) + if (EXISTS ${lib} OR ${lib} MATCHES "^-") + list(APPEND REQUIRED_LIBS "${lib}") + else() + list(APPEND REQUIRED_LIBS "-l${lib}") + endif() + endforeach() + endif() + # BLAS + if (BLAS_FOUND) + if (BLAS_INCLUDE_DIRS) + list(APPEND REQUIRED_INCDIRS "${BLAS_INCLUDE_DIRS}") + endif() + foreach(libdir ${BLAS_LIBRARY_DIRS}) + if (libdir) + list(APPEND REQUIRED_LIBDIRS "${libdir}") + endif() + endforeach() + list(APPEND REQUIRED_LIBS "${BLAS_LIBRARIES}") + if (BLAS_LINKER_FLAGS) + list(APPEND REQUIRED_LDFLAGS "${BLAS_LINKER_FLAGS}") + endif() + endif() + # SCOTCH + if (PASTIX_LOOK_FOR_SCOTCH AND SCOTCH_FOUND) + if (SCOTCH_INCLUDE_DIRS) + list(APPEND REQUIRED_INCDIRS "${SCOTCH_INCLUDE_DIRS}") + endif() + foreach(libdir ${SCOTCH_LIBRARY_DIRS}) + if (libdir) + list(APPEND REQUIRED_LIBDIRS "${libdir}") + endif() + endforeach() + list(APPEND REQUIRED_LIBS "${SCOTCH_LIBRARIES}") + endif() + # PTSCOTCH + if (PASTIX_LOOK_FOR_PTSCOTCH AND PTSCOTCH_FOUND) + if (PTSCOTCH_INCLUDE_DIRS) + list(APPEND REQUIRED_INCDIRS "${PTSCOTCH_INCLUDE_DIRS}") + endif() + foreach(libdir ${PTSCOTCH_LIBRARY_DIRS}) + if (libdir) + list(APPEND REQUIRED_LIBDIRS "${libdir}") + endif() + endforeach() + list(APPEND REQUIRED_LIBS "${PTSCOTCH_LIBRARIES}") + endif() + # METIS + if (PASTIX_LOOK_FOR_METIS AND METIS_FOUND) + if (METIS_INCLUDE_DIRS) + list(APPEND REQUIRED_INCDIRS "${METIS_INCLUDE_DIRS}") + endif() + foreach(libdir ${METIS_LIBRARY_DIRS}) + if (libdir) + list(APPEND REQUIRED_LIBDIRS "${libdir}") + endif() + endforeach() + list(APPEND REQUIRED_LIBS "${METIS_LIBRARIES}") + endif() + # Fortran + if (CMAKE_C_COMPILER_ID MATCHES "GNU") + find_library( + FORTRAN_gfortran_LIBRARY + NAMES gfortran + HINTS ${_lib_env} + ) + mark_as_advanced(FORTRAN_gfortran_LIBRARY) + if (FORTRAN_gfortran_LIBRARY) + list(APPEND REQUIRED_LIBS "${FORTRAN_gfortran_LIBRARY}") + endif() + elseif (CMAKE_C_COMPILER_ID MATCHES "Intel") + find_library( + FORTRAN_ifcore_LIBRARY + NAMES ifcore + HINTS ${_lib_env} + ) + mark_as_advanced(FORTRAN_ifcore_LIBRARY) + if (FORTRAN_ifcore_LIBRARY) + list(APPEND REQUIRED_LIBS "${FORTRAN_ifcore_LIBRARY}") + endif() + endif() + # EXTRA LIBS such that pthread, m, rt + list(APPEND REQUIRED_LIBS ${PASTIX_EXTRA_LIBRARIES}) + + # set required libraries for link + set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}") + set(CMAKE_REQUIRED_LIBRARIES) + list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LDFLAGS}") + foreach(lib_dir ${REQUIRED_LIBDIRS}) + list(APPEND CMAKE_REQUIRED_LIBRARIES "-L${lib_dir}") + endforeach() + list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LIBS}") + list(APPEND CMAKE_REQUIRED_FLAGS "${REQUIRED_FLAGS}") + string(REGEX REPLACE "^ -" "-" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}") + + # test link + unset(PASTIX_WORKS CACHE) + include(CheckFunctionExists) + check_function_exists(pastix PASTIX_WORKS) + mark_as_advanced(PASTIX_WORKS) + + if(PASTIX_WORKS) + # save link with dependencies + set(PASTIX_LIBRARIES_DEP "${REQUIRED_LIBS}") + set(PASTIX_LIBRARY_DIRS_DEP "${REQUIRED_LIBDIRS}") + set(PASTIX_INCLUDE_DIRS_DEP "${REQUIRED_INCDIRS}") + set(PASTIX_LINKER_FLAGS "${REQUIRED_LDFLAGS}") + list(REMOVE_DUPLICATES PASTIX_LIBRARY_DIRS_DEP) + list(REMOVE_DUPLICATES PASTIX_INCLUDE_DIRS_DEP) + list(REMOVE_DUPLICATES PASTIX_LINKER_FLAGS) + else() + if(NOT PASTIX_FIND_QUIETLY) + message(STATUS "Looking for PASTIX : test of pastix() fails") + message(STATUS "CMAKE_REQUIRED_LIBRARIES: ${CMAKE_REQUIRED_LIBRARIES}") + message(STATUS "CMAKE_REQUIRED_INCLUDES: ${CMAKE_REQUIRED_INCLUDES}") + message(STATUS "Check in CMakeFiles/CMakeError.log to figure out why it fails") + message(STATUS "Maybe PASTIX is linked with specific libraries. " + "Have you tried with COMPONENTS (MPI/SEQ, STARPU, STARPU_CUDA, SCOTCH, PTSCOTCH, METIS)? " + "See the explanation in FindPASTIX.cmake.") + endif() + endif() + set(CMAKE_REQUIRED_INCLUDES) + set(CMAKE_REQUIRED_FLAGS) + set(CMAKE_REQUIRED_LIBRARIES) +endif() + +if (PASTIX_LIBRARIES) + list(GET PASTIX_LIBRARIES 0 first_lib) + get_filename_component(first_lib_path "${first_lib}" PATH) + if (${first_lib_path} MATCHES "/lib(32|64)?$") + string(REGEX REPLACE "/lib(32|64)?$" "" not_cached_dir "${first_lib_path}") + set(PASTIX_DIR_FOUND "${not_cached_dir}" CACHE PATH "Installation directory of PASTIX library" FORCE) + else() + set(PASTIX_DIR_FOUND "${first_lib_path}" CACHE PATH "Installation directory of PASTIX library" FORCE) + endif() +endif() +mark_as_advanced(PASTIX_DIR) +mark_as_advanced(PASTIX_DIR_FOUND) + +# check that PASTIX has been found +# --------------------------------- +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(PASTIX DEFAULT_MSG + PASTIX_LIBRARIES + PASTIX_WORKS) diff --git a/external/eigen-3.4.0/cmake/FindPTSCOTCH.cmake b/external/eigen-3.4.0/cmake/FindPTSCOTCH.cmake new file mode 100644 index 0000000..6ccc743 --- /dev/null +++ b/external/eigen-3.4.0/cmake/FindPTSCOTCH.cmake @@ -0,0 +1,422 @@ +### +# +# @copyright (c) 2009-2014 The University of Tennessee and The University +# of Tennessee Research Foundation. +# All rights reserved. +# @copyright (c) 2012-2016 Inria. All rights reserved. +# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. +# +### +# +# - Find PTSCOTCH include dirs and libraries +# Use this module by invoking find_package with the form: +# find_package(PTSCOTCH +# [REQUIRED] # Fail with error if ptscotch is not found +# [COMPONENTS ...] # dependencies +# ) +# +# PTSCOTCH depends on the following libraries: +# - Threads +# - MPI +# +# COMPONENTS can be some of the following: +# - ESMUMPS: to activate detection of PT-Scotch with the esmumps interface +# +# This module finds headers and ptscotch library. +# Results are reported in variables: +# PTSCOTCH_FOUND - True if headers and requested libraries were found +# PTSCOTCH_LINKER_FLAGS - list of required linker flags (excluding -l and -L) +# PTSCOTCH_INCLUDE_DIRS - ptscotch include directories +# PTSCOTCH_LIBRARY_DIRS - Link directories for ptscotch libraries +# PTSCOTCH_LIBRARIES - ptscotch component libraries to be linked +# PTSCOTCH_INCLUDE_DIRS_DEP - ptscotch + dependencies include directories +# PTSCOTCH_LIBRARY_DIRS_DEP - ptscotch + dependencies link directories +# PTSCOTCH_LIBRARIES_DEP - ptscotch libraries + dependencies +# PTSCOTCH_INTSIZE - Number of octets occupied by a SCOTCH_Num +# +# The user can give specific paths where to find the libraries adding cmake +# options at configure (ex: cmake path/to/project -DPTSCOTCH=path/to/ptscotch): +# PTSCOTCH_DIR - Where to find the base directory of ptscotch +# PTSCOTCH_INCDIR - Where to find the header files +# PTSCOTCH_LIBDIR - Where to find the library files +# The module can also look for the following environment variables if paths +# are not given as cmake variable: PTSCOTCH_DIR, PTSCOTCH_INCDIR, PTSCOTCH_LIBDIR + +#============================================================================= +# Copyright 2012-2013 Inria +# Copyright 2012-2013 Emmanuel Agullo +# Copyright 2012-2013 Mathieu Faverge +# Copyright 2012 Cedric Castagnede +# Copyright 2013-2016 Florent Pruvost +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file MORSE-Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of Morse, substitute the full +# License text for the above reference.) + +if (NOT PTSCOTCH_FOUND) + set(PTSCOTCH_DIR "" CACHE PATH "Installation directory of PTSCOTCH library") + if (NOT PTSCOTCH_FIND_QUIETLY) + message(STATUS "A cache variable, namely PTSCOTCH_DIR, has been set to specify the install directory of PTSCOTCH") + endif() +endif() + +# Set the version to find +set(PTSCOTCH_LOOK_FOR_ESMUMPS OFF) + +if( PTSCOTCH_FIND_COMPONENTS ) + foreach( component ${PTSCOTCH_FIND_COMPONENTS} ) + if (${component} STREQUAL "ESMUMPS") + # means we look for esmumps library + set(PTSCOTCH_LOOK_FOR_ESMUMPS ON) + endif() + endforeach() +endif() + +# PTSCOTCH depends on Threads, try to find it +include(CMakeFindDependencyMacro) +if (NOT THREADS_FOUND) + if (PTSCOTCH_FIND_REQUIRED) + find_dependency(Threads REQUIRED) + else() + find_dependency(Threads) + endif() +endif() + +# PTSCOTCH depends on MPI, try to find it +if (NOT MPI_FOUND) + if (PTSCOTCH_FIND_REQUIRED) + find_dependency(MPI REQUIRED) + else() + find_dependency(MPI) + endif() +endif() + +# Looking for include +# ------------------- + +# Add system include paths to search include +# ------------------------------------------ +unset(_inc_env) +set(ENV_PTSCOTCH_DIR "$ENV{PTSCOTCH_DIR}") +set(ENV_PTSCOTCH_INCDIR "$ENV{PTSCOTCH_INCDIR}") +if(ENV_PTSCOTCH_INCDIR) + list(APPEND _inc_env "${ENV_PTSCOTCH_INCDIR}") +elseif(ENV_PTSCOTCH_DIR) + list(APPEND _inc_env "${ENV_PTSCOTCH_DIR}") + list(APPEND _inc_env "${ENV_PTSCOTCH_DIR}/include") + list(APPEND _inc_env "${ENV_PTSCOTCH_DIR}/include/ptscotch") +else() + if(WIN32) + string(REPLACE ":" ";" _inc_env "$ENV{INCLUDE}") + else() + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{CPATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + endif() +endif() +list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") +list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") +list(REMOVE_DUPLICATES _inc_env) + + +# Try to find the ptscotch header in the given paths +# ------------------------------------------------- + +set(PTSCOTCH_hdrs_to_find "ptscotch.h;scotch.h") + +# call cmake macro to find the header path +if(PTSCOTCH_INCDIR) + foreach(ptscotch_hdr ${PTSCOTCH_hdrs_to_find}) + set(PTSCOTCH_${ptscotch_hdr}_DIRS "PTSCOTCH_${ptscotch_hdr}_DIRS-NOTFOUND") + find_path(PTSCOTCH_${ptscotch_hdr}_DIRS + NAMES ${ptscotch_hdr} + HINTS ${PTSCOTCH_INCDIR}) + mark_as_advanced(PTSCOTCH_${ptscotch_hdr}_DIRS) + endforeach() +else() + if(PTSCOTCH_DIR) + foreach(ptscotch_hdr ${PTSCOTCH_hdrs_to_find}) + set(PTSCOTCH_${ptscotch_hdr}_DIRS "PTSCOTCH_${ptscotch_hdr}_DIRS-NOTFOUND") + find_path(PTSCOTCH_${ptscotch_hdr}_DIRS + NAMES ${ptscotch_hdr} + HINTS ${PTSCOTCH_DIR} + PATH_SUFFIXES "include" "include/scotch") + mark_as_advanced(PTSCOTCH_${ptscotch_hdr}_DIRS) + endforeach() + else() + foreach(ptscotch_hdr ${PTSCOTCH_hdrs_to_find}) + set(PTSCOTCH_${ptscotch_hdr}_DIRS "PTSCOTCH_${ptscotch_hdr}_DIRS-NOTFOUND") + find_path(PTSCOTCH_${ptscotch_hdr}_DIRS + NAMES ${ptscotch_hdr} + HINTS ${_inc_env} + PATH_SUFFIXES "scotch") + mark_as_advanced(PTSCOTCH_${ptscotch_hdr}_DIRS) + endforeach() + endif() +endif() + +# If found, add path to cmake variable +# ------------------------------------ +foreach(ptscotch_hdr ${PTSCOTCH_hdrs_to_find}) + if (PTSCOTCH_${ptscotch_hdr}_DIRS) + list(APPEND PTSCOTCH_INCLUDE_DIRS "${PTSCOTCH_${ptscotch_hdr}_DIRS}") + else () + if (NOT PTSCOTCH_FIND_QUIETLY) + message(STATUS "Looking for ptscotch -- ${ptscotch_hdr} not found") + endif() + endif() +endforeach() +list(REMOVE_DUPLICATES PTSCOTCH_INCLUDE_DIRS) + +# Looking for lib +# --------------- + +# Add system library paths to search lib +# -------------------------------------- +unset(_lib_env) +set(ENV_PTSCOTCH_LIBDIR "$ENV{PTSCOTCH_LIBDIR}") +if(ENV_PTSCOTCH_LIBDIR) + list(APPEND _lib_env "${ENV_PTSCOTCH_LIBDIR}") +elseif(ENV_PTSCOTCH_DIR) + list(APPEND _lib_env "${ENV_PTSCOTCH_DIR}") + list(APPEND _lib_env "${ENV_PTSCOTCH_DIR}/lib") +else() + if(WIN32) + string(REPLACE ":" ";" _lib_env "$ENV{LIB}") + else() + if(APPLE) + string(REPLACE ":" ";" _lib_env "$ENV{DYLD_LIBRARY_PATH}") + else() + string(REPLACE ":" ";" _lib_env "$ENV{LD_LIBRARY_PATH}") + endif() + list(APPEND _lib_env "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}") + list(APPEND _lib_env "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") + endif() +endif() +list(REMOVE_DUPLICATES _lib_env) + +# Try to find the ptscotch lib in the given paths +# ---------------------------------------------- + +set(PTSCOTCH_libs_to_find "ptscotch;ptscotcherr") +if (PTSCOTCH_LOOK_FOR_ESMUMPS) + list(INSERT PTSCOTCH_libs_to_find 0 "ptesmumps") + list(APPEND PTSCOTCH_libs_to_find "esmumps" ) +endif() +list(APPEND PTSCOTCH_libs_to_find "scotch;scotcherr") + +# call cmake macro to find the lib path +if(PTSCOTCH_LIBDIR) + foreach(ptscotch_lib ${PTSCOTCH_libs_to_find}) + set(PTSCOTCH_${ptscotch_lib}_LIBRARY "PTSCOTCH_${ptscotch_lib}_LIBRARY-NOTFOUND") + find_library(PTSCOTCH_${ptscotch_lib}_LIBRARY + NAMES ${ptscotch_lib} + HINTS ${PTSCOTCH_LIBDIR}) + endforeach() +else() + if(PTSCOTCH_DIR) + foreach(ptscotch_lib ${PTSCOTCH_libs_to_find}) + set(PTSCOTCH_${ptscotch_lib}_LIBRARY "PTSCOTCH_${ptscotch_lib}_LIBRARY-NOTFOUND") + find_library(PTSCOTCH_${ptscotch_lib}_LIBRARY + NAMES ${ptscotch_lib} + HINTS ${PTSCOTCH_DIR} + PATH_SUFFIXES lib lib32 lib64) + endforeach() + else() + foreach(ptscotch_lib ${PTSCOTCH_libs_to_find}) + set(PTSCOTCH_${ptscotch_lib}_LIBRARY "PTSCOTCH_${ptscotch_lib}_LIBRARY-NOTFOUND") + find_library(PTSCOTCH_${ptscotch_lib}_LIBRARY + NAMES ${ptscotch_lib} + HINTS ${_lib_env}) + endforeach() + endif() +endif() + +set(PTSCOTCH_LIBRARIES "") +set(PTSCOTCH_LIBRARY_DIRS "") +# If found, add path to cmake variable +# ------------------------------------ +foreach(ptscotch_lib ${PTSCOTCH_libs_to_find}) + + if (PTSCOTCH_${ptscotch_lib}_LIBRARY) + get_filename_component(${ptscotch_lib}_lib_path "${PTSCOTCH_${ptscotch_lib}_LIBRARY}" PATH) + # set cmake variables + list(APPEND PTSCOTCH_LIBRARIES "${PTSCOTCH_${ptscotch_lib}_LIBRARY}") + list(APPEND PTSCOTCH_LIBRARY_DIRS "${${ptscotch_lib}_lib_path}") + else () + if (NOT PTSCOTCH_FIND_QUIETLY) + message(STATUS "Looking for ptscotch -- lib ${ptscotch_lib} not found") + endif() + endif () + + mark_as_advanced(PTSCOTCH_${ptscotch_lib}_LIBRARY) + +endforeach() +list(REMOVE_DUPLICATES PTSCOTCH_LIBRARY_DIRS) + +# check a function to validate the find +if(PTSCOTCH_LIBRARIES) + + set(REQUIRED_LDFLAGS) + set(REQUIRED_INCDIRS) + set(REQUIRED_LIBDIRS) + set(REQUIRED_LIBS) + + # PTSCOTCH + if (PTSCOTCH_INCLUDE_DIRS) + set(REQUIRED_INCDIRS "${PTSCOTCH_INCLUDE_DIRS}") + endif() + if (PTSCOTCH_LIBRARY_DIRS) + set(REQUIRED_LIBDIRS "${PTSCOTCH_LIBRARY_DIRS}") + endif() + set(REQUIRED_LIBS "${PTSCOTCH_LIBRARIES}") + # MPI + if (MPI_FOUND) + if (MPI_C_INCLUDE_PATH) + list(APPEND CMAKE_REQUIRED_INCLUDES "${MPI_C_INCLUDE_PATH}") + endif() + if (MPI_C_LINK_FLAGS) + if (${MPI_C_LINK_FLAGS} MATCHES " -") + string(REGEX REPLACE " -" "-" MPI_C_LINK_FLAGS ${MPI_C_LINK_FLAGS}) + endif() + list(APPEND REQUIRED_LDFLAGS "${MPI_C_LINK_FLAGS}") + endif() + list(APPEND REQUIRED_LIBS "${MPI_C_LIBRARIES}") + endif() + # THREADS + if(CMAKE_THREAD_LIBS_INIT) + list(APPEND REQUIRED_LIBS "${CMAKE_THREAD_LIBS_INIT}") + endif() + set(Z_LIBRARY "Z_LIBRARY-NOTFOUND") + find_library(Z_LIBRARY NAMES z) + mark_as_advanced(Z_LIBRARY) + if(Z_LIBRARY) + list(APPEND REQUIRED_LIBS "-lz") + endif() + set(M_LIBRARY "M_LIBRARY-NOTFOUND") + find_library(M_LIBRARY NAMES m) + mark_as_advanced(M_LIBRARY) + if(M_LIBRARY) + list(APPEND REQUIRED_LIBS "-lm") + endif() + set(RT_LIBRARY "RT_LIBRARY-NOTFOUND") + find_library(RT_LIBRARY NAMES rt) + mark_as_advanced(RT_LIBRARY) + if(RT_LIBRARY) + list(APPEND REQUIRED_LIBS "-lrt") + endif() + + # set required libraries for link + set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}") + set(CMAKE_REQUIRED_LIBRARIES) + list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LDFLAGS}") + foreach(lib_dir ${REQUIRED_LIBDIRS}) + list(APPEND CMAKE_REQUIRED_LIBRARIES "-L${lib_dir}") + endforeach() + list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LIBS}") + list(APPEND CMAKE_REQUIRED_FLAGS "${REQUIRED_FLAGS}") + string(REGEX REPLACE "^ -" "-" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}") + + # test link + unset(PTSCOTCH_WORKS CACHE) + include(CheckFunctionExists) + check_function_exists(SCOTCH_dgraphInit PTSCOTCH_WORKS) + mark_as_advanced(PTSCOTCH_WORKS) + + if(PTSCOTCH_WORKS) + # save link with dependencies + set(PTSCOTCH_LIBRARIES_DEP "${REQUIRED_LIBS}") + set(PTSCOTCH_LIBRARY_DIRS_DEP "${REQUIRED_LIBDIRS}") + set(PTSCOTCH_INCLUDE_DIRS_DEP "${REQUIRED_INCDIRS}") + set(PTSCOTCH_LINKER_FLAGS "${REQUIRED_LDFLAGS}") + list(REMOVE_DUPLICATES PTSCOTCH_LIBRARY_DIRS_DEP) + list(REMOVE_DUPLICATES PTSCOTCH_INCLUDE_DIRS_DEP) + list(REMOVE_DUPLICATES PTSCOTCH_LINKER_FLAGS) + else() + if(NOT PTSCOTCH_FIND_QUIETLY) + message(STATUS "Looking for PTSCOTCH : test of SCOTCH_dgraphInit with PTSCOTCH library fails") + message(STATUS "CMAKE_REQUIRED_LIBRARIES: ${CMAKE_REQUIRED_LIBRARIES}") + message(STATUS "CMAKE_REQUIRED_INCLUDES: ${CMAKE_REQUIRED_INCLUDES}") + message(STATUS "Check in CMakeFiles/CMakeError.log to figure out why it fails") + endif() + endif() + set(CMAKE_REQUIRED_INCLUDES) + set(CMAKE_REQUIRED_FLAGS) + set(CMAKE_REQUIRED_LIBRARIES) +endif() + +if (PTSCOTCH_LIBRARIES) + list(GET PTSCOTCH_LIBRARIES 0 first_lib) + get_filename_component(first_lib_path "${first_lib}" PATH) + if (${first_lib_path} MATCHES "/lib(32|64)?$") + string(REGEX REPLACE "/lib(32|64)?$" "" not_cached_dir "${first_lib_path}") + set(PTSCOTCH_DIR_FOUND "${not_cached_dir}" CACHE PATH "Installation directory of PTSCOTCH library" FORCE) + else() + set(PTSCOTCH_DIR_FOUND "${first_lib_path}" CACHE PATH "Installation directory of PTSCOTCH library" FORCE) + endif() +endif() +mark_as_advanced(PTSCOTCH_DIR) +mark_as_advanced(PTSCOTCH_DIR_FOUND) + +# Check the size of SCOTCH_Num +# --------------------------------- +set(CMAKE_REQUIRED_INCLUDES ${PTSCOTCH_INCLUDE_DIRS}) + +include(CheckCSourceRuns) +#stdio.h and stdint.h should be included by scotch.h directly +set(PTSCOTCH_C_TEST_SCOTCH_Num_4 " +#include +#include +#include +int main(int argc, char **argv) { + if (sizeof(SCOTCH_Num) == 4) + return 0; + else + return 1; +} +") + +set(PTSCOTCH_C_TEST_SCOTCH_Num_8 " +#include +#include +#include +int main(int argc, char **argv) { + if (sizeof(SCOTCH_Num) == 8) + return 0; + else + return 1; +} +") +check_c_source_runs("${PTSCOTCH_C_TEST_SCOTCH_Num_4}" PTSCOTCH_Num_4) +if(NOT PTSCOTCH_Num_4) + check_c_source_runs("${PTSCOTCH_C_TEST_SCOTCH_Num_8}" PTSCOTCH_Num_8) + if(NOT PTSCOTCH_Num_8) + set(PTSCOTCH_INTSIZE -1) + else() + set(PTSCOTCH_INTSIZE 8) + endif() +else() + set(PTSCOTCH_INTSIZE 4) +endif() +set(CMAKE_REQUIRED_INCLUDES "") + +# check that PTSCOTCH has been found +# --------------------------------- +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(PTSCOTCH DEFAULT_MSG + PTSCOTCH_LIBRARIES + PTSCOTCH_WORKS) +# +# TODO: Add possibility to check for specific functions in the library +# diff --git a/external/eigen-3.4.0/cmake/FindSCOTCH.cmake b/external/eigen-3.4.0/cmake/FindSCOTCH.cmake new file mode 100644 index 0000000..11b971a --- /dev/null +++ b/external/eigen-3.4.0/cmake/FindSCOTCH.cmake @@ -0,0 +1,370 @@ +### +# +# @copyright (c) 2009-2014 The University of Tennessee and The University +# of Tennessee Research Foundation. +# All rights reserved. +# @copyright (c) 2012-2014 Inria. All rights reserved. +# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. +# +### +# +# - Find SCOTCH include dirs and libraries +# Use this module by invoking find_package with the form: +# find_package(SCOTCH +# [REQUIRED] # Fail with error if scotch is not found +# [COMPONENTS ...] # dependencies +# ) +# +# COMPONENTS can be some of the following: +# - ESMUMPS: to activate detection of Scotch with the esmumps interface +# +# This module finds headers and scotch library. +# Results are reported in variables: +# SCOTCH_FOUND - True if headers and requested libraries were found +# SCOTCH_INCLUDE_DIRS - scotch include directories +# SCOTCH_LIBRARY_DIRS - Link directories for scotch libraries +# SCOTCH_LIBRARIES - scotch component libraries to be linked +# SCOTCH_INTSIZE - Number of octets occupied by a SCOTCH_Num +# +# The user can give specific paths where to find the libraries adding cmake +# options at configure (ex: cmake path/to/project -DSCOTCH=path/to/scotch): +# SCOTCH_DIR - Where to find the base directory of scotch +# SCOTCH_INCDIR - Where to find the header files +# SCOTCH_LIBDIR - Where to find the library files +# The module can also look for the following environment variables if paths +# are not given as cmake variable: SCOTCH_DIR, SCOTCH_INCDIR, SCOTCH_LIBDIR + +#============================================================================= +# Copyright 2012-2013 Inria +# Copyright 2012-2013 Emmanuel Agullo +# Copyright 2012-2013 Mathieu Faverge +# Copyright 2012 Cedric Castagnede +# Copyright 2013 Florent Pruvost +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file MORSE-Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of Morse, substitute the full +# License text for the above reference.) + +if (NOT SCOTCH_FOUND) + set(SCOTCH_DIR "" CACHE PATH "Installation directory of SCOTCH library") + if (NOT SCOTCH_FIND_QUIETLY) + message(STATUS "A cache variable, namely SCOTCH_DIR, has been set to specify the install directory of SCOTCH") + endif() +endif() + +# Set the version to find +set(SCOTCH_LOOK_FOR_ESMUMPS OFF) + +if( SCOTCH_FIND_COMPONENTS ) + foreach( component ${SCOTCH_FIND_COMPONENTS} ) + if (${component} STREQUAL "ESMUMPS") + # means we look for esmumps library + set(SCOTCH_LOOK_FOR_ESMUMPS ON) + endif() + endforeach() +endif() + +# SCOTCH may depend on Threads, try to find it +include(CMakeFindDependencyMacro) +if (NOT THREADS_FOUND) + if (SCOTCH_FIND_REQUIRED) + find_dependency(Threads REQUIRED) + else() + find_dependency(Threads) + endif() +endif() + +# Looking for include +# ------------------- + +# Add system include paths to search include +# ------------------------------------------ +unset(_inc_env) +set(ENV_SCOTCH_DIR "$ENV{SCOTCH_DIR}") +set(ENV_SCOTCH_INCDIR "$ENV{SCOTCH_INCDIR}") +if(ENV_SCOTCH_INCDIR) + list(APPEND _inc_env "${ENV_SCOTCH_INCDIR}") +elseif(ENV_SCOTCH_DIR) + list(APPEND _inc_env "${ENV_SCOTCH_DIR}") + list(APPEND _inc_env "${ENV_SCOTCH_DIR}/include") + list(APPEND _inc_env "${ENV_SCOTCH_DIR}/include/scotch") +else() + if(WIN32) + string(REPLACE ":" ";" _inc_env "$ENV{INCLUDE}") + else() + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{CPATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + endif() +endif() +list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") +list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") +list(REMOVE_DUPLICATES _inc_env) + + +# Try to find the scotch header in the given paths +# ------------------------------------------------- +# call cmake macro to find the header path +if(SCOTCH_INCDIR) + set(SCOTCH_scotch.h_DIRS "SCOTCH_scotch.h_DIRS-NOTFOUND") + find_path(SCOTCH_scotch.h_DIRS + NAMES scotch.h + HINTS ${SCOTCH_INCDIR}) +else() + if(SCOTCH_DIR) + set(SCOTCH_scotch.h_DIRS "SCOTCH_scotch.h_DIRS-NOTFOUND") + find_path(SCOTCH_scotch.h_DIRS + NAMES scotch.h + HINTS ${SCOTCH_DIR} + PATH_SUFFIXES "include" "include/scotch") + else() + set(SCOTCH_scotch.h_DIRS "SCOTCH_scotch.h_DIRS-NOTFOUND") + find_path(SCOTCH_scotch.h_DIRS + NAMES scotch.h + HINTS ${_inc_env} + PATH_SUFFIXES "scotch") + endif() +endif() +mark_as_advanced(SCOTCH_scotch.h_DIRS) + +# If found, add path to cmake variable +# ------------------------------------ +if (SCOTCH_scotch.h_DIRS) + set(SCOTCH_INCLUDE_DIRS "${SCOTCH_scotch.h_DIRS}") +else () + set(SCOTCH_INCLUDE_DIRS "SCOTCH_INCLUDE_DIRS-NOTFOUND") + if (NOT SCOTCH_FIND_QUIETLY) + message(STATUS "Looking for scotch -- scotch.h not found") + endif() +endif() +list(REMOVE_DUPLICATES SCOTCH_INCLUDE_DIRS) + +# Looking for lib +# --------------- + +# Add system library paths to search lib +# -------------------------------------- +unset(_lib_env) +set(ENV_SCOTCH_LIBDIR "$ENV{SCOTCH_LIBDIR}") +if(ENV_SCOTCH_LIBDIR) + list(APPEND _lib_env "${ENV_SCOTCH_LIBDIR}") +elseif(ENV_SCOTCH_DIR) + list(APPEND _lib_env "${ENV_SCOTCH_DIR}") + list(APPEND _lib_env "${ENV_SCOTCH_DIR}/lib") +else() + if(WIN32) + string(REPLACE ":" ";" _lib_env "$ENV{LIB}") + else() + if(APPLE) + string(REPLACE ":" ";" _lib_env "$ENV{DYLD_LIBRARY_PATH}") + else() + string(REPLACE ":" ";" _lib_env "$ENV{LD_LIBRARY_PATH}") + endif() + list(APPEND _lib_env "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}") + list(APPEND _lib_env "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") + endif() +endif() +list(REMOVE_DUPLICATES _lib_env) + +# Try to find the scotch lib in the given paths +# ---------------------------------------------- + +set(SCOTCH_libs_to_find "scotch;scotcherrexit") +if (SCOTCH_LOOK_FOR_ESMUMPS) + list(INSERT SCOTCH_libs_to_find 0 "esmumps") +endif() + +# call cmake macro to find the lib path +if(SCOTCH_LIBDIR) + foreach(scotch_lib ${SCOTCH_libs_to_find}) + set(SCOTCH_${scotch_lib}_LIBRARY "SCOTCH_${scotch_lib}_LIBRARY-NOTFOUND") + find_library(SCOTCH_${scotch_lib}_LIBRARY + NAMES ${scotch_lib} + HINTS ${SCOTCH_LIBDIR}) + endforeach() +else() + if(SCOTCH_DIR) + foreach(scotch_lib ${SCOTCH_libs_to_find}) + set(SCOTCH_${scotch_lib}_LIBRARY "SCOTCH_${scotch_lib}_LIBRARY-NOTFOUND") + find_library(SCOTCH_${scotch_lib}_LIBRARY + NAMES ${scotch_lib} + HINTS ${SCOTCH_DIR} + PATH_SUFFIXES lib lib32 lib64) + endforeach() + else() + foreach(scotch_lib ${SCOTCH_libs_to_find}) + set(SCOTCH_${scotch_lib}_LIBRARY "SCOTCH_${scotch_lib}_LIBRARY-NOTFOUND") + find_library(SCOTCH_${scotch_lib}_LIBRARY + NAMES ${scotch_lib} + HINTS ${_lib_env}) + endforeach() + endif() +endif() + +set(SCOTCH_LIBRARIES "") +set(SCOTCH_LIBRARY_DIRS "") +# If found, add path to cmake variable +# ------------------------------------ +foreach(scotch_lib ${SCOTCH_libs_to_find}) + + if (SCOTCH_${scotch_lib}_LIBRARY) + get_filename_component(${scotch_lib}_lib_path "${SCOTCH_${scotch_lib}_LIBRARY}" PATH) + # set cmake variables + list(APPEND SCOTCH_LIBRARIES "${SCOTCH_${scotch_lib}_LIBRARY}") + list(APPEND SCOTCH_LIBRARY_DIRS "${${scotch_lib}_lib_path}") + else () + list(APPEND SCOTCH_LIBRARIES "${SCOTCH_${scotch_lib}_LIBRARY}") + if (NOT SCOTCH_FIND_QUIETLY) + message(STATUS "Looking for scotch -- lib ${scotch_lib} not found") + endif() + endif () + + mark_as_advanced(SCOTCH_${scotch_lib}_LIBRARY) + +endforeach() +list(REMOVE_DUPLICATES SCOTCH_LIBRARY_DIRS) + +# check a function to validate the find +if(SCOTCH_LIBRARIES) + + set(REQUIRED_INCDIRS) + set(REQUIRED_LIBDIRS) + set(REQUIRED_LIBS) + + # SCOTCH + if (SCOTCH_INCLUDE_DIRS) + set(REQUIRED_INCDIRS "${SCOTCH_INCLUDE_DIRS}") + endif() + if (SCOTCH_LIBRARY_DIRS) + set(REQUIRED_LIBDIRS "${SCOTCH_LIBRARY_DIRS}") + endif() + set(REQUIRED_LIBS "${SCOTCH_LIBRARIES}") + # THREADS + if(CMAKE_THREAD_LIBS_INIT) + list(APPEND REQUIRED_LIBS "${CMAKE_THREAD_LIBS_INIT}") + endif() + set(Z_LIBRARY "Z_LIBRARY-NOTFOUND") + find_library(Z_LIBRARY NAMES z) + mark_as_advanced(Z_LIBRARY) + if(Z_LIBRARY) + list(APPEND REQUIRED_LIBS "-lz") + endif() + set(M_LIBRARY "M_LIBRARY-NOTFOUND") + find_library(M_LIBRARY NAMES m) + mark_as_advanced(M_LIBRARY) + if(M_LIBRARY) + list(APPEND REQUIRED_LIBS "-lm") + endif() + set(RT_LIBRARY "RT_LIBRARY-NOTFOUND") + find_library(RT_LIBRARY NAMES rt) + mark_as_advanced(RT_LIBRARY) + if(RT_LIBRARY) + list(APPEND REQUIRED_LIBS "-lrt") + endif() + + # set required libraries for link + set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}") + set(CMAKE_REQUIRED_LIBRARIES) + foreach(lib_dir ${REQUIRED_LIBDIRS}) + list(APPEND CMAKE_REQUIRED_LIBRARIES "-L${lib_dir}") + endforeach() + list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LIBS}") + string(REGEX REPLACE "^ -" "-" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}") + + # test link + unset(SCOTCH_WORKS CACHE) + include(CheckFunctionExists) + check_function_exists(SCOTCH_graphInit SCOTCH_WORKS) + mark_as_advanced(SCOTCH_WORKS) + + if(SCOTCH_WORKS) + # save link with dependencies + set(SCOTCH_LIBRARIES "${REQUIRED_LIBS}") + else() + if(NOT SCOTCH_FIND_QUIETLY) + message(STATUS "Looking for SCOTCH : test of SCOTCH_graphInit with SCOTCH library fails") + message(STATUS "CMAKE_REQUIRED_LIBRARIES: ${CMAKE_REQUIRED_LIBRARIES}") + message(STATUS "CMAKE_REQUIRED_INCLUDES: ${CMAKE_REQUIRED_INCLUDES}") + message(STATUS "Check in CMakeFiles/CMakeError.log to figure out why it fails") + endif() + endif() + set(CMAKE_REQUIRED_INCLUDES) + set(CMAKE_REQUIRED_FLAGS) + set(CMAKE_REQUIRED_LIBRARIES) +endif() + +if (SCOTCH_LIBRARIES) + list(GET SCOTCH_LIBRARIES 0 first_lib) + get_filename_component(first_lib_path "${first_lib}" PATH) + if (${first_lib_path} MATCHES "/lib(32|64)?$") + string(REGEX REPLACE "/lib(32|64)?$" "" not_cached_dir "${first_lib_path}") + set(SCOTCH_DIR_FOUND "${not_cached_dir}" CACHE PATH "Installation directory of SCOTCH library" FORCE) + else() + set(SCOTCH_DIR_FOUND "${first_lib_path}" CACHE PATH "Installation directory of SCOTCH library" FORCE) + endif() +endif() +mark_as_advanced(SCOTCH_DIR) +mark_as_advanced(SCOTCH_DIR_FOUND) + +# Check the size of SCOTCH_Num +# --------------------------------- +set(CMAKE_REQUIRED_INCLUDES ${SCOTCH_INCLUDE_DIRS}) + +include(CheckCSourceRuns) +#stdio.h and stdint.h should be included by scotch.h directly +set(SCOTCH_C_TEST_SCOTCH_Num_4 " +#include +#include +#include +int main(int argc, char **argv) { + if (sizeof(SCOTCH_Num) == 4) + return 0; + else + return 1; +} +") + +set(SCOTCH_C_TEST_SCOTCH_Num_8 " +#include +#include +#include +int main(int argc, char **argv) { + if (sizeof(SCOTCH_Num) == 8) + return 0; + else + return 1; +} +") +check_c_source_runs("${SCOTCH_C_TEST_SCOTCH_Num_4}" SCOTCH_Num_4) +if(NOT SCOTCH_Num_4) + check_c_source_runs("${SCOTCH_C_TEST_SCOTCH_Num_8}" SCOTCH_Num_8) + if(NOT SCOTCH_Num_8) + set(SCOTCH_INTSIZE -1) + else() + set(SCOTCH_INTSIZE 8) + endif() +else() + set(SCOTCH_INTSIZE 4) +endif() +set(CMAKE_REQUIRED_INCLUDES "") + +# check that SCOTCH has been found +# --------------------------------- +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(SCOTCH DEFAULT_MSG + SCOTCH_LIBRARIES + SCOTCH_WORKS) +# +# TODO: Add possibility to check for specific functions in the library +# diff --git a/external/eigen-3.4.0/cmake/FindSPQR.cmake b/external/eigen-3.4.0/cmake/FindSPQR.cmake new file mode 100644 index 0000000..d6fb2e1 --- /dev/null +++ b/external/eigen-3.4.0/cmake/FindSPQR.cmake @@ -0,0 +1,41 @@ +# SPQR lib usually requires linking to a blas and lapack library. +# It is up to the user of this module to find a BLAS and link to it. + +# SPQR lib requires Cholmod, colamd and amd as well. +# FindCholmod.cmake can be used to find those packages before finding spqr + +if (SPQR_INCLUDES AND SPQR_LIBRARIES) + set(SPQR_FIND_QUIETLY TRUE) +endif () + +find_path(SPQR_INCLUDES + NAMES + SuiteSparseQR.hpp + PATHS + $ENV{SPQRDIR} + ${INCLUDE_INSTALL_DIR} + PATH_SUFFIXES + suitesparse + ufsparse +) + +find_library(SPQR_LIBRARIES spqr $ENV{SPQRDIR} ${LIB_INSTALL_DIR}) + +if(SPQR_LIBRARIES) + + find_library(SUITESPARSE_LIBRARY SuiteSparse PATHS $ENV{SPQRDIR} ${LIB_INSTALL_DIR}) + if (SUITESPARSE_LIBRARY) + set(SPQR_LIBRARIES ${SPQR_LIBRARIES} ${SUITESPARSE_LIBRARY}) + endif() + + find_library(CHOLMOD_LIBRARY cholmod PATHS $ENV{UMFPACK_LIBDIR} $ENV{UMFPACKDIR} ${LIB_INSTALL_DIR}) + if(CHOLMOD_LIBRARY) + set(SPQR_LIBRARIES ${SPQR_LIBRARIES} ${CHOLMOD_LIBRARY}) + endif() + +endif() + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(SPQR DEFAULT_MSG SPQR_INCLUDES SPQR_LIBRARIES) + +mark_as_advanced(SPQR_INCLUDES SPQR_LIBRARIES) \ No newline at end of file diff --git a/external/eigen-3.4.0/cmake/FindStandardMathLibrary.cmake b/external/eigen-3.4.0/cmake/FindStandardMathLibrary.cmake new file mode 100644 index 0000000..1d1e5b3 --- /dev/null +++ b/external/eigen-3.4.0/cmake/FindStandardMathLibrary.cmake @@ -0,0 +1,70 @@ +# - Try to find how to link to the standard math library, if anything at all is needed to do. +# On most platforms this is automatic, but for example it's not automatic on QNX. +# +# Once done this will define +# +# STANDARD_MATH_LIBRARY_FOUND - we found how to successfully link to the standard math library +# STANDARD_MATH_LIBRARY - the name of the standard library that one has to link to. +# -- this will be left empty if it's automatic (most platforms). +# -- this will be set to "m" on platforms where one must explicitly +# pass the "-lm" linker flag. +# +# Copyright (c) 2010 Benoit Jacob +# 2020 Susi Lehtola +# Redistribution and use is allowed according to the terms of the 2-clause BSD license. + + +include(CheckCXXSourceCompiles) + +# a little test program for c++ math functions. +# notice the std:: is required on some platforms such as QNX +# notice the (void) is required if -Wall (-Wunused-value) is added to CMAKE_CXX_FLAG + +# We read in the arguments from standard input to avoid the compiler optimizing away the calls +set(find_standard_math_library_test_program +" +#include +int main(int argc, char **){ + return int(std::sin(double(argc)) + std::log(double(argc))); +}") + +# first try compiling/linking the test program without any linker flags + +set(CMAKE_REQUIRED_FLAGS "") +set(CMAKE_REQUIRED_LIBRARIES "") +CHECK_CXX_SOURCE_COMPILES( + "${find_standard_math_library_test_program}" + standard_math_library_linked_to_automatically +) + +if(standard_math_library_linked_to_automatically) + + # the test program linked successfully without any linker flag. + set(STANDARD_MATH_LIBRARY "") + set(STANDARD_MATH_LIBRARY_FOUND TRUE) + +else() + + # the test program did not link successfully without any linker flag. + # This is a very uncommon case that so far we only saw on QNX. The next try is the + # standard name 'm' for the standard math library. + + set(CMAKE_REQUIRED_LIBRARIES "m") + CHECK_CXX_SOURCE_COMPILES( + "${find_standard_math_library_test_program}" + standard_math_library_linked_to_as_m) + + if(standard_math_library_linked_to_as_m) + + # the test program linked successfully when linking to the 'm' library + set(STANDARD_MATH_LIBRARY "m") + set(STANDARD_MATH_LIBRARY_FOUND TRUE) + + else() + + # the test program still doesn't link successfully + set(STANDARD_MATH_LIBRARY_FOUND FALSE) + + endif() + +endif() diff --git a/external/eigen-3.4.0/cmake/FindSuperLU.cmake b/external/eigen-3.4.0/cmake/FindSuperLU.cmake new file mode 100644 index 0000000..4b779f5 --- /dev/null +++ b/external/eigen-3.4.0/cmake/FindSuperLU.cmake @@ -0,0 +1,97 @@ + +# Umfpack lib usually requires linking to a blas library. +# It is up to the user of this module to find a BLAS and link to it. + +if (SUPERLU_INCLUDES AND SUPERLU_LIBRARIES) + set(SUPERLU_FIND_QUIETLY TRUE) +endif () + +find_path(SUPERLU_INCLUDES + NAMES + supermatrix.h + PATHS + $ENV{SUPERLUDIR} + ${INCLUDE_INSTALL_DIR} + PATH_SUFFIXES + superlu + SRC +) + +find_library(SUPERLU_LIBRARIES + NAMES "superlu_5.2.1" "superlu_5.2" "superlu_5.1.1" "superlu_5.1" "superlu_5.0" "superlu_4.3" "superlu_4.2" "superlu_4.1" "superlu_4.0" "superlu_3.1" "superlu_3.0" "superlu" + PATHS $ENV{SUPERLUDIR} ${LIB_INSTALL_DIR} + PATH_SUFFIXES lib) + +if(SUPERLU_INCLUDES AND SUPERLU_LIBRARIES) + +include(CheckCXXSourceCompiles) +include(CMakePushCheckState) +cmake_push_check_state() + +set(CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES} ${SUPERLU_INCLUDES}) + +# check whether struct mem_usage_t is globally defined +check_cxx_source_compiles(" +typedef int int_t; +#include +#include +int main() { + mem_usage_t mem; + return 0; +}" +SUPERLU_HAS_GLOBAL_MEM_USAGE_T) + + +check_cxx_source_compiles(" +typedef int int_t; +#include +#include +int main() { + return SLU_SINGLE; +}" +SUPERLU_HAS_CLEAN_ENUMS) + +check_cxx_source_compiles(" +typedef int int_t; +#include +#include +int main(void) +{ + GlobalLU_t glu; + return 0; +}" +SUPERLU_HAS_GLOBALLU_T) + +if(SUPERLU_HAS_GLOBALLU_T) + # at least 5.0 + set(SUPERLU_VERSION_VAR "5.0") +elseif(SUPERLU_HAS_CLEAN_ENUMS) + # at least 4.3 + set(SUPERLU_VERSION_VAR "4.3") +elseif(SUPERLU_HAS_GLOBAL_MEM_USAGE_T) + # at least 4.0 + set(SUPERLU_VERSION_VAR "4.0") +else() + set(SUPERLU_VERSION_VAR "3.0") +endif() + +cmake_pop_check_state() + +if(SuperLU_FIND_VERSION) + if(${SUPERLU_VERSION_VAR} VERSION_LESS ${SuperLU_FIND_VERSION}) + set(SUPERLU_VERSION_OK FALSE) + else() + set(SUPERLU_VERSION_OK TRUE) + endif() +else() + set(SUPERLU_VERSION_OK TRUE) +endif() + +endif() + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(SuperLU + REQUIRED_VARS SUPERLU_INCLUDES SUPERLU_LIBRARIES SUPERLU_VERSION_OK + VERSION_VAR SUPERLU_VERSION_VAR) + +mark_as_advanced(SUPERLU_INCLUDES SUPERLU_LIBRARIES) diff --git a/external/eigen-3.4.0/cmake/FindTriSYCL.cmake b/external/eigen-3.4.0/cmake/FindTriSYCL.cmake new file mode 100644 index 0000000..8104239 --- /dev/null +++ b/external/eigen-3.4.0/cmake/FindTriSYCL.cmake @@ -0,0 +1,173 @@ +#.rst: +# FindTriSYCL +#--------------- +# +# TODO : insert Copyright and licence + +######################### +# FindTriSYCL.cmake +######################### +# +# Tools for finding and building with TriSYCL. +# +# User must define TRISYCL_INCLUDE_DIR pointing to the triSYCL +# include directory. +# +# Latest version of this file can be found at: +# https://github.com/triSYCL/triSYCL + +# Requite CMake version 3.5 or higher +cmake_minimum_required (VERSION 3.5) + +# Check that a supported host compiler can be found +if(CMAKE_COMPILER_IS_GNUCXX) + # Require at least gcc 5.4 + if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.4) + message(FATAL_ERROR + "host compiler - Not found! (gcc version must be at least 5.4)") + else() + message(STATUS "host compiler - gcc ${CMAKE_CXX_COMPILER_VERSION}") + endif() +elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") + # Require at least clang 3.9 + if (${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.9) + message(FATAL_ERROR + "host compiler - Not found! (clang version must be at least 3.9)") + else() + message(STATUS "host compiler - clang ${CMAKE_CXX_COMPILER_VERSION}") + endif() +else() + message(WARNING + "host compiler - Not found! (triSYCL supports GCC and Clang)") +endif() + +#triSYCL options +option(TRISYCL_OPENMP "triSYCL multi-threading with OpenMP" ON) +option(TRISYCL_OPENCL "triSYCL OpenCL interoperability mode" OFF) +option(TRISYCL_NO_ASYNC "triSYCL use synchronous kernel execution" OFF) +option(TRISYCL_DEBUG "triSCYL use debug mode" OFF) +option(TRISYCL_DEBUG_STRUCTORS "triSYCL trace of object lifetimes" OFF) +option(TRISYCL_TRACE_KERNEL "triSYCL trace of kernel execution" OFF) + +mark_as_advanced(TRISYCL_OPENMP) +mark_as_advanced(TRISYCL_OPENCL) +mark_as_advanced(TRISYCL_NO_ASYNC) +mark_as_advanced(TRISYCL_DEBUG) +mark_as_advanced(TRISYCL_DEBUG_STRUCTORS) +mark_as_advanced(TRISYCL_TRACE_KERNEL) + +#triSYCL definitions +set(CL_SYCL_LANGUAGE_VERSION 220 CACHE STRING + "Host language version to be used by trisYCL (default is: 220)") +set(TRISYCL_CL_LANGUAGE_VERSION 220 CACHE STRING + "Device language version to be used by trisYCL (default is: 220)") +# triSYCL now requires c++17 +set(CMAKE_CXX_STANDARD 17) +set(CXX_STANDARD_REQUIRED ON) + + +# Find OpenCL package +include(CMakeFindDependencyMacro) +if(TRISYCL_OPENCL) + find_dependency(OpenCL REQUIRED) + if(UNIX) + set(BOOST_COMPUTE_INCPATH /usr/include/compute CACHE PATH + "Path to Boost.Compute headers (default is: /usr/include/compute)") + endif() +endif() + +# Find OpenMP package +if(TRISYCL_OPENMP) + find_dependency(OpenMP REQUIRED) +endif() + +# Find Boost +find_dependency(Boost 1.58 REQUIRED COMPONENTS chrono log) + +# If debug or trace we need boost log +if(TRISYCL_DEBUG OR TRISYCL_DEBUG_STRUCTORS OR TRISYCL_TRACE_KERNEL) + set(LOG_NEEDED ON) +else() + set(LOG_NEEDED OFF) +endif() + +find_dependency(Threads REQUIRED) + +# Find triSYCL directory +if (TRISYCL_INCLUDES AND TRISYCL_LIBRARIES) + set(TRISYCL_FIND_QUIETLY TRUE) +endif () + +find_path(TRISYCL_INCLUDE_DIR + NAMES sycl.hpp + PATHS $ENV{TRISYCLDIR} $ENV{TRISYCLDIR}/include ${INCLUDE_INSTALL_DIR} + PATH_SUFFIXES triSYCL +) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(TriSYCL DEFAULT_MSG + TRISYCL_INCLUDE_DIR) + +if(NOT TRISYCL_INCLUDE_DIR) + message(FATAL_ERROR + "triSYCL include directory - Not found! (please set TRISYCL_INCLUDE_DIR") +else() + message(STATUS "triSYCL include directory - Found ${TRISYCL_INCLUDE_DIR}") +endif() + +include(CMakeParseArguments) +####################### +# add_sycl_to_target +####################### +function(add_sycl_to_target) + set(options) + set(one_value_args + TARGET + ) + set(multi_value_args + SOURCES + ) + cmake_parse_arguments(ADD_SYCL_ARGS + "${options}" + "${one_value_args}" + "${multi_value_args}" + ${ARGN} + ) + + # Add include directories to the "#include <>" paths + target_include_directories (${ADD_SYCL_ARGS_TARGET} PUBLIC + ${TRISYCL_INCLUDE_DIR} + ${Boost_INCLUDE_DIRS} + $<$:${OpenCL_INCLUDE_DIRS}> + $<$:${BOOST_COMPUTE_INCPATH}>) + + # Link dependencies + target_link_libraries(${ADD_SYCL_ARGS_TARGET} + $<$:${OpenCL_LIBRARIES}> + Threads::Threads + $<$:Boost::log> + Boost::chrono) + + # Compile definitions + target_compile_definitions(${ADD_SYCL_ARGS_TARGET} PUBLIC + EIGEN_SYCL_TRISYCL + $<$:TRISYCL_NO_ASYNC> + $<$:TRISYCL_OPENCL> + $<$:TRISYCL_DEBUG> + $<$:TRISYCL_DEBUG_STRUCTORS> + $<$:TRISYCL_TRACE_KERNEL> + $<$:BOOST_LOG_DYN_LINK>) + + # C++ and OpenMP requirements + target_compile_options(${ADD_SYCL_ARGS_TARGET} PUBLIC + ${TRISYCL_COMPILE_OPTIONS} + $<$:${OpenMP_CXX_FLAGS}>) + + if(${TRISYCL_OPENMP} AND (NOT WIN32)) + # Does not support generator expressions + set_target_properties(${ADD_SYCL_ARGS_TARGET} + PROPERTIES + LINK_FLAGS ${OpenMP_CXX_FLAGS}) + endif() + +endfunction() diff --git a/external/eigen-3.4.0/cmake/FindUMFPACK.cmake b/external/eigen-3.4.0/cmake/FindUMFPACK.cmake new file mode 100644 index 0000000..91cf637 --- /dev/null +++ b/external/eigen-3.4.0/cmake/FindUMFPACK.cmake @@ -0,0 +1,53 @@ +# Umfpack lib usually requires linking to a blas library. +# It is up to the user of this module to find a BLAS and link to it. + +if (UMFPACK_INCLUDES AND UMFPACK_LIBRARIES) + set(UMFPACK_FIND_QUIETLY TRUE) +endif () + +find_path(UMFPACK_INCLUDES + NAMES + umfpack.h + PATHS + $ENV{UMFPACKDIR} + ${INCLUDE_INSTALL_DIR} + PATH_SUFFIXES + suitesparse + ufsparse +) + +find_library(UMFPACK_LIBRARIES umfpack PATHS $ENV{UMFPACKDIR} ${LIB_INSTALL_DIR}) + +if(UMFPACK_LIBRARIES) + + if(NOT UMFPACK_LIBDIR) + get_filename_component(UMFPACK_LIBDIR ${UMFPACK_LIBRARIES} PATH) + endif() + + find_library(COLAMD_LIBRARY colamd PATHS ${UMFPACK_LIBDIR} $ENV{UMFPACKDIR} ${LIB_INSTALL_DIR}) + if(COLAMD_LIBRARY) + set(UMFPACK_LIBRARIES ${UMFPACK_LIBRARIES} ${COLAMD_LIBRARY}) + endif () + + find_library(AMD_LIBRARY amd PATHS ${UMFPACK_LIBDIR} $ENV{UMFPACKDIR} ${LIB_INSTALL_DIR}) + if(AMD_LIBRARY) + set(UMFPACK_LIBRARIES ${UMFPACK_LIBRARIES} ${AMD_LIBRARY}) + endif () + + find_library(SUITESPARSE_LIBRARY SuiteSparse PATHS ${UMFPACK_LIBDIR} $ENV{UMFPACKDIR} ${LIB_INSTALL_DIR}) + if(SUITESPARSE_LIBRARY) + set(UMFPACK_LIBRARIES ${UMFPACK_LIBRARIES} ${SUITESPARSE_LIBRARY}) + endif () + + find_library(CHOLMOD_LIBRARY cholmod PATHS $ENV{UMFPACK_LIBDIR} $ENV{UMFPACKDIR} ${LIB_INSTALL_DIR}) + if(CHOLMOD_LIBRARY) + set(UMFPACK_LIBRARIES ${UMFPACK_LIBRARIES} ${CHOLMOD_LIBRARY}) + endif() + +endif() + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(UMFPACK DEFAULT_MSG + UMFPACK_INCLUDES UMFPACK_LIBRARIES) + +mark_as_advanced(UMFPACK_INCLUDES UMFPACK_LIBRARIES AMD_LIBRARY COLAMD_LIBRARY CHOLMOD_LIBRARY SUITESPARSE_LIBRARY) diff --git a/external/eigen-3.4.0/cmake/RegexUtils.cmake b/external/eigen-3.4.0/cmake/RegexUtils.cmake new file mode 100644 index 0000000..f0a1524 --- /dev/null +++ b/external/eigen-3.4.0/cmake/RegexUtils.cmake @@ -0,0 +1,19 @@ +function(escape_string_as_regex _str_out _str_in) + string(REGEX REPLACE "\\\\" "\\\\\\\\" FILETEST2 "${_str_in}") + string(REGEX REPLACE "([.$+*?|-])" "\\\\\\1" FILETEST2 "${FILETEST2}") + string(REGEX REPLACE "\\^" "\\\\^" FILETEST2 "${FILETEST2}") + string(REGEX REPLACE "\\(" "\\\\(" FILETEST2 "${FILETEST2}") + string(REGEX REPLACE "\\)" "\\\\)" FILETEST2 "${FILETEST2}") + string(REGEX REPLACE "\\[" "\\\\[" FILETEST2 "${FILETEST2}") + string(REGEX REPLACE "\\]" "\\\\]" FILETEST2 "${FILETEST2}") + set(${_str_out} "${FILETEST2}" PARENT_SCOPE) +endfunction() + +function(test_escape_string_as_regex) + set(test1 "\\.^$-+*()[]?|") + escape_string_as_regex(test2 "${test1}") + set(testRef "\\\\\\.\\^\\$\\-\\+\\*\\(\\)\\[\\]\\?\\|") + if(NOT test2 STREQUAL testRef) + message("Error in the escape_string_for_regex function : \n ${test1} was escaped as ${test2}, should be ${testRef}") + endif() +endfunction() \ No newline at end of file diff --git a/external/eigen-3.4.0/cmake/UseEigen3.cmake b/external/eigen-3.4.0/cmake/UseEigen3.cmake new file mode 100644 index 0000000..a38bac8 --- /dev/null +++ b/external/eigen-3.4.0/cmake/UseEigen3.cmake @@ -0,0 +1,6 @@ +# -*- cmake -*- +# +# UseEigen3.cmake + +add_definitions ( ${EIGEN3_DEFINITIONS} ) +include_directories ( ${EIGEN3_INCLUDE_DIRS} ) diff --git a/include/acmaes.hpp b/include/acmaes.hpp new file mode 100644 index 0000000..2cdb15c --- /dev/null +++ b/include/acmaes.hpp @@ -0,0 +1,116 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include "evaluator.h" + + +namespace acmaes { + + class AcmaesOptimizer { + + public: + + AcmaesOptimizer(long runid_, Fitness* fitfun_, int popsize_, int mu_, + const vec& guess_, const vec& inputSigma_, int maxEvaluations_, + double accuracy_, double stopfitness_, double stopTolHistFun_, + int update_gap_, long seed); + + + ~AcmaesOptimizer(); + // param zmean weighted row matrix of the gaussian random numbers generating the current offspring + // param xold xmean matrix of the previous generation + // return hsig flag indicating a small correction + + bool updateEvolutionPaths(const vec& zmean, const vec& xold); + + // param hsig flag indicating a small correction + // param bestArx fitness-sorted matrix of the argument vectors producing the current offspring + // param arz unsorted matrix containing the gaussian random values of the current offspring + // param arindex indices indicating the fitness-order of the current offspring + // param xold xmean matrix of the previous generation + + double updateCovariance(bool hsig, const mat& bestArx, const mat& arz, + const ivec& arindex, const mat& xold); + + // Update B and diagD from C + // param negccov Negative covariance factor. + + void updateBD(double negccov); + mat ask_all(); + int tell_all(mat ys, mat xs); + int tell_all_asked(mat ys, mat xs); + mat getPopulation(); + vec ask(); + int tell(double y, const vec& x); + void updateCMA(); + int doOptimize(); + int do_optimize_delayed_update(int workers); + vec getBestX(); + double getBestValue(); + double getIterations(); + int getStop(); + Fitness* getFitfun(); + int getDim(); + int getPopsize(); + Fitness* getFitfunPar(); + mat popX; + + int n_updates; + + private: + long runid; + Fitness* fitfun; + vec guess; + double accuracy; + int popsize; // population size + vec inputSigma; + int dim; + int maxEvaluations; + double stopfitness; + double stopTolUpX; + double stopTolX; + double stopTolFun; + double stopTolHistFun; + int mu; // + vec weights; + double mueff; // + double sigma; + double cc; + double cs; + double damps; + double ccov1; + double ccovmu; + double chiN; + double ccov1Sep; + double ccovmuSep; + double lazy_update_gap = 0; + vec xmean; + vec pc; + vec ps; + double normps; + mat B; + mat BD; + mat diagD; + mat C; + vec diagC; + mat arz; + mat arx; + vec fitness; + int iterations = 0; + int last_update = 0; + vec fitnessHistory; + int historySize; + double bestValue; + vec bestX; + int stop; + int told = 0; + pcg64* rs; + bool computeArz; + }; +} diff --git a/include/common.hpp b/include/common.hpp index f6840e7..9077789 100644 --- a/include/common.hpp +++ b/include/common.hpp @@ -22,7 +22,7 @@ #include #include -using Float = long double; +using Float = double; using Matrix = Eigen::Matrix; using Vector = Eigen::Matrix; using Array = Eigen::Array; diff --git a/include/evaluator.h b/include/evaluator.h new file mode 100644 index 0000000..ded88ad --- /dev/null +++ b/include/evaluator.h @@ -0,0 +1,508 @@ +/* + * evaluator.hpp + * + * Created on: Jul 12, 2021 + * Author: Dietmar Wolz + */ + +#ifndef EVALUATOR_HPP_ +#define EVALUATOR_HPP_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define EIGEN_VECTORIZE_SSE2 +#include "pcg_random.hpp" + +using Clock = std::chrono::steady_clock; +using std::chrono::time_point; +using std::chrono::duration_cast; +using std::chrono::milliseconds; + +template +class blocking_queue { + +private: + size_t _capacity; + std::queue _queue; + std::mutex _mutex; + std::condition_variable _not_full; + std::condition_variable _not_empty; + +public: + inline blocking_queue(size_t capacity) : + _capacity(capacity) { + } + + inline size_t size() { + std::unique_lock lock(_mutex); + return _queue.size(); + } + + //Inserts the specified element into this queue, + // waiting if necessary for space to become available. + inline void put(const T& elem) { + { + std::unique_lock lock(_mutex); + while (_queue.size() >= _capacity) + _not_full.wait(lock); + _queue.push(elem); + } + _not_empty.notify_one(); + } + + // Retrieves and removes the head of this queue, + // waiting if necessary until an element becomes available. + inline const T& take() { + std::unique_lock lock(_mutex); + while (_queue.size() == 0) + _not_empty.wait(lock); + T& front = _queue.front(); + _queue.pop(); + _not_full.notify_one(); + return front; + } +}; + +typedef Eigen::Matrix vec; +typedef Eigen::Matrix ivec; +typedef Eigen::Matrix mat; +typedef Eigen::Matrix imat; + +typedef bool (*callback_type)(int, const double*, double*); + +typedef void (*callback_parallel)(int, int, double*, double*); + +static bool noop_callback(int popsize, const double* x, double* y) { + return true; +} + +static void noop_callback_par(int popsize, int dim, double* x, double* y) { +} + +static std::uniform_real_distribution<> distr_01 = +std::uniform_real_distribution<>(0, 1); + +static std::normal_distribution<> gauss_01 = std::normal_distribution<>(0, 1); + +static Eigen::MatrixXd normal(int dx, int dy, pcg64& rs) { + return Eigen::MatrixXd::NullaryExpr(dx, dy, [&]() { + return gauss_01(rs); + }); +} + +static Eigen::MatrixXd normalVec(int dim, pcg64& rs) { + return Eigen::MatrixXd::NullaryExpr(dim, 1, [&]() { + return gauss_01(rs); + }); +} + +static Eigen::MatrixXd uniform(int dx, int dy, pcg64& rs) { + return Eigen::MatrixXd::NullaryExpr(dx, dy, [&]() { + return distr_01(rs); + }); +} + +static Eigen::MatrixXd uniformVec(int dim, pcg64& rs) { + return Eigen::MatrixXd::NullaryExpr(dim, 1, [&]() { + return distr_01(rs); + }); +} + +static double rand01(pcg64& rs) { + return distr_01(rs); +} + +static int randInt(pcg64& rs, int max) { + return (int)(max * distr_01(rs)); +} + +static double normreal(pcg64& rs, double mu, double sdev) { + return gauss_01(rs) * sdev + mu; +} + +static vec normalVec(const vec& mean, const vec& sdev, int dim, pcg64& rs) { + vec nv = normalVec(dim, rs); + return (nv.array() * sdev.array()).matrix() + mean; +} + +static vec zeros(int n) { + return Eigen::MatrixXd::Zero(n, 1); +} + +static mat zeros(int n, int m) { + return Eigen::MatrixXd::Zero(n, m); +} + +static vec constant(int n, double val) { + return Eigen::MatrixXd::Constant(n, 1, val); +} + +static mat constant(int n, int m, double val) { + return Eigen::MatrixXd::Constant(n, m, val); +} + +struct IndexVal { + int index; + double val; +}; + +static bool compareIndexVal(IndexVal i1, IndexVal i2) { + return (i1.val < i2.val); +} + +static ivec sort_index(const vec& x) { + int size = x.size(); + std::vector ivals(size); + for (int i = 0; i < size; i++) { + ivals[i].index = i; + ivals[i].val = x[i]; + } + std::sort(ivals.data(), ivals.data() + size, compareIndexVal); + return Eigen::MatrixXi::NullaryExpr(size, 1, [&ivals](int i) { + return ivals[i].index; + }); +} + +static int index_min(vec& v) { + double minv = DBL_MAX; + int mi = -1; + for (int i = 0; i < v.size(); i++) { + if (v[i] < minv) { + mi = i; + minv = v[i]; + } + } + return mi; +} + +// wrapper around the fitness function, scales according to boundaries + +class Fitness { + +public: + + Fitness(callback_type func, callback_parallel func_par_, int dim, int nobj, + const vec& lower, const vec& upper) : + _func(func), _func_par(func_par_), _dim(dim), _nobj(nobj), _lower( + lower), _upper(upper) { + if (_lower.size() > 0) { // bounds defined + _scale = _upper - _lower; + _typx = 0.5 * (_upper + _lower); + } + else { + _scale = constant(dim, 1.0); + _typx = zeros(dim); + } + _evaluationCounter = 0; + _normalize = false; + _terminate = false; + _dim = dim; + } + + bool terminate() { + return _terminate; + } + + vec eval(const vec& X) { + std::vector res(_nobj); + _terminate = _terminate || _func(_dim, X.data(), res.data()); + for (int i = 0; i < _nobj; i++) { + if (std::isnan(res[i]) || !std::isfinite(res[i])) + res[i] = 1E99; + } + _evaluationCounter++; + vec rvec = Eigen::Map(res.data(), _nobj); + return rvec; + } + + vec eval(const double* const p) { + std::vector res(_nobj); + _terminate = _terminate || _func(_dim, p, res.data()); + for (int i = 0; i < _nobj; i++) { + if (std::isnan(res[i]) || !std::isfinite(res[i])) + res[i] = 1E99; + } + _evaluationCounter++; + vec rvec = Eigen::Map(res.data(), _nobj); + return rvec; + } + + vec getClosestFeasible(const vec& X) const { + if (_lower.size() > 0) + return X.cwiseMin(_upper).cwiseMax(_lower); + return X; + } + + double getClosestFeasible_i(int i, double x_i) { + return _lower.size() == 0 ? x_i : std::min(_upper[i], std::max(_lower[i], x_i)); + } + + vec getClosestFeasibleNormed(const vec& X) const { + if (_lower.size() > 0) { + if (_normalize) + return X.cwiseMin(1.0).cwiseMax(-1.0); + else + return X.cwiseMin(_upper).cwiseMax(_lower); + } + return X; + } + + void setClosestFeasible(mat& X) const { + if (_lower.size() > 0) { + for (int i = 0; i < X.cols(); i++) { + if (_normalize) + X.col(i) = X.col(i).cwiseMin(1.0).cwiseMax(-1.0); + else + X.col(i) = X.col(i).cwiseMin(_upper).cwiseMax(_lower); + } + } + } + + vec norm(const vec& X) const { + if (_lower.size() == 0) + std::cout << "no bounds error" << std::endl; + return ((X - _lower).array() / _scale.array()).matrix(); + } + + double norm_i(int i, double x) const { + if (_lower.size() == 0) + std::cout << "no bounds error" << std::endl; + return std::max(0.0, std::min(1.0, (x - _lower[i]) / _scale[i])); // avoid Nan errors + } + + bool feasible(int i, double x) { + return _lower.size() == 0 || (x >= _lower[i] && x <= _upper[i]); + } + + bool hasBounds() { + return _lower.size() != 0; + } + + vec sample(pcg64& rs) { + if (_lower.size() == 0) + std::cout << "no bounds error" << std::endl; + vec rv = uniformVec(_dim, rs); + return (rv.array() * _scale.array()).matrix() + _lower; + } + + vec sample(pcg64& rs, vec& up, vec& lo) { + vec rv = uniformVec(_dim, rs); + return (rv.array() * (up - lo).array()).matrix() + lo; + } + + double sample_i(int i, pcg64& rs) { + if (_lower.size() == 0) + std::cout << "no bounds error" << std::endl; + return _lower[i] + _scale[i] * distr_01(rs); + } + + double sample_i(int i, pcg64& rs, vec& up, vec& lo) { + return lo[i] + (up[i] - lo[i]) * distr_01(rs); + } + + int evaluations() { + return _evaluationCounter; + } + + void resetEvaluations() { + _evaluationCounter = 0; + } + + void incrEvaluations() { + _evaluationCounter++; + } + + vec scale() { + return _scale; + } + + vec typx() { + return _typx; + } + + void setNormalize(bool normalize) { + _normalize = normalize; + } + + void setTerminate() { + _terminate = true; + } + + vec encode(const vec& X) const { + if (_normalize) + return 2 * (X - _typx).array() / _scale.array(); + else + return X; + } + + vec decode(const vec& X) const { + if (_normalize) + return 0.5 * (X.array() * _scale.array()).matrix() + _typx; + else + return X; + } + + void values(const mat& popX, vec& ys) { + int popsize = popX.cols(); + int n = popX.rows(); + std::vector pargs(popsize * n); + std::vector res(popsize); + for (int p = 0; p < popsize; p++) { + vec x = getClosestFeasible(decode(popX.col(p))); + for (int i = 0; i < n; i++) + pargs[p * n + i] = x(i); + } + _func_par(popsize, n, pargs.data(), res.data()); + for (int p = 0; p < popX.cols(); p++) + ys[p] = res[p]; + _evaluationCounter += popsize; + } + + vec violations(const mat& X, double penalty_coef) { + vec violations = zeros(X.cols()); + if (_lower.size() > 0) { + for (int i = 0; i < X.cols(); i++) { + vec x = decode(X.col(i)); + violations[i] = penalty_coef + * ((_lower - x).cwiseMax(0).sum() + + (x - _upper).cwiseMax(0).sum()); + } + } + return violations; + } + + void getMinValues(double* const p) const { + for (int i = 0; i < _lower.size(); i++) + p[i] = _lower[i]; + } + + void getMaxValues(double* const p) const { + for (int i = 0; i < _upper.size(); i++) + p[i] = _upper[i]; + } + +private: + callback_type _func; + callback_parallel _func_par; + int _dim; + int _nobj; + vec _lower; + vec _upper; + vec _scale; + vec _typx; + bool _normalize; + bool _terminate; + long _evaluationCounter; +}; + +struct vec_id { +public: + + vec_id(const vec& v, int id) : + _id(id), _v(v) { + } + + int _id; + vec _v; +}; + +class evaluator { +public: + + evaluator(Fitness* fit, int nobj, int workers) : + _fit(fit), _nobj(nobj), _workers(workers), _stop(false) { + _requests = new blocking_queue(2 * workers); + _evaled = new blocking_queue(2 * workers); + _t0 = Clock::now(); + if (_workers <= 0) + _workers = std::thread::hardware_concurrency(); + for (int thread_id = 0; thread_id < _workers; thread_id++) { + _jobs.push_back(evaluator_job(thread_id, this)); + } + } + + ~evaluator() { + join(); + delete _requests; + delete _evaled; + } + + void evaluate(vec& x, int id) { + _requests->put(new vec_id(x, id)); + } + + // needs to be deleted + vec_id* result() { + return _evaled->take(); + } + + void execute(int thread_id) { + while (!_stop) { + vec_id* vid = _requests->take(); + if (!_stop) { + try { + vec x = _fit->getClosestFeasible(_fit->decode(vid->_v)); + vid->_v = _fit->eval(x); + } + catch (std::exception& e) { + std::cout << e.what() << std::endl; + vid->_v = constant(_nobj, DBL_MAX); + } + _evaled->put(vid); + } + else + delete vid; + } + } + + void join() { + _stop = true; + vec x(0); + // to release all locks + for (auto& job : _jobs) { + _requests->put(new vec_id(x, 0)); + } + for (auto& job : _jobs) { + job.join(); + } + } + +private: + + class evaluator_job { + + public: + evaluator_job(int id, evaluator* exec) { + _thread = std::thread(&evaluator::execute, exec, id); + } + + void join() { + if (_thread.joinable()) + _thread.join(); + } + + private: + std::thread _thread; + }; + + Fitness* _fit; + int _nobj; + int _workers; + bool _stop; + blocking_queue* _requests; + blocking_queue* _evaled; + std::vector _jobs; + time_point _t0; +}; + +#endif /* EVALUATOR_HPP_ */ \ No newline at end of file diff --git a/include/pcg_extras.hpp b/include/pcg_extras.hpp new file mode 100644 index 0000000..6467d15 --- /dev/null +++ b/include/pcg_extras.hpp @@ -0,0 +1,667 @@ +/* + * PCG Random Number Generation for C++ + * + * Copyright 2014-2017 Melissa O'Neill , + * and the PCG Project contributors. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + * + * Licensed under the Apache License, Version 2.0 (provided in + * LICENSE-APACHE.txt and at http://www.apache.org/licenses/LICENSE-2.0) + * or under the MIT license (provided in LICENSE-MIT.txt and at + * http://opensource.org/licenses/MIT), at your option. This file may not + * be copied, modified, or distributed except according to those terms. + * + * Distributed on an "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, either + * express or implied. See your chosen license for details. + * + * For additional information about the PCG random number generation scheme, + * visit http://www.pcg-random.org/. + */ + + /* + * This file provides support code that is useful for random-number generation + * but not specific to the PCG generation scheme, including: + * - 128-bit int support for platforms where it isn't available natively + * - bit twiddling operations + * - I/O of 128-bit and 8-bit integers + * - Handling the evilness of SeedSeq + * - Support for efficiently producing random numbers less than a given + * bound + */ + +#ifndef PCG_EXTRAS_HPP_INCLUDED +#define PCG_EXTRAS_HPP_INCLUDED 1 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __GNUC__ +#include +#endif + + /* + * Abstractions for compiler-specific directives + */ + +#ifdef __GNUC__ +#define PCG_NOINLINE __attribute__((noinline)) +#else +#define PCG_NOINLINE +#endif + + /* + * Some members of the PCG library use 128-bit math. When compiling on 64-bit + * platforms, both GCC and Clang provide 128-bit integer types that are ideal + * for the job. + * + * On 32-bit platforms (or with other compilers), we fall back to a C++ + * class that provides 128-bit unsigned integers instead. It may seem + * like we're reinventing the wheel here, because libraries already exist + * that support large integers, but most existing libraries provide a very + * generic multiprecision code, but here we're operating at a fixed size. + * Also, most other libraries are fairly heavyweight. So we use a direct + * implementation. Sadly, it's much slower than hand-coded assembly or + * direct CPU support. + * + */ +#if __SIZEOF_INT128__ && !PCG_FORCE_EMULATED_128BIT_MATH +namespace pcg_extras { + typedef __uint128_t pcg128_t; +} +#define PCG_128BIT_CONSTANT(high,low) \ + ((pcg_extras::pcg128_t(high) << 64) + low) +#else +#include "pcg_uint128.hpp" +namespace pcg_extras { + typedef pcg_extras::uint_x4 pcg128_t; +} +#define PCG_128BIT_CONSTANT(high,low) \ + pcg_extras::pcg128_t(high,low) +#define PCG_EMULATED_128BIT_MATH 1 +#endif + + +namespace pcg_extras { + + /* + * We often need to represent a "number of bits". When used normally, these + * numbers are never greater than 128, so an unsigned char is plenty. + * If you're using a nonstandard generator of a larger size, you can set + * PCG_BITCOUNT_T to have it define it as a larger size. (Some compilers + * might produce faster code if you set it to an unsigned int.) + */ + +#ifndef PCG_BITCOUNT_T + typedef uint8_t bitcount_t; +#else + typedef PCG_BITCOUNT_T bitcount_t; +#endif + + /* + * C++ requires us to be able to serialize RNG state by printing or reading + * it from a stream. Because we use 128-bit ints, we also need to be able + * ot print them, so here is code to do so. + * + * This code provides enough functionality to print 128-bit ints in decimal + * and zero-padded in hex. It's not a full-featured implementation. + */ + + template + std::basic_ostream& + operator<<(std::basic_ostream& out, pcg128_t value) + { + auto desired_base = out.flags() & out.basefield; + bool want_hex = desired_base == out.hex; + + if (want_hex) { + uint64_t highpart = uint64_t(value >> 64); + uint64_t lowpart = uint64_t(value); + auto desired_width = out.width(); + if (desired_width > 16) { + out.width(desired_width - 16); + } + if (highpart != 0 || desired_width > 16) + out << highpart; + CharT oldfill = '\0'; + if (highpart != 0) { + out.width(16); + oldfill = out.fill('0'); + } + auto oldflags = out.setf(decltype(desired_base){}, out.showbase); + out << lowpart; + out.setf(oldflags); + if (highpart != 0) { + out.fill(oldfill); + } + return out; + } + constexpr size_t MAX_CHARS_128BIT = 40; + + char buffer[MAX_CHARS_128BIT]; + char* pos = buffer + sizeof(buffer); + *(--pos) = '\0'; + constexpr auto BASE = pcg128_t(10ULL); + do { + auto div = value / BASE; + auto mod = uint32_t(value - (div * BASE)); + *(--pos) = '0' + char(mod); + value = div; + } while (value != pcg128_t(0ULL)); + return out << pos; + } + + template + std::basic_istream& + operator>>(std::basic_istream& in, pcg128_t& value) + { + typename std::basic_istream::sentry s(in); + + if (!s) + return in; + + constexpr auto BASE = pcg128_t(10ULL); + pcg128_t current(0ULL); + bool did_nothing = true; + bool overflow = false; + for (;;) { + CharT wide_ch = in.get(); + if (!in.good()) { + in.clear(std::ios::eofbit); + break; + } + auto ch = in.narrow(wide_ch, '\0'); + if (ch < '0' || ch > '9') { + in.unget(); + break; + } + did_nothing = false; + pcg128_t digit(uint32_t(ch - '0')); + pcg128_t timesbase = current * BASE; + overflow = overflow || timesbase < current; + current = timesbase + digit; + overflow = overflow || current < digit; + } + + if (did_nothing || overflow) { + in.setstate(std::ios::failbit); + if (overflow) + current = ~pcg128_t(0ULL); + } + + value = current; + + return in; + } + + /* + * Likewise, if people use tiny rngs, we'll be serializing uint8_t. + * If we just used the provided IO operators, they'd read/write chars, + * not ints, so we need to define our own. We *can* redefine this operator + * here because we're in our own namespace. + */ + + template + std::basic_ostream& + operator<<(std::basic_ostream& out, uint8_t value) + { + return out << uint32_t(value); + } + + template + std::basic_istream& + operator>>(std::basic_istream& in, uint8_t& target) + { + uint32_t value = 0xdecea5edU; + in >> value; + if (!in && value == 0xdecea5edU) + return in; + if (value > uint8_t(~0)) { + in.setstate(std::ios::failbit); + value = ~0U; + } + target = uint8_t(value); + return in; + } + + /* Unfortunately, the above functions don't get found in preference to the + * built in ones, so we create some more specific overloads that will. + * Ugh. + */ + + inline std::ostream& operator<<(std::ostream& out, uint8_t value) + { + return pcg_extras::operator<< (out, value); + } + + inline std::istream& operator>>(std::istream& in, uint8_t& value) + { + return pcg_extras::operator>> (in, value); + } + + + + /* + * Useful bitwise operations. + */ + + /* + * XorShifts are invertable, but they are someting of a pain to invert. + * This function backs them out. It's used by the whacky "inside out" + * generator defined later. + */ + + template + inline itype unxorshift(itype x, bitcount_t bits, bitcount_t shift) + { + if (2 * shift >= bits) { + return x ^ (x >> shift); + } + itype lowmask1 = (itype(1U) << (bits - shift * 2)) - 1; + itype highmask1 = ~lowmask1; + itype top1 = x; + itype bottom1 = x & lowmask1; + top1 ^= top1 >> shift; + top1 &= highmask1; + x = top1 | bottom1; + itype lowmask2 = (itype(1U) << (bits - shift)) - 1; + itype bottom2 = x & lowmask2; + bottom2 = unxorshift(bottom2, bits - shift, shift); + bottom2 &= lowmask1; + return top1 | bottom2; + } + + /* + * Rotate left and right. + * + * In ideal world, compilers would spot idiomatic rotate code and convert it + * to a rotate instruction. Of course, opinions vary on what the correct + * idiom is and how to spot it. For clang, sometimes it generates better + * (but still crappy) code if you define PCG_USE_ZEROCHECK_ROTATE_IDIOM. + */ + + template + inline itype rotl(itype value, bitcount_t rot) + { + constexpr bitcount_t bits = sizeof(itype) * 8; + constexpr bitcount_t mask = bits - 1; +#if PCG_USE_ZEROCHECK_ROTATE_IDIOM + return rot ? (value << rot) | (value >> (bits - rot)) : value; +#else + return (value << rot) | (value >> ((-rot) & mask)); +#endif + } + + template + inline itype rotr(itype value, bitcount_t rot) + { + constexpr bitcount_t bits = sizeof(itype) * 8; + constexpr bitcount_t mask = bits - 1; +#if PCG_USE_ZEROCHECK_ROTATE_IDIOM + return rot ? (value >> rot) | (value << (bits - rot)) : value; +#else + return (value >> rot) | (value << ((-rot) & mask)); +#endif + } + + /* Unfortunately, both Clang and GCC sometimes perform poorly when it comes + * to properly recognizing idiomatic rotate code, so for we also provide + * assembler directives (enabled with PCG_USE_INLINE_ASM). Boo, hiss. + * (I hope that these compilers get better so that this code can die.) + * + * These overloads will be preferred over the general template code above. + */ +#if PCG_USE_INLINE_ASM && __GNUC__ && (__x86_64__ || __i386__) + + inline uint8_t rotr(uint8_t value, bitcount_t rot) + { + asm("rorb %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); + return value; + } + + inline uint16_t rotr(uint16_t value, bitcount_t rot) + { + asm("rorw %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); + return value; + } + + inline uint32_t rotr(uint32_t value, bitcount_t rot) + { + asm("rorl %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); + return value; + } + +#if __x86_64__ + inline uint64_t rotr(uint64_t value, bitcount_t rot) + { + asm("rorq %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); + return value; + } +#endif // __x86_64__ + +#elif defined(_MSC_VER) + // Use MSVC++ bit rotation intrinsics + +#pragma intrinsic(_rotr, _rotr64, _rotr8, _rotr16) + + inline uint8_t rotr(uint8_t value, bitcount_t rot) + { + return _rotr8(value, rot); + } + + inline uint16_t rotr(uint16_t value, bitcount_t rot) + { + return _rotr16(value, rot); + } + + inline uint32_t rotr(uint32_t value, bitcount_t rot) + { + return _rotr(value, rot); + } + + inline uint64_t rotr(uint64_t value, bitcount_t rot) + { + return _rotr64(value, rot); + } + +#endif // PCG_USE_INLINE_ASM + + + /* + * The C++ SeedSeq concept (modelled by seed_seq) can fill an array of + * 32-bit integers with seed data, but sometimes we want to produce + * larger or smaller integers. + * + * The following code handles this annoyance. + * + * uneven_copy will copy an array of 32-bit ints to an array of larger or + * smaller ints (actually, the code is general it only needing forward + * iterators). The copy is identical to the one that would be performed if + * we just did memcpy on a standard little-endian machine, but works + * regardless of the endian of the machine (or the weirdness of the ints + * involved). + * + * generate_to initializes an array of integers using a SeedSeq + * object. It is given the size as a static constant at compile time and + * tries to avoid memory allocation. If we're filling in 32-bit constants + * we just do it directly. If we need a separate buffer and it's small, + * we allocate it on the stack. Otherwise, we fall back to heap allocation. + * Ugh. + * + * generate_one produces a single value of some integral type using a + * SeedSeq object. + */ + + /* uneven_copy helper, case where destination ints are less than 32 bit. */ + + template + SrcIter uneven_copy_impl( + SrcIter src_first, DestIter dest_first, DestIter dest_last, + std::true_type) + { + typedef typename std::iterator_traits::value_type src_t; + typedef typename std::iterator_traits::value_type dest_t; + + constexpr bitcount_t SRC_SIZE = sizeof(src_t); + constexpr bitcount_t DEST_SIZE = sizeof(dest_t); + constexpr bitcount_t DEST_BITS = DEST_SIZE * 8; + constexpr bitcount_t SCALE = SRC_SIZE / DEST_SIZE; + + size_t count = 0; + src_t value = 0; + + while (dest_first != dest_last) { + if ((count++ % SCALE) == 0) + value = *src_first++; // Get more bits + else + value >>= DEST_BITS; // Move down bits + + *dest_first++ = dest_t(value); // Truncates, ignores high bits. + } + return src_first; + } + + /* uneven_copy helper, case where destination ints are more than 32 bit. */ + + template + SrcIter uneven_copy_impl( + SrcIter src_first, DestIter dest_first, DestIter dest_last, + std::false_type) + { + typedef typename std::iterator_traits::value_type src_t; + typedef typename std::iterator_traits::value_type dest_t; + + constexpr auto SRC_SIZE = sizeof(src_t); + constexpr auto SRC_BITS = SRC_SIZE * 8; + constexpr auto DEST_SIZE = sizeof(dest_t); + constexpr auto SCALE = (DEST_SIZE + SRC_SIZE - 1) / SRC_SIZE; + + while (dest_first != dest_last) { + dest_t value(0UL); + unsigned int shift = 0; + + for (size_t i = 0; i < SCALE; ++i) { + value |= dest_t(*src_first++) << shift; + shift += SRC_BITS; + } + + *dest_first++ = value; + } + return src_first; + } + + /* uneven_copy, call the right code for larger vs. smaller */ + + template + inline SrcIter uneven_copy(SrcIter src_first, + DestIter dest_first, DestIter dest_last) + { + typedef typename std::iterator_traits::value_type src_t; + typedef typename std::iterator_traits::value_type dest_t; + + constexpr bool DEST_IS_SMALLER = sizeof(dest_t) < sizeof(src_t); + + return uneven_copy_impl(src_first, dest_first, dest_last, + std::integral_constant{}); + } + + /* generate_to, fill in a fixed-size array of integral type using a SeedSeq + * (actually works for any random-access iterator) + */ + + template + inline void generate_to_impl(SeedSeq&& generator, DestIter dest, + std::true_type) + { + generator.generate(dest, dest + size); + } + + template + void generate_to_impl(SeedSeq&& generator, DestIter dest, + std::false_type) + { + typedef typename std::iterator_traits::value_type dest_t; + constexpr auto DEST_SIZE = sizeof(dest_t); + constexpr auto GEN_SIZE = sizeof(uint32_t); + + constexpr bool GEN_IS_SMALLER = GEN_SIZE < DEST_SIZE; + constexpr size_t FROM_ELEMS = + GEN_IS_SMALLER + ? size * ((DEST_SIZE + GEN_SIZE - 1) / GEN_SIZE) + : (size + (GEN_SIZE / DEST_SIZE) - 1) + / ((GEN_SIZE / DEST_SIZE) + GEN_IS_SMALLER); + // this odd code ^^^^^^^^^^^^^^^^^ is work-around for + // a bug: http://llvm.org/bugs/show_bug.cgi?id=21287 + + if (FROM_ELEMS <= 1024) { + uint32_t buffer[FROM_ELEMS]; + generator.generate(buffer, buffer + FROM_ELEMS); + uneven_copy(buffer, dest, dest + size); + } + else { + uint32_t* buffer = static_cast(malloc(GEN_SIZE * FROM_ELEMS)); + generator.generate(buffer, buffer + FROM_ELEMS); + uneven_copy(buffer, dest, dest + size); + free(static_cast(buffer)); + } + } + + template + inline void generate_to(SeedSeq&& generator, DestIter dest) + { + typedef typename std::iterator_traits::value_type dest_t; + constexpr bool IS_32BIT = sizeof(dest_t) == sizeof(uint32_t); + + generate_to_impl(std::forward(generator), dest, + std::integral_constant{}); + } + + /* generate_one, produce a value of integral type using a SeedSeq + * (optionally, we can have it produce more than one and pick which one + * we want) + */ + + template + inline UInt generate_one(SeedSeq&& generator) + { + UInt result[N]; + generate_to(std::forward(generator), result); + return result[i]; + } + + template + auto bounded_rand(RngType& rng, typename RngType::result_type upper_bound) + -> typename RngType::result_type + { + typedef typename RngType::result_type rtype; + rtype threshold = (RngType::max() - RngType::min() + rtype(1) - upper_bound) + % upper_bound; + for (;;) { + rtype r = rng() - RngType::min(); + if (r >= threshold) + return r % upper_bound; + } + } + + template + void shuffle(Iter from, Iter to, RandType&& rng) + { + typedef typename std::iterator_traits::difference_type delta_t; + typedef typename std::remove_reference::type::result_type result_t; + auto count = to - from; + while (count > 1) { + delta_t chosen = delta_t(bounded_rand(rng, result_t(count))); + --count; + --to; + using std::swap; + swap(*(from + chosen), *to); + } + } + + /* + * Although std::seed_seq is useful, it isn't everything. Often we want to + * initialize a random-number generator some other way, such as from a random + * device. + * + * Technically, it does not meet the requirements of a SeedSequence because + * it lacks some of the rarely-used member functions (some of which would + * be impossible to provide). However the C++ standard is quite specific + * that actual engines only called the generate method, so it ought not to be + * a problem in practice. + */ + + template + class seed_seq_from { + private: + RngType rng_; + + typedef uint_least32_t result_type; + + public: + template + seed_seq_from(Args&&... args) : + rng_(std::forward(args)...) + { + // Nothing (else) to do... + } + + template + void generate(Iter start, Iter finish) + { + for (auto i = start; i != finish; ++i) + *i = result_type(rng_()); + } + + constexpr size_t size() const + { + return (sizeof(typename RngType::result_type) > sizeof(result_type) + && RngType::max() > ~size_t(0UL)) + ? ~size_t(0UL) + : size_t(RngType::max()); + } + }; + + /* + * Sometimes you might want a distinct seed based on when the program + * was compiled. That way, a particular instance of the program will + * behave the same way, but when recompiled it'll produce a different + * value. + */ + + template + struct static_arbitrary_seed { + private: + static constexpr IntType fnv(IntType hash, const char* pos) { + return *pos == '\0' + ? hash + : fnv((hash * IntType(16777619U)) ^ *pos, (pos + 1)); + } + + public: + static constexpr IntType value = fnv(IntType(2166136261U ^ sizeof(IntType)), + __DATE__ __TIME__ __FILE__); + }; + + // Sometimes, when debugging or testing, it's handy to be able print the name + // of a (in human-readable form). This code allows the idiom: + // + // cout << printable_typename() + // + // to print out my_foo_type_t (or its concrete type if it is a synonym) + +#if __cpp_rtti || __GXX_RTTI + + template + struct printable_typename {}; + + template + std::ostream& operator<<(std::ostream& out, printable_typename) { + const char* implementation_typename = typeid(T).name(); +#ifdef __GNUC__ + int status; + char* pretty_name = + abi::__cxa_demangle(implementation_typename, nullptr, nullptr, &status); + if (status == 0) + out << pretty_name; + free(static_cast(pretty_name)); + if (status == 0) + return out; +#endif + out << implementation_typename; + return out; + } + +#endif // __cpp_rtti || __GXX_RTTI + +} // namespace pcg_extras + +#endif // PCG_EXTRAS_HPP_INCLUDED \ No newline at end of file diff --git a/include/pcg_random.hpp b/include/pcg_random.hpp new file mode 100644 index 0000000..944458a --- /dev/null +++ b/include/pcg_random.hpp @@ -0,0 +1,1958 @@ +/* + * PCG Random Number Generation for C++ + * + * Copyright 2014-2022 Melissa O'Neill , + * and the PCG Project contributors. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + * + * Licensed under the Apache License, Version 2.0 (provided in + * LICENSE-APACHE.txt and at http://www.apache.org/licenses/LICENSE-2.0) + * or under the MIT license (provided in LICENSE-MIT.txt and at + * http://opensource.org/licenses/MIT), at your option. This file may not + * be copied, modified, or distributed except according to those terms. + * + * Distributed on an "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, either + * express or implied. See your chosen license for details. + * + * For additional information about the PCG random number generation scheme, + * visit http://www.pcg-random.org/. + */ + + /* + * This code provides the reference implementation of the PCG family of + * random number generators. The code is complex because it implements + * + * - several members of the PCG family, specifically members corresponding + * to the output functions: + * - XSH RR (good for 64-bit state, 32-bit output) + * - XSH RS (good for 64-bit state, 32-bit output) + * - XSL RR (good for 128-bit state, 64-bit output) + * - RXS M XS (statistically most powerful generator) + * - XSL RR RR (good for 128-bit state, 128-bit output) + * - and RXS, RXS M, XSH, XSL (mostly for testing) + * - at potentially *arbitrary* bit sizes + * - with four different techniques for random streams (MCG, one-stream + * LCG, settable-stream LCG, unique-stream LCG) + * - and the extended generation schemes allowing arbitrary periods + * - with all features of C++11 random number generation (and more), + * some of which are somewhat painful, including + * - initializing with a SeedSequence which writes 32-bit values + * to memory, even though the state of the generator may not + * use 32-bit values (it might use smaller or larger integers) + * - I/O for RNGs and a prescribed format, which needs to handle + * the issue that 8-bit and 128-bit integers don't have working + * I/O routines (e.g., normally 8-bit = char, not integer) + * - equality and inequality for RNGs + * - and a number of convenience typedefs to mask all the complexity + * + * The code employees a fairly heavy level of abstraction, and has to deal + * with various C++ minutia. If you're looking to learn about how the PCG + * scheme works, you're probably best of starting with one of the other + * codebases (see www.pcg-random.org). But if you're curious about the + * constants for the various output functions used in those other, simpler, + * codebases, this code shows how they are calculated. + * + * On the positive side, at least there are convenience typedefs so that you + * can say + * + * pcg32 myRNG; + * + * rather than: + * + * pcg_detail::engine< + * uint32_t, // Output Type + * uint64_t, // State Type + * pcg_detail::xsh_rr_mixin, true, // Output Func + * pcg_detail::specific_stream, // Stream Kind + * pcg_detail::default_multiplier // LCG Mult + * > myRNG; + * + */ + +#ifndef PCG_RAND_HPP_INCLUDED +#define PCG_RAND_HPP_INCLUDED 1 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef _MSC_VER +#pragma warning(disable:4146) +#endif + +#ifdef _MSC_VER +#define PCG_ALWAYS_INLINE __forceinline +#elif __GNUC__ +#define PCG_ALWAYS_INLINE __attribute__((always_inline)) +#else +#define PCG_ALWAYS_INLINE inline +#endif + + /* + * The pcg_extras namespace contains some support code that is likely to + * be useful for a variety of RNGs, including: + * - 128-bit int support for platforms where it isn't available natively + * - bit twiddling operations + * - I/O of 128-bit and 8-bit integers + * - Handling the evilness of SeedSeq + * - Support for efficiently producing random numbers less than a given + * bound + */ + +#include "pcg_extras.hpp" + +namespace pcg_detail { + + using namespace pcg_extras; + + /* + * The LCG generators need some constants to function. This code lets you + * look up the constant by *type*. For example + * + * default_multiplier::multiplier() + * + * gives you the default multiplier for 32-bit integers. We use the name + * of the constant and not a generic word like value to allow these classes + * to be used as mixins. + */ + + template + struct default_multiplier { + // Not defined for an arbitrary type + }; + + template + struct default_increment { + // Not defined for an arbitrary type + }; + +#define PCG_DEFINE_CONSTANT(type, what, kind, constant) \ + template <> \ + struct what ## _ ## kind { \ + static constexpr type kind() { \ + return constant; \ + } \ + }; + + PCG_DEFINE_CONSTANT(uint8_t, default, multiplier, 141U) + PCG_DEFINE_CONSTANT(uint8_t, default, increment, 77U) + + PCG_DEFINE_CONSTANT(uint16_t, default, multiplier, 12829U) + PCG_DEFINE_CONSTANT(uint16_t, default, increment, 47989U) + + PCG_DEFINE_CONSTANT(uint32_t, default, multiplier, 747796405U) + PCG_DEFINE_CONSTANT(uint32_t, default, increment, 2891336453U) + + PCG_DEFINE_CONSTANT(uint64_t, default, multiplier, 6364136223846793005ULL) + PCG_DEFINE_CONSTANT(uint64_t, default, increment, 1442695040888963407ULL) + + PCG_DEFINE_CONSTANT(pcg128_t, default, multiplier, + PCG_128BIT_CONSTANT(2549297995355413924ULL, 4865540595714422341ULL)) + PCG_DEFINE_CONSTANT(pcg128_t, default, increment, + PCG_128BIT_CONSTANT(6364136223846793005ULL, 1442695040888963407ULL)) + + /* Alternative (cheaper) multipliers for 128-bit */ + + template + struct cheap_multiplier : public default_multiplier { + // For most types just use the default. + }; + + template <> + struct cheap_multiplier { + static constexpr uint64_t multiplier() { + return 0xda942042e4dd58b5ULL; + } + }; + + + /* + * Each PCG generator is available in four variants, based on how it applies + * the additive constant for its underlying LCG; the variations are: + * + * single stream - all instances use the same fixed constant, thus + * the RNG always somewhere in same sequence + * mcg - adds zero, resulting in a single stream and reduced + * period + * specific stream - the constant can be changed at any time, selecting + * a different random sequence + * unique stream - the constant is based on the memory address of the + * object, thus every RNG has its own unique sequence + * + * This variation is provided though mixin classes which define a function + * value called increment() that returns the necessary additive constant. + */ + + + + /* + * unique stream + */ + + + template + class unique_stream { + protected: + static constexpr bool is_mcg = false; + + // Is never called, but is provided for symmetry with specific_stream + void set_stream(...) + { + abort(); + } + + public: + typedef itype state_type; + + constexpr itype increment() const { + return itype(reinterpret_cast(this) | 1); + } + + constexpr itype stream() const + { + return increment() >> 1; + } + + static constexpr bool can_specify_stream = false; + + static constexpr size_t streams_pow2() + { + return (sizeof(itype) < sizeof(size_t) ? sizeof(itype) + : sizeof(size_t)) * 8 - 1u; + } + + protected: + constexpr unique_stream() = default; + }; + + + /* + * no stream (mcg) + */ + + template + class no_stream { + protected: + static constexpr bool is_mcg = true; + + // Is never called, but is provided for symmetry with specific_stream + void set_stream(...) + { + abort(); + } + + public: + typedef itype state_type; + + static constexpr itype increment() { + return 0; + } + + static constexpr bool can_specify_stream = false; + + static constexpr size_t streams_pow2() + { + return 0u; + } + + protected: + constexpr no_stream() = default; + }; + + + /* + * single stream/sequence (oneseq) + */ + + template + class oneseq_stream : public default_increment { + protected: + static constexpr bool is_mcg = false; + + // Is never called, but is provided for symmetry with specific_stream + void set_stream(...) + { + abort(); + } + + public: + typedef itype state_type; + + static constexpr itype stream() + { + return default_increment::increment() >> 1; + } + + static constexpr bool can_specify_stream = false; + + static constexpr size_t streams_pow2() + { + return 0u; + } + + protected: + constexpr oneseq_stream() = default; + }; + + + /* + * specific stream + */ + + template + class specific_stream { + protected: + static constexpr bool is_mcg = false; + + itype inc_ = default_increment::increment(); + + public: + typedef itype state_type; + typedef itype stream_state; + + constexpr itype increment() const { + return inc_; + } + + itype stream() + { + return inc_ >> 1; + } + + void set_stream(itype specific_seq) + { + inc_ = (specific_seq << 1) | 1; + } + + static constexpr bool can_specify_stream = true; + + static constexpr size_t streams_pow2() + { + return (sizeof(itype) * 8) - 1u; + } + + protected: + specific_stream() = default; + + specific_stream(itype specific_seq) + : inc_(itype(specific_seq << 1) | itype(1U)) + { + // Nothing (else) to do. + } + }; + + + /* + * This is where it all comes together. This function joins together three + * mixin classes which define + * - the LCG additive constant (the stream) + * - the LCG multiplier + * - the output function + * in addition, we specify the type of the LCG state, and the result type, + * and whether to use the pre-advance version of the state for the output + * (increasing instruction-level parallelism) or the post-advance version + * (reducing register pressure). + * + * Given the high level of parameterization, the code has to use some + * template-metaprogramming tricks to handle some of the subtle variations + * involved. + */ + + template , + typename multiplier_mixin = default_multiplier > + class engine : protected output_mixin, + public stream_mixin, + protected multiplier_mixin { + protected: + itype state_; + + struct can_specify_stream_tag {}; + struct no_specifiable_stream_tag {}; + + using stream_mixin::increment; + using multiplier_mixin::multiplier; + + public: + typedef xtype result_type; + typedef itype state_type; + + static constexpr size_t period_pow2() + { + return sizeof(state_type) * 8 - 2 * stream_mixin::is_mcg; + } + + // It would be nice to use std::numeric_limits for these, but + // we can't be sure that it'd be defined for the 128-bit types. + + static constexpr result_type min() + { + return result_type(0UL); + } + + static constexpr result_type max() + { + return result_type(~result_type(0UL)); + } + + protected: + itype bump(itype state) + { + return state * multiplier() + increment(); + } + + itype base_generate() + { + return state_ = bump(state_); + } + + itype base_generate0() + { + itype old_state = state_; + state_ = bump(state_); + return old_state; + } + + public: + result_type operator()() + { + if (output_previous) + return this->output(base_generate0()); + else + return this->output(base_generate()); + } + + result_type operator()(result_type upper_bound) + { + return bounded_rand(*this, upper_bound); + } + + protected: + static itype advance(itype state, itype delta, + itype cur_mult, itype cur_plus); + + static itype distance(itype cur_state, itype newstate, itype cur_mult, + itype cur_plus, itype mask = ~itype(0U)); + + itype distance(itype newstate, itype mask = itype(~itype(0U))) const + { + return distance(state_, newstate, multiplier(), increment(), mask); + } + + public: + void advance(itype delta) + { + state_ = advance(state_, delta, this->multiplier(), this->increment()); + } + + void backstep(itype delta) + { + advance(-delta); + } + + void discard(itype delta) + { + advance(delta); + } + + bool wrapped() + { + if (stream_mixin::is_mcg) { + // For MCGs, the low order two bits never change. In this + // implementation, we keep them fixed at 3 to make this test + // easier. + return state_ == 3; + } + else { + return state_ == 0; + } + } + + engine(itype state = itype(0xcafef00dd15ea5e5ULL)) + : state_(this->is_mcg ? state | state_type(3U) + : bump(state + this->increment())) + { + // Nothing else to do. + } + + // This function may or may not exist. It thus has to be a template + // to use SFINAE; users don't have to worry about its template-ness. + + template + engine(itype state, typename sm::stream_state stream_seed) + : stream_mixin(stream_seed), + state_(this->is_mcg ? state | state_type(3U) + : bump(state + this->increment())) + { + // Nothing else to do. + } + + template + engine(SeedSeq&& seedSeq, typename std::enable_if< + !stream_mixin::can_specify_stream + && !std::is_convertible::value + && !std::is_convertible::value, + no_specifiable_stream_tag>::type = {}) + : engine(generate_one(std::forward(seedSeq))) + { + // Nothing else to do. + } + + template + engine(SeedSeq&& seedSeq, typename std::enable_if< + stream_mixin::can_specify_stream + && !std::is_convertible::value + && !std::is_convertible::value, + can_specify_stream_tag>::type = {}) + { + itype seeddata[2]; + generate_to<2>(std::forward(seedSeq), seeddata); + seed(seeddata[1], seeddata[0]); + } + + + template + void seed(Args&&... args) + { + new (this) engine(std::forward(args)...); + } + + template + friend bool operator==(const engine&, + const engine&); + + template + friend itype1 operator-(const engine&, + const engine&); + + template + friend std::basic_ostream& + operator<<(std::basic_ostream& out, + const engine&); + + template + friend std::basic_istream& + operator>>(std::basic_istream& in, + engine& rng); + }; + + template + std::basic_ostream& + operator<<(std::basic_ostream& out, + const engine& rng) + { + using pcg_extras::operator<<; + + auto orig_flags = out.flags(std::ios_base::dec | std::ios_base::left); + auto space = out.widen(' '); + auto orig_fill = out.fill(); + + out << rng.multiplier() << space + << rng.increment() << space + << rng.state_; + + out.flags(orig_flags); + out.fill(orig_fill); + return out; + } + + + template + std::basic_istream& + operator>>(std::basic_istream& in, + engine& rng) + { + using pcg_extras::operator>>; + + auto orig_flags = in.flags(std::ios_base::dec | std::ios_base::skipws); + + itype multiplier, increment, state; + in >> multiplier >> increment >> state; + + if (!in.fail()) { + bool good = true; + if (multiplier != rng.multiplier()) { + good = false; + } + else if (rng.can_specify_stream) { + rng.set_stream(increment >> 1); + } + else if (increment != rng.increment()) { + good = false; + } + if (good) { + rng.state_ = state; + } + else { + in.clear(std::ios::failbit); + } + } + + in.flags(orig_flags); + return in; + } + + + template + itype engine::advance( + itype state, itype delta, itype cur_mult, itype cur_plus) + { + // The method used here is based on Brown, "Random Number Generation + // with Arbitrary Stride,", Transactions of the American Nuclear + // Society (Nov. 1994). The algorithm is very similar to fast + // exponentiation. + // + // Even though delta is an unsigned integer, we can pass a + // signed integer to go backwards, it just goes "the long way round". + + constexpr itype ZERO = 0u; // itype may be a non-trivial types, so + constexpr itype ONE = 1u; // we define some ugly constants. + itype acc_mult = 1; + itype acc_plus = 0; + while (delta > ZERO) { + if (delta & ONE) { + acc_mult *= cur_mult; + acc_plus = acc_plus * cur_mult + cur_plus; + } + cur_plus = (cur_mult + ONE) * cur_plus; + cur_mult *= cur_mult; + delta >>= 1; + } + return acc_mult * state + acc_plus; + } + + template + itype engine::distance( + itype cur_state, itype newstate, itype cur_mult, itype cur_plus, itype mask) + { + constexpr itype ONE = 1u; // itype could be weird, so use constant + bool is_mcg = cur_plus == itype(0); + itype the_bit = is_mcg ? itype(4u) : itype(1u); + itype distance = 0u; + while ((cur_state & mask) != (newstate & mask)) { + if ((cur_state & the_bit) != (newstate & the_bit)) { + cur_state = cur_state * cur_mult + cur_plus; + distance |= the_bit; + } + assert((cur_state & the_bit) == (newstate & the_bit)); + the_bit <<= 1; + cur_plus = (cur_mult + ONE) * cur_plus; + cur_mult *= cur_mult; + } + return is_mcg ? distance >> 2 : distance; + } + + template + itype operator-(const engine& lhs, + const engine& rhs) + { + static_assert( + std::is_same::value && + std::is_same::value, + "Incomparable generators"); + if (lhs.increment() == rhs.increment()) { + return rhs.distance(lhs.state_); + } + else { + constexpr itype ONE = 1u; + itype lhs_diff = lhs.increment() + (lhs.multiplier() - ONE) * lhs.state_; + itype rhs_diff = rhs.increment() + (rhs.multiplier() - ONE) * rhs.state_; + if ((lhs_diff & itype(3u)) != (rhs_diff & itype(3u))) { + rhs_diff = -rhs_diff; + } + return rhs.distance(rhs_diff, lhs_diff, rhs.multiplier(), itype(0u)); + } + } + + + template + bool operator==(const engine& lhs, + const engine& rhs) + { + return (lhs.multiplier() == rhs.multiplier()) + && (lhs.increment() == rhs.increment()) + && (lhs.state_ == rhs.state_); + } + + template + inline bool operator!=(const engine& lhs, + const engine& rhs) + { + return !operator==(lhs, rhs); + } + + + template class output_mixin, + bool output_previous = (sizeof(itype) <= 8), + template class multiplier_mixin = default_multiplier> + using oneseq_base = engine, output_previous, + oneseq_stream, + multiplier_mixin >; + + template class output_mixin, + bool output_previous = (sizeof(itype) <= 8), + template class multiplier_mixin = default_multiplier> + using unique_base = engine, output_previous, + unique_stream, + multiplier_mixin >; + + template class output_mixin, + bool output_previous = (sizeof(itype) <= 8), + template class multiplier_mixin = default_multiplier> + using setseq_base = engine, output_previous, + specific_stream, + multiplier_mixin >; + + template class output_mixin, + bool output_previous = (sizeof(itype) <= 8), + template class multiplier_mixin = default_multiplier> + using mcg_base = engine, output_previous, + no_stream, + multiplier_mixin >; + + /* + * OUTPUT FUNCTIONS. + * + * These are the core of the PCG generation scheme. They specify how to + * turn the base LCG's internal state into the output value of the final + * generator. + * + * They're implemented as mixin classes. + * + * All of the classes have code that is written to allow it to be applied + * at *arbitrary* bit sizes, although in practice they'll only be used at + * standard sizes supported by C++. + */ + + /* + * XSH RS -- high xorshift, followed by a random shift + * + * Fast. A good performer. + */ + + template + struct xsh_rs_mixin { + static xtype output(itype internal) + { + constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); + constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); + constexpr bitcount_t sparebits = bits - xtypebits; + constexpr bitcount_t opbits = + sparebits - 5 >= 64 ? 5 + : sparebits - 4 >= 32 ? 4 + : sparebits - 3 >= 16 ? 3 + : sparebits - 2 >= 4 ? 2 + : sparebits - 1 >= 1 ? 1 + : 0; + constexpr bitcount_t mask = (1 << opbits) - 1; + constexpr bitcount_t maxrandshift = mask; + constexpr bitcount_t topspare = opbits; + constexpr bitcount_t bottomspare = sparebits - topspare; + constexpr bitcount_t xshift = topspare + (xtypebits + maxrandshift) / 2; + bitcount_t rshift = + opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0; + internal ^= internal >> xshift; + xtype result = xtype(internal >> (bottomspare - maxrandshift + rshift)); + return result; + } + }; + + /* + * XSH RR -- high xorshift, followed by a random rotate + * + * Fast. A good performer. Slightly better statistically than XSH RS. + */ + + template + struct xsh_rr_mixin { + static xtype output(itype internal) + { + constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); + constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); + constexpr bitcount_t sparebits = bits - xtypebits; + constexpr bitcount_t wantedopbits = + xtypebits >= 128 ? 7 + : xtypebits >= 64 ? 6 + : xtypebits >= 32 ? 5 + : xtypebits >= 16 ? 4 + : 3; + constexpr bitcount_t opbits = + sparebits >= wantedopbits ? wantedopbits + : sparebits; + constexpr bitcount_t amplifier = wantedopbits - opbits; + constexpr bitcount_t mask = (1 << opbits) - 1; + constexpr bitcount_t topspare = opbits; + constexpr bitcount_t bottomspare = sparebits - topspare; + constexpr bitcount_t xshift = (topspare + xtypebits) / 2; + bitcount_t rot = opbits ? bitcount_t(internal >> (bits - opbits)) & mask + : 0; + bitcount_t amprot = (rot << amplifier) & mask; + internal ^= internal >> xshift; + xtype result = xtype(internal >> bottomspare); + result = rotr(result, amprot); + return result; + } + }; + + /* + * RXS -- random xorshift + */ + + template + struct rxs_mixin { + static xtype output_rxs(itype internal) + { + constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); + constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); + constexpr bitcount_t shift = bits - xtypebits; + constexpr bitcount_t extrashift = (xtypebits - shift) / 2; + bitcount_t rshift = shift > 64 + 8 ? (internal >> (bits - 6)) & 63 + : shift > 32 + 4 ? (internal >> (bits - 5)) & 31 + : shift > 16 + 2 ? (internal >> (bits - 4)) & 15 + : shift > 8 + 1 ? (internal >> (bits - 3)) & 7 + : shift > 4 + 1 ? (internal >> (bits - 2)) & 3 + : shift > 2 + 1 ? (internal >> (bits - 1)) & 1 + : 0; + internal ^= internal >> (shift + extrashift - rshift); + xtype result = internal >> rshift; + return result; + } + }; + + /* + * RXS M XS -- random xorshift, mcg multiply, fixed xorshift + * + * The most statistically powerful generator, but all those steps + * make it slower than some of the others. We give it the rottenest jobs. + * + * Because it's usually used in contexts where the state type and the + * result type are the same, it is a permutation and is thus invertable. + * We thus provide a function to invert it. This function is used to + * for the "inside out" generator used by the extended generator. + */ + + /* Defined type-based concepts for the multiplication step. They're actually + * all derived by truncating the 128-bit, which was computed to be a good + * "universal" constant. + */ + + template + struct mcg_multiplier { + // Not defined for an arbitrary type + }; + + template + struct mcg_unmultiplier { + // Not defined for an arbitrary type + }; + + PCG_DEFINE_CONSTANT(uint8_t, mcg, multiplier, 217U) + PCG_DEFINE_CONSTANT(uint8_t, mcg, unmultiplier, 105U) + + PCG_DEFINE_CONSTANT(uint16_t, mcg, multiplier, 62169U) + PCG_DEFINE_CONSTANT(uint16_t, mcg, unmultiplier, 28009U) + + PCG_DEFINE_CONSTANT(uint32_t, mcg, multiplier, 277803737U) + PCG_DEFINE_CONSTANT(uint32_t, mcg, unmultiplier, 2897767785U) + + PCG_DEFINE_CONSTANT(uint64_t, mcg, multiplier, 12605985483714917081ULL) + PCG_DEFINE_CONSTANT(uint64_t, mcg, unmultiplier, 15009553638781119849ULL) + + PCG_DEFINE_CONSTANT(pcg128_t, mcg, multiplier, + PCG_128BIT_CONSTANT(17766728186571221404ULL, 12605985483714917081ULL)) + PCG_DEFINE_CONSTANT(pcg128_t, mcg, unmultiplier, + PCG_128BIT_CONSTANT(14422606686972528997ULL, 15009553638781119849ULL)) + + + template + struct rxs_m_xs_mixin { + static xtype output(itype internal) + { + constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); + constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); + constexpr bitcount_t opbits = xtypebits >= 128 ? 6 + : xtypebits >= 64 ? 5 + : xtypebits >= 32 ? 4 + : xtypebits >= 16 ? 3 + : 2; + constexpr bitcount_t shift = bits - xtypebits; + constexpr bitcount_t mask = (1 << opbits) - 1; + bitcount_t rshift = + opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0; + internal ^= internal >> (opbits + rshift); + internal *= mcg_multiplier::multiplier(); + xtype result = internal >> shift; + result ^= result >> ((2U * xtypebits + 2U) / 3U); + return result; + } + + static itype unoutput(itype internal) + { + constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); + constexpr bitcount_t opbits = bits >= 128 ? 6 + : bits >= 64 ? 5 + : bits >= 32 ? 4 + : bits >= 16 ? 3 + : 2; + constexpr bitcount_t mask = (1 << opbits) - 1; + + internal = unxorshift(internal, bits, (2U * bits + 2U) / 3U); + + internal *= mcg_unmultiplier::unmultiplier(); + + bitcount_t rshift = opbits ? (internal >> (bits - opbits)) & mask : 0; + internal = unxorshift(internal, bits, opbits + rshift); + + return internal; + } + }; + + + /* + * RXS M -- random xorshift, mcg multiply + */ + + template + struct rxs_m_mixin { + static xtype output(itype internal) + { + constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); + constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); + constexpr bitcount_t opbits = xtypebits >= 128 ? 6 + : xtypebits >= 64 ? 5 + : xtypebits >= 32 ? 4 + : xtypebits >= 16 ? 3 + : 2; + constexpr bitcount_t shift = bits - xtypebits; + constexpr bitcount_t mask = (1 << opbits) - 1; + bitcount_t rshift = opbits ? (internal >> (bits - opbits)) & mask : 0; + internal ^= internal >> (opbits + rshift); + internal *= mcg_multiplier::multiplier(); + xtype result = internal >> shift; + return result; + } + }; + + + /* + * DXSM -- double xorshift multiply + * + * This is a new, more powerful output permutation (added in 2019). It's + * a more comprehensive scrambling than RXS M, but runs faster on 128-bit + * types. Although primarily intended for use at large sizes, also works + * at smaller sizes as well. + * + * This permutation is similar to xorshift multiply hash functions, except + * that one of the multipliers is the LCG multiplier (to avoid needing to + * have a second constant) and the other is based on the low-order bits. + * This latter aspect means that the scrambling applied to the high bits + * depends on the low bits, and makes it (to my eye) impractical to back + * out the permutation without having the low-order bits. + */ + + template + struct dxsm_mixin { + inline xtype output(itype internal) + { + constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); + constexpr bitcount_t itypebits = bitcount_t(sizeof(itype) * 8); + static_assert(xtypebits <= itypebits / 2, + "Output type must be half the size of the state type."); + + xtype hi = xtype(internal >> (itypebits - xtypebits)); + xtype lo = xtype(internal); + + lo |= 1; + hi ^= hi >> (xtypebits / 2); + hi *= xtype(cheap_multiplier::multiplier()); + hi ^= hi >> (3 * (xtypebits / 4)); + hi *= lo; + return hi; + } + }; + + + /* + * XSL RR -- fixed xorshift (to low bits), random rotate + * + * Useful for 128-bit types that are split across two CPU registers. + */ + + template + struct xsl_rr_mixin { + static xtype output(itype internal) + { + constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); + constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); + constexpr bitcount_t sparebits = bits - xtypebits; + constexpr bitcount_t wantedopbits = xtypebits >= 128 ? 7 + : xtypebits >= 64 ? 6 + : xtypebits >= 32 ? 5 + : xtypebits >= 16 ? 4 + : 3; + constexpr bitcount_t opbits = sparebits >= wantedopbits ? wantedopbits + : sparebits; + constexpr bitcount_t amplifier = wantedopbits - opbits; + constexpr bitcount_t mask = (1 << opbits) - 1; + constexpr bitcount_t topspare = sparebits; + constexpr bitcount_t bottomspare = sparebits - topspare; + constexpr bitcount_t xshift = (topspare + xtypebits) / 2; + + bitcount_t rot = + opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0; + bitcount_t amprot = (rot << amplifier) & mask; + internal ^= internal >> xshift; + xtype result = xtype(internal >> bottomspare); + result = rotr(result, amprot); + return result; + } + }; + + + /* + * XSL RR RR -- fixed xorshift (to low bits), random rotate (both parts) + * + * Useful for 128-bit types that are split across two CPU registers. + * If you really want an invertable 128-bit RNG, I guess this is the one. + */ + + template struct halfsize_trait {}; + template <> struct halfsize_trait { typedef uint64_t type; }; + template <> struct halfsize_trait { typedef uint32_t type; }; + template <> struct halfsize_trait { typedef uint16_t type; }; + template <> struct halfsize_trait { typedef uint8_t type; }; + + template + struct xsl_rr_rr_mixin { + typedef typename halfsize_trait::type htype; + + static itype output(itype internal) + { + constexpr bitcount_t htypebits = bitcount_t(sizeof(htype) * 8); + constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); + constexpr bitcount_t sparebits = bits - htypebits; + constexpr bitcount_t wantedopbits = htypebits >= 128 ? 7 + : htypebits >= 64 ? 6 + : htypebits >= 32 ? 5 + : htypebits >= 16 ? 4 + : 3; + constexpr bitcount_t opbits = sparebits >= wantedopbits ? wantedopbits + : sparebits; + constexpr bitcount_t amplifier = wantedopbits - opbits; + constexpr bitcount_t mask = (1 << opbits) - 1; + constexpr bitcount_t topspare = sparebits; + constexpr bitcount_t xshift = (topspare + htypebits) / 2; + + bitcount_t rot = + opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0; + bitcount_t amprot = (rot << amplifier) & mask; + internal ^= internal >> xshift; + htype lowbits = htype(internal); + lowbits = rotr(lowbits, amprot); + htype highbits = htype(internal >> topspare); + bitcount_t rot2 = lowbits & mask; + bitcount_t amprot2 = (rot2 << amplifier) & mask; + highbits = rotr(highbits, amprot2); + return (itype(highbits) << topspare) ^ itype(lowbits); + } + }; + + + /* + * XSH -- fixed xorshift (to high bits) + * + * You shouldn't use this at 64-bits or less. + */ + + template + struct xsh_mixin { + static xtype output(itype internal) + { + constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); + constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); + constexpr bitcount_t sparebits = bits - xtypebits; + constexpr bitcount_t topspare = 0; + constexpr bitcount_t bottomspare = sparebits - topspare; + constexpr bitcount_t xshift = (topspare + xtypebits) / 2; + + internal ^= internal >> xshift; + xtype result = internal >> bottomspare; + return result; + } + }; + + /* + * XSL -- fixed xorshift (to low bits) + * + * You shouldn't use this at 64-bits or less. + */ + + template + struct xsl_mixin { + inline xtype output(itype internal) + { + constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); + constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); + constexpr bitcount_t sparebits = bits - xtypebits; + constexpr bitcount_t topspare = sparebits; + constexpr bitcount_t bottomspare = sparebits - topspare; + constexpr bitcount_t xshift = (topspare + xtypebits) / 2; + + internal ^= internal >> xshift; + xtype result = internal >> bottomspare; + return result; + } + }; + + + /* ---- End of Output Functions ---- */ + + + template + struct inside_out : private baseclass { + inside_out() = delete; + + typedef typename baseclass::result_type result_type; + typedef typename baseclass::state_type state_type; + static_assert(sizeof(result_type) == sizeof(state_type), + "Require a RNG whose output function is a permutation"); + + static bool external_step(result_type& randval, size_t i) + { + state_type state = baseclass::unoutput(randval); + state = state * baseclass::multiplier() + baseclass::increment() + + state_type(i * 2); + result_type result = baseclass::output(state); + randval = result; + state_type zero = + baseclass::is_mcg ? state & state_type(3U) : state_type(0U); + return result == zero; + } + + static bool external_advance(result_type& randval, size_t i, + result_type delta, bool forwards = true) + { + state_type state = baseclass::unoutput(randval); + state_type mult = baseclass::multiplier(); + state_type inc = baseclass::increment() + state_type(i * 2); + state_type zero = + baseclass::is_mcg ? state & state_type(3U) : state_type(0U); + state_type dist_to_zero = baseclass::distance(state, zero, mult, inc); + bool crosses_zero = + forwards ? dist_to_zero <= delta + : (-dist_to_zero) <= delta; + if (!forwards) + delta = -delta; + state = baseclass::advance(state, delta, mult, inc); + randval = baseclass::output(state); + return crosses_zero; + } + }; + + + template + class extended : public baseclass { + public: + typedef typename baseclass::state_type state_type; + typedef typename baseclass::result_type result_type; + typedef inside_out insideout; + + private: + static constexpr bitcount_t rtypebits = sizeof(result_type) * 8; + static constexpr bitcount_t stypebits = sizeof(state_type) * 8; + + static constexpr bitcount_t tick_limit_pow2 = 64U; + + static constexpr size_t table_size = 1UL << table_pow2; + static constexpr size_t table_shift = stypebits - table_pow2; + static constexpr state_type table_mask = + (state_type(1U) << table_pow2) - state_type(1U); + + static constexpr bool may_tick = + (advance_pow2 < stypebits) && (advance_pow2 < tick_limit_pow2); + static constexpr size_t tick_shift = stypebits - advance_pow2; + static constexpr state_type tick_mask = + may_tick ? state_type( + (uint64_t(1) << (advance_pow2 * may_tick)) - 1) + // ^-- stupidity to appease GCC warnings + : ~state_type(0U); + + static constexpr bool may_tock = stypebits < tick_limit_pow2; + + result_type data_[table_size]; + + PCG_NOINLINE void advance_table(); + + PCG_NOINLINE void advance_table(state_type delta, bool isForwards = true); + + result_type& get_extended_value() + { + state_type state = this->state_; + if (kdd && baseclass::is_mcg) { + // The low order bits of an MCG are constant, so drop them. + state >>= 2; + } + size_t index = kdd ? state & table_mask + : state >> table_shift; + + if (may_tick) { + bool tick = kdd ? (state & tick_mask) == state_type(0u) + : (state >> tick_shift) == state_type(0u); + if (tick) + advance_table(); + } + if (may_tock) { + bool tock = state == state_type(0u); + if (tock) + advance_table(); + } + return data_[index]; + } + + public: + static constexpr size_t period_pow2() + { + return baseclass::period_pow2() + table_size * extvalclass::period_pow2(); + } + + PCG_ALWAYS_INLINE result_type operator()() + { + result_type rhs = get_extended_value(); + result_type lhs = this->baseclass::operator()(); + return lhs ^ rhs; + } + + result_type operator()(result_type upper_bound) + { + return bounded_rand(*this, upper_bound); + } + + void set(result_type wanted) + { + result_type& rhs = get_extended_value(); + result_type lhs = this->baseclass::operator()(); + rhs = lhs ^ wanted; + } + + void advance(state_type distance, bool forwards = true); + + void backstep(state_type distance) + { + advance(distance, false); + } + + extended(const result_type* data) + : baseclass() + { + datainit(data); + } + + extended(const result_type* data, state_type seed) + : baseclass(seed) + { + datainit(data); + } + + // This function may or may not exist. It thus has to be a template + // to use SFINAE; users don't have to worry about its template-ness. + + template + extended(const result_type* data, state_type seed, + typename bc::stream_state stream_seed) + : baseclass(seed, stream_seed) + { + datainit(data); + } + + extended() + : baseclass() + { + selfinit(); + } + + extended(state_type seed) + : baseclass(seed) + { + selfinit(); + } + + // This function may or may not exist. It thus has to be a template + // to use SFINAE; users don't have to worry about its template-ness. + + template + extended(state_type seed, typename bc::stream_state stream_seed) + : baseclass(seed, stream_seed) + { + selfinit(); + } + + private: + void selfinit(); + void datainit(const result_type* data); + + public: + + template::value + && !std::is_convertible::value>::type> + extended(SeedSeq&& seedSeq) + : baseclass(seedSeq) + { + generate_to(seedSeq, data_); + } + + template + void seed(Args&&... args) + { + new (this) extended(std::forward(args)...); + } + + template + friend bool operator==(const extended&, + const extended&); + + template + friend std::basic_ostream& + operator<<(std::basic_ostream& out, + const extended&); + + template + friend std::basic_istream& + operator>>(std::basic_istream& in, + extended&); + + }; + + + template + void extended::datainit( + const result_type* data) + { + for (size_t i = 0; i < table_size; ++i) + data_[i] = data[i]; + } + + template + void extended::selfinit() + { + // We need to fill the extended table with something, and we have + // very little provided data, so we use the base generator to + // produce values. Although not ideal (use a seed sequence, folks!), + // unexpected correlations are mitigated by + // - using XOR differences rather than the number directly + // - the way the table is accessed, its values *won't* be accessed + // in the same order the were written. + // - any strange correlations would only be apparent if we + // were to backstep the generator so that the base generator + // was generating the same values again + result_type lhs = baseclass::operator()(); + result_type rhs = baseclass::operator()(); + result_type xdiff = lhs - rhs; + for (size_t i = 0; i < table_size; ++i) { + data_[i] = baseclass::operator()() ^ xdiff; + } + } + + template + bool operator==(const extended& lhs, + const extended& rhs) + { + auto& base_lhs = static_cast(lhs); + auto& base_rhs = static_cast(rhs); + return base_lhs == base_rhs + && std::equal( + std::begin(lhs.data_), std::end(lhs.data_), + std::begin(rhs.data_) + ); + } + + template + inline bool operator!=(const extended& lhs, + const extended& rhs) + { + return !operator==(lhs, rhs); + } + + template + std::basic_ostream& + operator<<(std::basic_ostream& out, + const extended& rng) + { + using pcg_extras::operator<<; + + auto orig_flags = out.flags(std::ios_base::dec | std::ios_base::left); + auto space = out.widen(' '); + auto orig_fill = out.fill(); + + out << rng.multiplier() << space + << rng.increment() << space + << rng.state_; + + for (const auto& datum : rng.data_) + out << space << datum; + + out.flags(orig_flags); + out.fill(orig_fill); + return out; + } + + template + std::basic_istream& + operator>>(std::basic_istream& in, + extended& rng) + { + extended new_rng; + auto& base_rng = static_cast(new_rng); + in >> base_rng; + + if (in.fail()) + return in; + + using pcg_extras::operator>>; + + auto orig_flags = in.flags(std::ios_base::dec | std::ios_base::skipws); + + for (auto& datum : new_rng.data_) { + in >> datum; + if (in.fail()) + goto bail; + } + + rng = new_rng; + + bail: + in.flags(orig_flags); + return in; + } + + + + template + void + extended::advance_table() + { + bool carry = false; + for (size_t i = 0; i < table_size; ++i) { + if (carry) { + carry = insideout::external_step(data_[i], i + 1); + } + bool carry2 = insideout::external_step(data_[i], i + 1); + carry = carry || carry2; + } + } + + template + void + extended::advance_table( + state_type delta, bool isForwards) + { + typedef typename baseclass::state_type base_state_t; + typedef typename extvalclass::state_type ext_state_t; + constexpr bitcount_t basebits = sizeof(base_state_t) * 8; + constexpr bitcount_t extbits = sizeof(ext_state_t) * 8; + static_assert(basebits <= extbits || advance_pow2 > 0, + "Current implementation might overflow its carry"); + + base_state_t carry = 0; + for (size_t i = 0; i < table_size; ++i) { + base_state_t total_delta = carry + delta; + ext_state_t trunc_delta = ext_state_t(total_delta); + if (basebits > extbits) { + carry = total_delta >> extbits; + } + else { + carry = 0; + } + carry += + insideout::external_advance(data_[i], i + 1, trunc_delta, isForwards); + } + } + + template + void extended::advance( + state_type distance, bool forwards) + { + static_assert(kdd, + "Efficient advance is too hard for non-kdd extension. " + "For a weak advance, cast to base class"); + state_type zero = + baseclass::is_mcg ? this->state_ & state_type(3U) : state_type(0U); + if (may_tick) { + state_type ticks = distance >> (advance_pow2 * may_tick); + // ^-- stupidity to appease GCC + // warnings + state_type adv_mask = + baseclass::is_mcg ? tick_mask << 2 : tick_mask; + state_type next_advance_distance = this->distance(zero, adv_mask); + if (!forwards) + next_advance_distance = (-next_advance_distance) & tick_mask; + if (next_advance_distance < (distance & tick_mask)) { + ++ticks; + } + if (ticks) + advance_table(ticks, forwards); + } + if (forwards) { + if (may_tock && this->distance(zero) <= distance) + advance_table(); + baseclass::advance(distance); + } + else { + if (may_tock && -(this->distance(zero)) <= distance) + advance_table(state_type(1U), false); + baseclass::advance(-distance); + } + } + +} // namespace pcg_detail + +namespace pcg_engines { + + using namespace pcg_detail; + + /* Predefined types for XSH RS */ + + typedef oneseq_base oneseq_xsh_rs_16_8; + typedef oneseq_base oneseq_xsh_rs_32_16; + typedef oneseq_base oneseq_xsh_rs_64_32; + typedef oneseq_base oneseq_xsh_rs_128_64; + typedef oneseq_base + cm_oneseq_xsh_rs_128_64; + + typedef unique_base unique_xsh_rs_16_8; + typedef unique_base unique_xsh_rs_32_16; + typedef unique_base unique_xsh_rs_64_32; + typedef unique_base unique_xsh_rs_128_64; + typedef unique_base + cm_unique_xsh_rs_128_64; + + typedef setseq_base setseq_xsh_rs_16_8; + typedef setseq_base setseq_xsh_rs_32_16; + typedef setseq_base setseq_xsh_rs_64_32; + typedef setseq_base setseq_xsh_rs_128_64; + typedef setseq_base + cm_setseq_xsh_rs_128_64; + + typedef mcg_base mcg_xsh_rs_16_8; + typedef mcg_base mcg_xsh_rs_32_16; + typedef mcg_base mcg_xsh_rs_64_32; + typedef mcg_base mcg_xsh_rs_128_64; + typedef mcg_base + cm_mcg_xsh_rs_128_64; + + /* Predefined types for XSH RR */ + + typedef oneseq_base oneseq_xsh_rr_16_8; + typedef oneseq_base oneseq_xsh_rr_32_16; + typedef oneseq_base oneseq_xsh_rr_64_32; + typedef oneseq_base oneseq_xsh_rr_128_64; + typedef oneseq_base + cm_oneseq_xsh_rr_128_64; + + typedef unique_base unique_xsh_rr_16_8; + typedef unique_base unique_xsh_rr_32_16; + typedef unique_base unique_xsh_rr_64_32; + typedef unique_base unique_xsh_rr_128_64; + typedef unique_base + cm_unique_xsh_rr_128_64; + + typedef setseq_base setseq_xsh_rr_16_8; + typedef setseq_base setseq_xsh_rr_32_16; + typedef setseq_base setseq_xsh_rr_64_32; + typedef setseq_base setseq_xsh_rr_128_64; + typedef setseq_base + cm_setseq_xsh_rr_128_64; + + typedef mcg_base mcg_xsh_rr_16_8; + typedef mcg_base mcg_xsh_rr_32_16; + typedef mcg_base mcg_xsh_rr_64_32; + typedef mcg_base mcg_xsh_rr_128_64; + typedef mcg_base + cm_mcg_xsh_rr_128_64; + + + /* Predefined types for RXS M XS */ + + typedef oneseq_base oneseq_rxs_m_xs_8_8; + typedef oneseq_base oneseq_rxs_m_xs_16_16; + typedef oneseq_base oneseq_rxs_m_xs_32_32; + typedef oneseq_base oneseq_rxs_m_xs_64_64; + typedef oneseq_base + oneseq_rxs_m_xs_128_128; + typedef oneseq_base + cm_oneseq_rxs_m_xs_128_128; + + typedef unique_base unique_rxs_m_xs_8_8; + typedef unique_base unique_rxs_m_xs_16_16; + typedef unique_base unique_rxs_m_xs_32_32; + typedef unique_base unique_rxs_m_xs_64_64; + typedef unique_base unique_rxs_m_xs_128_128; + typedef unique_base + cm_unique_rxs_m_xs_128_128; + + typedef setseq_base setseq_rxs_m_xs_8_8; + typedef setseq_base setseq_rxs_m_xs_16_16; + typedef setseq_base setseq_rxs_m_xs_32_32; + typedef setseq_base setseq_rxs_m_xs_64_64; + typedef setseq_base setseq_rxs_m_xs_128_128; + typedef setseq_base + cm_setseq_rxs_m_xs_128_128; + + // MCG versions don't make sense here, so aren't defined. + +/* Predefined types for RXS M */ + + typedef oneseq_base oneseq_rxs_m_16_8; + typedef oneseq_base oneseq_rxs_m_32_16; + typedef oneseq_base oneseq_rxs_m_64_32; + typedef oneseq_base oneseq_rxs_m_128_64; + typedef oneseq_base + cm_oneseq_rxs_m_128_64; + + typedef unique_base unique_rxs_m_16_8; + typedef unique_base unique_rxs_m_32_16; + typedef unique_base unique_rxs_m_64_32; + typedef unique_base unique_rxs_m_128_64; + typedef unique_base + cm_unique_rxs_m_128_64; + + typedef setseq_base setseq_rxs_m_16_8; + typedef setseq_base setseq_rxs_m_32_16; + typedef setseq_base setseq_rxs_m_64_32; + typedef setseq_base setseq_rxs_m_128_64; + typedef setseq_base + cm_setseq_rxs_m_128_64; + + typedef mcg_base mcg_rxs_m_16_8; + typedef mcg_base mcg_rxs_m_32_16; + typedef mcg_base mcg_rxs_m_64_32; + typedef mcg_base mcg_rxs_m_128_64; + typedef mcg_base + cm_mcg_rxs_m_128_64; + + /* Predefined types for DXSM */ + + typedef oneseq_base oneseq_dxsm_16_8; + typedef oneseq_base oneseq_dxsm_32_16; + typedef oneseq_base oneseq_dxsm_64_32; + typedef oneseq_base oneseq_dxsm_128_64; + typedef oneseq_base + cm_oneseq_dxsm_128_64; + + typedef unique_base unique_dxsm_16_8; + typedef unique_base unique_dxsm_32_16; + typedef unique_base unique_dxsm_64_32; + typedef unique_base unique_dxsm_128_64; + typedef unique_base + cm_unique_dxsm_128_64; + + typedef setseq_base setseq_dxsm_16_8; + typedef setseq_base setseq_dxsm_32_16; + typedef setseq_base setseq_dxsm_64_32; + typedef setseq_base setseq_dxsm_128_64; + typedef setseq_base + cm_setseq_dxsm_128_64; + + typedef mcg_base mcg_dxsm_16_8; + typedef mcg_base mcg_dxsm_32_16; + typedef mcg_base mcg_dxsm_64_32; + typedef mcg_base mcg_dxsm_128_64; + typedef mcg_base + cm_mcg_dxsm_128_64; + + /* Predefined types for XSL RR (only defined for "large" types) */ + + typedef oneseq_base oneseq_xsl_rr_64_32; + typedef oneseq_base oneseq_xsl_rr_128_64; + typedef oneseq_base + cm_oneseq_xsl_rr_128_64; + + typedef unique_base unique_xsl_rr_64_32; + typedef unique_base unique_xsl_rr_128_64; + typedef unique_base + cm_unique_xsl_rr_128_64; + + typedef setseq_base setseq_xsl_rr_64_32; + typedef setseq_base setseq_xsl_rr_128_64; + typedef setseq_base + cm_setseq_xsl_rr_128_64; + + typedef mcg_base mcg_xsl_rr_64_32; + typedef mcg_base mcg_xsl_rr_128_64; + typedef mcg_base + cm_mcg_xsl_rr_128_64; + + + /* Predefined types for XSL RR RR (only defined for "large" types) */ + + typedef oneseq_base + oneseq_xsl_rr_rr_64_64; + typedef oneseq_base + oneseq_xsl_rr_rr_128_128; + typedef oneseq_base + cm_oneseq_xsl_rr_rr_128_128; + + typedef unique_base + unique_xsl_rr_rr_64_64; + typedef unique_base + unique_xsl_rr_rr_128_128; + typedef unique_base + cm_unique_xsl_rr_rr_128_128; + + typedef setseq_base + setseq_xsl_rr_rr_64_64; + typedef setseq_base + setseq_xsl_rr_rr_128_128; + typedef setseq_base + cm_setseq_xsl_rr_rr_128_128; + + // MCG versions don't make sense here, so aren't defined. + +/* Extended generators */ + + template + using ext_std8 = extended; + + template + using ext_std16 = extended; + + template + using ext_std32 = extended; + + template + using ext_std64 = extended; + + + template + using ext_oneseq_rxs_m_xs_32_32 = + ext_std32; + + template + using ext_mcg_xsh_rs_64_32 = + ext_std32; + + template + using ext_oneseq_xsh_rs_64_32 = + ext_std32; + + template + using ext_setseq_xsh_rr_64_32 = + ext_std32; + + template + using ext_mcg_xsl_rr_128_64 = + ext_std64; + + template + using ext_oneseq_xsl_rr_128_64 = + ext_std64; + + template + using ext_setseq_xsl_rr_128_64 = + ext_std64; + +} // namespace pcg_engines + +typedef pcg_engines::setseq_xsh_rr_64_32 pcg32; +typedef pcg_engines::oneseq_xsh_rr_64_32 pcg32_oneseq; +typedef pcg_engines::unique_xsh_rr_64_32 pcg32_unique; +typedef pcg_engines::mcg_xsh_rs_64_32 pcg32_fast; + +typedef pcg_engines::setseq_xsl_rr_128_64 pcg64; +typedef pcg_engines::oneseq_xsl_rr_128_64 pcg64_oneseq; +typedef pcg_engines::unique_xsl_rr_128_64 pcg64_unique; +typedef pcg_engines::mcg_xsl_rr_128_64 pcg64_fast; + +typedef pcg_engines::setseq_rxs_m_xs_8_8 pcg8_once_insecure; +typedef pcg_engines::setseq_rxs_m_xs_16_16 pcg16_once_insecure; +typedef pcg_engines::setseq_rxs_m_xs_32_32 pcg32_once_insecure; +typedef pcg_engines::setseq_rxs_m_xs_64_64 pcg64_once_insecure; +typedef pcg_engines::setseq_xsl_rr_rr_128_128 pcg128_once_insecure; + +typedef pcg_engines::oneseq_rxs_m_xs_8_8 pcg8_oneseq_once_insecure; +typedef pcg_engines::oneseq_rxs_m_xs_16_16 pcg16_oneseq_once_insecure; +typedef pcg_engines::oneseq_rxs_m_xs_32_32 pcg32_oneseq_once_insecure; +typedef pcg_engines::oneseq_rxs_m_xs_64_64 pcg64_oneseq_once_insecure; +typedef pcg_engines::oneseq_xsl_rr_rr_128_128 pcg128_oneseq_once_insecure; + + +// These two extended RNGs provide two-dimensionally equidistributed +// 32-bit generators. pcg32_k2_fast occupies the same space as pcg64, +// and can be called twice to generate 64 bits, but does not required +// 128-bit math; on 32-bit systems, it's faster than pcg64 as well. + +typedef pcg_engines::ext_setseq_xsh_rr_64_32<1, 16, true> pcg32_k2; +typedef pcg_engines::ext_oneseq_xsh_rs_64_32<1, 32, true> pcg32_k2_fast; + +// These eight extended RNGs have about as much state as arc4random +// +// - the k variants are k-dimensionally equidistributed +// - the c variants offer are intended to be harder to predict +// +// (neither is intended for use in cryptographic applications) + +typedef pcg_engines::ext_setseq_xsh_rr_64_32<6, 16, true> pcg32_k64; +typedef pcg_engines::ext_mcg_xsh_rs_64_32<6, 32, true> pcg32_k64_oneseq; +typedef pcg_engines::ext_oneseq_xsh_rs_64_32<6, 32, true> pcg32_k64_fast; + +typedef pcg_engines::ext_setseq_xsh_rr_64_32<6, 16, false> pcg32_c64; +typedef pcg_engines::ext_oneseq_xsh_rs_64_32<6, 32, false> pcg32_c64_oneseq; +typedef pcg_engines::ext_mcg_xsh_rs_64_32<6, 32, false> pcg32_c64_fast; + +typedef pcg_engines::ext_setseq_xsl_rr_128_64<5, 16, true> pcg64_k32; +typedef pcg_engines::ext_oneseq_xsl_rr_128_64<5, 128, true> pcg64_k32_oneseq; +typedef pcg_engines::ext_mcg_xsl_rr_128_64<5, 128, true> pcg64_k32_fast; + +typedef pcg_engines::ext_setseq_xsl_rr_128_64<5, 16, false> pcg64_c32; +typedef pcg_engines::ext_oneseq_xsl_rr_128_64<5, 128, false> pcg64_c32_oneseq; +typedef pcg_engines::ext_mcg_xsl_rr_128_64<5, 128, false> pcg64_c32_fast; + +// These eight extended RNGs have more state than the Mersenne twister +// +// - the k variants are k-dimensionally equidistributed +// - the c variants offer are intended to be harder to predict +// +// (neither is intended for use in cryptographic applications) + +typedef pcg_engines::ext_setseq_xsh_rr_64_32<10, 16, true> pcg32_k1024; +typedef pcg_engines::ext_oneseq_xsh_rs_64_32<10, 32, true> pcg32_k1024_fast; + +typedef pcg_engines::ext_setseq_xsh_rr_64_32<10, 16, false> pcg32_c1024; +typedef pcg_engines::ext_oneseq_xsh_rs_64_32<10, 32, false> pcg32_c1024_fast; + +typedef pcg_engines::ext_setseq_xsl_rr_128_64<10, 16, true> pcg64_k1024; +typedef pcg_engines::ext_oneseq_xsl_rr_128_64<10, 128, true> pcg64_k1024_fast; + +typedef pcg_engines::ext_setseq_xsl_rr_128_64<10, 16, false> pcg64_c1024; +typedef pcg_engines::ext_oneseq_xsl_rr_128_64<10, 128, false> pcg64_c1024_fast; + +// These generators have an insanely huge period (2^524352), and is suitable +// for silly party tricks, such as dumping out 64 KB ZIP files at an arbitrary +// point in the future. [Actually, over the full period of the generator, it +// will produce every 64 KB ZIP file 2^64 times!] + +typedef pcg_engines::ext_setseq_xsh_rr_64_32<14, 16, true> pcg32_k16384; +typedef pcg_engines::ext_oneseq_xsh_rs_64_32<14, 32, true> pcg32_k16384_fast; + +#ifdef _MSC_VER +#pragma warning(default:4146) +#endif + +#endif // PCG_RAND_HPP_INCLUDED \ No newline at end of file diff --git a/include/pcg_uint128.hpp b/include/pcg_uint128.hpp new file mode 100644 index 0000000..75bc601 --- /dev/null +++ b/include/pcg_uint128.hpp @@ -0,0 +1,1010 @@ +/* + * PCG Random Number Generation for C++ + * + * Copyright 2014-2021 Melissa O'Neill , + * and the PCG Project contributors. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + * + * Licensed under the Apache License, Version 2.0 (provided in + * LICENSE-APACHE.txt and at http://www.apache.org/licenses/LICENSE-2.0) + * or under the MIT license (provided in LICENSE-MIT.txt and at + * http://opensource.org/licenses/MIT), at your option. This file may not + * be copied, modified, or distributed except according to those terms. + * + * Distributed on an "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, either + * express or implied. See your chosen license for details. + * + * For additional information about the PCG random number generation scheme, + * visit http://www.pcg-random.org/. + */ + + /* + * This code provides a a C++ class that can provide 128-bit (or higher) + * integers. To produce 2K-bit integers, it uses two K-bit integers, + * placed in a union that allowes the code to also see them as four K/2 bit + * integers (and access them either directly name, or by index). + * + * It may seem like we're reinventing the wheel here, because several + * libraries already exist that support large integers, but most existing + * libraries provide a very generic multiprecision code, but here we're + * operating at a fixed size. Also, most other libraries are fairly + * heavyweight. So we use a direct implementation. Sadly, it's much slower + * than hand-coded assembly or direct CPU support. + */ + +#ifndef PCG_UINT128_HPP_INCLUDED +#define PCG_UINT128_HPP_INCLUDED 1 + +#include +#include +#include +#include +#include +#include +#include + +#if defined(_MSC_VER) // Use MSVC++ intrinsics +#include +#endif + + /* + * We want to lay the type out the same way that a native type would be laid + * out, which means we must know the machine's endian, at compile time. + * This ugliness attempts to do so. + */ + +#ifndef PCG_LITTLE_ENDIAN +#if defined(__BYTE_ORDER__) +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define PCG_LITTLE_ENDIAN 1 +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define PCG_LITTLE_ENDIAN 0 +#else +#error __BYTE_ORDER__ does not match a standard endian, pick a side +#endif +#elif __LITTLE_ENDIAN__ || _LITTLE_ENDIAN +#define PCG_LITTLE_ENDIAN 1 +#elif __BIG_ENDIAN__ || _BIG_ENDIAN +#define PCG_LITTLE_ENDIAN 0 +#elif __x86_64 || __x86_64__ || _M_X64 || __i386 || __i386__ || _M_IX86 +#define PCG_LITTLE_ENDIAN 1 +#elif __powerpc__ || __POWERPC__ || __ppc__ || __PPC__ \ + || __m68k__ || __mc68000__ +#define PCG_LITTLE_ENDIAN 0 +#else +#error Unable to determine target endianness +#endif +#endif + +#if INTPTR_MAX == INT64_MAX && !defined(PCG_64BIT_SPECIALIZATIONS) +#define PCG_64BIT_SPECIALIZATIONS 1 +#endif + +namespace pcg_extras { + + // Recent versions of GCC have intrinsics we can use to quickly calculate + // the number of leading and trailing zeros in a number. If possible, we + // use them, otherwise we fall back to old-fashioned bit twiddling to figure + // them out. + +#ifndef PCG_BITCOUNT_T + typedef uint8_t bitcount_t; +#else + typedef PCG_BITCOUNT_T bitcount_t; +#endif + + /* + * Provide some useful helper functions + * * flog2 floor(log2(x)) + * * trailingzeros number of trailing zero bits + */ + +#if defined(__GNUC__) // Any GNU-compatible compiler supporting C++11 has + // some useful intrinsics we can use. + + inline bitcount_t flog2(uint32_t v) + { + return 31 - __builtin_clz(v); + } + + inline bitcount_t trailingzeros(uint32_t v) + { + return __builtin_ctz(v); + } + + inline bitcount_t flog2(uint64_t v) + { +#if UINT64_MAX == ULONG_MAX + return 63 - __builtin_clzl(v); +#elif UINT64_MAX == ULLONG_MAX + return 63 - __builtin_clzll(v); +#else +#error Cannot find a function for uint64_t +#endif + } + + inline bitcount_t trailingzeros(uint64_t v) + { +#if UINT64_MAX == ULONG_MAX + return __builtin_ctzl(v); +#elif UINT64_MAX == ULLONG_MAX + return __builtin_ctzll(v); +#else +#error Cannot find a function for uint64_t +#endif + } + +#elif defined(_MSC_VER) // Use MSVC++ intrinsics + +#pragma intrinsic(_BitScanReverse, _BitScanForward) +#if defined(_M_X64) || defined(_M_ARM) || defined(_M_ARM64) +#pragma intrinsic(_BitScanReverse64, _BitScanForward64) +#endif + + inline bitcount_t flog2(uint32_t v) + { + unsigned long i; + _BitScanReverse(&i, v); + return bitcount_t(i); + } + + inline bitcount_t trailingzeros(uint32_t v) + { + unsigned long i; + _BitScanForward(&i, v); + return bitcount_t(i); + } + + inline bitcount_t flog2(uint64_t v) + { +#if defined(_M_X64) || defined(_M_ARM) || defined(_M_ARM64) + unsigned long i; + _BitScanReverse64(&i, v); + return bitcount_t(i); +#else + // 32-bit x86 + uint32_t high = v >> 32; + uint32_t low = uint32_t(v); + return high ? 32 + flog2(high) : flog2(low); +#endif + } + + inline bitcount_t trailingzeros(uint64_t v) + { +#if defined(_M_X64) || defined(_M_ARM) || defined(_M_ARM64) + unsigned long i; + _BitScanForward64(&i, v); + return bitcount_t(i); +#else + // 32-bit x86 + uint32_t high = v >> 32; + uint32_t low = uint32_t(v); + return low ? trailingzeros(low) : trailingzeros(high) + 32; +#endif + } + +#else // Otherwise, we fall back to bit twiddling + // implementations + + inline bitcount_t flog2(uint32_t v) + { + // Based on code by Eric Cole and Mark Dickinson, which appears at + // https://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn + + static const uint8_t multiplyDeBruijnBitPos[32] = { + 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, + 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 + }; + + v |= v >> 1; // first round down to one less than a power of 2 + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + + return multiplyDeBruijnBitPos[(uint32_t)(v * 0x07C4ACDDU) >> 27]; + } + + inline bitcount_t trailingzeros(uint32_t v) + { + static const uint8_t multiplyDeBruijnBitPos[32] = { + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 + }; + + return multiplyDeBruijnBitPos[((uint32_t)((v & -v) * 0x077CB531U)) >> 27]; + } + + inline bitcount_t flog2(uint64_t v) + { + uint32_t high = v >> 32; + uint32_t low = uint32_t(v); + + return high ? 32 + flog2(high) : flog2(low); + } + + inline bitcount_t trailingzeros(uint64_t v) + { + uint32_t high = v >> 32; + uint32_t low = uint32_t(v); + + return low ? trailingzeros(low) : trailingzeros(high) + 32; + } + +#endif + + inline bitcount_t flog2(uint8_t v) + { + return flog2(uint32_t(v)); + } + + inline bitcount_t flog2(uint16_t v) + { + return flog2(uint32_t(v)); + } + +#if __SIZEOF_INT128__ + inline bitcount_t flog2(__uint128_t v) + { + uint64_t high = uint64_t(v >> 64); + uint64_t low = uint64_t(v); + + return high ? 64 + flog2(high) : flog2(low); + } +#endif + + inline bitcount_t trailingzeros(uint8_t v) + { + return trailingzeros(uint32_t(v)); + } + + inline bitcount_t trailingzeros(uint16_t v) + { + return trailingzeros(uint32_t(v)); + } + +#if __SIZEOF_INT128__ + inline bitcount_t trailingzeros(__uint128_t v) + { + uint64_t high = uint64_t(v >> 64); + uint64_t low = uint64_t(v); + return low ? trailingzeros(low) : trailingzeros(high) + 64; + } +#endif + + template + inline bitcount_t clog2(UInt v) + { + return flog2(v) + ((v & (-v)) != v); + } + + template + inline UInt addwithcarry(UInt x, UInt y, bool carryin, bool* carryout) + { + UInt half_result = y + carryin; + UInt result = x + half_result; + *carryout = (half_result < y) || (result < x); + return result; + } + + template + inline UInt subwithcarry(UInt x, UInt y, bool carryin, bool* carryout) + { + UInt half_result = y + carryin; + UInt result = x - half_result; + *carryout = (half_result < y) || (result > x); + return result; + } + + + template + class uint_x4 { + // private: + static constexpr unsigned int UINT_BITS = sizeof(UInt) * CHAR_BIT; + public: + union { +#if PCG_LITTLE_ENDIAN + struct { + UInt v0, v1, v2, v3; + } w; + struct { + UIntX2 v01, v23; + } d; +#else + struct { + UInt v3, v2, v1, v0; + } w; + struct { + UIntX2 v23, v01; + } d; +#endif + // For the array access versions, the code that uses the array + // must handle endian itself. Yuck. + UInt wa[4]; + }; + + public: + uint_x4() = default; + + constexpr uint_x4(UInt v3, UInt v2, UInt v1, UInt v0) +#if PCG_LITTLE_ENDIAN + : w{ v0, v1, v2, v3 } +#else + : w{ v3, v2, v1, v0 } +#endif + { + // Nothing (else) to do + } + + constexpr uint_x4(UIntX2 v23, UIntX2 v01) +#if PCG_LITTLE_ENDIAN + : d{ v01,v23 } +#else + : d{ v23,v01 } +#endif + { + // Nothing (else) to do + } + + constexpr uint_x4(UIntX2 v01) +#if PCG_LITTLE_ENDIAN + : d{ v01, UIntX2(0) } +#else + : d{ UIntX2(0),v01 } +#endif + { + // Nothing (else) to do + } + + template::value + && sizeof(Integral) <= sizeof(UIntX2)) + >::type* = nullptr> + constexpr uint_x4(Integral v01) +#if PCG_LITTLE_ENDIAN + : d{ UIntX2(v01), UIntX2(0) } +#else + : d{ UIntX2(0), UIntX2(v01) } +#endif + { + // Nothing (else) to do + } + + explicit constexpr operator UIntX2() const + { + return d.v01; + } + + template::value + && sizeof(Integral) <= sizeof(UIntX2)) + >::type* = nullptr> + explicit constexpr operator Integral() const + { + return Integral(d.v01); + } + + explicit constexpr operator bool() const + { + return d.v01 || d.v23; + } + + template + friend uint_x4 operator*(const uint_x4&, const uint_x4&); + + template + friend uint_x4 operator*(const uint_x4&, V); + + template + friend std::pair< uint_x4, uint_x4 > + divmod(const uint_x4&, const uint_x4&); + + template + friend uint_x4 operator+(const uint_x4&, const uint_x4&); + + template + friend uint_x4 operator-(const uint_x4&, const uint_x4&); + + template + friend uint_x4 operator<<(const uint_x4&, const bitcount_t shift); + + template + friend uint_x4 operator>>(const uint_x4&, const bitcount_t shift); + +#if PCG_64BIT_SPECIALIZATIONS + template + friend uint_x4 operator<<(const uint_x4&, const bitcount_t shift); + + template + friend uint_x4 operator>>(const uint_x4&, const bitcount_t shift); +#endif + + template + friend uint_x4 operator&(const uint_x4&, const uint_x4&); + + template + friend uint_x4 operator|(const uint_x4&, const uint_x4&); + + template + friend uint_x4 operator^(const uint_x4&, const uint_x4&); + + template + friend bool operator==(const uint_x4&, const uint_x4&); + + template + friend bool operator!=(const uint_x4&, const uint_x4&); + + template + friend bool operator<(const uint_x4&, const uint_x4&); + + template + friend bool operator<=(const uint_x4&, const uint_x4&); + + template + friend bool operator>(const uint_x4&, const uint_x4&); + + template + friend bool operator>=(const uint_x4&, const uint_x4&); + + template + friend uint_x4 operator~(const uint_x4&); + + template + friend uint_x4 operator-(const uint_x4&); + + template + friend bitcount_t flog2(const uint_x4&); + + template + friend bitcount_t trailingzeros(const uint_x4&); + +#if PCG_64BIT_SPECIALIZATIONS + template + friend bitcount_t flog2(const uint_x4&); + + template + friend bitcount_t trailingzeros(const uint_x4&); +#endif + + uint_x4& operator*=(const uint_x4& rhs) + { + uint_x4 result = *this * rhs; + return *this = result; + } + + uint_x4& operator*=(UIntX2 rhs) + { + uint_x4 result = *this * rhs; + return *this = result; + } + + uint_x4& operator/=(const uint_x4& rhs) + { + uint_x4 result = *this / rhs; + return *this = result; + } + + uint_x4& operator%=(const uint_x4& rhs) + { + uint_x4 result = *this % rhs; + return *this = result; + } + + uint_x4& operator+=(const uint_x4& rhs) + { + uint_x4 result = *this + rhs; + return *this = result; + } + + uint_x4& operator-=(const uint_x4& rhs) + { + uint_x4 result = *this - rhs; + return *this = result; + } + + uint_x4& operator&=(const uint_x4& rhs) + { + uint_x4 result = *this & rhs; + return *this = result; + } + + uint_x4& operator|=(const uint_x4& rhs) + { + uint_x4 result = *this | rhs; + return *this = result; + } + + uint_x4& operator^=(const uint_x4& rhs) + { + uint_x4 result = *this ^ rhs; + return *this = result; + } + + uint_x4& operator>>=(bitcount_t shift) + { + uint_x4 result = *this >> shift; + return *this = result; + } + + uint_x4& operator<<=(bitcount_t shift) + { + uint_x4 result = *this << shift; + return *this = result; + } + + }; + + template + bitcount_t flog2(const uint_x4& v) + { +#if PCG_LITTLE_ENDIAN + for (uint8_t i = 4; i != 0; /* dec in loop */) { + --i; +#else + for (uint8_t i = 0; i < 4; ++i) { +#endif + if (v.wa[i] == 0) + continue; + return flog2(v.wa[i]) + uint_x4::UINT_BITS* i; + } + abort(); + } + + template + bitcount_t trailingzeros(const uint_x4&v) + { +#if PCG_LITTLE_ENDIAN + for (uint8_t i = 0; i < 4; ++i) { +#else + for (uint8_t i = 4; i != 0; /* dec in loop */) { + --i; +#endif + if (v.wa[i] != 0) + return trailingzeros(v.wa[i]) + uint_x4::UINT_BITS* i; + } + return uint_x4::UINT_BITS * 4; + } + +#if PCG_64BIT_SPECIALIZATIONS + template + bitcount_t flog2(const uint_x4&v) + { + return v.d.v23 > 0 ? flog2(v.d.v23) + uint_x4::UINT_BITS * 2 + : flog2(v.d.v01); + } + + template + bitcount_t trailingzeros(const uint_x4&v) + { + return v.d.v01 == 0 ? trailingzeros(v.d.v23) + uint_x4::UINT_BITS * 2 + : trailingzeros(v.d.v01); + } +#endif + + template + std::pair< uint_x4, uint_x4 > + divmod(const uint_x4&orig_dividend, + const uint_x4&divisor) + { + // If the dividend is less than the divisor, the answer is always zero. + // This takes care of boundary cases like 0/x (which would otherwise be + // problematic because we can't take the log of zero. (The boundary case + // of division by zero is undefined.) + if (orig_dividend < divisor) + return { uint_x4(UIntX2(0)), orig_dividend }; + + auto dividend = orig_dividend; + + auto log2_divisor = flog2(divisor); + auto log2_dividend = flog2(dividend); + // assert(log2_dividend >= log2_divisor); + bitcount_t logdiff = log2_dividend - log2_divisor; + + constexpr uint_x4 ONE(UIntX2(1)); + if (logdiff == 0) + return { ONE, dividend - divisor }; + + // Now we change the log difference to + // floor(log2(divisor)) - ceil(log2(dividend)) + // to ensure that we *underestimate* the result. + logdiff -= 1; + + uint_x4 quotient(UIntX2(0)); + + auto qfactor = ONE << logdiff; + auto factor = divisor << logdiff; + + do { + dividend -= factor; + quotient += qfactor; + while (dividend < factor) { + factor >>= 1; + qfactor >>= 1; + } + } while (dividend >= divisor); + + return { quotient, dividend }; + } + + template + uint_x4 operator/(const uint_x4÷nd, + const uint_x4&divisor) + { + return divmod(dividend, divisor).first; + } + + template + uint_x4 operator%(const uint_x4÷nd, + const uint_x4&divisor) + { + return divmod(dividend, divisor).second; + } + + + template + uint_x4 operator*(const uint_x4&a, + const uint_x4&b) + { + constexpr auto UINT_BITS = uint_x4::UINT_BITS; + uint_x4 r = { 0U, 0U, 0U, 0U }; + bool carryin = false; + bool carryout; + UIntX2 a0b0 = UIntX2(a.w.v0) * UIntX2(b.w.v0); + r.w.v0 = UInt(a0b0); + r.w.v1 = UInt(a0b0 >> UINT_BITS); + + UIntX2 a1b0 = UIntX2(a.w.v1) * UIntX2(b.w.v0); + r.w.v2 = UInt(a1b0 >> UINT_BITS); + r.w.v1 = addwithcarry(r.w.v1, UInt(a1b0), carryin, &carryout); + carryin = carryout; + r.w.v2 = addwithcarry(r.w.v2, UInt(0U), carryin, &carryout); + carryin = carryout; + r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout); + + UIntX2 a0b1 = UIntX2(a.w.v0) * UIntX2(b.w.v1); + carryin = false; + r.w.v2 = addwithcarry(r.w.v2, UInt(a0b1 >> UINT_BITS), carryin, &carryout); + carryin = carryout; + r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout); + + carryin = false; + r.w.v1 = addwithcarry(r.w.v1, UInt(a0b1), carryin, &carryout); + carryin = carryout; + r.w.v2 = addwithcarry(r.w.v2, UInt(0U), carryin, &carryout); + carryin = carryout; + r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout); + + UIntX2 a1b1 = UIntX2(a.w.v1) * UIntX2(b.w.v1); + carryin = false; + r.w.v2 = addwithcarry(r.w.v2, UInt(a1b1), carryin, &carryout); + carryin = carryout; + r.w.v3 = addwithcarry(r.w.v3, UInt(a1b1 >> UINT_BITS), carryin, &carryout); + + r.d.v23 += a.d.v01 * b.d.v23 + a.d.v23 * b.d.v01; + + return r; + } + + + template + uint_x4 operator*(const uint_x4&a, + UIntX2 b01) + { + constexpr auto UINT_BITS = uint_x4::UINT_BITS; + uint_x4 r = { 0U, 0U, 0U, 0U }; + bool carryin = false; + bool carryout; + UIntX2 a0b0 = UIntX2(a.w.v0) * UIntX2(UInt(b01)); + r.w.v0 = UInt(a0b0); + r.w.v1 = UInt(a0b0 >> UINT_BITS); + + UIntX2 a1b0 = UIntX2(a.w.v1) * UIntX2(UInt(b01)); + r.w.v2 = UInt(a1b0 >> UINT_BITS); + r.w.v1 = addwithcarry(r.w.v1, UInt(a1b0), carryin, &carryout); + carryin = carryout; + r.w.v2 = addwithcarry(r.w.v2, UInt(0U), carryin, &carryout); + carryin = carryout; + r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout); + + UIntX2 a0b1 = UIntX2(a.w.v0) * UIntX2(b01 >> UINT_BITS); + carryin = false; + r.w.v2 = addwithcarry(r.w.v2, UInt(a0b1 >> UINT_BITS), carryin, &carryout); + carryin = carryout; + r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout); + + carryin = false; + r.w.v1 = addwithcarry(r.w.v1, UInt(a0b1), carryin, &carryout); + carryin = carryout; + r.w.v2 = addwithcarry(r.w.v2, UInt(0U), carryin, &carryout); + carryin = carryout; + r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout); + + UIntX2 a1b1 = UIntX2(a.w.v1) * UIntX2(b01 >> UINT_BITS); + carryin = false; + r.w.v2 = addwithcarry(r.w.v2, UInt(a1b1), carryin, &carryout); + carryin = carryout; + r.w.v3 = addwithcarry(r.w.v3, UInt(a1b1 >> UINT_BITS), carryin, &carryout); + + r.d.v23 += a.d.v23 * b01; + + return r; + } + +#if PCG_64BIT_SPECIALIZATIONS +#if defined(_MSC_VER) +#pragma intrinsic(_umul128) +#endif + +#if defined(_MSC_VER) || __SIZEOF_INT128__ + template + uint_x4 operator*(const uint_x4&a, + const uint_x4&b) + { +#if defined(_MSC_VER) + uint64_t hi; + uint64_t lo = _umul128(a.d.v01, b.d.v01, &hi); +#else + __uint128_t r = __uint128_t(a.d.v01) * __uint128_t(b.d.v01); + uint64_t lo = uint64_t(r); + uint64_t hi = r >> 64; +#endif + hi += a.d.v23 * b.d.v01 + a.d.v01 * b.d.v23; + return { hi, lo }; + } +#endif +#endif + + + template + uint_x4 operator+(const uint_x4&a, + const uint_x4&b) + { + uint_x4 r = { 0U, 0U, 0U, 0U }; + + bool carryin = false; + bool carryout; + r.w.v0 = addwithcarry(a.w.v0, b.w.v0, carryin, &carryout); + carryin = carryout; + r.w.v1 = addwithcarry(a.w.v1, b.w.v1, carryin, &carryout); + carryin = carryout; + r.w.v2 = addwithcarry(a.w.v2, b.w.v2, carryin, &carryout); + carryin = carryout; + r.w.v3 = addwithcarry(a.w.v3, b.w.v3, carryin, &carryout); + + return r; + } + + template + uint_x4 operator-(const uint_x4&a, + const uint_x4&b) + { + uint_x4 r = { 0U, 0U, 0U, 0U }; + + bool carryin = false; + bool carryout; + r.w.v0 = subwithcarry(a.w.v0, b.w.v0, carryin, &carryout); + carryin = carryout; + r.w.v1 = subwithcarry(a.w.v1, b.w.v1, carryin, &carryout); + carryin = carryout; + r.w.v2 = subwithcarry(a.w.v2, b.w.v2, carryin, &carryout); + carryin = carryout; + r.w.v3 = subwithcarry(a.w.v3, b.w.v3, carryin, &carryout); + + return r; + } + +#if PCG_64BIT_SPECIALIZATIONS + template + uint_x4 operator+(const uint_x4&a, + const uint_x4&b) + { + uint_x4 r = { uint64_t(0u), uint64_t(0u) }; + + bool carryin = false; + bool carryout; + r.d.v01 = addwithcarry(a.d.v01, b.d.v01, carryin, &carryout); + carryin = carryout; + r.d.v23 = addwithcarry(a.d.v23, b.d.v23, carryin, &carryout); + + return r; + } + + template + uint_x4 operator-(const uint_x4&a, + const uint_x4&b) + { + uint_x4 r = { uint64_t(0u), uint64_t(0u) }; + + bool carryin = false; + bool carryout; + r.d.v01 = subwithcarry(a.d.v01, b.d.v01, carryin, &carryout); + carryin = carryout; + r.d.v23 = subwithcarry(a.d.v23, b.d.v23, carryin, &carryout); + + return r; + } +#endif + + template + uint_x4 operator&(const uint_x4&a, + const uint_x4&b) + { + return uint_x4(a.d.v23 & b.d.v23, a.d.v01 & b.d.v01); + } + + template + uint_x4 operator|(const uint_x4&a, + const uint_x4&b) + { + return uint_x4(a.d.v23 | b.d.v23, a.d.v01 | b.d.v01); + } + + template + uint_x4 operator^(const uint_x4&a, + const uint_x4&b) + { + return uint_x4(a.d.v23 ^ b.d.v23, a.d.v01 ^ b.d.v01); + } + + template + uint_x4 operator~(const uint_x4&v) + { + return uint_x4(~v.d.v23, ~v.d.v01); + } + + template + uint_x4 operator-(const uint_x4&v) + { + return uint_x4(0UL, 0UL) - v; + } + + template + bool operator==(const uint_x4&a, const uint_x4&b) + { + return (a.d.v01 == b.d.v01) && (a.d.v23 == b.d.v23); + } + + template + bool operator!=(const uint_x4&a, const uint_x4&b) + { + return !operator==(a, b); + } + + + template + bool operator<(const uint_x4&a, const uint_x4&b) + { + return (a.d.v23 < b.d.v23) + || ((a.d.v23 == b.d.v23) && (a.d.v01 < b.d.v01)); + } + + template + bool operator>(const uint_x4&a, const uint_x4&b) + { + return operator<(b, a); + } + + template + bool operator<=(const uint_x4&a, const uint_x4&b) + { + return !(operator<(b, a)); + } + + template + bool operator>=(const uint_x4&a, const uint_x4&b) + { + return !(operator<(a, b)); + } + + + + template + uint_x4 operator<<(const uint_x4&v, + const bitcount_t shift) + { + uint_x4 r = { 0U, 0U, 0U, 0U }; + const bitcount_t bits = uint_x4::UINT_BITS; + const bitcount_t bitmask = bits - 1; + const bitcount_t shiftdiv = shift / bits; + const bitcount_t shiftmod = shift & bitmask; + + if (shiftmod) { + UInt carryover = 0; +#if PCG_LITTLE_ENDIAN + for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) { +#else + for (uint8_t out = 4 - shiftdiv, in = 4; out != 0; /* dec in loop */) { + --out, --in; +#endif + r.wa[out] = (v.wa[in] << shiftmod) | carryover; + carryover = (v.wa[in] >> (bits - shiftmod)); + } + } + else { +#if PCG_LITTLE_ENDIAN + for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) { +#else + for (uint8_t out = 4 - shiftdiv, in = 4; out != 0; /* dec in loop */) { + --out, --in; +#endif + r.wa[out] = v.wa[in]; + } + } + + return r; + } + + template + uint_x4 operator>>(const uint_x4&v, + const bitcount_t shift) + { + uint_x4 r = { 0U, 0U, 0U, 0U }; + const bitcount_t bits = uint_x4::UINT_BITS; + const bitcount_t bitmask = bits - 1; + const bitcount_t shiftdiv = shift / bits; + const bitcount_t shiftmod = shift & bitmask; + + if (shiftmod) { + UInt carryover = 0; +#if PCG_LITTLE_ENDIAN + for (uint8_t out = 4 - shiftdiv, in = 4; out != 0; /* dec in loop */) { + --out, --in; +#else + for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) { +#endif + r.wa[out] = (v.wa[in] >> shiftmod) | carryover; + carryover = (v.wa[in] << (bits - shiftmod)); + } + } + else { +#if PCG_LITTLE_ENDIAN + for (uint8_t out = 4 - shiftdiv, in = 4; out != 0; /* dec in loop */) { + --out, --in; +#else + for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) { +#endif + r.wa[out] = v.wa[in]; + } + } + + return r; + } + +#if PCG_64BIT_SPECIALIZATIONS + template + uint_x4 operator<<(const uint_x4&v, + const bitcount_t shift) + { + constexpr bitcount_t bits2 = uint_x4::UINT_BITS * 2; + + if (shift >= bits2) { + return { v.d.v01 << (shift - bits2), uint64_t(0u) }; + } + else { + return { shift ? (v.d.v23 << shift) | (v.d.v01 >> (bits2 - shift)) + : v.d.v23, + v.d.v01 << shift }; + } + } + + template + uint_x4 operator>>(const uint_x4&v, + const bitcount_t shift) + { + constexpr bitcount_t bits2 = uint_x4::UINT_BITS * 2; + + if (shift >= bits2) { + return { uint64_t(0u), v.d.v23 >> (shift - bits2) }; + } + else { + return { v.d.v23 >> shift, + shift ? (v.d.v01 >> shift) | (v.d.v23 << (bits2 - shift)) + : v.d.v01 }; + } + } +#endif + + } // namespace pcg_extras + +#endif // PCG_UINT128_HPP_INCLUDED \ No newline at end of file diff --git a/src/acmaes.cpp b/src/acmaes.cpp new file mode 100644 index 0000000..e42f344 --- /dev/null +++ b/src/acmaes.cpp @@ -0,0 +1,670 @@ +// Copyright (c) Dietmar Wolz. +// +// This source code is licensed under the MIT license found in the +// LICENSE file in the root directory. + +// Eigen based implementation of active CMA-ES + +// Supports parallel fitness function evaluation. +// +// For expensive objective functions (e.g. machine learning parameter optimization) use the workers +// parameter to parallelize objective function evaluation. The workers parameter should be limited +// the population size because otherwize poulation update is delayed. + +// Derived from http://cma.gforge.inria.fr/cmaes.m which follows +// https://www.researchgate.net/publication/227050324_The_CMA_Evolution_Strategy_A_Comparing_Review + +// Requires Eigen version >= 3.4 because new slicing capabilities are used, see +// https://eigen.tuxfamily.org/dox-devel/group__TutorialSlicingIndexing.html +// requires https://github.com/bab2min/EigenRand for random number generation. + + + +#include "acmaes.hpp" + +using namespace std; + +namespace acmaes { + + static ivec inverse(const ivec& indices) { + ivec inverse = ivec(indices.size()); + for (int i = 0; i < indices.size(); i++) + inverse(indices(i)) = i; + return inverse; + } + + static vec sequence(double start, double end, double step) { + int size = (int)((end - start) / step + 1); + vec d(size); + double value = start; + for (int r = 0; r < size; r++) { + d(r) = value; + value += step; + } + return d; + } + + AcmaesOptimizer::AcmaesOptimizer(long runid_, Fitness* fitfun_, int popsize_, int mu_, + const vec& guess_, const vec& inputSigma_, int maxEvaluations_, + double accuracy_, double stopfitness_, double stopTolHistFun_, + int update_gap_, long seed) { + // runid used for debugging / logging + runid = runid_; + // fitness function to minimize + fitfun = fitfun_; + // initial guess for the arguments of the fitness function + guess = guess_; + // accuracy = 1.0 is default, > 1.0 reduces accuracy + accuracy = accuracy_; + // number of objective variables/problem dimension + dim = guess_.size(); + // population size, offspring number. The primary strategy parameter to play + // with, which can be increased from its default value. Increasing the + // population size improves global search properties in exchange to speed. + // Speed decreases, as a rule, at most linearly with increasing population + // size. It is advisable to begin with the default small population size. + if (popsize_ > 0) + popsize = popsize_; + else + popsize = 4 + int(3. * log(dim)); + // individual sigma values - initial search volume. inputSigma determines + // the initial coordinate wise standard deviations for the search. Setting + // SIGMA one third of the initial search region is appropriate. + if (inputSigma_.size() == 1) + inputSigma = vec::Constant(dim, inputSigma_[0]); + else + inputSigma = inputSigma_; + // overall standard deviation - search volume. + sigma = inputSigma.maxCoeff(); + // termination criteria + // maximal number of evaluations allowed. + maxEvaluations = maxEvaluations_; + // limit for fitness value. + stopfitness = stopfitness_; + // stop if x-changes larger stopTolUpX. + stopTolUpX = 1e3 * sigma; + // stop if x-change smaller stopTolX. + stopTolX = 1e-11 * sigma * accuracy; + // stop if fun-changes smaller stopTolFun. + stopTolFun = 1e-12 * accuracy; + // stop if back fun-changes smaller stopTolHistFun. + stopTolHistFun = stopTolHistFun_ < 0 ? 1e-13 * accuracy : stopTolHistFun_; + // selection strategy parameters + // number of parents/points for recombination. + mu = mu_ > 0 ? mu_ : popsize / 2; + // array for weighted recombination. + weights = (log(sequence(1, mu, 1).array()) * -1.) + log(mu + 0.5); + double sumw = weights.sum(); + double sumwq = weights.squaredNorm(); + weights *= 1. / sumw; + // variance-effectiveness of sum w_i x_i. + mueff = sumw * sumw / sumwq; + + // dynamic strategy parameters and constants + // cumulation constant. + cc = (4. + mueff / dim) / (dim + 4. + 2. * mueff / dim); + // cumulation constant for step-size. + cs = (mueff + 2.) / (dim + mueff + 3.); + // damping for step-size. + damps = (1. + 2. * std::max(0., sqrt((mueff - 1.) / (dim + 1.)) - 1.)) + * max(0.3, + 1. - // modification for short runs + dim / (1e-6 + (maxEvaluations / popsize))) + + cs; // minor increment + // learning rate for rank-one update. + ccov1 = 2. / ((dim + 1.3) * (dim + 1.3) + mueff); + // learning rate for rank-mu update' + ccovmu = min(1. - ccov1, + 2. * (mueff - 2. + 1. / mueff) + / ((dim + 2.) * (dim + 2.) + mueff)); + // expectation of ||N(0,I)|| == norm(randn(N,1)). + chiN = sqrt(dim) * (1. - 1. / (4. * dim) + 1 / (21. * dim * dim)); + ccov1Sep = min(1., ccov1 * (dim + 1.5) / 3.); + ccovmuSep = min(1. - ccov1, ccovmu * (dim + 1.5) / 3.); + // lazy covariance update gap + lazy_update_gap = + update_gap_ >= 0 ? + update_gap_ : + 1.0 / (ccov1 + ccovmu + 1e-23) / dim / 10.0; + // CMA internal values - updated each generation + // objective variables. + xmean = fitfun->encode(guess); + // evolution path. + pc = zeros(dim); + // evolution path for sigma. + ps = zeros(dim); + // norm of ps, stored for efficiency. + normps = ps.norm(); + // coordinate system. + B = Eigen::MatrixXd::Identity(dim, dim); + // diagonal of sqrt(D), stored for efficiency. + diagD = inputSigma / sigma; + diagC = diagD.cwiseProduct(diagD); + // B*D, stored for efficiency. + BD = B.cwiseProduct(diagD.transpose().replicate(dim, 1)); + // covariance matrix. + C = B * (Eigen::MatrixXd::Identity(dim, dim) * B.transpose()); + // number of iterations. + iterations = 1; + // size of history queue of best values. + historySize = 10 + int(3. * 10. * dim / popsize); + // stop criteria + stop = 0; + // best value so far + bestValue = DBL_MAX; + // best parameters so far + bestX = guess; + // history queue of best values. + fitnessHistory = vec::Constant(historySize, DBL_MAX); + fitnessHistory(0) = bestValue; + rs = new pcg64(seed); + + computeArz = true; + fitness = vec(popsize); + arx = mat(dim, popsize); + n_updates = 0; + } + + AcmaesOptimizer::~AcmaesOptimizer() { + delete rs; + } + + // param zmean weighted row matrix of the gaussian random numbers generating the current offspring + // param xold xmean matrix of the previous generation + // return hsig flag indicating a small correction + + bool AcmaesOptimizer::updateEvolutionPaths(const vec& zmean, const vec& xold) { + ps = ps * (1. - cs) + ((B * zmean) * sqrt(cs * (2. - cs) * mueff)); + normps = ps.norm(); + bool hsig = normps / sqrt(1. - pow(1. - cs, 2. * iterations)) / chiN + < 1.4 + 2. / (dim + 1.); + pc *= (1. - cc); + if (hsig) + pc += (xmean - xold) * (sqrt(cc * (2. - cc) * mueff) / sigma); + return hsig; + } + + // param hsig flag indicating a small correction + // param bestArx fitness-sorted matrix of the argument vectors producing the current offspring + // param arz unsorted matrix containing the gaussian random values of the current offspring + // param arindex indices indicating the fitness-order of the current offspring + // param xold xmean matrix of the previous generation + + double AcmaesOptimizer::updateCovariance(bool hsig, const mat& bestArx, const mat& arz, + const ivec& arindex, const mat& xold) { + double negccov = 0; + if (ccov1 + ccovmu > 0) { + mat arpos = (bestArx - xold.replicate(1, mu)) * (1. / sigma); // mu difference vectors + mat roneu = pc * pc.transpose() * ccov1; + // minor correction if hsig==false + double oldFac = hsig ? 0 : ccov1 * cc * (2. - cc); + oldFac += 1. - ccov1 - ccovmu; + // Adapt covariance matrix C active CMA + negccov = (1. - ccovmu) * 0.25 * mueff + / (pow(dim + 2., 1.5) + 2. * mueff); + double negminresidualvariance = 0.66; + // keep at least 0.66 in all directions, small popsize are most critical + double negalphaold = 0.5; // where to make up for the variance loss, + // prepare vectors, compute negative updating matrix Cneg + ivec arReverseIndex = arindex.reverse(); + mat arzneg = arz(Eigen::indexing::all, arReverseIndex.head(mu)); + vec arnorms = arzneg.colwise().norm(); + ivec idxnorms = sort_index(arnorms); + vec arnormsSorted = arnorms(idxnorms); + ivec idxReverse = idxnorms.reverse(); + vec arnormsReverse = arnorms(idxReverse); + arnorms = arnormsReverse.cwiseQuotient(arnormsSorted); + vec arnormsInv = arnorms(inverse(idxnorms)); + mat sqarnw = arnormsInv.cwiseProduct(arnormsInv).transpose() + * weights; + double negcovMax = (1. - negminresidualvariance) / sqarnw(0); + if (negccov > negcovMax) + negccov = negcovMax; + arzneg = arzneg.cwiseProduct( + arnormsInv.transpose().replicate(dim, 1)); + mat artmp = BD * arzneg; + mat Cneg = artmp * weights.asDiagonal() * artmp.transpose(); + oldFac += negalphaold * negccov; + C = (C * oldFac) + roneu + + (arpos * (ccovmu + (1. - negalphaold) * negccov) + * weights.replicate(1, dim).cwiseProduct( + arpos.transpose())) - (Cneg * negccov); + } + return negccov; + } + + // Update B and diagD from C + // param negccov Negative covariance factor. + + void AcmaesOptimizer::updateBD(double negccov) { + + if (ccov1 + ccovmu + negccov > 0 + && (std::fmod(iterations, + 1. / (ccov1 + ccovmu + negccov) / dim / 10.)) < 1.) { + // to achieve O(N^2) enforce symmetry to prevent complex numbers + mat triC = C.triangularView(); + mat triC1 = C.triangularView(); + C = triC + triC1.transpose(); + Eigen::SelfAdjointEigenSolver sades; + sades.compute(C); + // diagD defines the scaling + diagD = sades.eigenvalues(); + B = sades.eigenvectors(); + if (diagD.minCoeff() <= 0) { + for (int i = 0; i < dim; i++) + if (diagD(i, 0) < 0) + diagD(i, 0) = 0.; + double tfac = diagD.maxCoeff() / 1e14; + C += Eigen::MatrixXd::Identity(dim, dim) * tfac; + diagD += vec::Constant(dim, 1.0) * tfac; + } + if (diagD.maxCoeff() > 1e14 * diagD.minCoeff()) { + double tfac = diagD.maxCoeff() / 1e14 - diagD.minCoeff(); + C += Eigen::MatrixXd::Identity(dim, dim) * tfac; + diagD += vec::Constant(dim, 1.0) * tfac; + } + diagC = C.diagonal(); + diagD = diagD.cwiseSqrt(); // D contains standard deviations now + BD = B.cwiseProduct(diagD.transpose().replicate(dim, 1)); + + n_updates++; + + } + } + + mat AcmaesOptimizer::ask_all() { // undecoded + // generate popsize offspring. + arz = normal(dim, popsize, *rs); + mat xs(dim, popsize); + for (int k = 0; k < popsize; k++) { + vec delta = (BD * arz.col(k)) * sigma; + xs.col(k) = fitfun->getClosestFeasibleNormed(xmean + delta); + } + computeArz = false; + return xs; + } + + int AcmaesOptimizer::tell_all(mat ys, mat xs) { + told = 0; + for (int p = 0; p < popsize; p++) + tell(ys(p), xs.col(p)); + computeArz = true; + return stop; + } + + int AcmaesOptimizer::tell_all_asked(mat ys, mat xs) { + told = 0; + for (int p = 0; p < popsize; p++) + tell(ys(p), xs.col(p)); + computeArz = false; + return stop; + } + + mat AcmaesOptimizer::getPopulation() { + mat pop(dim, popsize); + for (int p = 0; p < popsize; p++) + pop.col(p) = fitfun->decode(fitfun->getClosestFeasibleNormed(popX.col(p))); + return pop; + } + + vec AcmaesOptimizer::ask() { + // ask for one new argument vector. + vec arz1 = normalVec(dim, *rs); + vec delta = (BD * arz1) * sigma; + vec arx1 = fitfun->getClosestFeasibleNormed(xmean + delta); + computeArz = true; + return arx1; + } + + int AcmaesOptimizer::tell(double y, const vec& x) { + //tell function value for a argument list retrieved by ask_one(). + fitness[told] = isfinite(y) ? y : DBL_MAX; + arx.col(told) = x; + told++; + if (told >= popsize) { + xmean = fitfun->getClosestFeasibleNormed(xmean); + if (computeArz) { + try { + arz = (BD.inverse() + * ((arx - xmean.replicate(1, popsize)) / sigma)); + } + catch (std::exception& e) { + arz = normal(dim, popsize, *rs); + } + } + updateCMA(); + told = 0; + iterations += 1; + } + return stop; + } + + void AcmaesOptimizer::updateCMA() { + // sort by fitness and compute weighted mean into xmean + ivec arindex = sort_index(fitness); + // calculate new xmean, this is selection and recombination + vec xold = xmean; // for speed up of Eq. (2) and (3) + ivec bestIndex = arindex.head(mu); + mat bestArx = arx(Eigen::indexing::all, bestIndex); + xmean = bestArx * weights; + mat bestArz = arz(Eigen::indexing::all, bestIndex); + mat zmean = bestArz * weights; + bool hsig = updateEvolutionPaths(zmean, xold); + // adapt step size sigma + sigma *= exp(min(1.0, (normps / chiN - 1.) * cs / damps)); + double bestFitness = fitness(arindex(0)); + double worstFitness = fitness(arindex(arindex.size() - 1)); + if (bestValue > bestFitness) { + bestValue = bestFitness; + bestX = fitfun->decode(bestArx.col(0)); + if (isfinite(stopfitness) && bestFitness < stopfitness) { + stop = 1; + return; + } + } + if (iterations >= last_update + lazy_update_gap) { + last_update = iterations; + double negccov = updateCovariance(hsig, bestArx, arz, arindex, + xold); + updateBD(negccov); + // handle termination criteria + vec sqrtDiagC = diagC.cwiseSqrt(); + vec pcCol = pc; + for (int i = 0; i < dim; i++) { + if (sigma * (max(abs(pcCol[i]), sqrtDiagC[i])) > stopTolX) + break; + if (i >= dim - 1) + stop = 2; + } + if (stop > 0) + return; + for (int i = 0; i < dim; i++) + if (sigma * sqrtDiagC[i] > stopTolUpX) + stop = 3; + if (stop > 0) + return; + } + double historyBest = fitnessHistory.minCoeff(); + double historyWorst = fitnessHistory.maxCoeff(); + if (iterations > 2 + && max(historyWorst, worstFitness) + - min(historyBest, bestFitness) < stopTolFun) { + stop = 4; + return; + } + if (iterations > fitnessHistory.size() + && historyWorst - historyBest < stopTolHistFun) { + stop = 5; + return; + } + // condition number of the covariance matrix exceeds 1e14 + if (diagD.maxCoeff() / diagD.minCoeff() > 1e7 * 1.0 / sqrt(accuracy)) { + stop = 6; + return; + } + // adjust step size in case of equal function values (flat fitness) + if (bestValue == fitness[arindex[(int)(0.1 + popsize / 4.)]]) { + sigma *= exp(0.2 + cs / damps); + } + if (iterations > 2 + && max(historyWorst, bestFitness) + - std::min(historyBest, bestFitness) == 0) { + sigma *= ::exp(0.2 + cs / damps); + } + // store best in history + for (int i = 1; i < fitnessHistory.size(); i++) + fitnessHistory[i] = fitnessHistory[i - 1]; + fitnessHistory[0] = bestFitness; + } + + int AcmaesOptimizer::doOptimize() { + + // -------------------- Generation Loop -------------------------------- + iterations = 0; + fitfun->resetEvaluations(); + while (fitfun->evaluations() < maxEvaluations && !fitfun->terminate()) { + // generate and evaluate popsize offspring + mat xs = ask_all(); + vec ys(popsize); + fitfun->values(xs, ys); // decodes + told = 0; + for (int k = 0; k < popsize && stop == 0; k++) + tell(ys(k), xs.col(k)); // tell encoded + if (stop != 0) + return fitfun->evaluations(); + } + return fitfun->evaluations(); + } + + int AcmaesOptimizer::do_optimize_delayed_update(int workers) { + //iterations = 0; + //fitfun->resetEvaluations(); + //evaluator eval(fitfun, 1, workers); + //vec evals_x(workers); + //// fill eval queue with initial population + //for (int i = 0; i < workers; i++) { + // vec x = ask(); + // vec xdec = fitfun->decode(x); + // eval.evaluate(xdec, i); + // evals_x[i] = x; // encoded + //} + //while (fitfun->evaluations() < maxEvaluations) { + // vec_id* vid = eval.result(); + // vec y = vec(vid->_v); + // int p = vid->_id; + // delete vid; + // vec x = evals_x[p]; + // tell(y(0), x); // tell evaluated encoded x + // if (fitfun->evaluations() >= maxEvaluations || stop != 0) + // break; + // x = ask(); + // eval.evaluate(x, p); + // evals_x[p] = x; + //} + //return fitfun->evaluations(); + return 0; + } + + vec AcmaesOptimizer::getBestX() { + return bestX; + } + + double AcmaesOptimizer::getBestValue() { + return bestValue; + } + + double AcmaesOptimizer::getIterations() { + return iterations; + } + + int AcmaesOptimizer::getStop() { + return stop; + } + + Fitness* AcmaesOptimizer::getFitfun() { + return fitfun; + } + + int AcmaesOptimizer::getDim() { + return dim; + } + + int AcmaesOptimizer::getPopsize() { + return popsize; + } + + Fitness* AcmaesOptimizer::getFitfunPar() { + return fitfun; + } +} + +/* +using namespace acmaes; + +extern "C" { + void optimizeACMA_C(long runid, callback_type func, callback_parallel func_par, int dim, + double* init, double* lower, double* upper, double* sigma, + int maxEvals, double stopfitness, double stopTolHistFun, int mu, int popsize, double accuracy, + long seed, bool normalize, bool use_delayed_update, int update_gap, int workers, double* res) { + + vec guess(dim), lower_limit(dim), upper_limit(dim), inputSigma(dim); + for (int i = 0; i < dim; i++) {// guess is mandatory + guess[i] = init[i]; + inputSigma[i] = sigma[i]; + } + if (lower != NULL && upper != NULL) { + for (int i = 0; i < dim; i++) { + guess[i] = init[i]; + lower_limit[i] = lower[i]; + upper_limit[i] = upper[i]; + } + } + else { + lower_limit.resize(0); + upper_limit.resize(0); + normalize = false; + } + + Fitness fitfun(func, func_par, dim, 1, lower_limit, upper_limit); + fitfun.setNormalize(normalize); + + AcmaesOptimizer opt(runid, &fitfun, popsize, mu, guess, inputSigma, + maxEvals, accuracy, stopfitness, stopTolHistFun, update_gap, seed); + try { + int evals = 0; + if (workers > 1 && use_delayed_update) + evals = opt.do_optimize_delayed_update(workers); + else + evals = opt.doOptimize(); + vec bestX = opt.getBestX(); + double bestY = opt.getBestValue(); + for (int i = 0; i < dim; i++) + res[i] = bestX[i]; + res[dim] = bestY; + res[dim + 1] = evals; + res[dim + 2] = opt.getIterations(); + res[dim + 3] = opt.getStop(); + } + catch (std::exception& e) { + cout << e.what() << endl; + } + } + + uintptr_t initACMA_C(long runid, int dim, + double* init, double* lower, double* upper, double* sigma, + int maxEvals, double stopfitness, double stopTolHistFun, int mu, int popsize, double accuracy, + long seed, bool normalize, bool use_delayed_update, int update_gap) { + + vec guess(dim), lower_limit(dim), upper_limit(dim), inputSigma(dim); + for (int i = 0; i < dim; i++) {// guess is mandatory + guess[i] = init[i]; + inputSigma[i] = sigma[i]; + } + if (lower != NULL && upper != NULL) { + for (int i = 0; i < dim; i++) { + guess[i] = init[i]; + lower_limit[i] = lower[i]; + upper_limit[i] = upper[i]; + } + } + else { + lower_limit.resize(0); + upper_limit.resize(0); + normalize = false; + } + + Fitness* fitfun = new Fitness(noop_callback, noop_callback_par, dim, 1, lower_limit, upper_limit); // never used here + fitfun->setNormalize(normalize); + + AcmaesOptimizer* opt = new AcmaesOptimizer(runid, fitfun, popsize, mu, guess, inputSigma, + maxEvals, accuracy, stopfitness, stopTolHistFun, update_gap, seed); + return (uintptr_t)opt; + } + + void destroyACMA_C(uintptr_t ptr) { + AcmaesOptimizer* opt = (AcmaesOptimizer*)ptr; + Fitness* fitfun = opt->getFitfun(); + delete fitfun; + delete opt; + } + + void askACMA_C(uintptr_t ptr, double* xs) { + AcmaesOptimizer* opt = (AcmaesOptimizer*)ptr; + int n = opt->getDim(); + int popsize = opt->getPopsize(); + opt->popX = opt->ask_all(); + Fitness* fitfun = opt->getFitfun(); + for (int p = 0; p < popsize; p++) { + vec x = fitfun->decode(opt->popX.col(p)); + for (int i = 0; i < n; i++) + xs[p * n + i] = x[i]; + } + } + + int tellACMA_C(uintptr_t ptr, double* ys) { + AcmaesOptimizer* opt = (AcmaesOptimizer*)ptr; + int popsize = opt->getPopsize(); + vec vals(popsize); + for (int i = 0; i < popsize; i++) + vals[i] = ys[i]; + opt->tell_all_asked(vals, opt->popX); + return opt->getStop(); + } + + int tellXACMA_C(uintptr_t ptr, double* ys, double* xs) { + AcmaesOptimizer* opt = (AcmaesOptimizer*)ptr; + int popsize = opt->getPopsize(); + int dim = opt->getDim(); + Fitness* fitfun = opt->getFitfun(); + opt->popX = mat(dim, popsize); + for (int p = 0; p < popsize; p++) { + vec x(dim); + for (int i = 0; i < dim; i++) + x[i] = xs[p * dim + i]; + opt->popX.col(p) = fitfun->encode(x); + } + vec vals(popsize); + for (int i = 0; i < popsize; i++) + vals[i] = ys[i]; + opt->tell_all(vals, opt->popX); + return opt->getStop(); + } + + int populationACMA_C(uintptr_t ptr, double* xs) { + AcmaesOptimizer* opt = (AcmaesOptimizer*)ptr; + int dim = opt->getDim(); + int popsize = opt->getPopsize(); + mat popX = opt->getPopulation(); + for (int p = 0; p < popsize; p++) { + vec x = popX.col(p); + for (int i = 0; i < dim; i++) + x[i] = xs[p * dim + i]; + } + return opt->getStop(); + } + + int resultACMA_C(uintptr_t ptr, double* res) { + AcmaesOptimizer* opt = (AcmaesOptimizer*)ptr; + vec bestX = opt->getBestX(); + double bestY = opt->getBestValue(); + int n = bestX.size(); + for (int i = 0; i < bestX.size(); i++) + res[i] = bestX[i]; + res[n] = bestY; + Fitness* fitfun = opt->getFitfun(); + res[n + 1] = fitfun->evaluations(); + res[n + 2] = opt->getIterations(); + res[n + 3] = opt->getStop(); + return opt->getStop(); + } + + int testACMA_C(int n, double* res) { + for (int i = 0; i < n; i++) { + cout << i << ": " << res[i] << endl; + res[i] = -res[i]; + } + return 7; + } +} + +*/ \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index c5f1473..e661943 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,5 +1,7 @@ #include "c_maes.hpp" +#include "acmaes.hpp" #include +#include using namespace std::placeholders; using std::chrono::high_resolution_clock; @@ -8,10 +10,14 @@ using std::chrono::duration; using std::chrono::milliseconds; +static int dim = 100; +static bool rotated = false; +static size_t budget = dim * 1000; + struct Ellipse { size_t evals = 0; - Matrix R; + Matrix R; Ellipse(const int dim, const bool rotated = false) : R{ rotated ? functions::random_rotation_matrix(dim, 1): Matrix::Identity(dim, dim) } @@ -49,13 +55,9 @@ struct Timer }; -int main() +void run_modcma() { rng::set_seed(42); - const size_t dim = 100; - const size_t budget = dim * 10000; - const bool rotated = true; - parameters::Modules m; //m.matrix_adaptation = parameters::MatrixAdaptationType::MATRIX; //m.sample_transformation = parameters::SampleTranformerType::SCALED_UNIFORM; @@ -77,7 +79,7 @@ int main() //if (cma.p->stats.current_best.y < 1e-8) // break; - + // No rotation // e: Stats t=549 e=5490 // no-e: Stats t=594 e=5940 @@ -94,8 +96,81 @@ int main() // no-e Stats t=568 e=5680 // } - std::cout << cma.p->stats.evaluations << std::endl; - std::cout << cma.p->stats.t << std::endl; - std::cout << cma.p->stats.n_updates << std::endl; - std::cout << cma.p->stats << std::endl; + + std::cout << "modcmaes\n" << std::defaultfloat; + std::cout << "evals: " << cma.p->stats.evaluations << std::endl; + std::cout << "iters: " << cma.p->stats.t << std::endl; + std::cout << "updates: " << cma.p->stats.n_updates << std::endl; + std::cout << "best_y: " << std::scientific << std::setprecision(3) << cma.p->stats.global_best.y << std::endl << std::endl; +} + +void run_acmaes() +{ + Timer t; + double sigma = 2.0; + bool normalize = false; + + vec guess(dim), lower_limit(dim), upper_limit(dim), inputSigma(dim); + for (int i = 0; i < dim; i++) { + guess[i] = 0.; + inputSigma[i] = sigma; + lower_limit[i] = -5; + upper_limit[i] = 5; + } + + + auto func_par = [](int popsize, int dim, double* x, double* y) { + static FunctionType f = Ellipse(dim, rotated); + //std::cout << "is this called\n"; + + for (int i = 0; i < popsize; i++) + { + auto map = Eigen::Map(x + i * dim, dim); + //std::cout << map.transpose() << std::endl; + y[i] = f(map); + } + }; + + + auto func = [](int popsize, const double* x, double* y) { + static FunctionType f = Ellipse(dim, rotated); + std::cout << "is this called\n"; + return true; + }; + + + + Fitness fitfun(func, func_par, dim, 1, lower_limit, upper_limit); + fitfun.setNormalize(normalize); + + int popsize = 4 + std::floor(3 * std::log((double)dim)); + int mu = popsize / 2; + long seed = 32; + constexpr double accuracy = -std::numeric_limits::infinity(); + constexpr double stop_fitness = -std::numeric_limits::infinity(); + double stopTolHistFun = 0; + int update_gap = -1; + + acmaes::AcmaesOptimizer opt(0, &fitfun, popsize, mu, guess, inputSigma, + budget, accuracy, stop_fitness, stopTolHistFun, update_gap, seed); + + int evals = 0; + + evals = opt.doOptimize(); + vec bestX = opt.getBestX(); + double bestY = opt.getBestValue(); + + std::cout << "acmaes\n" << std::defaultfloat; + std::cout << "evals: " << evals << std::endl; + std::cout << "iters: " << (int)opt.getIterations() << std::endl; + std::cout << "updates: " << opt.n_updates << std::endl; + std::cout << "best_y: " << std::scientific << std::setprecision(3) << bestY << std::endl << std::endl; + //std::cout << bestX.transpose() << std::endl; } + + +int main() +{ + run_modcma(); + run_acmaes(); +} \ No newline at end of file diff --git a/src/matrix_adaptation.cpp b/src/matrix_adaptation.cpp index ae2e154..4fb328c 100644 --- a/src/matrix_adaptation.cpp +++ b/src/matrix_adaptation.cpp @@ -131,7 +131,7 @@ namespace matrix_adaptation m_old.setZero(); dm.setZero(); pc.setZero(); - ps.setZero(); + ps.setZero(); } Vector CovarianceAdaptation::compute_y(const Vector& zi) From 579ea6dd941070759a0d0adde3899b9d704ccc57 Mon Sep 17 00:00:00 2001 From: jacobdenobel Date: Fri, 30 May 2025 14:22:11 +0200 Subject: [PATCH 08/74] extra tests --- scripts/timing/test_bbob5d.py | 120 ++++++++++++++++++++++++++++++++++ scripts/timing/test_timing.py | 23 +++++-- setup.py | 14 ++-- src/interface.cpp | 7 +- 4 files changed, 152 insertions(+), 12 deletions(-) create mode 100644 scripts/timing/test_bbob5d.py diff --git a/scripts/timing/test_bbob5d.py b/scripts/timing/test_bbob5d.py new file mode 100644 index 0000000..9a2a963 --- /dev/null +++ b/scripts/timing/test_bbob5d.py @@ -0,0 +1,120 @@ +from time import perf_counter +import ioh +import modcma.c_maes as modcma +import iohinspector as ins +import matplotlib.colors as mcolors +import numpy as np + + +def timeit(f): + def inner(*args, **kwargs): + start = perf_counter() + result = f(*args, **kwargs) + stop = perf_counter() + elapsed = stop - start + return elapsed + return inner + + +@timeit +def run_modma(f: ioh.ProblemType, dim: int, n_evaluations): + modules = modcma.parameters.Modules() + # modules.restart_strategy = modcma.options.RestartStrategy.IPOP + # modules.active = True + settings = modcma.Settings( + dim, + budget=n_evaluations, + target=f.optimum.y + 1e-8, + lb=f.bounds.lb, + ub=f.bounds.ub, + sigma0=2.0, + modules=modules, + verbose=False + ) + cma = modcma.ModularCMAES(settings) + cma.run(f) + return cma + +def fix_legend_labels(ax, n_split, algs, groupby_word = None, reorder=False): + colors = dict(zip(algs, mcolors.TABLEAU_COLORS)) + lines = ax.get_lines()[::] + if reorder: + lines = lines[::2] + lines[1::2] + + for line, line_label in zip(lines[:len(lines)//2], lines[len(lines)//2:]): + if (lab:=line_label.get_label()) in colors: + for l in (line, line_label): + l.set_color(colors[lab]) + l.set_linewidth(3) + if groupby_word is not None and groupby_word in lab: + l.set_linestyle('dashed') + else: + l.set_linestyle('solid') + + handles, labels = ax.get_legend_handles_labels() + labels = [l[n_split:] for l in labels[:]] + idx = np.argsort(labels) + ax.legend(np.array(handles)[idx], np.array(labels)[idx], fancybox=True, shadow=True, fontsize=13) + return handles, labels + +def place_legend_below(ax, handles, labels, show = True, legend_nrow = 1, start_legend = 3, loc_y = -.11): + box = ax.get_position() + ax.set_position([box.x0, box.y0 + box.height * 0.1, + box.width, box.height * 0.9]) + + ax.legend().remove() + if show: + ax.legend(np.array(handles), np.array(labels), loc='upper center', + fontsize=13, bbox_to_anchor=(start_legend, loc_y), fancybox=True, shadow=True, ncol=np.ceil(len(labels) / legend_nrow), + ) + + +if __name__ == "__main__": + # modcma.utils.set_seed(43) + # modcma.constants.calc_eigv = True + # name = f"CMA-ES eig={modcma.constants.calc_eigv}" + + # logger = ioh.logger.Analyzer( + # folder_name=name, + # algorithm_name=name, + # root="data" + # ) + + # dim = 5 + # n_rep = 5 + # n_instances = 5 + + # budget = 50_000 * dim + # for i in range(1, 25): + # for ii in range(1, n_instances + 1): + # problem = ioh.get_problem(i, ii, dim) + # problem.attach_logger(logger) + # for r in range(n_rep): + # run_modma(problem, dim, budget) + # print(problem.state.evaluations, problem.state.current_best_internal.y) + # problem.reset() + + import os + manager = ins.DataManager() + algs = [] + for folder in os.listdir("data"): + algs.append(folder) + manager.add_folder(f"data/{folder}") + + + + import matplotlib.pyplot as plt + + f, axes = plt.subplots(5, 5, figsize=(25, 13), sharex=True, sharey=True) + + x_values = ins.get_sequence(1, 50_000 * 5, 50, True, True) + for fid, ax in enumerate(axes.ravel(), 1): + if fid > 24: + break + dt = manager.select(function_ids=[fid]).load(True, True, x_values=x_values) + ins.plot.single_function_fixedbudget(data=dt, ax=ax) + h,l = fix_legend_labels(ax, 1, algs, None) + place_legend_below(ax, h, l, fid == 24, 1) + + plt.tight_layout() + plt.show() \ No newline at end of file diff --git a/scripts/timing/test_timing.py b/scripts/timing/test_timing.py index c0ffe7b..8fb8c51 100644 --- a/scripts/timing/test_timing.py +++ b/scripts/timing/test_timing.py @@ -36,11 +36,12 @@ def inner(*args, **kwargs): @timeit def run_fcmaes(f: ioh.ProblemType, dim: int, n_evaluations, x0: np.ndarray): + lamb = 4 + np.floor(3 * np.log(dim)).astype(int) bounds = np.array([f.bounds.lb, f.bounds.ub]) res = optimizer.cmaescpp.minimize( f, x0=x0, max_evaluations=n_evaluations, stop_hist=0, accuracy=1e-10, stop_fitness=-700, - popsize=4 + popsize=lamb, workers=1, delayed_update=False ) @@ -50,10 +51,18 @@ def run_fcmaes(f: ioh.ProblemType, dim: int, n_evaluations, x0: np.ndarray): @timeit def run_modma(f: ioh.ProblemType, dim: int, n_evaluations, x0: np.ndarray): + modcma.constants.calc_eigv = True modules = modcma.parameters.Modules() - modules.sample_transformation = modcma.options.SCALED_UNIFORM + # modules.sample_transformation = modcma.options.SCALED_UNIFORM modules.matrix_adaptation = modcma.options.COVARIANCE - settings = modcma.Settings(dim, budget=n_evaluations, x0=x0, modules=modules, verbose=True) + settings = modcma.Settings(dim, + budget=n_evaluations, + x0=x0, + modules=modules, + lb=f.bounds.lb, + ub=f.bounds.ub, + verbose=True + ) cma = modcma.ModularCMAES(settings) @@ -61,8 +70,10 @@ def run_modma(f: ioh.ProblemType, dim: int, n_evaluations, x0: np.ndarray): maxp = 1/(10 * dim * (cma.p.weights.c1 +cma.p.weights.cmu)) # print(dim, max(1, maxp), maxp) # breakpoint() - - cma.run(f) + + while cma.step(f): + pass + # cma.run(f) print(cma.p.stats.t, cma.p.stats.n_updates) assert f.state.evaluations >= n_evaluations return cma @@ -92,7 +103,7 @@ def run_pycma(f: ioh.ProblemType, dim: int, n_evaluations: int, x0: np.ndarray): n_iters = 1 n_evals = 1_000 fid = 12 - dimensions = [100] + dimensions = [50] names, functions = zip( *[ (name, obj) diff --git a/setup.py b/setup.py index c62b0b5..a5c0f9d 100644 --- a/setup.py +++ b/setup.py @@ -20,14 +20,18 @@ cxx_std=17, ) if platform.system() in ("Linux", "Darwin"): - os.environ["CC"] = "g++" - os.environ["CXX"] = "g++" - flags = ["-O3", "-fno-math-errno", "-msse2", "-mavx", "-mfma", "-mtune=native"] + os.environ["CC"] = "clang" + os.environ["CXX"] = "clang" + flags = [ + "-O3", + # "-fno-math-errno", "-msse2", "-mavx", "-mfma", "-mtune=native", + # "-march=native", "-ffast-math", "-flto", "-funroll-loops", "-ftree-vectorize" + ] if platform.system() == "Darwin": flags.append("-mmacosx-version-min=10.15") - else: - flags.append("-march=native") + # else: + # flags.append("-march=native") ext._add_cflags(flags) ext._add_ldflags(flags) diff --git a/src/interface.cpp b/src/interface.cpp index e2a7b4e..8cebeac 100644 --- a/src/interface.cpp +++ b/src/interface.cpp @@ -898,7 +898,6 @@ class constants_w void define_constants(py::module &m) { py::class_(m, "constants") - .def_property_static( "cache_max_doubles", [](py::object) @@ -923,6 +922,12 @@ void define_constants(py::module &m) { return constants::clip_sigma; }, [](py::object, bool a) { constants::clip_sigma = a; }) + .def_property_static( + "calc_eigv", + [](py::object) + { return constants::calc_eigv; }, + [](py::object, bool a) + { constants::calc_eigv = a; }) ; } From 2c184e71c008acb48ee1f47880e7f3abf50f4868 Mon Sep 17 00:00:00 2001 From: jacobdenobel Date: Fri, 30 May 2025 15:20:37 +0200 Subject: [PATCH 09/74] remove uneeded --- .../cmake/ComputeCppCompilerChecks.cmake | 50 - .../eigen-3.4.0/cmake/ComputeCppIRMap.cmake | 18 - .../eigen-3.4.0/cmake/Eigen3Config.cmake.in | 23 - .../cmake/Eigen3ConfigLegacy.cmake.in | 30 - .../cmake/EigenConfigureTesting.cmake | 58 - .../cmake/EigenDetermineOSVersion.cmake | 46 - .../cmake/EigenDetermineVSServicePack.cmake | 41 - .../cmake/EigenSmokeTestList.cmake | 131 -- external/eigen-3.4.0/cmake/EigenTesting.cmake | 782 --------- .../eigen-3.4.0/cmake/EigenUninstall.cmake | 40 - external/eigen-3.4.0/cmake/FindAdolc.cmake | 20 - external/eigen-3.4.0/cmake/FindBLAS.cmake | 1407 ----------------- external/eigen-3.4.0/cmake/FindBLASEXT.cmake | 384 ----- external/eigen-3.4.0/cmake/FindCHOLMOD.cmake | 89 -- .../eigen-3.4.0/cmake/FindComputeCpp.cmake | 455 ------ external/eigen-3.4.0/cmake/FindEigen2.cmake | 80 - external/eigen-3.4.0/cmake/FindEigen3.cmake | 107 -- external/eigen-3.4.0/cmake/FindFFTW.cmake | 120 -- external/eigen-3.4.0/cmake/FindGLEW.cmake | 105 -- external/eigen-3.4.0/cmake/FindGMP.cmake | 21 - external/eigen-3.4.0/cmake/FindGSL.cmake | 170 -- .../eigen-3.4.0/cmake/FindGoogleHash.cmake | 23 - external/eigen-3.4.0/cmake/FindHWLOC.cmake | 332 ---- external/eigen-3.4.0/cmake/FindKLU.cmake | 48 - external/eigen-3.4.0/cmake/FindLAPACK.cmake | 274 ---- external/eigen-3.4.0/cmake/FindMPFR.cmake | 83 - external/eigen-3.4.0/cmake/FindMPREAL.cmake | 103 -- external/eigen-3.4.0/cmake/FindMetis.cmake | 265 ---- external/eigen-3.4.0/cmake/FindPASTIX.cmake | 704 --------- external/eigen-3.4.0/cmake/FindPTSCOTCH.cmake | 422 ----- external/eigen-3.4.0/cmake/FindSCOTCH.cmake | 370 ----- external/eigen-3.4.0/cmake/FindSPQR.cmake | 41 - .../cmake/FindStandardMathLibrary.cmake | 70 - external/eigen-3.4.0/cmake/FindSuperLU.cmake | 97 -- external/eigen-3.4.0/cmake/FindTriSYCL.cmake | 173 -- external/eigen-3.4.0/cmake/FindUMFPACK.cmake | 53 - external/eigen-3.4.0/cmake/RegexUtils.cmake | 19 - external/eigen-3.4.0/cmake/UseEigen3.cmake | 6 - scripts/timing/test_timing.py | 40 +- setup.py | 43 +- 40 files changed, 44 insertions(+), 7299 deletions(-) delete mode 100644 external/eigen-3.4.0/cmake/ComputeCppCompilerChecks.cmake delete mode 100644 external/eigen-3.4.0/cmake/ComputeCppIRMap.cmake delete mode 100644 external/eigen-3.4.0/cmake/Eigen3Config.cmake.in delete mode 100644 external/eigen-3.4.0/cmake/Eigen3ConfigLegacy.cmake.in delete mode 100644 external/eigen-3.4.0/cmake/EigenConfigureTesting.cmake delete mode 100644 external/eigen-3.4.0/cmake/EigenDetermineOSVersion.cmake delete mode 100644 external/eigen-3.4.0/cmake/EigenDetermineVSServicePack.cmake delete mode 100644 external/eigen-3.4.0/cmake/EigenSmokeTestList.cmake delete mode 100644 external/eigen-3.4.0/cmake/EigenTesting.cmake delete mode 100644 external/eigen-3.4.0/cmake/EigenUninstall.cmake delete mode 100644 external/eigen-3.4.0/cmake/FindAdolc.cmake delete mode 100644 external/eigen-3.4.0/cmake/FindBLAS.cmake delete mode 100644 external/eigen-3.4.0/cmake/FindBLASEXT.cmake delete mode 100644 external/eigen-3.4.0/cmake/FindCHOLMOD.cmake delete mode 100644 external/eigen-3.4.0/cmake/FindComputeCpp.cmake delete mode 100644 external/eigen-3.4.0/cmake/FindEigen2.cmake delete mode 100644 external/eigen-3.4.0/cmake/FindEigen3.cmake delete mode 100644 external/eigen-3.4.0/cmake/FindFFTW.cmake delete mode 100644 external/eigen-3.4.0/cmake/FindGLEW.cmake delete mode 100644 external/eigen-3.4.0/cmake/FindGMP.cmake delete mode 100644 external/eigen-3.4.0/cmake/FindGSL.cmake delete mode 100644 external/eigen-3.4.0/cmake/FindGoogleHash.cmake delete mode 100644 external/eigen-3.4.0/cmake/FindHWLOC.cmake delete mode 100644 external/eigen-3.4.0/cmake/FindKLU.cmake delete mode 100644 external/eigen-3.4.0/cmake/FindLAPACK.cmake delete mode 100644 external/eigen-3.4.0/cmake/FindMPFR.cmake delete mode 100644 external/eigen-3.4.0/cmake/FindMPREAL.cmake delete mode 100644 external/eigen-3.4.0/cmake/FindMetis.cmake delete mode 100644 external/eigen-3.4.0/cmake/FindPASTIX.cmake delete mode 100644 external/eigen-3.4.0/cmake/FindPTSCOTCH.cmake delete mode 100644 external/eigen-3.4.0/cmake/FindSCOTCH.cmake delete mode 100644 external/eigen-3.4.0/cmake/FindSPQR.cmake delete mode 100644 external/eigen-3.4.0/cmake/FindStandardMathLibrary.cmake delete mode 100644 external/eigen-3.4.0/cmake/FindSuperLU.cmake delete mode 100644 external/eigen-3.4.0/cmake/FindTriSYCL.cmake delete mode 100644 external/eigen-3.4.0/cmake/FindUMFPACK.cmake delete mode 100644 external/eigen-3.4.0/cmake/RegexUtils.cmake delete mode 100644 external/eigen-3.4.0/cmake/UseEigen3.cmake diff --git a/external/eigen-3.4.0/cmake/ComputeCppCompilerChecks.cmake b/external/eigen-3.4.0/cmake/ComputeCppCompilerChecks.cmake deleted file mode 100644 index 1807485..0000000 --- a/external/eigen-3.4.0/cmake/ComputeCppCompilerChecks.cmake +++ /dev/null @@ -1,50 +0,0 @@ -cmake_minimum_required(VERSION 3.4.3) - -if(CMAKE_COMPILER_IS_GNUCXX) - if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.8) - message(FATAL_ERROR "host compiler - gcc version must be > 4.8") - endif() -elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") - if (${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.6) - message(FATAL_ERROR "host compiler - clang version must be > 3.6") - endif() -endif() - -if(MSVC) - set(ComputeCpp_STL_CHECK_SRC __STL_check) - file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/${ComputeCpp_STL_CHECK_SRC}.cpp - "#include \n" - "int main() { return 0; }\n") - execute_process( - COMMAND ${ComputeCpp_DEVICE_COMPILER_EXECUTABLE} - ${COMPUTECPP_DEVICE_COMPILER_FLAGS} - -isystem ${ComputeCpp_INCLUDE_DIRS} - -o ${ComputeCpp_STL_CHECK_SRC}.sycl - -c ${ComputeCpp_STL_CHECK_SRC}.cpp - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - RESULT_VARIABLE ComputeCpp_STL_CHECK_RESULT - ERROR_QUIET - OUTPUT_QUIET) - if(NOT ${ComputeCpp_STL_CHECK_RESULT} EQUAL 0) - # Try disabling compiler version checks - execute_process( - COMMAND ${ComputeCpp_DEVICE_COMPILER_EXECUTABLE} - ${COMPUTECPP_DEVICE_COMPILER_FLAGS} - -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH - -isystem ${ComputeCpp_INCLUDE_DIRS} - -o ${ComputeCpp_STL_CHECK_SRC}.cpp.sycl - -c ${ComputeCpp_STL_CHECK_SRC}.cpp - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - RESULT_VARIABLE ComputeCpp_STL_CHECK_RESULT - ERROR_QUIET - OUTPUT_QUIET) - if(NOT ${ComputeCpp_STL_CHECK_RESULT} EQUAL 0) - message(STATUS "Device compiler cannot consume hosted STL headers. Using any parts of the STL will likely result in device compiler errors.") - else() - message(STATUS "Device compiler does not meet certain STL version requirements. Disabling version checks and hoping for the best.") - list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH) - endif() - endif() - file(REMOVE ${CMAKE_CURRENT_BINARY_DIR}/${ComputeCpp_STL_CHECK_SRC}.cpp - ${CMAKE_CURRENT_BINARY_DIR}/${ComputeCpp_STL_CHECK_SRC}.cpp.sycl) -endif(MSVC) diff --git a/external/eigen-3.4.0/cmake/ComputeCppIRMap.cmake b/external/eigen-3.4.0/cmake/ComputeCppIRMap.cmake deleted file mode 100644 index 942d91d..0000000 --- a/external/eigen-3.4.0/cmake/ComputeCppIRMap.cmake +++ /dev/null @@ -1,18 +0,0 @@ -cmake_minimum_required(VERSION 3.4.3) - -# These should match the types of IR output by compute++ -set(IR_MAP_spir bc) -set(IR_MAP_spir64 bc) -set(IR_MAP_spir32 bc) -set(IR_MAP_spirv spv) -set(IR_MAP_spirv64 spv) -set(IR_MAP_spirv32 spv) -set(IR_MAP_aorta-x86_64 o) -set(IR_MAP_aorta-aarch64 o) -set(IR_MAP_aorta-rcar-cve o) -set(IR_MAP_custom-spir64 bc) -set(IR_MAP_custom-spir32 bc) -set(IR_MAP_custom-spirv64 spv) -set(IR_MAP_custom-spirv32 spv) -set(IR_MAP_ptx64 s) -set(IR_MAP_amdgcn s) diff --git a/external/eigen-3.4.0/cmake/Eigen3Config.cmake.in b/external/eigen-3.4.0/cmake/Eigen3Config.cmake.in deleted file mode 100644 index 0a1ac61..0000000 --- a/external/eigen-3.4.0/cmake/Eigen3Config.cmake.in +++ /dev/null @@ -1,23 +0,0 @@ -# This file exports the Eigen3::Eigen CMake target which should be passed to the -# target_link_libraries command. - -@PACKAGE_INIT@ - -if (NOT TARGET eigen) - include ("${CMAKE_CURRENT_LIST_DIR}/Eigen3Targets.cmake") -endif () - -# Legacy variables, do *not* use. May be removed in the future. - -set (EIGEN3_FOUND 1) -set (EIGEN3_USE_FILE "${CMAKE_CURRENT_LIST_DIR}/UseEigen3.cmake") - -set (EIGEN3_DEFINITIONS "@EIGEN_DEFINITIONS@") -set (EIGEN3_INCLUDE_DIR "@PACKAGE_EIGEN_INCLUDE_DIR@") -set (EIGEN3_INCLUDE_DIRS "@PACKAGE_EIGEN_INCLUDE_DIR@") -set (EIGEN3_ROOT_DIR "@PACKAGE_EIGEN_ROOT_DIR@") - -set (EIGEN3_VERSION_STRING "@EIGEN_VERSION_STRING@") -set (EIGEN3_VERSION_MAJOR "@EIGEN_VERSION_MAJOR@") -set (EIGEN3_VERSION_MINOR "@EIGEN_VERSION_MINOR@") -set (EIGEN3_VERSION_PATCH "@EIGEN_VERSION_PATCH@") diff --git a/external/eigen-3.4.0/cmake/Eigen3ConfigLegacy.cmake.in b/external/eigen-3.4.0/cmake/Eigen3ConfigLegacy.cmake.in deleted file mode 100644 index 62d7224..0000000 --- a/external/eigen-3.4.0/cmake/Eigen3ConfigLegacy.cmake.in +++ /dev/null @@ -1,30 +0,0 @@ -# -*- cmake -*- -# -# Eigen3Config.cmake(.in) - -# Use the following variables to compile and link against Eigen: -# EIGEN3_FOUND - True if Eigen was found on your system -# EIGEN3_USE_FILE - The file making Eigen usable -# EIGEN3_DEFINITIONS - Definitions needed to build with Eigen -# EIGEN3_INCLUDE_DIR - Directory where signature_of_eigen3_matrix_library can be found -# EIGEN3_INCLUDE_DIRS - List of directories of Eigen and it's dependencies -# EIGEN3_ROOT_DIR - The base directory of Eigen -# EIGEN3_VERSION_STRING - A human-readable string containing the version -# EIGEN3_VERSION_MAJOR - The major version of Eigen -# EIGEN3_VERSION_MINOR - The minor version of Eigen -# EIGEN3_VERSION_PATCH - The patch version of Eigen - -@PACKAGE_INIT@ - -set ( EIGEN3_FOUND 1 ) -set ( EIGEN3_USE_FILE "${CMAKE_CURRENT_LIST_DIR}/UseEigen3.cmake" ) - -set ( EIGEN3_DEFINITIONS "@EIGEN_DEFINITIONS@" ) -set ( EIGEN3_INCLUDE_DIR "@PACKAGE_EIGEN_INCLUDE_DIR@" ) -set ( EIGEN3_INCLUDE_DIRS "@PACKAGE_EIGEN_INCLUDE_DIR@" ) -set ( EIGEN3_ROOT_DIR "@PACKAGE_EIGEN_ROOT_DIR@" ) - -set ( EIGEN3_VERSION_STRING "@EIGEN_VERSION_STRING@" ) -set ( EIGEN3_VERSION_MAJOR "@EIGEN_VERSION_MAJOR@" ) -set ( EIGEN3_VERSION_MINOR "@EIGEN_VERSION_MINOR@" ) -set ( EIGEN3_VERSION_PATCH "@EIGEN_VERSION_PATCH@" ) diff --git a/external/eigen-3.4.0/cmake/EigenConfigureTesting.cmake b/external/eigen-3.4.0/cmake/EigenConfigureTesting.cmake deleted file mode 100644 index 9cb3bb2..0000000 --- a/external/eigen-3.4.0/cmake/EigenConfigureTesting.cmake +++ /dev/null @@ -1,58 +0,0 @@ -include(EigenTesting) -include(CheckCXXSourceCompiles) - -# configure the "site" and "buildname" -ei_set_sitename() - -# retrieve and store the build string -ei_set_build_string() - -add_custom_target(buildtests) -add_custom_target(check COMMAND "ctest") -add_dependencies(check buildtests) - -# check whether /bin/bash exists (disabled as not used anymore) -# find_file(EIGEN_BIN_BASH_EXISTS "/bin/bash" PATHS "/" NO_DEFAULT_PATH) - -# This call activates testing and generates the DartConfiguration.tcl -include(CTest) - -set(EIGEN_TEST_BUILD_FLAGS "" CACHE STRING "Options passed to the build command of unit tests") -set(EIGEN_DASHBOARD_BUILD_TARGET "buildtests" CACHE STRING "Target to be built in dashboard mode, default is buildtests") -set(EIGEN_CTEST_ERROR_EXCEPTION "" CACHE STRING "Regular expression for build error messages to be filtered out") - -# Overwrite default DartConfiguration.tcl such that ctest can build our unit tests. -# Recall that our unit tests are not in the "all" target, so we have to explicitly ask ctest to build our custom 'buildtests' target. -# At this stage, we can also add custom flags to the build tool through the user defined EIGEN_TEST_BUILD_FLAGS variable. -file(READ "${CMAKE_CURRENT_BINARY_DIR}/DartConfiguration.tcl" EIGEN_DART_CONFIG_FILE) -# try to grab the default flags -string(REGEX MATCH "MakeCommand:.*-- (.*)\nDefaultCTestConfigurationType" EIGEN_DUMMY ${EIGEN_DART_CONFIG_FILE}) -if(NOT CMAKE_MATCH_1) -string(REGEX MATCH "MakeCommand:.*[^c]make (.*)\nDefaultCTestConfigurationType" EIGEN_DUMMY ${EIGEN_DART_CONFIG_FILE}) -endif() -string(REGEX REPLACE "MakeCommand:.*DefaultCTestConfigurationType" "MakeCommand: ${CMAKE_COMMAND} --build . --target ${EIGEN_DASHBOARD_BUILD_TARGET} --config \"\${CTEST_CONFIGURATION_TYPE}\" -- ${CMAKE_MATCH_1} ${EIGEN_TEST_BUILD_FLAGS}\nDefaultCTestConfigurationType" - EIGEN_DART_CONFIG_FILE2 ${EIGEN_DART_CONFIG_FILE}) -file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/DartConfiguration.tcl" ${EIGEN_DART_CONFIG_FILE2}) - -configure_file(${CMAKE_CURRENT_SOURCE_DIR}/CTestCustom.cmake.in ${CMAKE_BINARY_DIR}/CTestCustom.cmake) - -# some documentation of this function would be nice -ei_init_testing() - -# configure Eigen related testing options -option(EIGEN_NO_ASSERTION_CHECKING "Disable checking of assertions using exceptions" OFF) -option(EIGEN_DEBUG_ASSERTS "Enable advanced debugging of assertions" OFF) - -if(CMAKE_COMPILER_IS_GNUCXX) - option(EIGEN_COVERAGE_TESTING "Enable/disable gcov" OFF) - if(EIGEN_COVERAGE_TESTING) - set(COVERAGE_FLAGS "-fprofile-arcs -ftest-coverage") - set(CTEST_CUSTOM_COVERAGE_EXCLUDE "/test/") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COVERAGE_FLAGS}") - endif() - -elseif(MSVC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D_CRT_SECURE_NO_WARNINGS /D_SCL_SECURE_NO_WARNINGS") -endif() - - diff --git a/external/eigen-3.4.0/cmake/EigenDetermineOSVersion.cmake b/external/eigen-3.4.0/cmake/EigenDetermineOSVersion.cmake deleted file mode 100644 index 9246fa6..0000000 --- a/external/eigen-3.4.0/cmake/EigenDetermineOSVersion.cmake +++ /dev/null @@ -1,46 +0,0 @@ -# The utility function DetermineOSVersion aims at providing an -# improved version of the CMake variable ${CMAKE_SYSTEM} on Windows -# machines. -# -# Usage: -# include(EigenDetermineOSVersion) -# DetermineOSVersion(OS_VERSION) -# message("OS: ${OS_VERSION}") - -# - A little helper variable which should not be directly called -function(DetermineShortWindowsName WIN_VERSION win_num_version) - if (${win_num_version} VERSION_EQUAL "6.1") - set(_version "win7") - elseif(${win_num_version} VERSION_EQUAL "6.0") - set(_version "winVista") - elseif(${win_num_version} VERSION_EQUAL "5.2") - set(_version "winXpProf") - elseif(${win_num_version} VERSION_EQUAL "5.1") - set(_version "winXp") - elseif(${win_num_version} VERSION_EQUAL "5.0") - set(_version "win2000Prof") - else() - set(_version "unknownWin") - endif() - set(${WIN_VERSION} ${_version} PARENT_SCOPE) -endfunction() - -function(DetermineOSVersion OS_VERSION) - if (WIN32 AND CMAKE_HOST_SYSTEM_NAME MATCHES Windows) - file (TO_NATIVE_PATH "$ENV{COMSPEC}" SHELL) - exec_program( ${SHELL} ARGS "/c" "ver" OUTPUT_VARIABLE ver_output) - - string(REGEX MATCHALL "[0-9]+" - ver_list "${ver_output}") - list(GET ver_list 0 _major) - list(GET ver_list 1 _minor) - - set(win_num_version ${_major}.${_minor}) - DetermineShortWindowsName(win_version "${win_num_version}") - if(win_version) - set(${OS_VERSION} ${win_version} PARENT_SCOPE) - endif() - else() - set(${OS_VERSION} ${CMAKE_SYSTEM} PARENT_SCOPE) - endif() -endfunction() diff --git a/external/eigen-3.4.0/cmake/EigenDetermineVSServicePack.cmake b/external/eigen-3.4.0/cmake/EigenDetermineVSServicePack.cmake deleted file mode 100644 index fed7819..0000000 --- a/external/eigen-3.4.0/cmake/EigenDetermineVSServicePack.cmake +++ /dev/null @@ -1,41 +0,0 @@ -include(CMakeDetermineVSServicePack) - -# The code is almost identical to the CMake version. The only difference is that we remove -# _DetermineVSServicePack_FastCheckVersionWithCompiler which lead to errors on some systems. -function(EigenDetermineVSServicePack _pack) - if(NOT DETERMINED_VS_SERVICE_PACK OR NOT ${_pack}) - if(NOT DETERMINED_VS_SERVICE_PACK) - _DetermineVSServicePack_CheckVersionWithTryCompile(DETERMINED_VS_SERVICE_PACK _cl_version) - if(NOT DETERMINED_VS_SERVICE_PACK) - _DetermineVSServicePack_CheckVersionWithTryRun(DETERMINED_VS_SERVICE_PACK _cl_version) - endif() - endif() - - if(DETERMINED_VS_SERVICE_PACK) - if(_cl_version) - # Call helper function to determine VS version - _DetermineVSServicePackFromCompiler(_sp "${_cl_version}") - - # temporary fix, until CMake catches up - if (NOT _sp) - if(${_cl_version} VERSION_EQUAL "17.00.50727.1") - set(_sp "vc110") - elseif(${_cl_version} VERSION_EQUAL "17.00.51106.1") - set(_sp "vc110sp1") - elseif(${_cl_version} VERSION_EQUAL "17.00.60315.1") - set(_sp "vc110sp2") - elseif(${_cl_version} VERSION_EQUAL "17.00.60610.1") - set(_sp "vc110sp3") - else() - set(_sp ${CMAKE_CXX_COMPILER_VERSION}) - endif() - endif() - - if(_sp) - set(${_pack} ${_sp} CACHE INTERNAL - "The Visual Studio Release with Service Pack") - endif() - endif() - endif() - endif() -endfunction() diff --git a/external/eigen-3.4.0/cmake/EigenSmokeTestList.cmake b/external/eigen-3.4.0/cmake/EigenSmokeTestList.cmake deleted file mode 100644 index 6f0f724..0000000 --- a/external/eigen-3.4.0/cmake/EigenSmokeTestList.cmake +++ /dev/null @@ -1,131 +0,0 @@ -# List of tests that will be build and run during Eigen's smoke testing. If one -# of these tests doesn't exists or cannot be build with the current configuration -# it will just be skipped. -set(ei_smoke_test_list - adjoint_1 - alignedvector3 - array_cwise_7 - array_cwise_8 - array_for_matrix_1 - array_of_string - array_replicate_1 - array_reverse_1 - autodiff_1 - autodiff_scalar_1 - bandmatrix - bdcsvd_9 - bessel_functions_1 - bfloat16_float - blasutil_1 - block_5 - BVH - cholesky_1 - cholmod_support_23 - cholmod_support_24 - conservative_resize_1 - constructor_1 - corners_1 - ctorleakmiscmatrices_4 - dense_storage - determinant_1 - diagonal_1 - diagonal_2 - diagonalmatrices_1 - dynalloc - eigensolver_complex_1 - eigensolver_selfadjoint_8 - EulerAngles_1 - exceptions - fastmath - first_aligned - geo_alignedbox_2 - geo_eulerangles_1 - geo_homogeneous_1 - geo_hyperplane_1 - geo_orthomethods_1 - geo_parametrizedline_1 - geo_transformations_7 - half_float - hessenberg_1 - hessenberg_6qr_10 - householder_8 - indexed_view_1 - inplace_decomposition_1 - integer_types_1 - inverse_1 - is_same_dense - jacobi_1 - jacobisvd_1 - kronecker_product - linearstructure_1 - mapped_matrix_1 - mapstaticmethods_1 - mapstride_1 - matrix_square_root_1 - meta - minres_2 - miscmatrices_1 - mixingtypes_7 - nestbyvalue - nesting_ops_1 - nomalloc_1 - nullary_1 - num_dimensions - NumericalDiff - numext - packetmath - permutationmatrices_1 - polynomialsolver_1 - prec_inverse_4x4_1 - product_extra_5 - product_selfadjoint_1 - product_small_7 - product_symm_1 - product_syrk_1 - product_trmm_1 - product_trmv_1 - product_trsolve_5 - qr_1 - qr_colpivoting_7 - qr_fullpivoting_4 - rand - real_qz_1 - redux_1 - ref_1 - resize - rvalue_types_1 - schur_complex_1 - schur_real_1 - selfadjoint_1 - sizeof - sizeoverflow - smallvectors - sparse_basic_3 - sparse_block_1 - sparse_extra_4 - sparse_permutations_2 - sparse_product_4 - sparse_ref_1 - sparse_solvers_1 - sparse_vector_1 - special_functions_1 - special_numbers_1 - special_packetmath_1 - spqr_support_2 - stable_norm_1 - stddeque_1 - stddeque_overload_1 - stdlist_1 - stdlist_overload_1 - stdvector_1 - stdvector_overload_1 - stl_iterators_1 - swap_1 - symbolic_index_1 - triangular_1 - type_aliaslu_9 - umeyama_3 - unalignedassert - unalignedcount - vectorwiseop_1 - visitor_1) \ No newline at end of file diff --git a/external/eigen-3.4.0/cmake/EigenTesting.cmake b/external/eigen-3.4.0/cmake/EigenTesting.cmake deleted file mode 100644 index eb8457d..0000000 --- a/external/eigen-3.4.0/cmake/EigenTesting.cmake +++ /dev/null @@ -1,782 +0,0 @@ - -macro(ei_add_property prop value) - get_property(previous GLOBAL PROPERTY ${prop}) - if ((NOT previous) OR (previous STREQUAL "")) - set_property(GLOBAL PROPERTY ${prop} "${value}") - else() - set_property(GLOBAL PROPERTY ${prop} "${previous} ${value}") - endif() -endmacro() - -#internal. See documentation of ei_add_test for details. -macro(ei_add_test_internal testname testname_with_suffix) - set(targetname ${testname_with_suffix}) - - if(EIGEN_ADD_TEST_FILENAME_EXTENSION) - set(filename ${testname}.${EIGEN_ADD_TEST_FILENAME_EXTENSION}) - else() - set(filename ${testname}.cpp) - endif() - - # Add the current target to the list of subtest targets - get_property(EIGEN_SUBTESTS_LIST GLOBAL PROPERTY EIGEN_SUBTESTS_LIST) - set(EIGEN_SUBTESTS_LIST "${EIGEN_SUBTESTS_LIST}${targetname}\n") - set_property(GLOBAL PROPERTY EIGEN_SUBTESTS_LIST "${EIGEN_SUBTESTS_LIST}") - - if(EIGEN_ADD_TEST_FILENAME_EXTENSION STREQUAL cu) - if(EIGEN_TEST_HIP) - hip_reset_flags() - hip_add_executable(${targetname} ${filename} HIPCC_OPTIONS "-DEIGEN_USE_HIP ${ARGV2}") - elseif(EIGEN_TEST_CUDA_CLANG) - set_source_files_properties(${filename} PROPERTIES LANGUAGE CXX) - - if(CUDA_64_BIT_DEVICE_CODE AND (EXISTS "${CUDA_TOOLKIT_ROOT_DIR}/lib64")) - link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64") - else() - link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib") - endif() - - if (${ARGC} GREATER 2) - add_executable(${targetname} ${filename}) - else() - add_executable(${targetname} ${filename} OPTIONS ${ARGV2}) - endif() - set(CUDA_CLANG_LINK_LIBRARIES "cudart_static" "cuda" "dl" "pthread") - if (CMAKE_SYSTEM_NAME STREQUAL "Linux") - set(CUDA_CLANG_LINK_LIBRARIES ${CUDA_CLANG_LINK_LIBRARIES} "rt") - endif() - target_link_libraries(${targetname} ${CUDA_CLANG_LINK_LIBRARIES}) - else() - if (${ARGC} GREATER 2) - cuda_add_executable(${targetname} ${filename} OPTIONS ${ARGV2}) - else() - cuda_add_executable(${targetname} ${filename}) - endif() - endif() - else() - add_executable(${targetname} ${filename}) - endif() - - if (targetname MATCHES "^eigen2_") - add_dependencies(eigen2_buildtests ${targetname}) - else() - add_dependencies(buildtests ${targetname}) - endif() - - if(EIGEN_NO_ASSERTION_CHECKING) - ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_NO_ASSERTION_CHECKING=1") - else() - if(EIGEN_DEBUG_ASSERTS) - ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_DEBUG_ASSERTS=1") - endif() - endif() - - ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_TEST_MAX_SIZE=${EIGEN_TEST_MAX_SIZE}") - - if(MSVC) - ei_add_target_property(${targetname} COMPILE_FLAGS "/bigobj") - endif() - - # let the user pass flags. - if(${ARGC} GREATER 2) - ei_add_target_property(${targetname} COMPILE_FLAGS "${ARGV2}") - endif() - - if(EIGEN_TEST_CUSTOM_CXX_FLAGS) - ei_add_target_property(${targetname} COMPILE_FLAGS "${EIGEN_TEST_CUSTOM_CXX_FLAGS}") - endif() - - if(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO) - target_link_libraries(${targetname} ${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO}) - endif() - if(EXTERNAL_LIBS) - target_link_libraries(${targetname} ${EXTERNAL_LIBS}) - endif() - if(EIGEN_TEST_CUSTOM_LINKER_FLAGS) - target_link_libraries(${targetname} ${EIGEN_TEST_CUSTOM_LINKER_FLAGS}) - endif() - - if(${ARGC} GREATER 3) - set(libs_to_link ${ARGV3}) - # it could be that some cmake module provides a bad library string " " (just spaces), - # and that severely breaks target_link_libraries ("can't link to -l-lstdc++" errors). - # so we check for strings containing only spaces. - string(STRIP "${libs_to_link}" libs_to_link_stripped) - string(LENGTH "${libs_to_link_stripped}" libs_to_link_stripped_length) - if(${libs_to_link_stripped_length} GREATER 0) - # notice: no double quotes around ${libs_to_link} here. It may be a list. - target_link_libraries(${targetname} ${libs_to_link}) - endif() - endif() - - add_test(${testname_with_suffix} "${targetname}") - - # Specify target and test labels according to EIGEN_CURRENT_SUBPROJECT - get_property(current_subproject GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT) - if ((current_subproject) AND (NOT (current_subproject STREQUAL ""))) - set_property(TARGET ${targetname} PROPERTY LABELS "Build${current_subproject}") - add_dependencies("Build${current_subproject}" ${targetname}) - set_property(TEST ${testname_with_suffix} PROPERTY LABELS "${current_subproject}") - endif() - if(EIGEN_SYCL) - # Force include of the SYCL file at the end to avoid errors. - set_property(TARGET ${targetname} PROPERTY COMPUTECPP_INCLUDE_AFTER 1) - # Set COMPILE_FLAGS to COMPILE_DEFINITIONS instead to avoid having to duplicate the flags - # to the device compiler. - get_target_property(target_compile_flags ${targetname} COMPILE_FLAGS) - separate_arguments(target_compile_flags) - foreach(flag ${target_compile_flags}) - if(${flag} MATCHES "^-D.*") - string(REPLACE "-D" "" definition_flag ${flag}) - set_property(TARGET ${targetname} APPEND PROPERTY COMPILE_DEFINITIONS ${definition_flag}) - list(REMOVE_ITEM target_compile_flags ${flag}) - endif() - endforeach() - set_property(TARGET ${targetname} PROPERTY COMPILE_FLAGS ${target_compile_flags}) - # Link against pthread and add sycl to target - set(THREADS_PREFER_PTHREAD_FLAG ON) - find_package(Threads REQUIRED) - target_link_libraries(${targetname} Threads::Threads) - add_sycl_to_target(TARGET ${targetname} SOURCES ${filename}) - endif(EIGEN_SYCL) -endmacro(ei_add_test_internal) -# Macro to add a test -# -# the unique mandatory parameter testname must correspond to a file -# .cpp which follows this pattern: -# -# #include "main.h" -# void test_() { ... } -# -# Depending on the contents of that file, this macro can have 2 behaviors, -# see below. -# -# The optional 2nd parameter is libraries to link to. -# -# A. Default behavior -# -# this macro adds an executable as well as a ctest test -# named too. -# -# On platforms with bash simply run: -# "ctest -V" or "ctest -V -R " -# On other platform use ctest as usual -# -# B. Multi-part behavior -# -# If the source file matches the regexp -# CALL_SUBTEST_[0-9]+|EIGEN_TEST_PART_[0-9]+ -# then it is interpreted as a multi-part test. The behavior then depends on the -# CMake option EIGEN_SPLIT_LARGE_TESTS, which is ON by default. -# -# If EIGEN_SPLIT_LARGE_TESTS is OFF, the behavior is the same as in A (the multi-part -# aspect is ignored). -# -# If EIGEN_SPLIT_LARGE_TESTS is ON, the test is split into multiple executables -# test__ -# where N runs from 1 to the greatest occurrence found in the source file. Each of these -# executables is built passing -DEIGEN_TEST_PART_N. This allows to split large tests -# into smaller executables. -# -# Moreover, targets are still generated, they -# have the effect of building all the parts of the test. -# -# Again, ctest -R allows to run all matching tests. -macro(ei_add_test testname) - get_property(EIGEN_TESTS_LIST GLOBAL PROPERTY EIGEN_TESTS_LIST) - set(EIGEN_TESTS_LIST "${EIGEN_TESTS_LIST}${testname}\n") - set_property(GLOBAL PROPERTY EIGEN_TESTS_LIST "${EIGEN_TESTS_LIST}") - - if(EIGEN_ADD_TEST_FILENAME_EXTENSION) - set(filename ${testname}.${EIGEN_ADD_TEST_FILENAME_EXTENSION}) - else() - set(filename ${testname}.cpp) - endif() - - file(READ "${filename}" test_source) - string(REGEX MATCHALL "CALL_SUBTEST_[0-9]+|EIGEN_TEST_PART_[0-9]+|EIGEN_SUFFIXES(;[0-9]+)+" - occurrences "${test_source}") - string(REGEX REPLACE "CALL_SUBTEST_|EIGEN_TEST_PART_|EIGEN_SUFFIXES" "" suffixes "${occurrences}") - list(REMOVE_DUPLICATES suffixes) - set(explicit_suffixes "") - if( (NOT EIGEN_SPLIT_LARGE_TESTS) AND suffixes) - # Check whether we have EIGEN_TEST_PART_* statements, in which case we likely must enforce splitting. - # For instance, indexed_view activate a different c++ version for each part. - string(REGEX MATCHALL "EIGEN_TEST_PART_[0-9]+" occurrences "${test_source}") - string(REGEX REPLACE "EIGEN_TEST_PART_" "" explicit_suffixes "${occurrences}") - list(REMOVE_DUPLICATES explicit_suffixes) - endif() - if( (EIGEN_SPLIT_LARGE_TESTS AND suffixes) OR explicit_suffixes) - add_custom_target(${testname}) - foreach(suffix ${suffixes}) - ei_add_test_internal(${testname} ${testname}_${suffix} - "${ARGV1} -DEIGEN_TEST_PART_${suffix}=1" "${ARGV2}") - add_dependencies(${testname} ${testname}_${suffix}) - endforeach() - else() - ei_add_test_internal(${testname} ${testname} "${ARGV1} -DEIGEN_TEST_PART_ALL=1" "${ARGV2}") - endif() -endmacro() - -# adds a failtest, i.e. a test that succeed if the program fails to compile -# note that the test runner for these is CMake itself, when passed -DEIGEN_FAILTEST=ON -# so here we're just running CMake commands immediately, we're not adding any targets. -macro(ei_add_failtest testname) - - set(test_target_ok ${testname}_ok) - set(test_target_ko ${testname}_ko) - - # Add executables - add_executable(${test_target_ok} ${testname}.cpp) - add_executable(${test_target_ko} ${testname}.cpp) - - # Remove them from the normal build process - set_target_properties(${test_target_ok} ${test_target_ko} PROPERTIES - EXCLUDE_FROM_ALL TRUE - EXCLUDE_FROM_DEFAULT_BUILD TRUE) - - # Configure the failing test - target_compile_definitions(${test_target_ko} PRIVATE EIGEN_SHOULD_FAIL_TO_BUILD) - - # Add the tests to ctest. - add_test(NAME ${test_target_ok} - COMMAND ${CMAKE_COMMAND} --build . --target ${test_target_ok} --config $ - WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) - add_test(NAME ${test_target_ko} - COMMAND ${CMAKE_COMMAND} --build . --target ${test_target_ko} --config $ - WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) - - # Expect the second test to fail - set_tests_properties(${test_target_ko} PROPERTIES WILL_FAIL TRUE) -endmacro() - -# print a summary of the different options -macro(ei_testing_print_summary) - message(STATUS "************************************************************") - message(STATUS "*** Eigen's unit tests configuration summary ***") - message(STATUS "************************************************************") - message(STATUS "") - message(STATUS "Build type: ${CMAKE_BUILD_TYPE}") - message(STATUS "Build site: ${SITE}") - message(STATUS "Build string: ${BUILDNAME}") - get_property(EIGEN_TESTING_SUMMARY GLOBAL PROPERTY EIGEN_TESTING_SUMMARY) - get_property(EIGEN_TESTED_BACKENDS GLOBAL PROPERTY EIGEN_TESTED_BACKENDS) - get_property(EIGEN_MISSING_BACKENDS GLOBAL PROPERTY EIGEN_MISSING_BACKENDS) - message(STATUS "Enabled backends: ${EIGEN_TESTED_BACKENDS}") - message(STATUS "Disabled backends: ${EIGEN_MISSING_BACKENDS}") - - if(EIGEN_DEFAULT_TO_ROW_MAJOR) - message(STATUS "Default order: Row-major") - else() - message(STATUS "Default order: Column-major") - endif() - - if(EIGEN_TEST_NO_EXPLICIT_ALIGNMENT) - message(STATUS "Explicit alignment (hence vectorization) disabled") - elseif(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION) - message(STATUS "Explicit vectorization disabled (alignment kept enabled)") - else() - - message(STATUS "Maximal matrix/vector size: ${EIGEN_TEST_MAX_SIZE}") - - if(EIGEN_TEST_SSE2) - message(STATUS "SSE2: ON") - else() - message(STATUS "SSE2: Using architecture defaults") - endif() - - if(EIGEN_TEST_SSE3) - message(STATUS "SSE3: ON") - else() - message(STATUS "SSE3: Using architecture defaults") - endif() - - if(EIGEN_TEST_SSSE3) - message(STATUS "SSSE3: ON") - else() - message(STATUS "SSSE3: Using architecture defaults") - endif() - - if(EIGEN_TEST_SSE4_1) - message(STATUS "SSE4.1: ON") - else() - message(STATUS "SSE4.1: Using architecture defaults") - endif() - - if(EIGEN_TEST_SSE4_2) - message(STATUS "SSE4.2: ON") - else() - message(STATUS "SSE4.2: Using architecture defaults") - endif() - - if(EIGEN_TEST_AVX) - message(STATUS "AVX: ON") - else() - message(STATUS "AVX: Using architecture defaults") - endif() - - if(EIGEN_TEST_AVX2) - message(STATUS "AVX2: ON") - else() - message(STATUS "AVX2: Using architecture defaults") - endif() - - if(EIGEN_TEST_FMA) - message(STATUS "FMA: ON") - else() - message(STATUS "FMA: Using architecture defaults") - endif() - - if(EIGEN_TEST_AVX512) - message(STATUS "AVX512: ON") - else() - message(STATUS "AVX512: Using architecture defaults") - endif() - - if(EIGEN_TEST_AVX512DQ) - message(STATUS "AVX512DQ: ON") - else() - message(STATUS "AVX512DQ: Using architecture defaults") - endif() - - if(EIGEN_TEST_ALTIVEC) - message(STATUS "Altivec: ON") - else() - message(STATUS "Altivec: Using architecture defaults") - endif() - - if(EIGEN_TEST_VSX) - message(STATUS "VSX: ON") - else() - message(STATUS "VSX: Using architecture defaults") - endif() - - if(EIGEN_TEST_MSA) - message(STATUS "MIPS MSA: ON") - else() - message(STATUS "MIPS MSA: Using architecture defaults") - endif() - - if(EIGEN_TEST_NEON) - message(STATUS "ARM NEON: ON") - else() - message(STATUS "ARM NEON: Using architecture defaults") - endif() - - if(EIGEN_TEST_NEON64) - message(STATUS "ARMv8 NEON: ON") - else() - message(STATUS "ARMv8 NEON: Using architecture defaults") - endif() - - if(EIGEN_TEST_ZVECTOR) - message(STATUS "S390X ZVECTOR: ON") - else() - message(STATUS "S390X ZVECTOR: Using architecture defaults") - endif() - - if(EIGEN_TEST_CXX11) - message(STATUS "C++11: ON") - else() - message(STATUS "C++11: OFF") - endif() - - if(EIGEN_TEST_SYCL) - if(EIGEN_SYCL_TRISYCL) - message(STATUS "SYCL: ON (using triSYCL)") - else() - message(STATUS "SYCL: ON (using computeCPP)") - endif() - else() - message(STATUS "SYCL: OFF") - endif() - if(EIGEN_TEST_CUDA) - if(EIGEN_TEST_CUDA_CLANG) - message(STATUS "CUDA: ON (using clang)") - else() - message(STATUS "CUDA: ON (using nvcc)") - endif() - else() - message(STATUS "CUDA: OFF") - endif() - if(EIGEN_TEST_HIP) - message(STATUS "HIP: ON (using hipcc)") - else() - message(STATUS "HIP: OFF") - endif() - - endif() # vectorization / alignment options - - message(STATUS "\n${EIGEN_TESTING_SUMMARY}") - - message(STATUS "************************************************************") -endmacro() - -macro(ei_init_testing) - define_property(GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT BRIEF_DOCS " " FULL_DOCS " ") - define_property(GLOBAL PROPERTY EIGEN_TESTED_BACKENDS BRIEF_DOCS " " FULL_DOCS " ") - define_property(GLOBAL PROPERTY EIGEN_MISSING_BACKENDS BRIEF_DOCS " " FULL_DOCS " ") - define_property(GLOBAL PROPERTY EIGEN_TESTING_SUMMARY BRIEF_DOCS " " FULL_DOCS " ") - define_property(GLOBAL PROPERTY EIGEN_TESTS_LIST BRIEF_DOCS " " FULL_DOCS " ") - define_property(GLOBAL PROPERTY EIGEN_SUBTESTS_LIST BRIEF_DOCS " " FULL_DOCS " ") - - set_property(GLOBAL PROPERTY EIGEN_TESTED_BACKENDS "") - set_property(GLOBAL PROPERTY EIGEN_MISSING_BACKENDS "") - set_property(GLOBAL PROPERTY EIGEN_TESTING_SUMMARY "") - set_property(GLOBAL PROPERTY EIGEN_TESTS_LIST "") - set_property(GLOBAL PROPERTY EIGEN_SUBTESTS_LIST "") - - define_property(GLOBAL PROPERTY EIGEN_FAILTEST_FAILURE_COUNT BRIEF_DOCS " " FULL_DOCS " ") - define_property(GLOBAL PROPERTY EIGEN_FAILTEST_COUNT BRIEF_DOCS " " FULL_DOCS " ") - - set_property(GLOBAL PROPERTY EIGEN_FAILTEST_FAILURE_COUNT "0") - set_property(GLOBAL PROPERTY EIGEN_FAILTEST_COUNT "0") - - # uncomment anytime you change the ei_get_compilerver_from_cxx_version_string macro - # ei_test_get_compilerver_from_cxx_version_string() -endmacro() - -macro(ei_set_sitename) - # if the sitename is not yet set, try to set it - if(NOT ${SITE} OR ${SITE} STREQUAL "") - set(eigen_computername $ENV{COMPUTERNAME}) - set(eigen_hostname $ENV{HOSTNAME}) - if(eigen_hostname) - set(SITE ${eigen_hostname}) - elseif(eigen_computername) - set(SITE ${eigen_computername}) - endif() - endif() - # in case it is already set, enforce lower case - if(SITE) - string(TOLOWER ${SITE} SITE) - endif() -endmacro() - -macro(ei_get_compilerver VAR) - if(MSVC) - # on windows system, we use a modified CMake script - include(EigenDetermineVSServicePack) - EigenDetermineVSServicePack( my_service_pack ) - - if( my_service_pack ) - set(${VAR} ${my_service_pack}) - else() - set(${VAR} "na") - endif() - elseif(${CMAKE_CXX_COMPILER_ID} MATCHES "PGI") - set(${VAR} "${CMAKE_CXX_COMPILER_ID}-${CMAKE_CXX_COMPILER_VERSION}") - else() - # on all other system we rely on ${CMAKE_CXX_COMPILER} - # supporting a "--version" or "/version" flag - - if(WIN32 AND ${CMAKE_CXX_COMPILER_ID} EQUAL "Intel") - set(EIGEN_CXX_FLAG_VERSION "/version") - else() - set(EIGEN_CXX_FLAG_VERSION "--version") - endif() - - execute_process(COMMAND ${CMAKE_CXX_COMPILER} ${EIGEN_CXX_FLAG_VERSION} - OUTPUT_VARIABLE eigen_cxx_compiler_version_string OUTPUT_STRIP_TRAILING_WHITESPACE) - string(REGEX REPLACE "^[ \n\r]+" "" eigen_cxx_compiler_version_string ${eigen_cxx_compiler_version_string}) - string(REGEX REPLACE "[\n\r].*" "" eigen_cxx_compiler_version_string ${eigen_cxx_compiler_version_string}) - - ei_get_compilerver_from_cxx_version_string("${eigen_cxx_compiler_version_string}" CNAME CVER) - set(${VAR} "${CNAME}-${CVER}") - - endif() -endmacro() - -# Extract compiler name and version from a raw version string -# WARNING: if you edit this macro, then please test it by uncommenting -# the testing macro call in ei_init_testing() of the EigenTesting.cmake file. -# See also the ei_test_get_compilerver_from_cxx_version_string macro at the end -# of the file -macro(ei_get_compilerver_from_cxx_version_string VERSTRING CNAME CVER) - # extract possible compiler names - string(REGEX MATCH "g\\+\\+" ei_has_gpp ${VERSTRING}) - string(REGEX MATCH "llvm|LLVM" ei_has_llvm ${VERSTRING}) - string(REGEX MATCH "gcc|GCC" ei_has_gcc ${VERSTRING}) - string(REGEX MATCH "icpc|ICC" ei_has_icpc ${VERSTRING}) - string(REGEX MATCH "clang|CLANG" ei_has_clang ${VERSTRING}) - string(REGEX MATCH "mingw32" ei_has_mingw ${VERSTRING}) - - # combine them - if((ei_has_llvm) AND (ei_has_gpp OR ei_has_gcc)) - set(${CNAME} "llvm-g++") - elseif((ei_has_llvm) AND (ei_has_clang)) - set(${CNAME} "llvm-clang++") - elseif(ei_has_clang) - set(${CNAME} "clang++") - elseif ((ei_has_mingw) AND (ei_has_gpp OR ei_has_gcc)) - set(${CNAME} "mingw32-g++") - elseif(ei_has_icpc) - set(${CNAME} "icpc") - elseif(ei_has_gpp OR ei_has_gcc) - set(${CNAME} "g++") - else() - set(${CNAME} "_") - endif() - - # extract possible version numbers - # first try to extract 3 isolated numbers: - string(REGEX MATCH " [0-9]+\\.[0-9]+\\.[0-9]+" eicver ${VERSTRING}) - if(NOT eicver) - # try to extract 2 isolated ones: - string(REGEX MATCH " [0-9]+\\.[0-9]+" eicver ${VERSTRING}) - if(NOT eicver) - # try to extract 3: - string(REGEX MATCH "[^0-9][0-9]+\\.[0-9]+\\.[0-9]+" eicver ${VERSTRING}) - if(NOT eicver) - # try to extract 2: - string(REGEX MATCH "[^0-9][0-9]+\\.[0-9]+" eicver ${VERSTRING}) - if (NOT eicver AND ei_has_mingw) - # try to extract 1 number plus suffix: - string(REGEX MATCH "[^0-9][0-9]+-win32" eicver ${VERSTRING}) - endif() - endif() - endif() - endif() - - if (NOT eicver) - set(eicver " _") - endif() - - string(REGEX REPLACE ".(.*)" "\\1" ${CVER} ${eicver}) - -endmacro() - -macro(ei_get_cxxflags VAR) - set(${VAR} "") - ei_is_64bit_env(IS_64BIT_ENV) - if(EIGEN_TEST_NEON) - set(${VAR} NEON) - elseif(EIGEN_TEST_NEON64) - set(${VAR} NEON) - elseif(EIGEN_TEST_ZVECTOR) - set(${VAR} ZVECTOR) - elseif(EIGEN_TEST_VSX) - set(${VAR} VSX) - elseif(EIGEN_TEST_ALTIVEC) - set(${VAR} ALVEC) - elseif(EIGEN_TEST_FMA) - set(${VAR} FMA) - elseif(EIGEN_TEST_AVX) - set(${VAR} AVX) - elseif(EIGEN_TEST_SSE4_2) - set(${VAR} SSE42) - elseif(EIGEN_TEST_SSE4_1) - set(${VAR} SSE41) - elseif(EIGEN_TEST_SSSE3) - set(${VAR} SSSE3) - elseif(EIGEN_TEST_SSE3) - set(${VAR} SSE3) - elseif(EIGEN_TEST_SSE2 OR IS_64BIT_ENV) - set(${VAR} SSE2) - elseif(EIGEN_TEST_MSA) - set(${VAR} MSA) - endif() - - if(EIGEN_TEST_OPENMP) - if (${VAR} STREQUAL "") - set(${VAR} OMP) - else() - set(${VAR} ${${VAR}}-OMP) - endif() - endif() - - if(EIGEN_DEFAULT_TO_ROW_MAJOR) - if (${VAR} STREQUAL "") - set(${VAR} ROW) - else() - set(${VAR} ${${VAR}}-ROWMAJ) - endif() - endif() -endmacro() - -macro(ei_set_build_string) - ei_get_compilerver(LOCAL_COMPILER_VERSION) - ei_get_cxxflags(LOCAL_COMPILER_FLAGS) - - include(EigenDetermineOSVersion) - DetermineOSVersion(OS_VERSION) - - set(TMP_BUILD_STRING ${OS_VERSION}-${LOCAL_COMPILER_VERSION}) - - if (NOT ${LOCAL_COMPILER_FLAGS} STREQUAL "") - set(TMP_BUILD_STRING ${TMP_BUILD_STRING}-${LOCAL_COMPILER_FLAGS}) - endif() - - if(EIGEN_TEST_EXTERNAL_BLAS) - set(TMP_BUILD_STRING ${TMP_BUILD_STRING}-external_blas) - endif() - - ei_is_64bit_env(IS_64BIT_ENV) - if(NOT IS_64BIT_ENV) - set(TMP_BUILD_STRING ${TMP_BUILD_STRING}-32bit) - else() - set(TMP_BUILD_STRING ${TMP_BUILD_STRING}-64bit) - endif() - - if(EIGEN_TEST_CXX11) - set(TMP_BUILD_STRING ${TMP_BUILD_STRING}-cxx11) - endif() - - if(EIGEN_BUILD_STRING_SUFFIX) - set(TMP_BUILD_STRING ${TMP_BUILD_STRING}-${EIGEN_BUILD_STRING_SUFFIX}) - endif() - - string(TOLOWER ${TMP_BUILD_STRING} BUILDNAME) -endmacro() - -macro(ei_is_64bit_env VAR) - if(CMAKE_SIZEOF_VOID_P EQUAL 8) - set(${VAR} 1) - elseif(CMAKE_SIZEOF_VOID_P EQUAL 4) - set(${VAR} 0) - else() - message(WARNING "Unsupported pointer size. Please contact the authors.") - endif() -endmacro() - - -# helper macro for testing ei_get_compilerver_from_cxx_version_string -# STR: raw version string -# REFNAME: expected compiler name -# REFVER: expected compiler version -macro(ei_test1_get_compilerver_from_cxx_version_string STR REFNAME REFVER) - ei_get_compilerver_from_cxx_version_string(${STR} CNAME CVER) - if((NOT ${REFNAME} STREQUAL ${CNAME}) OR (NOT ${REFVER} STREQUAL ${CVER})) - message("STATUS ei_get_compilerver_from_cxx_version_string error:") - message("Expected \"${REFNAME}-${REFVER}\", got \"${CNAME}-${CVER}\"") - endif() -endmacro() - -# macro for testing ei_get_compilerver_from_cxx_version_string -# feel free to add more version strings -macro(ei_test_get_compilerver_from_cxx_version_string) - ei_test1_get_compilerver_from_cxx_version_string("g++ (SUSE Linux) 4.5.3 20110428 [gcc-4_5-branch revision 173117]" "g++" "4.5.3") - ei_test1_get_compilerver_from_cxx_version_string("c++ (GCC) 4.5.1 20100924 (Red Hat 4.5.1-4)" "g++" "4.5.1") - ei_test1_get_compilerver_from_cxx_version_string("icpc (ICC) 11.0 20081105" "icpc" "11.0") - ei_test1_get_compilerver_from_cxx_version_string("g++-3.4 (GCC) 3.4.6" "g++" "3.4.6") - ei_test1_get_compilerver_from_cxx_version_string("SUSE Linux clang version 3.0 (branches/release_30 145598) (based on LLVM 3.0)" "llvm-clang++" "3.0") - ei_test1_get_compilerver_from_cxx_version_string("icpc (ICC) 12.0.5 20110719" "icpc" "12.0.5") - ei_test1_get_compilerver_from_cxx_version_string("Apple clang version 2.1 (tags/Apple/clang-163.7.1) (based on LLVM 3.0svn)" "llvm-clang++" "2.1") - ei_test1_get_compilerver_from_cxx_version_string("i686-apple-darwin11-llvm-g++-4.2 (GCC) 4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2335.15.00)" "llvm-g++" "4.2.1") - ei_test1_get_compilerver_from_cxx_version_string("g++-mp-4.4 (GCC) 4.4.6" "g++" "4.4.6") - ei_test1_get_compilerver_from_cxx_version_string("g++-mp-4.4 (GCC) 2011" "g++" "4.4") - ei_test1_get_compilerver_from_cxx_version_string("x86_64-w64-mingw32-g++ (GCC) 10-win32 20210110" "mingw32-g++" "10-win32") -endmacro() - -# Split all tests listed in EIGEN_TESTS_LIST into num_splits many targets -# named buildtestspartN with N = { 0, ..., num_splits-1}. -# -# The intention behind the existance of this macro is the size of Eigen's -# testsuite. Together with the relativly big compile-times building all tests -# can take a substantial amount of time depending on the available hardware. -# -# The last buildtestspartN target will build possible remaining tests. -# -# An example: -# -# EIGEN_TESTS_LIST= [ test1, test2, test3, test4, test5, test6, test7 ] -# -# A call to ei_split_testsuite(3) creates the following targets with dependencies -# -# Target Dependencies -# ------ ------------ -# buildtestspart0 test1, test2 -# buildtestspart1 test3, test4 -# buildtestspart2 test5, test6, test7 -# -macro(ei_split_testsuite num_splits) - get_property(EIGEN_TESTS_LIST GLOBAL PROPERTY EIGEN_TESTS_LIST) - - # Translate EIGEN_TESTS_LIST into a CMake list - string(REGEX REPLACE "\n" " " EIGEN_TESTS_LIST "${EIGEN_TESTS_LIST}") - set(EIGEN_TESTS_LIST "${EIGEN_TESTS_LIST}") - separate_arguments(EIGEN_TESTS_LIST) - - set(eigen_test_count "0") - foreach(t IN ITEMS ${EIGEN_TESTS_LIST}) - math(EXPR eigen_test_count "${eigen_test_count}+1") - endforeach() - - # Get number of tests per target - math(EXPR num_tests_per_target "${eigen_test_count}/${num_splits} - ${eigen_test_count}/${num_splits} % 1") - - set(test_idx "0") - math(EXPR target_bound "${num_splits}-1") - foreach(part RANGE "0" "${target_bound}") - # Create target - set(current_target "buildtestspart${part}") - add_custom_target("${current_target}") - math(EXPR upper_bound "${test_idx} + ${num_tests_per_target} - 1") - foreach(test_idx RANGE "${test_idx}" "${upper_bound}") - list(GET EIGEN_TESTS_LIST "${test_idx}" curr_test) - add_dependencies("${current_target}" "${curr_test}") - endforeach() - math(EXPR test_idx "${test_idx} + ${num_tests_per_target}") - endforeach() - - # Handle the possibly remaining tests - math(EXPR test_idx "${num_splits} * ${num_tests_per_target}") - math(EXPR target_bound "${eigen_test_count} - 1") - foreach(test_idx RANGE "${test_idx}" "${target_bound}") - list(GET EIGEN_TESTS_LIST "${test_idx}" curr_test) - add_dependencies("${current_target}" "${curr_test}") - endforeach() -endmacro(ei_split_testsuite num_splits) - -# Defines the custom command buildsmoketests to build a number of tests -# specified in smoke_test_list. -# -# Test in smoke_test_list can be either test targets (e.g. packetmath) or -# subtests targets (e.g. packetmath_2). If any of the test are not available -# in the current configuration they are just skipped. -# -# All tests added via this macro are labeled with the smoketest label. This -# allows running smoketests only using ctest. -# -# Smoke tests are intended to be run before the whole test suite is invoked, -# e.g., to smoke test patches. -macro(ei_add_smoke_tests smoke_test_list) - # Set the build target to build smoketests - set(buildtarget "buildsmoketests") - add_custom_target("${buildtarget}") - - # Get list of all tests and translate it into a CMake list - get_property(EIGEN_TESTS_LIST GLOBAL PROPERTY EIGEN_TESTS_LIST) - string(REGEX REPLACE "\n" " " EIGEN_TESTS_LIST "${EIGEN_TESTS_LIST}") - set(EIGEN_TESTS_LIST "${EIGEN_TESTS_LIST}") - separate_arguments(EIGEN_TESTS_LIST) - - # Check if the test in smoke_test_list is a currently valid test target - foreach(test IN ITEMS ${smoke_test_list}) - # Add tests in smoke_test_list to our smoke test target but only if the test - # is currently available, i.e., is in EIGEN_SUBTESTS_LIST - if ("${test}" IN_LIST EIGEN_TESTS_LIST) - add_dependencies("${buildtarget}" "${test}") - # In the case of a test we match all subtests - set(ctest_regex "${ctest_regex}^${test}_[0-9]+$$|") - endif() - endforeach() - - # Get list of all subtests and translate it into a CMake list - get_property(EIGEN_SUBTESTS_LIST GLOBAL PROPERTY EIGEN_SUBTESTS_LIST) - string(REGEX REPLACE "\n" " " EIGEN_SUBTESTS_LIST "${EIGEN_SUBTESTS_LIST}") - set(EIGEN_SUBTESTS_LIST "${EIGEN_SUBTESTS_LIST}") - separate_arguments(EIGEN_SUBTESTS_LIST) - - # Check if the test in smoke_test_list is a currently valid subtest target - foreach(test IN ITEMS ${smoke_test_list}) - # Add tests in smoke_test_list to our smoke test target but only if the test - # is currently available, i.e., is in EIGEN_SUBTESTS_LIST - if ("${test}" IN_LIST EIGEN_SUBTESTS_LIST) - add_dependencies("${buildtarget}" "${test}") - # Add label smoketest to be able to run smoketests using ctest - get_property(test_labels TEST ${test} PROPERTY LABELS) - set_property(TEST ${test} PROPERTY LABELS "${test_labels};smoketest") - endif() - endforeach() -endmacro(ei_add_smoke_tests) diff --git a/external/eigen-3.4.0/cmake/EigenUninstall.cmake b/external/eigen-3.4.0/cmake/EigenUninstall.cmake deleted file mode 100644 index 5e63c98..0000000 --- a/external/eigen-3.4.0/cmake/EigenUninstall.cmake +++ /dev/null @@ -1,40 +0,0 @@ -################ CMake Uninstall Template ####################### -# CMake Template file for uninstallation of files -# mentioned in 'install_manifest.txt' -# -# Used by uinstall target -################################################################# - -set(MANIFEST "${CMAKE_CURRENT_BINARY_DIR}/install_manifest.txt") - -if(EXISTS ${MANIFEST}) - message(STATUS "============== Uninstalling Eigen ===================") - - file(STRINGS ${MANIFEST} files) - foreach(file ${files}) - if(EXISTS ${file}) - message(STATUS "Removing file: '${file}'") - - execute_process( - COMMAND ${CMAKE_COMMAND} -E remove ${file} - OUTPUT_VARIABLE rm_out - RESULT_VARIABLE rm_retval - ) - - if(NOT "${rm_retval}" STREQUAL 0) - message(FATAL_ERROR "Failed to remove file: '${file}'.") - endif() - else() - message(STATUS "File '${file}' does not exist.") - endif() - endforeach() - - message(STATUS "========== Finished Uninstalling Eigen ==============") -else() - message(STATUS "Cannot find install manifest: '${MANIFEST}'") - message(STATUS "Probably make install has not been performed") - message(STATUS " or install_manifest.txt has been deleted.") -endif() - - - diff --git a/external/eigen-3.4.0/cmake/FindAdolc.cmake b/external/eigen-3.4.0/cmake/FindAdolc.cmake deleted file mode 100644 index 13c59fc..0000000 --- a/external/eigen-3.4.0/cmake/FindAdolc.cmake +++ /dev/null @@ -1,20 +0,0 @@ - -if (ADOLC_INCLUDES AND ADOLC_LIBRARIES) - set(ADOLC_FIND_QUIETLY TRUE) -endif () - -find_path(ADOLC_INCLUDES - NAMES adolc/adtl.h - PATHS $ENV{ADOLCDIR} $ENV{ADOLCDIR}/include ${INCLUDE_INSTALL_DIR} -) - -find_library(ADOLC_LIBRARIES - adolc - PATHS $ENV{ADOLCDIR} ${LIB_INSTALL_DIR} - PATH_SUFFIXES lib lib64) - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(Adolc DEFAULT_MSG - ADOLC_INCLUDES ADOLC_LIBRARIES) - -mark_as_advanced(ADOLC_INCLUDES ADOLC_LIBRARIES) diff --git a/external/eigen-3.4.0/cmake/FindBLAS.cmake b/external/eigen-3.4.0/cmake/FindBLAS.cmake deleted file mode 100644 index 1bb8f19..0000000 --- a/external/eigen-3.4.0/cmake/FindBLAS.cmake +++ /dev/null @@ -1,1407 +0,0 @@ -### -# -# @copyright (c) 2009-2014 The University of Tennessee and The University -# of Tennessee Research Foundation. -# All rights reserved. -# @copyright (c) 2012-2016 Inria. All rights reserved. -# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. -# -### -# -# - Find BLAS library -# This module finds an installed fortran library that implements the BLAS -# linear-algebra interface (see http://www.netlib.org/blas/). -# The list of libraries searched for is taken -# from the autoconf macro file, acx_blas.m4 (distributed at -# http://ac-archive.sourceforge.net/ac-archive/acx_blas.html). -# -# This module sets the following variables: -# BLAS_FOUND - set to true if a library implementing the BLAS interface -# is found -# BLAS_LINKER_FLAGS - uncached list of required linker flags (excluding -l -# and -L). -# BLAS_COMPILER_FLAGS - uncached list of required compiler flags (including -I for mkl headers). -# BLAS_LIBRARIES - uncached list of libraries (using full path name) to -# link against to use BLAS -# BLAS95_LIBRARIES - uncached list of libraries (using full path name) -# to link against to use BLAS95 interface -# BLAS95_FOUND - set to true if a library implementing the BLAS f95 interface -# is found -# BLA_STATIC if set on this determines what kind of linkage we do (static) -# BLA_VENDOR if set checks only the specified vendor, if not set checks -# all the possibilities -# BLAS_VENDOR_FOUND stores the BLAS vendor found -# BLA_F95 if set on tries to find the f95 interfaces for BLAS/LAPACK -# The user can give specific paths where to find the libraries adding cmake -# options at configure (ex: cmake path/to/project -DBLAS_DIR=path/to/blas): -# BLAS_DIR - Where to find the base directory of blas -# BLAS_INCDIR - Where to find the header files -# BLAS_LIBDIR - Where to find the library files -# The module can also look for the following environment variables if paths -# are not given as cmake variable: BLAS_DIR, BLAS_INCDIR, BLAS_LIBDIR -# For MKL case and if no paths are given as hints, we will try to use the MKLROOT -# environment variable -# BLAS_VERBOSE Print some additional information during BLAS libraries detection -########## -### List of vendors (BLA_VENDOR) valid in this module -########## List of vendors (BLA_VENDOR) valid in this module -## Open (for OpenBlas), Eigen (for EigenBlas), Goto, ATLAS PhiPACK, -##  CXML, DXML, SunPerf, SCSL, SGIMATH, IBMESSL, IBMESSLMT -## Intel10_32 (intel mkl v10 32 bit), Intel10_64lp (intel mkl v10 64 bit,lp thread model, lp64 model), -## Intel10_64lp_seq (intel mkl v10 64 bit,sequential code, lp64 model), -## Intel( older versions of mkl 32 and 64 bit), -##  ACML, ACML_MP, ACML_GPU, Apple, NAS, Generic -# C/CXX should be enabled to use Intel mkl -### -# We handle different modes to find the dependency -# -# - Detection if already installed on the system -# - BLAS libraries can be detected from different ways -# Here is the order of precedence: -# 1) we look in cmake variable BLAS_LIBDIR or BLAS_DIR (we guess the libdirs) if defined -# 2) we look in environment variable BLAS_LIBDIR or BLAS_DIR (we guess the libdirs) if defined -# 3) we look in common environnment variables depending on the system (INCLUDE, C_INCLUDE_PATH, CPATH - LIB, DYLD_LIBRARY_PATH, LD_LIBRARY_PATH) -# 4) we look in common system paths depending on the system, see for example paths contained in the following cmake variables: -# - CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES, CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES -# - CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES, CMAKE_C_IMPLICIT_LINK_DIRECTORIES -# - -#============================================================================= -# Copyright 2007-2009 Kitware, Inc. -# -# Distributed under the OSI-approved BSD License (the "License"); -# see accompanying file Copyright.txt for details. -# -# This software is distributed WITHOUT ANY WARRANTY; without even the -# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the License for more information. -#============================================================================= -# (To distribute this file outside of CMake, substitute the full -# License text for the above reference.) - -## Some macros to print status when search for headers and libs -# This macro informs why the _lib_to_find file has not been found -macro(Print_Find_Library_Blas_Status _libname _lib_to_find) - - # save _libname upper/lower case - string(TOUPPER ${_libname} LIBNAME) - string(TOLOWER ${_libname} libname) - - # print status - #message(" ") - if(${LIBNAME}_LIBDIR) - message("${Yellow}${LIBNAME}_LIBDIR is defined but ${_lib_to_find}" - "has not been found in ${ARGN}${ColourReset}") - else() - if(${LIBNAME}_DIR) - message("${Yellow}${LIBNAME}_DIR is defined but ${_lib_to_find}" - "has not been found in ${ARGN}${ColourReset}") - else() - message("${Yellow}${_lib_to_find} not found." - "Nor ${LIBNAME}_DIR neither ${LIBNAME}_LIBDIR" - "are defined so that we look for ${_lib_to_find} in" - "system paths (Linux: LD_LIBRARY_PATH, Windows: LIB," - "Mac: DYLD_LIBRARY_PATH," - "CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES," - "CMAKE_C_IMPLICIT_LINK_DIRECTORIES)${ColourReset}") - if(_lib_env) - message("${Yellow}${_lib_to_find} has not been found in" - "${_lib_env}${ColourReset}") - endif() - endif() - endif() - message("${BoldYellow}Please indicate where to find ${_lib_to_find}. You have three options:\n" - "- Option 1: Provide the Installation directory of BLAS library with cmake option: -D${LIBNAME}_DIR=your/path/to/${libname}/\n" - "- Option 2: Provide the directory where to find the library with cmake option: -D${LIBNAME}_LIBDIR=your/path/to/${libname}/lib/\n" - "- Option 3: Update your environment variable (Linux: LD_LIBRARY_PATH, Windows: LIB, Mac: DYLD_LIBRARY_PATH)\n" - "- Option 4: If your library provides a PkgConfig file, make sure pkg-config finds your library${ColourReset}") - -endmacro() - -# This macro informs why the _lib_to_find file has not been found -macro(Print_Find_Library_Blas_CheckFunc_Status _name) - - # save _libname upper/lower case - string(TOUPPER ${_name} FUNCNAME) - string(TOLOWER ${_name} funcname) - - # print status - #message(" ") - message("${Red}Libs have been found but check of symbol ${_name} failed " - "with following libraries ${ARGN}${ColourReset}") - message("${BoldRed}Please open your error file CMakeFiles/CMakeError.log" - "to figure out why it fails${ColourReset}") - #message(" ") - -endmacro() - -if (NOT BLAS_FOUND) - set(BLAS_DIR "" CACHE PATH "Installation directory of BLAS library") - if (NOT BLAS_FIND_QUIETLY) - message(STATUS "A cache variable, namely BLAS_DIR, has been set to specify the install directory of BLAS") - endif() -endif() - -option(BLAS_VERBOSE "Print some additional information during BLAS libraries detection" OFF) -mark_as_advanced(BLAS_VERBOSE) - -include(CheckFunctionExists) -include(CheckFortranFunctionExists) -include(CMakeFindDependencyMacro) - -set(_blas_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES}) - -# Check the language being used -get_property( _LANGUAGES_ GLOBAL PROPERTY ENABLED_LANGUAGES ) -if( _LANGUAGES_ MATCHES Fortran AND CMAKE_Fortran_COMPILER) - set( _CHECK_FORTRAN TRUE ) -elseif( (_LANGUAGES_ MATCHES C) OR (_LANGUAGES_ MATCHES CXX) ) - set( _CHECK_FORTRAN FALSE ) -else() - if(BLAS_FIND_REQUIRED) - message(FATAL_ERROR "FindBLAS requires Fortran, C, or C++ to be enabled.") - else() - message(STATUS "Looking for BLAS... - NOT found (Unsupported languages)") - return() - endif() -endif() - -macro(Check_Fortran_Libraries LIBRARIES _prefix _name _flags _list _thread) - # This macro checks for the existence of the combination of fortran libraries - # given by _list. If the combination is found, this macro checks (using the - # Check_Fortran_Function_Exists macro) whether can link against that library - # combination using the name of a routine given by _name using the linker - # flags given by _flags. If the combination of libraries is found and passes - # the link test, LIBRARIES is set to the list of complete library paths that - # have been found. Otherwise, LIBRARIES is set to FALSE. - - # N.B. _prefix is the prefix applied to the names of all cached variables that - # are generated internally and marked advanced by this macro. - - set(_libdir ${ARGN}) - - set(_libraries_work TRUE) - set(${LIBRARIES}) - set(_combined_name) - set(ENV_MKLROOT "$ENV{MKLROOT}") - set(ENV_BLAS_DIR "$ENV{BLAS_DIR}") - set(ENV_BLAS_LIBDIR "$ENV{BLAS_LIBDIR}") - if (NOT _libdir) - if (BLAS_LIBDIR) - list(APPEND _libdir "${BLAS_LIBDIR}") - elseif (BLAS_DIR) - list(APPEND _libdir "${BLAS_DIR}") - list(APPEND _libdir "${BLAS_DIR}/lib") - if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8") - list(APPEND _libdir "${BLAS_DIR}/lib64") - list(APPEND _libdir "${BLAS_DIR}/lib/intel64") - else() - list(APPEND _libdir "${BLAS_DIR}/lib32") - list(APPEND _libdir "${BLAS_DIR}/lib/ia32") - endif() - elseif(ENV_BLAS_LIBDIR) - list(APPEND _libdir "${ENV_BLAS_LIBDIR}") - elseif(ENV_BLAS_DIR) - list(APPEND _libdir "${ENV_BLAS_DIR}") - list(APPEND _libdir "${ENV_BLAS_DIR}/lib") - if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8") - list(APPEND _libdir "${ENV_BLAS_DIR}/lib64") - list(APPEND _libdir "${ENV_BLAS_DIR}/lib/intel64") - else() - list(APPEND _libdir "${ENV_BLAS_DIR}/lib32") - list(APPEND _libdir "${ENV_BLAS_DIR}/lib/ia32") - endif() - else() - if (ENV_MKLROOT) - list(APPEND _libdir "${ENV_MKLROOT}/lib") - if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8") - list(APPEND _libdir "${ENV_MKLROOT}/lib64") - list(APPEND _libdir "${ENV_MKLROOT}/lib/intel64") - else() - list(APPEND _libdir "${ENV_MKLROOT}/lib32") - list(APPEND _libdir "${ENV_MKLROOT}/lib/ia32") - endif() - endif() - if (WIN32) - string(REPLACE ":" ";" _libdir2 "$ENV{LIB}") - elseif (APPLE) - string(REPLACE ":" ";" _libdir2 "$ENV{DYLD_LIBRARY_PATH}") - else () - string(REPLACE ":" ";" _libdir2 "$ENV{LD_LIBRARY_PATH}") - endif () - list(APPEND _libdir "${_libdir2}") - list(APPEND _libdir "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}") - list(APPEND _libdir "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") - endif() - endif () - - if (BLAS_VERBOSE) - message("${Cyan}Try to find BLAS libraries: ${_list}") - endif () - - foreach(_library ${_list}) - set(_combined_name ${_combined_name}_${_library}) - - if(_libraries_work) - if (BLA_STATIC) - if (WIN32) - set(CMAKE_FIND_LIBRARY_SUFFIXES .lib ${CMAKE_FIND_LIBRARY_SUFFIXES}) - endif () - if (APPLE) - set(CMAKE_FIND_LIBRARY_SUFFIXES .lib ${CMAKE_FIND_LIBRARY_SUFFIXES}) - else () - set(CMAKE_FIND_LIBRARY_SUFFIXES .a ${CMAKE_FIND_LIBRARY_SUFFIXES}) - endif () - else () - if (CMAKE_SYSTEM_NAME STREQUAL "Linux") - # for ubuntu's libblas3gf and liblapack3gf packages - set(CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES} .so.3gf) - endif () - endif () - find_library(${_prefix}_${_library}_LIBRARY - NAMES ${_library} - HINTS ${_libdir} - NO_DEFAULT_PATH - ) - mark_as_advanced(${_prefix}_${_library}_LIBRARY) - # Print status if not found - # ------------------------- - if (NOT ${_prefix}_${_library}_LIBRARY AND NOT BLAS_FIND_QUIETLY AND BLAS_VERBOSE) - Print_Find_Library_Blas_Status(blas ${_library} ${_libdir}) - endif () - set(${LIBRARIES} ${${LIBRARIES}} ${${_prefix}_${_library}_LIBRARY}) - set(_libraries_work ${${_prefix}_${_library}_LIBRARY}) - endif() - endforeach() - - if(_libraries_work) - # Test this combination of libraries. - if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND BLA_STATIC) - list(INSERT ${LIBRARIES} 0 "-Wl,--start-group") - list(APPEND ${LIBRARIES} "-Wl,--end-group") - endif() - set(CMAKE_REQUIRED_LIBRARIES "${_flags};${${LIBRARIES}};${_thread}") - set(CMAKE_REQUIRED_FLAGS "${BLAS_COMPILER_FLAGS}") - if (BLAS_VERBOSE) - message("${Cyan}BLAS libs found for BLA_VENDOR ${BLA_VENDOR}." - "Try to compile symbol ${_name} with following libraries:" - "${CMAKE_REQUIRED_LIBRARIES}") - endif () - if(NOT BLAS_FOUND) - unset(${_prefix}${_combined_name}_WORKS CACHE) - endif() - if (_CHECK_FORTRAN) - if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") - string(REPLACE "mkl_intel_lp64" "mkl_gf_lp64" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}") - string(REPLACE "mkl_intel_ilp64" "mkl_gf_ilp64" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}") - endif() - check_fortran_function_exists("${_name}" ${_prefix}${_combined_name}_WORKS) - else() - check_function_exists("${_name}_" ${_prefix}${_combined_name}_WORKS) - endif() - mark_as_advanced(${_prefix}${_combined_name}_WORKS) - set(_libraries_work ${${_prefix}${_combined_name}_WORKS}) - # Print status if not found - # ------------------------- - if (NOT _libraries_work AND NOT BLAS_FIND_QUIETLY AND BLAS_VERBOSE) - Print_Find_Library_Blas_CheckFunc_Status(${_name} ${CMAKE_REQUIRED_LIBRARIES}) - endif () - set(CMAKE_REQUIRED_LIBRARIES) - endif() - - if(_libraries_work) - set(${LIBRARIES} ${${LIBRARIES}} ${_thread}) - else() - set(${LIBRARIES} FALSE) - endif() - -endmacro() - - -set(BLAS_LINKER_FLAGS) -set(BLAS_LIBRARIES) -set(BLAS95_LIBRARIES) -if ($ENV{BLA_VENDOR} MATCHES ".+") - set(BLA_VENDOR $ENV{BLA_VENDOR}) -else () - if(NOT BLA_VENDOR) - set(BLA_VENDOR "All") - endif() -endif () - -#BLAS in intel mkl 10 library? (em64t 64bit) -if (BLA_VENDOR MATCHES "Intel*" OR BLA_VENDOR STREQUAL "All") - - if(NOT BLAS_LIBRARIES OR BLA_VENDOR MATCHES "Intel*") - # Looking for include - # ------------------- - - # Add system include paths to search include - # ------------------------------------------ - unset(_inc_env) - set(ENV_MKLROOT "$ENV{MKLROOT}") - set(ENV_BLAS_DIR "$ENV{BLAS_DIR}") - set(ENV_BLAS_INCDIR "$ENV{BLAS_INCDIR}") - if(ENV_BLAS_INCDIR) - list(APPEND _inc_env "${ENV_BLAS_INCDIR}") - elseif(ENV_BLAS_DIR) - list(APPEND _inc_env "${ENV_BLAS_DIR}") - list(APPEND _inc_env "${ENV_BLAS_DIR}/include") - else() - if (ENV_MKLROOT) - list(APPEND _inc_env "${ENV_MKLROOT}/include") - endif() - # system variables - if(WIN32) - string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") - list(APPEND _inc_env "${_path_env}") - else() - string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") - list(APPEND _inc_env "${_path_env}") - string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") - list(APPEND _inc_env "${_path_env}") - string(REPLACE ":" ";" _path_env "$ENV{CPATH}") - list(APPEND _inc_env "${_path_env}") - string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") - list(APPEND _inc_env "${_path_env}") - endif() - endif() - list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") - list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") - list(REMOVE_DUPLICATES _inc_env) - - # set paths where to look for - set(PATH_TO_LOOK_FOR "${_inc_env}") - - # Try to find the fftw header in the given paths - # ------------------------------------------------- - # call cmake macro to find the header path - if(BLAS_INCDIR) - set(BLAS_mkl.h_DIRS "BLAS_mkl.h_DIRS-NOTFOUND") - find_path(BLAS_mkl.h_DIRS - NAMES mkl.h - HINTS ${BLAS_INCDIR}) - else() - if(BLAS_DIR) - set(BLAS_mkl.h_DIRS "BLAS_mkl.h_DIRS-NOTFOUND") - find_path(BLAS_mkl.h_DIRS - NAMES mkl.h - HINTS ${BLAS_DIR} - PATH_SUFFIXES "include") - else() - set(BLAS_mkl.h_DIRS "BLAS_mkl.h_DIRS-NOTFOUND") - find_path(BLAS_mkl.h_DIRS - NAMES mkl.h - HINTS ${PATH_TO_LOOK_FOR}) - endif() - endif() - mark_as_advanced(BLAS_mkl.h_DIRS) - - # If found, add path to cmake variable - # ------------------------------------ - if (BLAS_mkl.h_DIRS) - set(BLAS_INCLUDE_DIRS "${BLAS_mkl.h_DIRS}") - else () - set(BLAS_INCLUDE_DIRS "BLAS_INCLUDE_DIRS-NOTFOUND") - if(NOT BLAS_FIND_QUIETLY) - message(STATUS "Looking for BLAS -- mkl.h not found") - endif() - endif() - - if (WIN32) - string(REPLACE ":" ";" _libdir "$ENV{LIB}") - elseif (APPLE) - string(REPLACE ":" ";" _libdir "$ENV{DYLD_LIBRARY_PATH}") - else () - string(REPLACE ":" ";" _libdir "$ENV{LD_LIBRARY_PATH}") - endif () - list(APPEND _libdir "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}") - list(APPEND _libdir "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") - # libiomp5 - # -------- - set(OMP_iomp5_LIBRARY "OMP_iomp5_LIBRARY-NOTFOUND") - find_library(OMP_iomp5_LIBRARY - NAMES iomp5 - HINTS ${_libdir} - ) - mark_as_advanced(OMP_iomp5_LIBRARY) - set(OMP_LIB "") - # libgomp - # ------- - set(OMP_gomp_LIBRARY "OMP_gomp_LIBRARY-NOTFOUND") - find_library(OMP_gomp_LIBRARY - NAMES gomp - HINTS ${_libdir} - ) - mark_as_advanced(OMP_gomp_LIBRARY) - # choose one or another depending on the compilo - if (CMAKE_C_COMPILER_ID STREQUAL "GNU") - if (OMP_gomp_LIBRARY) - set(OMP_LIB "${OMP_gomp_LIBRARY}") - endif() - else() - if (OMP_iomp5_LIBRARY) - set(OMP_LIB "${OMP_iomp5_LIBRARY}") - endif() - endif() - - if (UNIX AND NOT WIN32) - # m - find_library(M_LIBRARY - NAMES m - HINTS ${_libdir}) - mark_as_advanced(M_LIBRARY) - if(M_LIBRARY) - set(LM "-lm") - else() - set(LM "") - endif() - # Fortran - set(LGFORTRAN "") - if (CMAKE_C_COMPILER_ID MATCHES "GNU") - find_library( - FORTRAN_gfortran_LIBRARY - NAMES gfortran - HINTS ${_libdir} - ) - mark_as_advanced(FORTRAN_gfortran_LIBRARY) - if (FORTRAN_gfortran_LIBRARY) - set(LGFORTRAN "${FORTRAN_gfortran_LIBRARY}") - endif() - elseif (CMAKE_C_COMPILER_ID MATCHES "Intel") - find_library( - FORTRAN_ifcore_LIBRARY - NAMES ifcore - HINTS ${_libdir} - ) - mark_as_advanced(FORTRAN_ifcore_LIBRARY) - if (FORTRAN_ifcore_LIBRARY) - set(LGFORTRAN "{FORTRAN_ifcore_LIBRARY}") - endif() - endif() - set(BLAS_COMPILER_FLAGS "") - if (NOT BLA_VENDOR STREQUAL "Intel10_64lp_seq") - if (CMAKE_C_COMPILER_ID STREQUAL "Intel") - list(APPEND BLAS_COMPILER_FLAGS "-openmp") - endif() - if (CMAKE_C_COMPILER_ID STREQUAL "GNU") - list(APPEND BLAS_COMPILER_FLAGS "-fopenmp") - endif() - endif() - if (CMAKE_C_COMPILER_ID STREQUAL "GNU") - if (BLA_VENDOR STREQUAL "Intel10_32") - list(APPEND BLAS_COMPILER_FLAGS "-m32") - else() - list(APPEND BLAS_COMPILER_FLAGS "-m64") - endif() - if (NOT BLA_VENDOR STREQUAL "Intel10_64lp_seq") - list(APPEND OMP_LIB "-ldl") - endif() - if (ENV_MKLROOT) - list(APPEND BLAS_COMPILER_FLAGS "-I${ENV_MKLROOT}/include") - endif() - endif() - - set(additional_flags "") - if (CMAKE_C_COMPILER_ID STREQUAL "GNU" AND CMAKE_SYSTEM_NAME STREQUAL "Linux") - set(additional_flags "-Wl,--no-as-needed") - endif() - endif () - - if (_LANGUAGES_ MATCHES C OR _LANGUAGES_ MATCHES CXX) - if(BLAS_FIND_QUIETLY OR NOT BLAS_FIND_REQUIRED) - find_dependency(Threads) - else() - find_dependency(Threads REQUIRED) - endif() - - set(BLAS_SEARCH_LIBS "") - - if(BLA_F95) - - set(BLAS_mkl_SEARCH_SYMBOL SGEMM) - set(_LIBRARIES BLAS95_LIBRARIES) - if (WIN32) - if (BLA_STATIC) - set(BLAS_mkl_DLL_SUFFIX "") - else() - set(BLAS_mkl_DLL_SUFFIX "_dll") - endif() - - # Find the main file (32-bit or 64-bit) - set(BLAS_SEARCH_LIBS_WIN_MAIN "") - if (BLA_VENDOR STREQUAL "Intel10_32" OR BLA_VENDOR STREQUAL "All") - list(APPEND BLAS_SEARCH_LIBS_WIN_MAIN - "mkl_blas95${BLAS_mkl_DLL_SUFFIX} mkl_intel_c${BLAS_mkl_DLL_SUFFIX}") - endif() - if (BLA_VENDOR STREQUAL "Intel10_64lp*" OR BLA_VENDOR STREQUAL "All") - list(APPEND BLAS_SEARCH_LIBS_WIN_MAIN - "mkl_blas95_lp64${BLAS_mkl_DLL_SUFFIX} mkl_intel_lp64${BLAS_mkl_DLL_SUFFIX}") - endif () - - # Add threading/sequential libs - set(BLAS_SEARCH_LIBS_WIN_THREAD "") - if (BLA_VENDOR STREQUAL "*_seq" OR BLA_VENDOR STREQUAL "All") - list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD - "mkl_sequential${BLAS_mkl_DLL_SUFFIX}") - endif() - if (NOT BLA_VENDOR STREQUAL "*_seq" OR BLA_VENDOR STREQUAL "All") - # old version - list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD - "libguide40 mkl_intel_thread${BLAS_mkl_DLL_SUFFIX}") - # mkl >= 10.3 - list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD - "libiomp5md mkl_intel_thread${BLAS_mkl_DLL_SUFFIX}") - endif() - - # Cartesian product of the above - foreach (MAIN ${BLAS_SEARCH_LIBS_WIN_MAIN}) - foreach (THREAD ${BLAS_SEARCH_LIBS_WIN_THREAD}) - list(APPEND BLAS_SEARCH_LIBS - "${MAIN} ${THREAD} mkl_core${BLAS_mkl_DLL_SUFFIX}") - endforeach() - endforeach() - else () - if (BLA_VENDOR STREQUAL "Intel10_32" OR BLA_VENDOR STREQUAL "All") - list(APPEND BLAS_SEARCH_LIBS - "mkl_blas95 mkl_intel mkl_intel_thread mkl_core guide") - endif () - if (BLA_VENDOR STREQUAL "Intel10_64lp" OR BLA_VENDOR STREQUAL "All") - # old version - list(APPEND BLAS_SEARCH_LIBS - "mkl_blas95 mkl_intel_lp64 mkl_intel_thread mkl_core guide") - # mkl >= 10.3 - if (CMAKE_C_COMPILER_ID STREQUAL "Intel") - list(APPEND BLAS_SEARCH_LIBS - "mkl_blas95_lp64 mkl_intel_lp64 mkl_intel_thread mkl_core") - endif() - if (CMAKE_C_COMPILER_ID STREQUAL "GNU") - list(APPEND BLAS_SEARCH_LIBS - "mkl_blas95_lp64 mkl_intel_lp64 mkl_gnu_thread mkl_core") - endif() - endif () - if (BLA_VENDOR STREQUAL "Intel10_64lp_seq" OR BLA_VENDOR STREQUAL "All") - list(APPEND BLAS_SEARCH_LIBS - "mkl_intel_lp64 mkl_sequential mkl_core") - if (BLA_VENDOR STREQUAL "Intel10_64lp_seq") - set(OMP_LIB "") - endif() - endif () - endif () - - else () - - set(BLAS_mkl_SEARCH_SYMBOL sgemm) - set(_LIBRARIES BLAS_LIBRARIES) - if (WIN32) - if (BLA_STATIC) - set(BLAS_mkl_DLL_SUFFIX "") - else() - set(BLAS_mkl_DLL_SUFFIX "_dll") - endif() - - # Find the main file (32-bit or 64-bit) - set(BLAS_SEARCH_LIBS_WIN_MAIN "") - if (BLA_VENDOR STREQUAL "Intel10_32" OR BLA_VENDOR STREQUAL "All") - list(APPEND BLAS_SEARCH_LIBS_WIN_MAIN - "mkl_intel_c${BLAS_mkl_DLL_SUFFIX}") - endif() - if (BLA_VENDOR STREQUAL "Intel10_64lp*" OR BLA_VENDOR STREQUAL "All") - list(APPEND BLAS_SEARCH_LIBS_WIN_MAIN - "mkl_intel_lp64${BLAS_mkl_DLL_SUFFIX}") - endif () - - # Add threading/sequential libs - set(BLAS_SEARCH_LIBS_WIN_THREAD "") - if (NOT BLA_VENDOR STREQUAL "*_seq" OR BLA_VENDOR STREQUAL "All") - # old version - list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD - "libguide40 mkl_intel_thread${BLAS_mkl_DLL_SUFFIX}") - # mkl >= 10.3 - list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD - "libiomp5md mkl_intel_thread${BLAS_mkl_DLL_SUFFIX}") - endif() - if (BLA_VENDOR STREQUAL "*_seq" OR BLA_VENDOR STREQUAL "All") - list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD - "mkl_sequential${BLAS_mkl_DLL_SUFFIX}") - endif() - - # Cartesian product of the above - foreach (MAIN ${BLAS_SEARCH_LIBS_WIN_MAIN}) - foreach (THREAD ${BLAS_SEARCH_LIBS_WIN_THREAD}) - list(APPEND BLAS_SEARCH_LIBS - "${MAIN} ${THREAD} mkl_core${BLAS_mkl_DLL_SUFFIX}") - endforeach() - endforeach() - else () - if (BLA_VENDOR STREQUAL "Intel10_32" OR BLA_VENDOR STREQUAL "All") - list(APPEND BLAS_SEARCH_LIBS - "mkl_intel mkl_intel_thread mkl_core guide") - endif () - if (BLA_VENDOR STREQUAL "Intel10_64lp" OR BLA_VENDOR STREQUAL "All") - # old version - list(APPEND BLAS_SEARCH_LIBS - "mkl_intel_lp64 mkl_intel_thread mkl_core guide") - # mkl >= 10.3 - if (CMAKE_C_COMPILER_ID STREQUAL "Intel") - list(APPEND BLAS_SEARCH_LIBS - "mkl_intel_lp64 mkl_intel_thread mkl_core") - endif() - if (CMAKE_C_COMPILER_ID STREQUAL "GNU") - list(APPEND BLAS_SEARCH_LIBS - "mkl_intel_lp64 mkl_gnu_thread mkl_core") - endif() - endif () - if (BLA_VENDOR STREQUAL "Intel10_64lp_seq" OR BLA_VENDOR STREQUAL "All") - list(APPEND BLAS_SEARCH_LIBS - "mkl_intel_lp64 mkl_sequential mkl_core") - if (BLA_VENDOR STREQUAL "Intel10_64lp_seq") - set(OMP_LIB "") - endif() - endif () - #older vesions of intel mkl libs - if (BLA_VENDOR STREQUAL "Intel" OR BLA_VENDOR STREQUAL "All") - list(APPEND BLAS_SEARCH_LIBS - "mkl") - list(APPEND BLAS_SEARCH_LIBS - "mkl_ia32") - list(APPEND BLAS_SEARCH_LIBS - "mkl_em64t") - endif () - endif () - - endif () - - foreach (IT ${BLAS_SEARCH_LIBS}) - string(REPLACE " " ";" SEARCH_LIBS ${IT}) - if (${_LIBRARIES}) - else () - check_fortran_libraries( - ${_LIBRARIES} - BLAS - ${BLAS_mkl_SEARCH_SYMBOL} - "${additional_flags}" - "${SEARCH_LIBS}" - "${OMP_LIB};${CMAKE_THREAD_LIBS_INIT};${LM}" - ) - if(_LIBRARIES) - set(BLAS_LINKER_FLAGS "${additional_flags}") - endif() - endif() - endforeach () - if(NOT BLAS_FIND_QUIETLY) - if(${_LIBRARIES}) - message(STATUS "Looking for MKL BLAS: found") - else() - message(STATUS "Looking for MKL BLAS: not found") - endif() - endif() - if (${_LIBRARIES} AND NOT BLAS_VENDOR_FOUND) - set (BLAS_VENDOR_FOUND "Intel MKL") - endif() - endif () - endif() -endif () - - -if (BLA_VENDOR STREQUAL "Goto" OR BLA_VENDOR STREQUAL "All") - - if(NOT BLAS_LIBRARIES) - # gotoblas (http://www.tacc.utexas.edu/tacc-projects/gotoblas2) - check_fortran_libraries( - BLAS_LIBRARIES - BLAS - sgemm - "" - "goto2" - "" - ) - if(NOT BLAS_FIND_QUIETLY) - if(BLAS_LIBRARIES) - message(STATUS "Looking for Goto BLAS: found") - else() - message(STATUS "Looking for Goto BLAS: not found") - endif() - endif() - endif() - if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) - set (BLAS_VENDOR_FOUND "Goto") - endif() - -endif () - - -# OpenBlas -if (BLA_VENDOR STREQUAL "Open" OR BLA_VENDOR STREQUAL "All") - - if(NOT BLAS_LIBRARIES) - # openblas (http://www.openblas.net/) - check_fortran_libraries( - BLAS_LIBRARIES - BLAS - sgemm - "" - "openblas" - "" - ) - if(NOT BLAS_FIND_QUIETLY) - if(BLAS_LIBRARIES) - message(STATUS "Looking for Open BLAS: found") - else() - message(STATUS "Looking for Open BLAS: not found") - endif() - endif() - endif() - if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) - set (BLAS_VENDOR_FOUND "Openblas") - endif() - -endif () - - -# EigenBlas -if (BLA_VENDOR STREQUAL "Eigen" OR BLA_VENDOR STREQUAL "All") - - if(NOT BLAS_LIBRARIES) - # eigenblas (http://eigen.tuxfamily.org/index.php?title=Main_Page) - check_fortran_libraries( - BLAS_LIBRARIES - BLAS - sgemm - "" - "eigen_blas" - "" - ) - if(NOT BLAS_FIND_QUIETLY) - if(BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) - message(STATUS "Looking for Eigen BLAS: found") - else() - message(STATUS "Looking for Eigen BLAS: not found") - endif() - endif() - endif() - - if(NOT BLAS_LIBRARIES) - # eigenblas (http://eigen.tuxfamily.org/index.php?title=Main_Page) - check_fortran_libraries( - BLAS_LIBRARIES - BLAS - sgemm - "" - "eigen_blas_static" - "" - ) - if(NOT BLAS_FIND_QUIETLY) - if(BLAS_LIBRARIES) - message(STATUS "Looking for Eigen BLAS: found") - else() - message(STATUS "Looking for Eigen BLAS: not found") - endif() - endif() - endif() - if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) - set (BLAS_VENDOR_FOUND "Eigen") - endif() - -endif () - - -if (BLA_VENDOR STREQUAL "ATLAS" OR BLA_VENDOR STREQUAL "All") - - if(NOT BLAS_LIBRARIES) - # BLAS in ATLAS library? (http://math-atlas.sourceforge.net/) - check_fortran_libraries( - BLAS_LIBRARIES - BLAS - dgemm - "" - "f77blas;atlas" - "" - ) - if(NOT BLAS_FIND_QUIETLY) - if(BLAS_LIBRARIES) - message(STATUS "Looking for Atlas BLAS: found") - else() - message(STATUS "Looking for Atlas BLAS: not found") - endif() - endif() - endif() - - if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) - set (BLAS_VENDOR_FOUND "Atlas") - endif() - -endif () - - -# BLAS in PhiPACK libraries? (requires generic BLAS lib, too) -if (BLA_VENDOR STREQUAL "PhiPACK" OR BLA_VENDOR STREQUAL "All") - - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_LIBRARIES - BLAS - sgemm - "" - "sgemm;dgemm;blas" - "" - ) - if(NOT BLAS_FIND_QUIETLY) - if(BLAS_LIBRARIES) - message(STATUS "Looking for PhiPACK BLAS: found") - else() - message(STATUS "Looking for PhiPACK BLAS: not found") - endif() - endif() - endif() - - if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) - set (BLAS_VENDOR_FOUND "PhiPACK") - endif() - -endif () - - -# BLAS in Alpha CXML library? -if (BLA_VENDOR STREQUAL "CXML" OR BLA_VENDOR STREQUAL "All") - - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_LIBRARIES - BLAS - sgemm - "" - "cxml" - "" - ) - if(NOT BLAS_FIND_QUIETLY) - if(BLAS_LIBRARIES) - message(STATUS "Looking for CXML BLAS: found") - else() - message(STATUS "Looking for CXML BLAS: not found") - endif() - endif() - endif() - - if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) - set (BLAS_VENDOR_FOUND "CXML") - endif() - -endif () - - -# BLAS in Alpha DXML library? (now called CXML, see above) -if (BLA_VENDOR STREQUAL "DXML" OR BLA_VENDOR STREQUAL "All") - - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_LIBRARIES - BLAS - sgemm - "" - "dxml" - "" - ) - if(NOT BLAS_FIND_QUIETLY) - if(BLAS_LIBRARIES) - message(STATUS "Looking for DXML BLAS: found") - else() - message(STATUS "Looking for DXML BLAS: not found") - endif() - endif() - endif() - - if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) - set (BLAS_VENDOR_FOUND "DXML") - endif() - -endif () - - -# BLAS in Sun Performance library? -if (BLA_VENDOR STREQUAL "SunPerf" OR BLA_VENDOR STREQUAL "All") - - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_LIBRARIES - BLAS - sgemm - "-xlic_lib=sunperf" - "sunperf;sunmath" - "" - ) - if(BLAS_LIBRARIES) - set(BLAS_LINKER_FLAGS "-xlic_lib=sunperf") - endif() - if(NOT BLAS_FIND_QUIETLY) - if(BLAS_LIBRARIES) - message(STATUS "Looking for SunPerf BLAS: found") - else() - message(STATUS "Looking for SunPerf BLAS: not found") - endif() - endif() - endif() - - if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) - set (BLAS_VENDOR_FOUND "SunPerf") - endif() - -endif () - - -# BLAS in SCSL library? (SGI/Cray Scientific Library) -if (BLA_VENDOR STREQUAL "SCSL" OR BLA_VENDOR STREQUAL "All") - - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_LIBRARIES - BLAS - sgemm - "" - "scsl" - "" - ) - if(NOT BLAS_FIND_QUIETLY) - if(BLAS_LIBRARIES) - message(STATUS "Looking for SCSL BLAS: found") - else() - message(STATUS "Looking for SCSL BLAS: not found") - endif() - endif() - endif() - - if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) - set (BLAS_VENDOR_FOUND "SunPerf") - endif() - -endif () - - -# BLAS in SGIMATH library? -if (BLA_VENDOR STREQUAL "SGIMATH" OR BLA_VENDOR STREQUAL "All") - - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_LIBRARIES - BLAS - sgemm - "" - "complib.sgimath" - "" - ) - if(NOT BLAS_FIND_QUIETLY) - if(BLAS_LIBRARIES) - message(STATUS "Looking for SGIMATH BLAS: found") - else() - message(STATUS "Looking for SGIMATH BLAS: not found") - endif() - endif() - endif() - - if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) - set (BLAS_VENDOR_FOUND "SGIMATH") - endif() - -endif () - - -# BLAS in IBM ESSL library (requires generic BLAS lib, too) -if (BLA_VENDOR STREQUAL "IBMESSL" OR BLA_VENDOR STREQUAL "All") - - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_LIBRARIES - BLAS - sgemm - "" - "essl;xlfmath;xlf90_r;blas" - "" - ) - if(NOT BLAS_FIND_QUIETLY) - if(BLAS_LIBRARIES) - message(STATUS "Looking for IBM ESSL BLAS: found") - else() - message(STATUS "Looking for IBM ESSL BLAS: not found") - endif() - endif() - endif() - - if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) - set (BLAS_VENDOR_FOUND "IBM ESSL") - endif() - -endif () - -# BLAS in IBM ESSL_MT library (requires generic BLAS lib, too) -if (BLA_VENDOR STREQUAL "IBMESSLMT" OR BLA_VENDOR STREQUAL "All") - - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_LIBRARIES - BLAS - sgemm - "" - "esslsmp;xlsmp;xlfmath;xlf90_r;blas" - "" - ) - if(NOT BLAS_FIND_QUIETLY) - if(BLAS_LIBRARIES) - message(STATUS "Looking for IBM ESSL MT BLAS: found") - else() - message(STATUS "Looking for IBM ESSL MT BLAS: not found") - endif() - endif() - endif() - - if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) - set (BLAS_VENDOR_FOUND "IBM ESSL MT") - endif() - -endif () - - -#BLAS in acml library? -if (BLA_VENDOR MATCHES "ACML.*" OR BLA_VENDOR STREQUAL "All") - - if( ((BLA_VENDOR STREQUAL "ACML") AND (NOT BLAS_ACML_LIB_DIRS)) OR - ((BLA_VENDOR STREQUAL "ACML_MP") AND (NOT BLAS_ACML_MP_LIB_DIRS)) OR - ((BLA_VENDOR STREQUAL "ACML_GPU") AND (NOT BLAS_ACML_GPU_LIB_DIRS))) - - # try to find acml in "standard" paths - if( WIN32 ) - file( GLOB _ACML_ROOT "C:/AMD/acml*/ACML-EULA.txt" ) - else() - file( GLOB _ACML_ROOT "/opt/acml*/ACML-EULA.txt" ) - endif() - if( WIN32 ) - file( GLOB _ACML_GPU_ROOT "C:/AMD/acml*/GPGPUexamples" ) - else() - file( GLOB _ACML_GPU_ROOT "/opt/acml*/GPGPUexamples" ) - endif() - list(GET _ACML_ROOT 0 _ACML_ROOT) - list(GET _ACML_GPU_ROOT 0 _ACML_GPU_ROOT) - - if( _ACML_ROOT ) - - get_filename_component( _ACML_ROOT ${_ACML_ROOT} PATH ) - if( SIZEOF_INTEGER EQUAL 8 ) - set( _ACML_PATH_SUFFIX "_int64" ) - else() - set( _ACML_PATH_SUFFIX "" ) - endif() - if( CMAKE_Fortran_COMPILER_ID STREQUAL "Intel" ) - set( _ACML_COMPILER32 "ifort32" ) - set( _ACML_COMPILER64 "ifort64" ) - elseif( CMAKE_Fortran_COMPILER_ID STREQUAL "SunPro" ) - set( _ACML_COMPILER32 "sun32" ) - set( _ACML_COMPILER64 "sun64" ) - elseif( CMAKE_Fortran_COMPILER_ID STREQUAL "PGI" ) - set( _ACML_COMPILER32 "pgi32" ) - if( WIN32 ) - set( _ACML_COMPILER64 "win64" ) - else() - set( _ACML_COMPILER64 "pgi64" ) - endif() - elseif( CMAKE_Fortran_COMPILER_ID STREQUAL "Open64" ) - # 32 bit builds not supported on Open64 but for code simplicity - # We'll just use the same directory twice - set( _ACML_COMPILER32 "open64_64" ) - set( _ACML_COMPILER64 "open64_64" ) - elseif( CMAKE_Fortran_COMPILER_ID STREQUAL "NAG" ) - set( _ACML_COMPILER32 "nag32" ) - set( _ACML_COMPILER64 "nag64" ) - else() - set( _ACML_COMPILER32 "gfortran32" ) - set( _ACML_COMPILER64 "gfortran64" ) - endif() - - if( BLA_VENDOR STREQUAL "ACML_MP" ) - set(_ACML_MP_LIB_DIRS - "${_ACML_ROOT}/${_ACML_COMPILER32}_mp${_ACML_PATH_SUFFIX}/lib" - "${_ACML_ROOT}/${_ACML_COMPILER64}_mp${_ACML_PATH_SUFFIX}/lib" ) - else() - set(_ACML_LIB_DIRS - "${_ACML_ROOT}/${_ACML_COMPILER32}${_ACML_PATH_SUFFIX}/lib" - "${_ACML_ROOT}/${_ACML_COMPILER64}${_ACML_PATH_SUFFIX}/lib" ) - endif() - - endif() - - elseif(BLAS_${BLA_VENDOR}_LIB_DIRS) - - set(_${BLA_VENDOR}_LIB_DIRS ${BLAS_${BLA_VENDOR}_LIB_DIRS}) - - endif() - - if( BLA_VENDOR STREQUAL "ACML_MP" ) - foreach( BLAS_ACML_MP_LIB_DIRS ${_ACML_MP_LIB_DIRS}) - check_fortran_libraries ( - BLAS_LIBRARIES - BLAS - sgemm - "" "acml_mp;acml_mv" "" ${BLAS_ACML_MP_LIB_DIRS} - ) - if( BLAS_LIBRARIES ) - break() - endif() - endforeach() - elseif( BLA_VENDOR STREQUAL "ACML_GPU" ) - foreach( BLAS_ACML_GPU_LIB_DIRS ${_ACML_GPU_LIB_DIRS}) - check_fortran_libraries ( - BLAS_LIBRARIES - BLAS - sgemm - "" "acml;acml_mv;CALBLAS" "" ${BLAS_ACML_GPU_LIB_DIRS} - ) - if( BLAS_LIBRARIES ) - break() - endif() - endforeach() - else() - foreach( BLAS_ACML_LIB_DIRS ${_ACML_LIB_DIRS} ) - check_fortran_libraries ( - BLAS_LIBRARIES - BLAS - sgemm - "" "acml;acml_mv" "" ${BLAS_ACML_LIB_DIRS} - ) - if( BLAS_LIBRARIES ) - break() - endif() - endforeach() - endif() - - # Either acml or acml_mp should be in LD_LIBRARY_PATH but not both - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_LIBRARIES - BLAS - sgemm - "" - "acml;acml_mv" - "" - ) - if(NOT BLAS_FIND_QUIETLY) - if(BLAS_LIBRARIES) - message(STATUS "Looking for ACML BLAS: found") - else() - message(STATUS "Looking for ACML BLAS: not found") - endif() - endif() - endif() - - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_LIBRARIES - BLAS - sgemm - "" - "acml_mp;acml_mv" - "" - ) - if(NOT BLAS_FIND_QUIETLY) - if(BLAS_LIBRARIES) - message(STATUS "Looking for ACML BLAS: found") - else() - message(STATUS "Looking for ACML BLAS: not found") - endif() - endif() - endif() - - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_LIBRARIES - BLAS - sgemm - "" - "acml;acml_mv;CALBLAS" - "" - ) - if(NOT BLAS_FIND_QUIETLY) - if(BLAS_LIBRARIES) - message(STATUS "Looking for ACML BLAS: found") - else() - message(STATUS "Looking for ACML BLAS: not found") - endif() - endif() - endif() - - if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) - set (BLAS_VENDOR_FOUND "ACML") - endif() - -endif () # ACML - - -# Apple BLAS library? -if (BLA_VENDOR STREQUAL "Apple" OR BLA_VENDOR STREQUAL "All") - - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_LIBRARIES - BLAS - dgemm - "" - "Accelerate" - "" - ) - if(NOT BLAS_FIND_QUIETLY) - if(BLAS_LIBRARIES) - message(STATUS "Looking for Apple BLAS: found") - else() - message(STATUS "Looking for Apple BLAS: not found") - endif() - endif() - endif() - - if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) - set (BLAS_VENDOR_FOUND "Apple Accelerate") - endif() - -endif () - - -if (BLA_VENDOR STREQUAL "NAS" OR BLA_VENDOR STREQUAL "All") - - if ( NOT BLAS_LIBRARIES ) - check_fortran_libraries( - BLAS_LIBRARIES - BLAS - dgemm - "" - "vecLib" - "" - ) - if(NOT BLAS_FIND_QUIETLY) - if(BLAS_LIBRARIES) - message(STATUS "Looking for NAS BLAS: found") - else() - message(STATUS "Looking for NAS BLAS: not found") - endif() - endif() - endif () - - if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) - set (BLAS_VENDOR_FOUND "NAS") - endif() - -endif () - - -# Generic BLAS library? -if (BLA_VENDOR STREQUAL "Generic" OR BLA_VENDOR STREQUAL "All") - - set(BLAS_SEARCH_LIBS "blas;blas_LINUX;blas_MAC;blas_WINDOWS;refblas") - foreach (SEARCH_LIB ${BLAS_SEARCH_LIBS}) - if (BLAS_LIBRARIES) - else () - check_fortran_libraries( - BLAS_LIBRARIES - BLAS - sgemm - "" - "${SEARCH_LIB}" - "${LGFORTRAN}" - ) - if(NOT BLAS_FIND_QUIETLY) - if(BLAS_LIBRARIES) - message(STATUS "Looking for Generic BLAS: found") - else() - message(STATUS "Looking for Generic BLAS: not found") - endif() - endif() - endif() - endforeach () - - if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) - set (BLAS_VENDOR_FOUND "Netlib or other Generic libblas") - endif() - -endif () - - -if(BLA_F95) - - if(BLAS95_LIBRARIES) - set(BLAS95_FOUND TRUE) - else() - set(BLAS95_FOUND FALSE) - endif() - - if(NOT BLAS_FIND_QUIETLY) - if(BLAS95_FOUND) - message(STATUS "A library with BLAS95 API found.") - message(STATUS "BLAS_LIBRARIES ${BLAS_LIBRARIES}") - else() - message(WARNING "BLA_VENDOR has been set to ${BLA_VENDOR} but blas 95 libraries could not be found or check of symbols failed." - "\nPlease indicate where to find blas libraries. You have three options:\n" - "- Option 1: Provide the installation directory of BLAS library with cmake option: -DBLAS_DIR=your/path/to/blas\n" - "- Option 2: Provide the directory where to find BLAS libraries with cmake option: -DBLAS_LIBDIR=your/path/to/blas/libs\n" - "- Option 3: Update your environment variable (Linux: LD_LIBRARY_PATH, Windows: LIB, Mac: DYLD_LIBRARY_PATH)\n" - "\nTo follow libraries detection more precisely you can activate a verbose mode with -DBLAS_VERBOSE=ON at cmake configure." - "\nYou could also specify a BLAS vendor to look for by setting -DBLA_VENDOR=blas_vendor_name." - "\nList of possible BLAS vendor: Goto, ATLAS PhiPACK, CXML, DXML, SunPerf, SCSL, SGIMATH, IBMESSL, Intel10_32 (intel mkl v10 32 bit)," - "Intel10_64lp (intel mkl v10 64 bit, lp thread model, lp64 model), Intel10_64lp_seq (intel mkl v10 64 bit, sequential code, lp64 model)," - "Intel( older versions of mkl 32 and 64 bit), ACML, ACML_MP, ACML_GPU, Apple, NAS, Generic") - if(BLAS_FIND_REQUIRED) - message(FATAL_ERROR - "A required library with BLAS95 API not found. Please specify library location.") - else() - message(STATUS - "A library with BLAS95 API not found. Please specify library location.") - endif() - endif() - endif() - - set(BLAS_FOUND TRUE) - set(BLAS_LIBRARIES "${BLAS95_LIBRARIES}") - -else() - - if(BLAS_LIBRARIES) - set(BLAS_FOUND TRUE) - else() - set(BLAS_FOUND FALSE) - endif() - - if(NOT BLAS_FIND_QUIETLY) - if(BLAS_FOUND) - message(STATUS "A library with BLAS API found.") - message(STATUS "BLAS_LIBRARIES ${BLAS_LIBRARIES}") - else() - message(WARNING "BLA_VENDOR has been set to ${BLA_VENDOR} but blas libraries could not be found or check of symbols failed." - "\nPlease indicate where to find blas libraries. You have three options:\n" - "- Option 1: Provide the installation directory of BLAS library with cmake option: -DBLAS_DIR=your/path/to/blas\n" - "- Option 2: Provide the directory where to find BLAS libraries with cmake option: -DBLAS_LIBDIR=your/path/to/blas/libs\n" - "- Option 3: Update your environment variable (Linux: LD_LIBRARY_PATH, Windows: LIB, Mac: DYLD_LIBRARY_PATH)\n" - "\nTo follow libraries detection more precisely you can activate a verbose mode with -DBLAS_VERBOSE=ON at cmake configure." - "\nYou could also specify a BLAS vendor to look for by setting -DBLA_VENDOR=blas_vendor_name." - "\nList of possible BLAS vendor: Goto, ATLAS PhiPACK, CXML, DXML, SunPerf, SCSL, SGIMATH, IBMESSL, Intel10_32 (intel mkl v10 32 bit)," - "Intel10_64lp (intel mkl v10 64 bit, lp thread model, lp64 model), Intel10_64lp_seq (intel mkl v10 64 bit, sequential code, lp64 model)," - "Intel( older versions of mkl 32 and 64 bit), ACML, ACML_MP, ACML_GPU, Apple, NAS, Generic") - if(BLAS_FIND_REQUIRED) - message(FATAL_ERROR - "A required library with BLAS API not found. Please specify library location.") - else() - message(STATUS - "A library with BLAS API not found. Please specify library location.") - endif() - endif() - endif() - -endif() - -set(CMAKE_FIND_LIBRARY_SUFFIXES ${_blas_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES}) - -if (BLAS_FOUND) - list(GET BLAS_LIBRARIES 0 first_lib) - get_filename_component(first_lib_path "${first_lib}" PATH) - if (${first_lib_path} MATCHES "(/lib(32|64)?$)|(/lib/intel64$|/lib/ia32$)") - string(REGEX REPLACE "(/lib(32|64)?$)|(/lib/intel64$|/lib/ia32$)" "" not_cached_dir "${first_lib_path}") - set(BLAS_DIR_FOUND "${not_cached_dir}" CACHE PATH "Installation directory of BLAS library" FORCE) - else() - set(BLAS_DIR_FOUND "${first_lib_path}" CACHE PATH "Installation directory of BLAS library" FORCE) - endif() -endif() -mark_as_advanced(BLAS_DIR) -mark_as_advanced(BLAS_DIR_FOUND) diff --git a/external/eigen-3.4.0/cmake/FindBLASEXT.cmake b/external/eigen-3.4.0/cmake/FindBLASEXT.cmake deleted file mode 100644 index 69a9418..0000000 --- a/external/eigen-3.4.0/cmake/FindBLASEXT.cmake +++ /dev/null @@ -1,384 +0,0 @@ -### -# -# @copyright (c) 2009-2014 The University of Tennessee and The University -# of Tennessee Research Foundation. -# All rights reserved. -# @copyright (c) 2012-2016 Inria. All rights reserved. -# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. -# -### -# -# - Find BLAS EXTENDED for MORSE projects: find include dirs and libraries -# -# This module allows to find BLAS libraries by calling the official FindBLAS module -# and handles the creation of different library lists whether the user wishes to link -# with a sequential BLAS or a multihreaded (BLAS_SEQ_LIBRARIES and BLAS_PAR_LIBRARIES). -# BLAS is detected with a FindBLAS call then if the BLAS vendor is Intel10_64lp, ACML -# or IBMESSLMT then the module attempts to find the corresponding multithreaded libraries. -# -# The following variables have been added to manage links with sequential or multithreaded -# versions: -# BLAS_INCLUDE_DIRS - BLAS include directories -# BLAS_LIBRARY_DIRS - Link directories for BLAS libraries -# BLAS_SEQ_LIBRARIES - BLAS component libraries to be linked (sequential) -# BLAS_PAR_LIBRARIES - BLAS component libraries to be linked (multithreaded) - -#============================================================================= -# Copyright 2012-2013 Inria -# Copyright 2012-2013 Emmanuel Agullo -# Copyright 2012-2013 Mathieu Faverge -# Copyright 2012 Cedric Castagnede -# Copyright 2013-2016 Florent Pruvost -# -# Distributed under the OSI-approved BSD License (the "License"); -# see accompanying file MORSE-Copyright.txt for details. -# -# This software is distributed WITHOUT ANY WARRANTY; without even the -# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the License for more information. -#============================================================================= -# (To distribute this file outside of Morse, substitute the full -# License text for the above reference.) - -# macro to factorize this call -include(CMakeFindDependencyMacro) -macro(find_package_blas) - if(BLASEXT_FIND_REQUIRED) - if(BLASEXT_FIND_QUIETLY) - find_dependency(BLAS REQUIRED QUIET) - else() - find_dependency(BLAS REQUIRED) - endif() - else() - if(BLASEXT_FIND_QUIETLY) - find_dependency(BLAS QUIET) - else() - find_dependency(BLAS) - endif() - endif() -endmacro() - -# add a cache variable to let the user specify the BLAS vendor -set(BLA_VENDOR "" CACHE STRING "list of possible BLAS vendor: - Open, Eigen, Goto, ATLAS PhiPACK, CXML, DXML, SunPerf, SCSL, SGIMATH, IBMESSL, IBMESSLMT, - Intel10_32 (intel mkl v10 32 bit), - Intel10_64lp (intel mkl v10 64 bit, lp thread model, lp64 model), - Intel10_64lp_seq (intel mkl v10 64 bit, sequential code, lp64 model), - Intel( older versions of mkl 32 and 64 bit), - ACML, ACML_MP, ACML_GPU, Apple, NAS, Generic") - -if(NOT BLASEXT_FIND_QUIETLY) - message(STATUS "In FindBLASEXT") - message(STATUS "If you want to force the use of one specific library, " - "\n please specify the BLAS vendor by setting -DBLA_VENDOR=blas_vendor_name" - "\n at cmake configure.") - message(STATUS "List of possible BLAS vendor: Goto, ATLAS PhiPACK, CXML, " - "\n DXML, SunPerf, SCSL, SGIMATH, IBMESSL, IBMESSLMT, Intel10_32 (intel mkl v10 32 bit)," - "\n Intel10_64lp (intel mkl v10 64 bit, lp thread model, lp64 model)," - "\n Intel10_64lp_seq (intel mkl v10 64 bit, sequential code, lp64 model)," - "\n Intel( older versions of mkl 32 and 64 bit)," - "\n ACML, ACML_MP, ACML_GPU, Apple, NAS, Generic") -endif() - -if (NOT BLAS_FOUND) - # First try to detect two cases: - # 1: only SEQ libs are handled - # 2: both SEQ and PAR libs are handled - find_package_blas() -endif () - -# detect the cases where SEQ and PAR libs are handled -if(BLA_VENDOR STREQUAL "All" AND - (BLAS_mkl_core_LIBRARY OR BLAS_mkl_core_dll_LIBRARY) - ) - set(BLA_VENDOR "Intel") - if(BLAS_mkl_intel_LIBRARY) - set(BLA_VENDOR "Intel10_32") - endif() - if(BLAS_mkl_intel_lp64_LIBRARY) - set(BLA_VENDOR "Intel10_64lp") - endif() - if(NOT BLASEXT_FIND_QUIETLY) - message(STATUS "A BLAS library has been found (${BLAS_LIBRARIES}) but we" - "\n have also potentially detected some multithreaded BLAS libraries from the MKL." - "\n We try to find both libraries lists (Sequential/Multithreaded).") - endif() - set(BLAS_FOUND "") -elseif(BLA_VENDOR STREQUAL "All" AND BLAS_acml_LIBRARY) - set(BLA_VENDOR "ACML") - if(NOT BLASEXT_FIND_QUIETLY) - message(STATUS "A BLAS library has been found (${BLAS_LIBRARIES}) but we" - "\n have also potentially detected some multithreaded BLAS libraries from the ACML." - "\n We try to find both libraries lists (Sequential/Multithreaded).") - endif() - set(BLAS_FOUND "") -elseif(BLA_VENDOR STREQUAL "All" AND BLAS_essl_LIBRARY) - set(BLA_VENDOR "IBMESSL") - if(NOT BLASEXT_FIND_QUIETLY) - message(STATUS "A BLAS library has been found (${BLAS_LIBRARIES}) but we" - "\n have also potentially detected some multithreaded BLAS libraries from the ESSL." - "\n We try to find both libraries lists (Sequential/Multithreaded).") - endif() - set(BLAS_FOUND "") -endif() - -# Intel case -if(BLA_VENDOR MATCHES "Intel*") - - ### - # look for include path if the BLAS vendor is Intel - ### - - # gather system include paths - unset(_inc_env) - if(WIN32) - string(REPLACE ":" ";" _inc_env "$ENV{INCLUDE}") - else() - string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") - list(APPEND _inc_env "${_path_env}") - string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") - list(APPEND _inc_env "${_path_env}") - string(REPLACE ":" ";" _path_env "$ENV{CPATH}") - list(APPEND _inc_env "${_path_env}") - string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") - list(APPEND _inc_env "${_path_env}") - endif() - list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") - list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") - set(ENV_MKLROOT "$ENV{MKLROOT}") - if (ENV_MKLROOT) - list(APPEND _inc_env "${ENV_MKLROOT}/include") - endif() - list(REMOVE_DUPLICATES _inc_env) - - # find mkl.h inside known include paths - set(BLAS_mkl.h_INCLUDE_DIRS "BLAS_mkl.h_INCLUDE_DIRS-NOTFOUND") - if(BLAS_INCDIR) - set(BLAS_mkl.h_INCLUDE_DIRS "BLAS_mkl.h_INCLUDE_DIRS-NOTFOUND") - find_path(BLAS_mkl.h_INCLUDE_DIRS - NAMES mkl.h - HINTS ${BLAS_INCDIR}) - else() - if(BLAS_DIR) - set(BLAS_mkl.h_INCLUDE_DIRS "BLAS_mkl.h_INCLUDE_DIRS-NOTFOUND") - find_path(BLAS_mkl.h_INCLUDE_DIRS - NAMES mkl.h - HINTS ${BLAS_DIR} - PATH_SUFFIXES include) - else() - set(BLAS_mkl.h_INCLUDE_DIRS "BLAS_mkl.h_INCLUDE_DIRS-NOTFOUND") - find_path(BLAS_mkl.h_INCLUDE_DIRS - NAMES mkl.h - HINTS ${_inc_env}) - endif() - endif() - mark_as_advanced(BLAS_mkl.h_INCLUDE_DIRS) - ## Print status if not found - ## ------------------------- - #if (NOT BLAS_mkl.h_INCLUDE_DIRS AND MORSE_VERBOSE) - # Print_Find_Header_Status(blas mkl.h) - #endif () - set(BLAS_INCLUDE_DIRS "") - if(BLAS_mkl.h_INCLUDE_DIRS) - list(APPEND BLAS_INCLUDE_DIRS "${BLAS_mkl.h_INCLUDE_DIRS}" ) - endif() - - ### - # look for libs - ### - # if Intel 10 64 bit -> look for sequential and multithreaded versions - if(BLA_VENDOR MATCHES "Intel10_64lp*") - - ## look for the sequential version - set(BLA_VENDOR "Intel10_64lp_seq") - if(NOT BLASEXT_FIND_QUIETLY) - message(STATUS "Look for the sequential version Intel10_64lp_seq") - endif() - find_package_blas() - if(BLAS_FOUND) - set(BLAS_SEQ_LIBRARIES "${BLAS_LIBRARIES}") - else() - set(BLAS_SEQ_LIBRARIES "${BLAS_SEQ_LIBRARIES-NOTFOUND}") - endif() - - ## look for the multithreaded version - set(BLA_VENDOR "Intel10_64lp") - if(NOT BLASEXT_FIND_QUIETLY) - message(STATUS "Look for the multithreaded version Intel10_64lp") - endif() - find_package_blas() - if(BLAS_FOUND) - set(BLAS_PAR_LIBRARIES "${BLAS_LIBRARIES}") - else() - set(BLAS_PAR_LIBRARIES "${BLAS_PAR_LIBRARIES-NOTFOUND}") - endif() - - else() - - if(BLAS_FOUND) - set(BLAS_SEQ_LIBRARIES "${BLAS_LIBRARIES}") - else() - set(BLAS_SEQ_LIBRARIES "${BLAS_SEQ_LIBRARIES-NOTFOUND}") - endif() - - endif() - - # ACML case -elseif(BLA_VENDOR MATCHES "ACML*") - - ## look for the sequential version - set(BLA_VENDOR "ACML") - find_package_blas() - if(BLAS_FOUND) - set(BLAS_SEQ_LIBRARIES "${BLAS_LIBRARIES}") - else() - set(BLAS_SEQ_LIBRARIES "${BLAS_SEQ_LIBRARIES-NOTFOUND}") - endif() - - ## look for the multithreaded version - set(BLA_VENDOR "ACML_MP") - find_package_blas() - if(BLAS_FOUND) - set(BLAS_PAR_LIBRARIES "${BLAS_LIBRARIES}") - else() - set(BLAS_PAR_LIBRARIES "${BLAS_PAR_LIBRARIES-NOTFOUND}") - endif() - - # IBMESSL case -elseif(BLA_VENDOR MATCHES "IBMESSL*") - - ## look for the sequential version - set(BLA_VENDOR "IBMESSL") - find_package_blas() - if(BLAS_FOUND) - set(BLAS_SEQ_LIBRARIES "${BLAS_LIBRARIES}") - else() - set(BLAS_SEQ_LIBRARIES "${BLAS_SEQ_LIBRARIES-NOTFOUND}") - endif() - - ## look for the multithreaded version - set(BLA_VENDOR "IBMESSLMT") - find_package_blas() - if(BLAS_FOUND) - set(BLAS_PAR_LIBRARIES "${BLAS_LIBRARIES}") - else() - set(BLAS_PAR_LIBRARIES "${BLAS_PAR_LIBRARIES-NOTFOUND}") - endif() - -else() - - if(BLAS_FOUND) - # define the SEQ libs as the BLAS_LIBRARIES - set(BLAS_SEQ_LIBRARIES "${BLAS_LIBRARIES}") - else() - set(BLAS_SEQ_LIBRARIES "${BLAS_SEQ_LIBRARIES-NOTFOUND}") - endif() - set(BLAS_PAR_LIBRARIES "${BLAS_PAR_LIBRARIES-NOTFOUND}") - -endif() - - -if(BLAS_SEQ_LIBRARIES) - set(BLAS_LIBRARIES "${BLAS_SEQ_LIBRARIES}") -endif() - -# extract libs paths -# remark: because it is not given by find_package(BLAS) -set(BLAS_LIBRARY_DIRS "") -string(REPLACE " " ";" BLAS_LIBRARIES "${BLAS_LIBRARIES}") -foreach(blas_lib ${BLAS_LIBRARIES}) - if (EXISTS "${blas_lib}") - get_filename_component(a_blas_lib_dir "${blas_lib}" PATH) - list(APPEND BLAS_LIBRARY_DIRS "${a_blas_lib_dir}" ) - else() - string(REPLACE "-L" "" blas_lib "${blas_lib}") - if (EXISTS "${blas_lib}") - list(APPEND BLAS_LIBRARY_DIRS "${blas_lib}" ) - else() - get_filename_component(a_blas_lib_dir "${blas_lib}" PATH) - if (EXISTS "${a_blas_lib_dir}") - list(APPEND BLAS_LIBRARY_DIRS "${a_blas_lib_dir}" ) - endif() - endif() - endif() -endforeach() -if (BLAS_LIBRARY_DIRS) - list(REMOVE_DUPLICATES BLAS_LIBRARY_DIRS) -endif () - -# check that BLAS has been found -# --------------------------------- -include(FindPackageHandleStandardArgs) -if(BLA_VENDOR MATCHES "Intel*") - if(BLA_VENDOR MATCHES "Intel10_64lp*") - if(NOT BLASEXT_FIND_QUIETLY) - message(STATUS "BLAS found is Intel MKL:" - "\n we manage two lists of libs, one sequential and one parallel if found" - "\n (see BLAS_SEQ_LIBRARIES and BLAS_PAR_LIBRARIES)") - message(STATUS "BLAS sequential libraries stored in BLAS_SEQ_LIBRARIES") - endif() - find_package_handle_standard_args(BLASEXT DEFAULT_MSG - BLAS_SEQ_LIBRARIES - BLAS_LIBRARY_DIRS - BLAS_INCLUDE_DIRS) - if(BLAS_PAR_LIBRARIES) - if(NOT BLASEXT_FIND_QUIETLY) - message(STATUS "BLAS parallel libraries stored in BLAS_PAR_LIBRARIES") - endif() - find_package_handle_standard_args(BLASEXT DEFAULT_MSG - BLAS_PAR_LIBRARIES) - endif() - else() - if(NOT BLASEXT_FIND_QUIETLY) - message(STATUS "BLAS sequential libraries stored in BLAS_SEQ_LIBRARIES") - endif() - find_package_handle_standard_args(BLASEXT DEFAULT_MSG - BLAS_SEQ_LIBRARIES - BLAS_LIBRARY_DIRS - BLAS_INCLUDE_DIRS) - endif() -elseif(BLA_VENDOR MATCHES "ACML*") - if(NOT BLASEXT_FIND_QUIETLY) - message(STATUS "BLAS found is ACML:" - "\n we manage two lists of libs, one sequential and one parallel if found" - "\n (see BLAS_SEQ_LIBRARIES and BLAS_PAR_LIBRARIES)") - message(STATUS "BLAS sequential libraries stored in BLAS_SEQ_LIBRARIES") - endif() - find_package_handle_standard_args(BLASEXT DEFAULT_MSG - BLAS_SEQ_LIBRARIES - BLAS_LIBRARY_DIRS) - if(BLAS_PAR_LIBRARIES) - if(NOT BLASEXT_FIND_QUIETLY) - message(STATUS "BLAS parallel libraries stored in BLAS_PAR_LIBRARIES") - endif() - find_package_handle_standard_args(BLASEXT DEFAULT_MSG - BLAS_PAR_LIBRARIES) - endif() -elseif(BLA_VENDOR MATCHES "IBMESSL*") - if(NOT BLASEXT_FIND_QUIETLY) - message(STATUS "BLAS found is ESSL:" - "\n we manage two lists of libs, one sequential and one parallel if found" - "\n (see BLAS_SEQ_LIBRARIES and BLAS_PAR_LIBRARIES)") - message(STATUS "BLAS sequential libraries stored in BLAS_SEQ_LIBRARIES") - endif() - find_package_handle_standard_args(BLASEXT DEFAULT_MSG - BLAS_SEQ_LIBRARIES - BLAS_LIBRARY_DIRS) - if(BLAS_PAR_LIBRARIES) - if(NOT BLASEXT_FIND_QUIETLY) - message(STATUS "BLAS parallel libraries stored in BLAS_PAR_LIBRARIES") - endif() - find_package_handle_standard_args(BLASEXT DEFAULT_MSG - BLAS_PAR_LIBRARIES) - endif() -else() - if(NOT BLASEXT_FIND_QUIETLY) - message(STATUS "BLAS sequential libraries stored in BLAS_SEQ_LIBRARIES") - endif() - find_package_handle_standard_args(BLASEXT DEFAULT_MSG - BLAS_SEQ_LIBRARIES - BLAS_LIBRARY_DIRS) -endif() - -# Callers expect BLAS_FOUND to be set as well. -set(BLAS_FOUND BLASEXT_FOUND) diff --git a/external/eigen-3.4.0/cmake/FindCHOLMOD.cmake b/external/eigen-3.4.0/cmake/FindCHOLMOD.cmake deleted file mode 100644 index e470cb2..0000000 --- a/external/eigen-3.4.0/cmake/FindCHOLMOD.cmake +++ /dev/null @@ -1,89 +0,0 @@ -# CHOLMOD lib usually requires linking to a blas and lapack library. -# It is up to the user of this module to find a BLAS and link to it. - -if (CHOLMOD_INCLUDES AND CHOLMOD_LIBRARIES) - set(CHOLMOD_FIND_QUIETLY TRUE) -endif () - -find_path(CHOLMOD_INCLUDES - NAMES - cholmod.h - PATHS - $ENV{CHOLMODDIR} - ${INCLUDE_INSTALL_DIR} - PATH_SUFFIXES - suitesparse - ufsparse -) - -find_library(CHOLMOD_LIBRARIES cholmod PATHS $ENV{CHOLMODDIR} ${LIB_INSTALL_DIR}) - -if(CHOLMOD_LIBRARIES) - - get_filename_component(CHOLMOD_LIBDIR ${CHOLMOD_LIBRARIES} PATH) - - find_library(AMD_LIBRARY amd PATHS ${CHOLMOD_LIBDIR} $ENV{CHOLMODDIR} ${LIB_INSTALL_DIR}) - if (AMD_LIBRARY) - set(CHOLMOD_LIBRARIES ${CHOLMOD_LIBRARIES} ${AMD_LIBRARY}) - else () - set(CHOLMOD_LIBRARIES FALSE) - endif () - -endif() - -if(CHOLMOD_LIBRARIES) - - find_library(COLAMD_LIBRARY colamd PATHS ${CHOLMOD_LIBDIR} $ENV{CHOLMODDIR} ${LIB_INSTALL_DIR}) - if (COLAMD_LIBRARY) - set(CHOLMOD_LIBRARIES ${CHOLMOD_LIBRARIES} ${COLAMD_LIBRARY}) - else () - set(CHOLMOD_LIBRARIES FALSE) - endif () - -endif() - -if(CHOLMOD_LIBRARIES) - - find_library(CAMD_LIBRARY camd PATHS ${CHOLMOD_LIBDIR} $ENV{CHOLMODDIR} ${LIB_INSTALL_DIR}) - if (CAMD_LIBRARY) - set(CHOLMOD_LIBRARIES ${CHOLMOD_LIBRARIES} ${CAMD_LIBRARY}) - else () - set(CHOLMOD_LIBRARIES FALSE) - endif () - -endif() - -if(CHOLMOD_LIBRARIES) - - find_library(CCOLAMD_LIBRARY ccolamd PATHS ${CHOLMOD_LIBDIR} $ENV{CHOLMODDIR} ${LIB_INSTALL_DIR}) - if (CCOLAMD_LIBRARY) - set(CHOLMOD_LIBRARIES ${CHOLMOD_LIBRARIES} ${CCOLAMD_LIBRARY}) - else () - set(CHOLMOD_LIBRARIES FALSE) - endif () - -endif() - -if(CHOLMOD_LIBRARIES) - - find_library(CHOLMOD_METIS_LIBRARY metis PATHS ${CHOLMOD_LIBDIR} $ENV{CHOLMODDIR} ${LIB_INSTALL_DIR}) - if (CHOLMOD_METIS_LIBRARY) - set(CHOLMOD_LIBRARIES ${CHOLMOD_LIBRARIES} ${CHOLMOD_METIS_LIBRARY}) - endif () - -endif() - -if(CHOLMOD_LIBRARIES) - - find_library(SUITESPARSE_LIBRARY SuiteSparse PATHS ${CHOLMOD_LIBDIR} $ENV{CHOLMODDIR} ${LIB_INSTALL_DIR}) - if (SUITESPARSE_LIBRARY) - set(CHOLMOD_LIBRARIES ${CHOLMOD_LIBRARIES} ${SUITESPARSE_LIBRARY}) - endif () - -endif() - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(CHOLMOD DEFAULT_MSG - CHOLMOD_INCLUDES CHOLMOD_LIBRARIES) - -mark_as_advanced(CHOLMOD_INCLUDES CHOLMOD_LIBRARIES AMD_LIBRARY COLAMD_LIBRARY SUITESPARSE_LIBRARY CAMD_LIBRARY CCOLAMD_LIBRARY CHOLMOD_METIS_LIBRARY) diff --git a/external/eigen-3.4.0/cmake/FindComputeCpp.cmake b/external/eigen-3.4.0/cmake/FindComputeCpp.cmake deleted file mode 100644 index 1c271f0..0000000 --- a/external/eigen-3.4.0/cmake/FindComputeCpp.cmake +++ /dev/null @@ -1,455 +0,0 @@ -#.rst: -# FindComputeCpp -#--------------- -# -# Copyright 2016-2018 Codeplay Software Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use these files except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -######################### -# FindComputeCpp.cmake -######################### -# -# Tools for finding and building with ComputeCpp. -# -# User must define ComputeCpp_DIR pointing to the ComputeCpp -# installation. -# -# Latest version of this file can be found at: -# https://github.com/codeplaysoftware/computecpp-sdk - -cmake_minimum_required(VERSION 3.4.3) -include(FindPackageHandleStandardArgs) -include(ComputeCppIRMap) - -set(COMPUTECPP_USER_FLAGS "" CACHE STRING "User flags for compute++") -separate_arguments(COMPUTECPP_USER_FLAGS) -mark_as_advanced(COMPUTECPP_USER_FLAGS) - -set(COMPUTECPP_BITCODE "spir64" CACHE STRING - "Bitcode type to use as SYCL target in compute++") -mark_as_advanced(COMPUTECPP_BITCODE) - -include(CMakeFindDependencyMacro) -find_dependency(OpenCL REQUIRED) - -# Find ComputeCpp package - -if(DEFINED ComputeCpp_DIR) - set(computecpp_find_hint ${ComputeCpp_DIR}) -elseif(DEFINED ENV{COMPUTECPP_DIR}) - set(computecpp_find_hint $ENV{COMPUTECPP_DIR}) -endif() - -# Used for running executables on the host -set(computecpp_host_find_hint ${computecpp_find_hint}) - -if(CMAKE_CROSSCOMPILING) - # ComputeCpp_HOST_DIR is used to find executables that are run on the host - if(DEFINED ComputeCpp_HOST_DIR) - set(computecpp_host_find_hint ${ComputeCpp_HOST_DIR}) - elseif(DEFINED ENV{COMPUTECPP_HOST_DIR}) - set(computecpp_host_find_hint $ENV{COMPUTECPP_HOST_DIR}) - endif() -endif() - -find_program(ComputeCpp_DEVICE_COMPILER_EXECUTABLE compute++ - HINTS ${computecpp_host_find_hint} - PATH_SUFFIXES bin - NO_SYSTEM_ENVIRONMENT_PATH) - -find_program(ComputeCpp_INFO_EXECUTABLE computecpp_info - HINTS ${computecpp_host_find_hint} - PATH_SUFFIXES bin - NO_SYSTEM_ENVIRONMENT_PATH) - -find_library(COMPUTECPP_RUNTIME_LIBRARY - NAMES ComputeCpp ComputeCpp_vs2015 - HINTS ${computecpp_find_hint} - PATH_SUFFIXES lib - DOC "ComputeCpp Runtime Library") - -find_library(COMPUTECPP_RUNTIME_LIBRARY_DEBUG - NAMES ComputeCpp_d ComputeCpp ComputeCpp_vs2015_d - HINTS ${computecpp_find_hint} - PATH_SUFFIXES lib - DOC "ComputeCpp Debug Runtime Library") - -find_path(ComputeCpp_INCLUDE_DIRS - NAMES "CL/sycl.hpp" - HINTS ${computecpp_find_hint}/include - DOC "The ComputeCpp include directory") -get_filename_component(ComputeCpp_INCLUDE_DIRS ${ComputeCpp_INCLUDE_DIRS} ABSOLUTE) - -get_filename_component(computecpp_canonical_root_dir "${ComputeCpp_INCLUDE_DIRS}/.." ABSOLUTE) -set(ComputeCpp_ROOT_DIR "${computecpp_canonical_root_dir}" CACHE PATH - "The root of the ComputeCpp install") - -if(NOT ComputeCpp_INFO_EXECUTABLE) - message(WARNING "Can't find computecpp_info - check ComputeCpp_DIR") -else() - execute_process(COMMAND ${ComputeCpp_INFO_EXECUTABLE} "--dump-version" - OUTPUT_VARIABLE ComputeCpp_VERSION - RESULT_VARIABLE ComputeCpp_INFO_EXECUTABLE_RESULT OUTPUT_STRIP_TRAILING_WHITESPACE) - if(NOT ComputeCpp_INFO_EXECUTABLE_RESULT EQUAL "0") - message(WARNING "Package version - Error obtaining version!") - endif() - - execute_process(COMMAND ${ComputeCpp_INFO_EXECUTABLE} "--dump-is-supported" - OUTPUT_VARIABLE COMPUTECPP_PLATFORM_IS_SUPPORTED - RESULT_VARIABLE ComputeCpp_INFO_EXECUTABLE_RESULT OUTPUT_STRIP_TRAILING_WHITESPACE) - if(NOT ComputeCpp_INFO_EXECUTABLE_RESULT EQUAL "0") - message(WARNING "platform - Error checking platform support!") - else() - mark_as_advanced(COMPUTECPP_PLATFORM_IS_SUPPORTED) - if (COMPUTECPP_PLATFORM_IS_SUPPORTED) - message(STATUS "platform - your system can support ComputeCpp") - else() - message(STATUS "platform - your system is not officially supported") - endif() - endif() -endif() - -find_package_handle_standard_args(ComputeCpp - REQUIRED_VARS ComputeCpp_ROOT_DIR - ComputeCpp_DEVICE_COMPILER_EXECUTABLE - ComputeCpp_INFO_EXECUTABLE - COMPUTECPP_RUNTIME_LIBRARY - COMPUTECPP_RUNTIME_LIBRARY_DEBUG - ComputeCpp_INCLUDE_DIRS - VERSION_VAR ComputeCpp_VERSION) -mark_as_advanced(ComputeCpp_ROOT_DIR - ComputeCpp_DEVICE_COMPILER_EXECUTABLE - ComputeCpp_INFO_EXECUTABLE - COMPUTECPP_RUNTIME_LIBRARY - COMPUTECPP_RUNTIME_LIBRARY_DEBUG - ComputeCpp_INCLUDE_DIRS - ComputeCpp_VERSION) - -if(NOT ComputeCpp_FOUND) - return() -endif() - -list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS -O2 -mllvm -inline-threshold=1000 -intelspirmetadata) -mark_as_advanced(COMPUTECPP_DEVICE_COMPILER_FLAGS) - -if(CMAKE_CROSSCOMPILING) - if(NOT COMPUTECPP_DONT_USE_TOOLCHAIN) - list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS --gcc-toolchain=${COMPUTECPP_TOOLCHAIN_DIR}) - endif() - list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS --sysroot=${COMPUTECPP_SYSROOT_DIR}) - list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS -target ${COMPUTECPP_TARGET_TRIPLE}) -endif() - -list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS -sycl-target ${COMPUTECPP_BITCODE}) -message(STATUS "compute++ flags - ${COMPUTECPP_DEVICE_COMPILER_FLAGS}") - -include(ComputeCppCompilerChecks) - -if(NOT TARGET OpenCL::OpenCL) - add_library(OpenCL::OpenCL UNKNOWN IMPORTED) - set_target_properties(OpenCL::OpenCL PROPERTIES - IMPORTED_LOCATION "${OpenCL_LIBRARIES}" - INTERFACE_INCLUDE_DIRECTORIES "${OpenCL_INCLUDE_DIRS}" - ) -endif() - -if(NOT TARGET ComputeCpp::ComputeCpp) - add_library(ComputeCpp::ComputeCpp UNKNOWN IMPORTED) - set_target_properties(ComputeCpp::ComputeCpp PROPERTIES - IMPORTED_LOCATION_DEBUG "${COMPUTECPP_RUNTIME_LIBRARY_DEBUG}" - IMPORTED_LOCATION_RELWITHDEBINFO "${COMPUTECPP_RUNTIME_LIBRARY}" - IMPORTED_LOCATION "${COMPUTECPP_RUNTIME_LIBRARY}" - INTERFACE_INCLUDE_DIRECTORIES "${ComputeCpp_INCLUDE_DIRS}" - INTERFACE_LINK_LIBRARIES "OpenCL::OpenCL" - ) -endif() - -# This property allows targets to specify that their sources should be -# compiled with the integration header included after the user's -# sources, not before (e.g. when an enum is used in a kernel name, this -# is not technically valid SYCL code but can work with ComputeCpp) -define_property( - TARGET PROPERTY COMPUTECPP_INCLUDE_AFTER - BRIEF_DOCS "Include integration header after user source" - FULL_DOCS "Changes compiler arguments such that the source file is - actually the integration header, and the .cpp file is included on - the command line so that it is seen by the compiler first. Enables - non-standards-conformant SYCL code to compile with ComputeCpp." -) -define_property( - TARGET PROPERTY INTERFACE_COMPUTECPP_FLAGS - BRIEF_DOCS "Interface compile flags to provide compute++" - FULL_DOCS "Set additional compile flags to pass to compute++ when compiling - any target which links to this one." -) -define_property( - SOURCE PROPERTY COMPUTECPP_SOURCE_FLAGS - BRIEF_DOCS "Source file compile flags for compute++" - FULL_DOCS "Set additional compile flags for compiling the SYCL integration - header for the given source file." -) - -#################### -# __build_ir -#################### -# -# Adds a custom target for running compute++ and adding a dependency for the -# resulting integration header and kernel binary. -# -# TARGET : Name of the target. -# SOURCE : Source file to be compiled. -# COUNTER : Counter included in name of custom target. Different counter -# values prevent duplicated names of custom target when source files with -# the same name, but located in different directories, are used for the -# same target. -# -function(__build_ir) - set(options) - set(one_value_args - TARGET - SOURCE - COUNTER - ) - set(multi_value_args) - cmake_parse_arguments(SDK_BUILD_IR - "${options}" - "${one_value_args}" - "${multi_value_args}" - ${ARGN} - ) - get_filename_component(sourceFileName ${SDK_BUILD_IR_SOURCE} NAME) - - # Set the path to the integration header. - # The .sycl filename must depend on the target so that different targets - # using the same source file will be generated with a different rule. - set(baseSyclName ${CMAKE_CURRENT_BINARY_DIR}/${SDK_BUILD_IR_TARGET}_${sourceFileName}) - set(outputSyclFile ${baseSyclName}.sycl) - set(outputDeviceFile ${baseSyclName}.${IR_MAP_${COMPUTECPP_BITCODE}}) - set(depFileName ${baseSyclName}.sycl.d) - - set(include_directories "$") - set(compile_definitions "$") - set(generated_include_directories - $<$:-I\"$\">) - set(generated_compile_definitions - $<$:-D$>) - - # Obtain language standard of the file - set(device_compiler_cxx_standard) - get_target_property(targetCxxStandard ${SDK_BUILD_IR_TARGET} CXX_STANDARD) - if (targetCxxStandard MATCHES 17) - set(device_compiler_cxx_standard "-std=c++1z") - elseif (targetCxxStandard MATCHES 14) - set(device_compiler_cxx_standard "-std=c++14") - elseif (targetCxxStandard MATCHES 11) - set(device_compiler_cxx_standard "-std=c++11") - elseif (targetCxxStandard MATCHES 98) - message(FATAL_ERROR "SYCL applications cannot be compiled using C++98") - else () - set(device_compiler_cxx_standard "") - endif() - - get_property(source_compile_flags - SOURCE ${SDK_BUILD_IR_SOURCE} - PROPERTY COMPUTECPP_SOURCE_FLAGS - ) - separate_arguments(source_compile_flags) - if(source_compile_flags) - list(APPEND computecpp_source_flags ${source_compile_flags}) - endif() - - list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS - ${device_compiler_cxx_standard} - ${COMPUTECPP_USER_FLAGS} - ${computecpp_source_flags} - ) - - set(ir_dependencies ${SDK_BUILD_IR_SOURCE}) - get_target_property(target_libraries ${SDK_BUILD_IR_TARGET} LINK_LIBRARIES) - if(target_libraries) - foreach(library ${target_libraries}) - if(TARGET ${library}) - list(APPEND ir_dependencies ${library}) - endif() - endforeach() - endif() - - # Depfile support was only added in CMake 3.7 - # CMake throws an error if it is unsupported by the generator (i. e. not ninja) - if((NOT CMAKE_VERSION VERSION_LESS 3.7.0) AND - CMAKE_GENERATOR MATCHES "Ninja") - file(RELATIVE_PATH relOutputFile ${CMAKE_BINARY_DIR} ${outputDeviceFile}) - set(generate_depfile -MMD -MF ${depFileName} -MT ${relOutputFile}) - set(enable_depfile DEPFILE ${depFileName}) - endif() - - # Add custom command for running compute++ - add_custom_command( - OUTPUT ${outputDeviceFile} ${outputSyclFile} - COMMAND ${ComputeCpp_DEVICE_COMPILER_EXECUTABLE} - ${COMPUTECPP_DEVICE_COMPILER_FLAGS} - ${generated_include_directories} - ${generated_compile_definitions} - -sycl-ih ${outputSyclFile} - -o ${outputDeviceFile} - -c ${SDK_BUILD_IR_SOURCE} - ${generate_depfile} - DEPENDS ${ir_dependencies} - IMPLICIT_DEPENDS CXX ${SDK_BUILD_IR_SOURCE} - ${enable_depfile} - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - COMMENT "Building ComputeCpp integration header file ${outputSyclFile}") - - # Name: (user-defined name)_(source file)_(counter)_ih - set(headerTargetName - ${SDK_BUILD_IR_TARGET}_${sourceFileName}_${SDK_BUILD_IR_COUNTER}_ih) - - if(NOT MSVC) - # Add a custom target for the generated integration header - add_custom_target(${headerTargetName} DEPENDS ${outputDeviceFile} ${outputSyclFile}) - add_dependencies(${SDK_BUILD_IR_TARGET} ${headerTargetName}) - endif() - - # This property can be set on a per-target basis to indicate that the - # integration header should appear after the main source listing - get_target_property(includeAfter ${SDK_ADD_SYCL_TARGET} COMPUTECPP_INCLUDE_AFTER) - - if(includeAfter) - # Change the source file to the integration header - e.g. - # g++ -c source_file_name.cpp.sycl - get_target_property(current_sources ${SDK_BUILD_IR_TARGET} SOURCES) - # Remove absolute path to source file - list(REMOVE_ITEM current_sources ${SDK_BUILD_IR_SOURCE}) - # Remove relative path to source file - string(REPLACE "${CMAKE_CURRENT_SOURCE_DIR}/" "" - rel_source_file ${SDK_BUILD_IR_SOURCE} - ) - list(REMOVE_ITEM current_sources ${rel_source_file}) - # Add SYCL header to source list - list(APPEND current_sources ${outputSyclFile}) - set_property(TARGET ${SDK_BUILD_IR_TARGET} - PROPERTY SOURCES ${current_sources}) - # CMake/gcc don't know what language a .sycl file is, so tell them - set_property(SOURCE ${outputSyclFile} PROPERTY LANGUAGE CXX) - set(includedFile ${SDK_BUILD_IR_SOURCE}) - set(cppFile ${outputSyclFile}) - else() - set_property(SOURCE ${outputSyclFile} PROPERTY HEADER_FILE_ONLY ON) - set(includedFile ${outputSyclFile}) - set(cppFile ${SDK_BUILD_IR_SOURCE}) - endif() - - # Force inclusion of the integration header for the host compiler - if(MSVC) - # Group SYCL files inside Visual Studio - source_group("SYCL" FILES ${outputSyclFile}) - - if(includeAfter) - # Allow the source file to be edited using Visual Studio. - # It will be added as a header file so it won't be compiled. - set_property(SOURCE ${SDK_BUILD_IR_SOURCE} PROPERTY HEADER_FILE_ONLY true) - endif() - - # Add both source and the sycl files to the VS solution. - target_sources(${SDK_BUILD_IR_TARGET} PUBLIC ${SDK_BUILD_IR_SOURCE} ${outputSyclFile}) - - set(forceIncludeFlags "/FI${includedFile} /TP") - else() - set(forceIncludeFlags "-include ${includedFile} -x c++") - endif() - - set_property( - SOURCE ${cppFile} - APPEND_STRING PROPERTY COMPILE_FLAGS "${forceIncludeFlags}" - ) - -endfunction(__build_ir) - -####################### -# add_sycl_to_target -####################### -# -# Adds a SYCL compilation custom command associated with an existing -# target and sets a dependancy on that new command. -# -# TARGET : Name of the target to add SYCL to. -# SOURCES : Source files to be compiled for SYCL. -# -function(add_sycl_to_target) - set(options) - set(one_value_args - TARGET - ) - set(multi_value_args - SOURCES - ) - cmake_parse_arguments(SDK_ADD_SYCL - "${options}" - "${one_value_args}" - "${multi_value_args}" - ${ARGN} - ) - - set_target_properties(${SDK_ADD_SYCL_TARGET} PROPERTIES LINKER_LANGUAGE CXX) - - # If the CXX compiler is set to compute++ enable the driver. - get_filename_component(cmakeCxxCompilerFileName "${CMAKE_CXX_COMPILER}" NAME) - if("${cmakeCxxCompilerFileName}" STREQUAL "compute++") - if(MSVC) - message(FATAL_ERROR "The compiler driver is not supported by this system, - revert the CXX compiler to your default host compiler.") - endif() - - get_target_property(includeAfter ${SDK_ADD_SYCL_TARGET} COMPUTECPP_INCLUDE_AFTER) - if(includeAfter) - list(APPEND COMPUTECPP_USER_FLAGS -fsycl-ih-last) - endif() - list(INSERT COMPUTECPP_DEVICE_COMPILER_FLAGS 0 -sycl-driver) - # Prepend COMPUTECPP_DEVICE_COMPILER_FLAGS and append COMPUTECPP_USER_FLAGS - foreach(prop COMPILE_OPTIONS INTERFACE_COMPILE_OPTIONS) - get_target_property(target_compile_options ${SDK_ADD_SYCL_TARGET} ${prop}) - if(NOT target_compile_options) - set(target_compile_options "") - endif() - set_property( - TARGET ${SDK_ADD_SYCL_TARGET} - PROPERTY ${prop} - ${COMPUTECPP_DEVICE_COMPILER_FLAGS} - ${target_compile_options} - ${COMPUTECPP_USER_FLAGS} - ) - endforeach() - else() - set(fileCounter 0) - list(INSERT COMPUTECPP_DEVICE_COMPILER_FLAGS 0 -sycl) - # Add custom target to run compute++ and generate the integration header - foreach(sourceFile ${SDK_ADD_SYCL_SOURCES}) - if(NOT IS_ABSOLUTE ${sourceFile}) - set(sourceFile "${CMAKE_CURRENT_SOURCE_DIR}/${sourceFile}") - endif() - __build_ir( - TARGET ${SDK_ADD_SYCL_TARGET} - SOURCE ${sourceFile} - COUNTER ${fileCounter} - ) - MATH(EXPR fileCounter "${fileCounter} + 1") - endforeach() - endif() - - set_property(TARGET ${SDK_ADD_SYCL_TARGET} - APPEND PROPERTY LINK_LIBRARIES ComputeCpp::ComputeCpp) - set_property(TARGET ${SDK_ADD_SYCL_TARGET} - APPEND PROPERTY INTERFACE_LINK_LIBRARIES ComputeCpp::ComputeCpp) -endfunction(add_sycl_to_target) diff --git a/external/eigen-3.4.0/cmake/FindEigen2.cmake b/external/eigen-3.4.0/cmake/FindEigen2.cmake deleted file mode 100644 index eb2709d..0000000 --- a/external/eigen-3.4.0/cmake/FindEigen2.cmake +++ /dev/null @@ -1,80 +0,0 @@ -# - Try to find Eigen2 lib -# -# This module supports requiring a minimum version, e.g. you can do -# find_package(Eigen2 2.0.3) -# to require version 2.0.3 to newer of Eigen2. -# -# Once done this will define -# -# EIGEN2_FOUND - system has eigen lib with correct version -# EIGEN2_INCLUDE_DIR - the eigen include directory -# EIGEN2_VERSION - eigen version - -# Copyright (c) 2006, 2007 Montel Laurent, -# Copyright (c) 2008, 2009 Gael Guennebaud, -# Redistribution and use is allowed according to the terms of the BSD license. - -if(NOT Eigen2_FIND_VERSION) - if(NOT Eigen2_FIND_VERSION_MAJOR) - set(Eigen2_FIND_VERSION_MAJOR 2) - endif() - if(NOT Eigen2_FIND_VERSION_MINOR) - set(Eigen2_FIND_VERSION_MINOR 0) - endif() - if(NOT Eigen2_FIND_VERSION_PATCH) - set(Eigen2_FIND_VERSION_PATCH 0) - endif() - - set(Eigen2_FIND_VERSION "${Eigen2_FIND_VERSION_MAJOR}.${Eigen2_FIND_VERSION_MINOR}.${Eigen2_FIND_VERSION_PATCH}") -endif() - -macro(_eigen2_check_version) - file(READ "${EIGEN2_INCLUDE_DIR}/Eigen/src/Core/util/Macros.h" _eigen2_version_header) - - string(REGEX MATCH "define[ \t]+EIGEN_WORLD_VERSION[ \t]+([0-9]+)" _eigen2_world_version_match "${_eigen2_version_header}") - set(EIGEN2_WORLD_VERSION "${CMAKE_MATCH_1}") - string(REGEX MATCH "define[ \t]+EIGEN_MAJOR_VERSION[ \t]+([0-9]+)" _eigen2_major_version_match "${_eigen2_version_header}") - set(EIGEN2_MAJOR_VERSION "${CMAKE_MATCH_1}") - string(REGEX MATCH "define[ \t]+EIGEN_MINOR_VERSION[ \t]+([0-9]+)" _eigen2_minor_version_match "${_eigen2_version_header}") - set(EIGEN2_MINOR_VERSION "${CMAKE_MATCH_1}") - - set(EIGEN2_VERSION ${EIGEN2_WORLD_VERSION}.${EIGEN2_MAJOR_VERSION}.${EIGEN2_MINOR_VERSION}) - if((${EIGEN2_WORLD_VERSION} NOTEQUAL 2) OR (${EIGEN2_MAJOR_VERSION} GREATER 10) OR (${EIGEN2_VERSION} VERSION_LESS ${Eigen2_FIND_VERSION})) - set(EIGEN2_VERSION_OK FALSE) - else() - set(EIGEN2_VERSION_OK TRUE) - endif() - - if(NOT EIGEN2_VERSION_OK) - - message(STATUS "Eigen2 version ${EIGEN2_VERSION} found in ${EIGEN2_INCLUDE_DIR}, " - "but at least version ${Eigen2_FIND_VERSION} is required") - endif() -endmacro() - -if (EIGEN2_INCLUDE_DIR) - - # in cache already - _eigen2_check_version() - set(EIGEN2_FOUND ${EIGEN2_VERSION_OK}) - -else () - -find_path(EIGEN2_INCLUDE_DIR NAMES Eigen/Core - PATHS - ${INCLUDE_INSTALL_DIR} - ${KDE4_INCLUDE_DIR} - PATH_SUFFIXES eigen2 - ) - -if(EIGEN2_INCLUDE_DIR) - _eigen2_check_version() -endif() - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(Eigen2 DEFAULT_MSG EIGEN2_INCLUDE_DIR EIGEN2_VERSION_OK) - -mark_as_advanced(EIGEN2_INCLUDE_DIR) - -endif() - diff --git a/external/eigen-3.4.0/cmake/FindEigen3.cmake b/external/eigen-3.4.0/cmake/FindEigen3.cmake deleted file mode 100644 index 0b36805..0000000 --- a/external/eigen-3.4.0/cmake/FindEigen3.cmake +++ /dev/null @@ -1,107 +0,0 @@ -# - Try to find Eigen3 lib -# -# This module supports requiring a minimum version, e.g. you can do -# find_package(Eigen3 3.1.2) -# to require version 3.1.2 or newer of Eigen3. -# -# Once done this will define -# -# EIGEN3_FOUND - system has eigen lib with correct version -# EIGEN3_INCLUDE_DIR - the eigen include directory -# EIGEN3_VERSION - eigen version -# -# and the following imported target: -# -# Eigen3::Eigen - The header-only Eigen library -# -# This module reads hints about search locations from -# the following environment variables: -# -# EIGEN3_ROOT -# EIGEN3_ROOT_DIR - -# Copyright (c) 2006, 2007 Montel Laurent, -# Copyright (c) 2008, 2009 Gael Guennebaud, -# Copyright (c) 2009 Benoit Jacob -# Redistribution and use is allowed according to the terms of the 2-clause BSD license. - -if(NOT Eigen3_FIND_VERSION) - if(NOT Eigen3_FIND_VERSION_MAJOR) - set(Eigen3_FIND_VERSION_MAJOR 2) - endif() - if(NOT Eigen3_FIND_VERSION_MINOR) - set(Eigen3_FIND_VERSION_MINOR 91) - endif() - if(NOT Eigen3_FIND_VERSION_PATCH) - set(Eigen3_FIND_VERSION_PATCH 0) - endif() - - set(Eigen3_FIND_VERSION "${Eigen3_FIND_VERSION_MAJOR}.${Eigen3_FIND_VERSION_MINOR}.${Eigen3_FIND_VERSION_PATCH}") -endif() - -macro(_eigen3_check_version) - file(READ "${EIGEN3_INCLUDE_DIR}/Eigen/src/Core/util/Macros.h" _eigen3_version_header) - - string(REGEX MATCH "define[ \t]+EIGEN_WORLD_VERSION[ \t]+([0-9]+)" _eigen3_world_version_match "${_eigen3_version_header}") - set(EIGEN3_WORLD_VERSION "${CMAKE_MATCH_1}") - string(REGEX MATCH "define[ \t]+EIGEN_MAJOR_VERSION[ \t]+([0-9]+)" _eigen3_major_version_match "${_eigen3_version_header}") - set(EIGEN3_MAJOR_VERSION "${CMAKE_MATCH_1}") - string(REGEX MATCH "define[ \t]+EIGEN_MINOR_VERSION[ \t]+([0-9]+)" _eigen3_minor_version_match "${_eigen3_version_header}") - set(EIGEN3_MINOR_VERSION "${CMAKE_MATCH_1}") - - set(EIGEN3_VERSION ${EIGEN3_WORLD_VERSION}.${EIGEN3_MAJOR_VERSION}.${EIGEN3_MINOR_VERSION}) - if(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION}) - set(EIGEN3_VERSION_OK FALSE) - else() - set(EIGEN3_VERSION_OK TRUE) - endif() - - if(NOT EIGEN3_VERSION_OK) - - message(STATUS "Eigen3 version ${EIGEN3_VERSION} found in ${EIGEN3_INCLUDE_DIR}, " - "but at least version ${Eigen3_FIND_VERSION} is required") - endif() -endmacro() - -if (EIGEN3_INCLUDE_DIR) - - # in cache already - _eigen3_check_version() - set(EIGEN3_FOUND ${EIGEN3_VERSION_OK}) - set(Eigen3_FOUND ${EIGEN3_VERSION_OK}) - -else () - - # search first if an Eigen3Config.cmake is available in the system, - # if successful this would set EIGEN3_INCLUDE_DIR and the rest of - # the script will work as usual - find_package(Eigen3 ${Eigen3_FIND_VERSION} NO_MODULE QUIET) - - if(NOT EIGEN3_INCLUDE_DIR) - find_path(EIGEN3_INCLUDE_DIR NAMES signature_of_eigen3_matrix_library - HINTS - ENV EIGEN3_ROOT - ENV EIGEN3_ROOT_DIR - PATHS - ${CMAKE_INSTALL_PREFIX}/include - ${KDE4_INCLUDE_DIR} - PATH_SUFFIXES eigen3 eigen - ) - endif() - - if(EIGEN3_INCLUDE_DIR) - _eigen3_check_version() - endif() - - include(FindPackageHandleStandardArgs) - find_package_handle_standard_args(Eigen3 DEFAULT_MSG EIGEN3_INCLUDE_DIR EIGEN3_VERSION_OK) - - mark_as_advanced(EIGEN3_INCLUDE_DIR) - -endif() - -if(EIGEN3_FOUND AND NOT TARGET Eigen3::Eigen) - add_library(Eigen3::Eigen INTERFACE IMPORTED) - set_target_properties(Eigen3::Eigen PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES "${EIGEN3_INCLUDE_DIR}") -endif() diff --git a/external/eigen-3.4.0/cmake/FindFFTW.cmake b/external/eigen-3.4.0/cmake/FindFFTW.cmake deleted file mode 100644 index ed55c5f..0000000 --- a/external/eigen-3.4.0/cmake/FindFFTW.cmake +++ /dev/null @@ -1,120 +0,0 @@ -# - Find the FFTW library -# -# Usage: -# find_package(FFTW [REQUIRED] [QUIET] ) -# -# It sets the following variables: -# FFTW_FOUND ... true if fftw is found on the system -# FFTW_LIBRARIES ... full path to fftw library -# FFTW_INCLUDES ... fftw include directory -# -# The following variables will be checked by the function -# FFTW_USE_STATIC_LIBS ... if true, only static libraries are found -# FFTW_ROOT ... if set, the libraries are exclusively searched -# under this path -# FFTW_LIBRARY ... fftw library to use -# FFTW_INCLUDE_DIR ... fftw include directory -# - -#If environment variable FFTWDIR is specified, it has same effect as FFTW_ROOT -if( NOT FFTW_ROOT AND ENV{FFTWDIR} ) - set( FFTW_ROOT $ENV{FFTWDIR} ) -endif() - -# Check if we can use PkgConfig -include(CMakeFindDependencyMacro) -find_dependency(PkgConfig) - -#Determine from PKG -if( PKG_CONFIG_FOUND AND NOT FFTW_ROOT ) - pkg_check_modules( PKG_FFTW QUIET "fftw3" ) -endif() - -#Check whether to search static or dynamic libs -set( CMAKE_FIND_LIBRARY_SUFFIXES_SAV ${CMAKE_FIND_LIBRARY_SUFFIXES} ) - -if( ${FFTW_USE_STATIC_LIBS} ) - set( CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_STATIC_LIBRARY_SUFFIX} ) -else() - set( CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_SHARED_LIBRARY_SUFFIX} ) -endif() - -if( FFTW_ROOT ) - - #find libs - find_library( - FFTW_LIB - NAMES "fftw3" - PATHS ${FFTW_ROOT} - PATH_SUFFIXES "lib" "lib64" - NO_DEFAULT_PATH - ) - - find_library( - FFTWF_LIB - NAMES "fftw3f" - PATHS ${FFTW_ROOT} - PATH_SUFFIXES "lib" "lib64" - NO_DEFAULT_PATH - ) - - find_library( - FFTWL_LIB - NAMES "fftw3l" - PATHS ${FFTW_ROOT} - PATH_SUFFIXES "lib" "lib64" - NO_DEFAULT_PATH - ) - - #find includes - find_path( - FFTW_INCLUDES - NAMES "fftw3.h" - PATHS ${FFTW_ROOT} - PATH_SUFFIXES "include" - NO_DEFAULT_PATH - ) - -else() - - find_library( - FFTW_LIB - NAMES "fftw3" - PATHS ${PKG_FFTW_LIBRARY_DIRS} ${LIB_INSTALL_DIR} - ) - - find_library( - FFTWF_LIB - NAMES "fftw3f" - PATHS ${PKG_FFTW_LIBRARY_DIRS} ${LIB_INSTALL_DIR} - ) - - - find_library( - FFTWL_LIB - NAMES "fftw3l" - PATHS ${PKG_FFTW_LIBRARY_DIRS} ${LIB_INSTALL_DIR} - ) - - find_path( - FFTW_INCLUDES - NAMES "fftw3.h" - PATHS ${PKG_FFTW_INCLUDE_DIRS} ${INCLUDE_INSTALL_DIR} - ) - -endif() - -set(FFTW_LIBRARIES ${FFTW_LIB} ${FFTWF_LIB}) - -if(FFTWL_LIB) - set(FFTW_LIBRARIES ${FFTW_LIBRARIES} ${FFTWL_LIB}) -endif() - -set( CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES_SAV} ) - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(FFTW DEFAULT_MSG - FFTW_INCLUDES FFTW_LIBRARIES) - -mark_as_advanced(FFTW_INCLUDES FFTW_LIBRARIES FFTW_LIB FFTWF_LIB FFTWL_LIB) - diff --git a/external/eigen-3.4.0/cmake/FindGLEW.cmake b/external/eigen-3.4.0/cmake/FindGLEW.cmake deleted file mode 100644 index 9d486d5..0000000 --- a/external/eigen-3.4.0/cmake/FindGLEW.cmake +++ /dev/null @@ -1,105 +0,0 @@ -# Copyright (c) 2009 Boudewijn Rempt -# -# Redistribution and use is allowed according to the terms of the BSD license. -# For details see the accompanying COPYING-CMAKE-SCRIPTS file. -# -# - try to find glew library and include files -# GLEW_INCLUDE_DIR, where to find GL/glew.h, etc. -# GLEW_LIBRARIES, the libraries to link against -# GLEW_FOUND, If false, do not try to use GLEW. -# Also defined, but not for general use are: -# GLEW_GLEW_LIBRARY = the full path to the glew library. - -if (WIN32) - - if(CYGWIN) - - find_path( GLEW_INCLUDE_DIR GL/glew.h) - - find_library( GLEW_GLEW_LIBRARY glew32 - ${OPENGL_LIBRARY_DIR} - /usr/lib/w32api - /usr/X11R6/lib - ) - - - else(CYGWIN) - - find_path( GLEW_INCLUDE_DIR GL/glew.h - $ENV{GLEW_ROOT_PATH}/include - ) - - find_library( GLEW_GLEW_LIBRARY - NAMES glew glew32 - PATHS - $ENV{GLEW_ROOT_PATH}/lib - ${OPENGL_LIBRARY_DIR} - ) - - endif(CYGWIN) - -else (WIN32) - - if (APPLE) -# These values for Apple could probably do with improvement. - find_path( GLEW_INCLUDE_DIR glew.h - /System/Library/Frameworks/GLEW.framework/Versions/A/Headers - ${OPENGL_LIBRARY_DIR} - ) - set(GLEW_GLEW_LIBRARY "-framework GLEW" CACHE STRING "GLEW library for OSX") - set(GLEW_cocoa_LIBRARY "-framework Cocoa" CACHE STRING "Cocoa framework for OSX") - else (APPLE) - - find_path( GLEW_INCLUDE_DIR GL/glew.h - /usr/include/GL - /usr/openwin/share/include - /usr/openwin/include - /usr/X11R6/include - /usr/include/X11 - /opt/graphics/OpenGL/include - /opt/graphics/OpenGL/contrib/libglew - ) - - find_library( GLEW_GLEW_LIBRARY GLEW - /usr/openwin/lib - /usr/X11R6/lib - ) - - endif (APPLE) - -endif (WIN32) - -set( GLEW_FOUND "NO" ) -if(GLEW_INCLUDE_DIR) - if(GLEW_GLEW_LIBRARY) - # Is -lXi and -lXmu required on all platforms that have it? - # If not, we need some way to figure out what platform we are on. - set( GLEW_LIBRARIES - ${GLEW_GLEW_LIBRARY} - ${GLEW_cocoa_LIBRARY} - ) - set( GLEW_FOUND "YES" ) - -#The following deprecated settings are for backwards compatibility with CMake1.4 - set (GLEW_LIBRARY ${GLEW_LIBRARIES}) - set (GLEW_INCLUDE_PATH ${GLEW_INCLUDE_DIR}) - - endif(GLEW_GLEW_LIBRARY) -endif(GLEW_INCLUDE_DIR) - -if(GLEW_FOUND) - if(NOT GLEW_FIND_QUIETLY) - message(STATUS "Found Glew: ${GLEW_LIBRARIES}") - endif(NOT GLEW_FIND_QUIETLY) -else(GLEW_FOUND) - if(GLEW_FIND_REQUIRED) - message(FATAL_ERROR "Could not find Glew") - endif(GLEW_FIND_REQUIRED) -endif(GLEW_FOUND) - -mark_as_advanced( - GLEW_INCLUDE_DIR - GLEW_GLEW_LIBRARY - GLEW_Xmu_LIBRARY - GLEW_Xi_LIBRARY -) diff --git a/external/eigen-3.4.0/cmake/FindGMP.cmake b/external/eigen-3.4.0/cmake/FindGMP.cmake deleted file mode 100644 index c41eedc..0000000 --- a/external/eigen-3.4.0/cmake/FindGMP.cmake +++ /dev/null @@ -1,21 +0,0 @@ -# Try to find the GNU Multiple Precision Arithmetic Library (GMP) -# See http://gmplib.org/ - -if (GMP_INCLUDES AND GMP_LIBRARIES) - set(GMP_FIND_QUIETLY TRUE) -endif () - -find_path(GMP_INCLUDES - NAMES - gmp.h - PATHS - $ENV{GMPDIR} - ${INCLUDE_INSTALL_DIR} -) - -find_library(GMP_LIBRARIES gmp PATHS $ENV{GMPDIR} ${LIB_INSTALL_DIR}) - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(GMP DEFAULT_MSG - GMP_INCLUDES GMP_LIBRARIES) -mark_as_advanced(GMP_INCLUDES GMP_LIBRARIES) diff --git a/external/eigen-3.4.0/cmake/FindGSL.cmake b/external/eigen-3.4.0/cmake/FindGSL.cmake deleted file mode 100644 index 8632232..0000000 --- a/external/eigen-3.4.0/cmake/FindGSL.cmake +++ /dev/null @@ -1,170 +0,0 @@ -# Try to find gnu scientific library GSL -# See -# http://www.gnu.org/software/gsl/ and -# http://gnuwin32.sourceforge.net/packages/gsl.htm -# -# Once run this will define: -# -# GSL_FOUND = system has GSL lib -# -# GSL_LIBRARIES = full path to the libraries -# on Unix/Linux with additional linker flags from "gsl-config --libs" -# -# CMAKE_GSL_CXX_FLAGS = Unix compiler flags for GSL, essentially "`gsl-config --cxxflags`" -# -# GSL_INCLUDE_DIR = where to find headers -# -# GSL_LINK_DIRECTORIES = link directories, useful for rpath on Unix -# GSL_EXE_LINKER_FLAGS = rpath on Unix -# -# Felix Woelk 07/2004 -# Jan Woetzel -# -# www.mip.informatik.uni-kiel.de -# -------------------------------- - -if(WIN32) - # JW tested with gsl-1.8, Windows XP, MSVS 7.1 - set(GSL_POSSIBLE_ROOT_DIRS - ${GSL_ROOT_DIR} - $ENV{GSL_ROOT_DIR} - ${GSL_DIR} - ${GSL_HOME} - $ENV{GSL_DIR} - $ENV{GSL_HOME} - $ENV{EXTRA} - "C:/Program Files/GnuWin32" - ) - find_path(GSL_INCLUDE_DIR - NAMES gsl/gsl_cdf.h gsl/gsl_randist.h - PATHS ${GSL_POSSIBLE_ROOT_DIRS} - PATH_SUFFIXES include - DOC "GSL header include dir" - ) - - find_library(GSL_GSL_LIBRARY - NAMES libgsl.dll.a gsl libgsl - PATHS ${GSL_POSSIBLE_ROOT_DIRS} - PATH_SUFFIXES lib - DOC "GSL library" ) - - if(NOT GSL_GSL_LIBRARY) - find_file(GSL_GSL_LIBRARY - NAMES libgsl.dll.a - PATHS ${GSL_POSSIBLE_ROOT_DIRS} - PATH_SUFFIXES lib - DOC "GSL library") - endif() - - find_library(GSL_GSLCBLAS_LIBRARY - NAMES libgslcblas.dll.a gslcblas libgslcblas - PATHS ${GSL_POSSIBLE_ROOT_DIRS} - PATH_SUFFIXES lib - DOC "GSL cblas library dir" ) - - if(NOT GSL_GSLCBLAS_LIBRARY) - find_file(GSL_GSLCBLAS_LIBRARY - NAMES libgslcblas.dll.a - PATHS ${GSL_POSSIBLE_ROOT_DIRS} - PATH_SUFFIXES lib - DOC "GSL library") - endif() - - set(GSL_LIBRARIES ${GSL_GSL_LIBRARY}) - - #message("DBG\n" - # "GSL_GSL_LIBRARY=${GSL_GSL_LIBRARY}\n" - # "GSL_GSLCBLAS_LIBRARY=${GSL_GSLCBLAS_LIBRARY}\n" - # "GSL_LIBRARIES=${GSL_LIBRARIES}") - - -else(WIN32) - - if(UNIX) - set(GSL_CONFIG_PREFER_PATH - "$ENV{GSL_DIR}/bin" - "$ENV{GSL_DIR}" - "$ENV{GSL_HOME}/bin" - "$ENV{GSL_HOME}" - CACHE STRING "preferred path to GSL (gsl-config)") - find_program(GSL_CONFIG gsl-config - ${GSL_CONFIG_PREFER_PATH} - /usr/bin/ - ) - # message("DBG GSL_CONFIG ${GSL_CONFIG}") - - if (GSL_CONFIG) - # set CXXFLAGS to be fed into CXX_FLAGS by the user: - set(GSL_CXX_FLAGS "`${GSL_CONFIG} --cflags`") - - # set INCLUDE_DIRS to prefix+include - exec_program(${GSL_CONFIG} - ARGS --prefix - OUTPUT_VARIABLE GSL_PREFIX) - set(GSL_INCLUDE_DIR ${GSL_PREFIX}/include CACHE STRING INTERNAL) - - # set link libraries and link flags - #set(GSL_LIBRARIES "`${GSL_CONFIG} --libs`") - exec_program(${GSL_CONFIG} - ARGS --libs - OUTPUT_VARIABLE GSL_LIBRARIES ) - - # extract link dirs for rpath - exec_program(${GSL_CONFIG} - ARGS --libs - OUTPUT_VARIABLE GSL_CONFIG_LIBS ) - - # extract version - exec_program(${GSL_CONFIG} - ARGS --version - OUTPUT_VARIABLE GSL_FULL_VERSION ) - - # split version as major/minor - string(REGEX MATCH "(.)\\..*" GSL_VERSION_MAJOR_ "${GSL_FULL_VERSION}") - set(GSL_VERSION_MAJOR ${CMAKE_MATCH_1}) - string(REGEX MATCH ".\\.(.*)" GSL_VERSION_MINOR_ "${GSL_FULL_VERSION}") - set(GSL_VERSION_MINOR ${CMAKE_MATCH_1}) - - # split off the link dirs (for rpath) - # use regular expression to match wildcard equivalent "-L*" - # with is a space or a semicolon - string(REGEX MATCHALL "[-][L]([^ ;])+" - GSL_LINK_DIRECTORIES_WITH_PREFIX - "${GSL_CONFIG_LIBS}" ) - # message("DBG GSL_LINK_DIRECTORIES_WITH_PREFIX=${GSL_LINK_DIRECTORIES_WITH_PREFIX}") - - # remove prefix -L because we need the pure directory for LINK_DIRECTORIES - - if (GSL_LINK_DIRECTORIES_WITH_PREFIX) - string(REGEX REPLACE "[-][L]" "" GSL_LINK_DIRECTORIES ${GSL_LINK_DIRECTORIES_WITH_PREFIX} ) - endif (GSL_LINK_DIRECTORIES_WITH_PREFIX) - set(GSL_EXE_LINKER_FLAGS "-Wl,-rpath,${GSL_LINK_DIRECTORIES}" CACHE STRING INTERNAL) - # message("DBG GSL_LINK_DIRECTORIES=${GSL_LINK_DIRECTORIES}") - # message("DBG GSL_EXE_LINKER_FLAGS=${GSL_EXE_LINKER_FLAGS}") - - # add_definitions("-DHAVE_GSL") - # set(GSL_DEFINITIONS "-DHAVE_GSL") - mark_as_advanced( - GSL_CXX_FLAGS - GSL_INCLUDE_DIR - GSL_LIBRARIES - GSL_LINK_DIRECTORIES - GSL_DEFINITIONS - ) - message(STATUS "Using GSL from ${GSL_PREFIX}") - - else(GSL_CONFIG) - message("FindGSL.cmake: gsl-config not found. Please set it manually. GSL_CONFIG=${GSL_CONFIG}") - endif(GSL_CONFIG) - - endif(UNIX) -endif(WIN32) - - -if(GSL_LIBRARIES) - if(GSL_INCLUDE_DIR OR GSL_CXX_FLAGS) - - set(GSL_FOUND 1) - - endif(GSL_INCLUDE_DIR OR GSL_CXX_FLAGS) -endif(GSL_LIBRARIES) diff --git a/external/eigen-3.4.0/cmake/FindGoogleHash.cmake b/external/eigen-3.4.0/cmake/FindGoogleHash.cmake deleted file mode 100644 index 481eb4d..0000000 --- a/external/eigen-3.4.0/cmake/FindGoogleHash.cmake +++ /dev/null @@ -1,23 +0,0 @@ - -if (GOOGLEHASH_INCLUDES AND GOOGLEHASH_LIBRARIES) - set(GOOGLEHASH_FIND_QUIETLY TRUE) -endif () - -find_path(GOOGLEHASH_INCLUDES - NAMES - google/dense_hash_map - PATHS - ${INCLUDE_INSTALL_DIR} -) - -if(GOOGLEHASH_INCLUDES) - # let's make sure it compiles with the current compiler - file(WRITE ${CMAKE_BINARY_DIR}/googlehash_test.cpp - "#include \n#include \nint main(int argc, char** argv) { google::dense_hash_map a; google::sparse_hash_map b; return 0;}\n") - try_compile(GOOGLEHASH_COMPILE ${CMAKE_BINARY_DIR} ${CMAKE_BINARY_DIR}/googlehash_test.cpp OUTPUT_VARIABLE GOOGLEHASH_COMPILE_RESULT) -endif() - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(GoogleHash DEFAULT_MSG GOOGLEHASH_INCLUDES GOOGLEHASH_COMPILE) - -mark_as_advanced(GOOGLEHASH_INCLUDES) diff --git a/external/eigen-3.4.0/cmake/FindHWLOC.cmake b/external/eigen-3.4.0/cmake/FindHWLOC.cmake deleted file mode 100644 index 522f521..0000000 --- a/external/eigen-3.4.0/cmake/FindHWLOC.cmake +++ /dev/null @@ -1,332 +0,0 @@ -### -# -# @copyright (c) 2009-2014 The University of Tennessee and The University -# of Tennessee Research Foundation. -# All rights reserved. -# @copyright (c) 2012-2014 Inria. All rights reserved. -# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. -# -### -# -# - Find HWLOC include dirs and libraries -# Use this module by invoking find_package with the form: -# find_package(HWLOC -# [REQUIRED]) # Fail with error if hwloc is not found -# -# This module finds headers and hwloc library. -# Results are reported in variables: -# HWLOC_FOUND - True if headers and requested libraries were found -# HWLOC_INCLUDE_DIRS - hwloc include directories -# HWLOC_LIBRARY_DIRS - Link directories for hwloc libraries -# HWLOC_LIBRARIES - hwloc component libraries to be linked -# -# The user can give specific paths where to find the libraries adding cmake -# options at configure (ex: cmake path/to/project -DHWLOC_DIR=path/to/hwloc): -# HWLOC_DIR - Where to find the base directory of hwloc -# HWLOC_INCDIR - Where to find the header files -# HWLOC_LIBDIR - Where to find the library files -# The module can also look for the following environment variables if paths -# are not given as cmake variable: HWLOC_DIR, HWLOC_INCDIR, HWLOC_LIBDIR - -#============================================================================= -# Copyright 2012-2013 Inria -# Copyright 2012-2013 Emmanuel Agullo -# Copyright 2012-2013 Mathieu Faverge -# Copyright 2012 Cedric Castagnede -# Copyright 2013 Florent Pruvost -# -# Distributed under the OSI-approved BSD License (the "License"); -# see accompanying file MORSE-Copyright.txt for details. -# -# This software is distributed WITHOUT ANY WARRANTY; without even the -# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the License for more information. -#============================================================================= -# (To distribute this file outside of Morse, substitute the full -# License text for the above reference.) - -include(CheckStructHasMember) -include(CheckCSourceCompiles) - -if (NOT HWLOC_FOUND) - set(HWLOC_DIR "" CACHE PATH "Installation directory of HWLOC library") - if (NOT HWLOC_FIND_QUIETLY) - message(STATUS "A cache variable, namely HWLOC_DIR, has been set to specify the install directory of HWLOC") - endif() -endif() - -set(ENV_HWLOC_DIR "$ENV{HWLOC_DIR}") -set(ENV_HWLOC_INCDIR "$ENV{HWLOC_INCDIR}") -set(ENV_HWLOC_LIBDIR "$ENV{HWLOC_LIBDIR}") -set(HWLOC_GIVEN_BY_USER "FALSE") -if ( HWLOC_DIR OR ( HWLOC_INCDIR AND HWLOC_LIBDIR) OR ENV_HWLOC_DIR OR (ENV_HWLOC_INCDIR AND ENV_HWLOC_LIBDIR) ) - set(HWLOC_GIVEN_BY_USER "TRUE") -endif() - -# Optionally use pkg-config to detect include/library dirs (if pkg-config is available) -# ------------------------------------------------------------------------------------- -include(CMakeFindDependencyMacro) -# include(FindPkgConfig) -find_dependency(PkgConfig QUIET) -if( PKG_CONFIG_EXECUTABLE AND NOT HWLOC_GIVEN_BY_USER ) - - pkg_search_module(HWLOC hwloc) - if (NOT HWLOC_FIND_QUIETLY) - if (HWLOC_FOUND AND HWLOC_LIBRARIES) - message(STATUS "Looking for HWLOC - found using PkgConfig") - #if(NOT HWLOC_INCLUDE_DIRS) - # message("${Magenta}HWLOC_INCLUDE_DIRS is empty using PkgConfig." - # "Perhaps the path to hwloc headers is already present in your" - # "C(PLUS)_INCLUDE_PATH environment variable.${ColourReset}") - #endif() - else() - message(STATUS "${Magenta}Looking for HWLOC - not found using PkgConfig." - "\n Perhaps you should add the directory containing hwloc.pc to" - "\n the PKG_CONFIG_PATH environment variable.${ColourReset}") - endif() - endif() - -endif() - -if( (NOT PKG_CONFIG_EXECUTABLE) OR (PKG_CONFIG_EXECUTABLE AND NOT HWLOC_FOUND) OR (HWLOC_GIVEN_BY_USER) ) - - if (NOT HWLOC_FIND_QUIETLY) - message(STATUS "Looking for HWLOC - PkgConfig not used") - endif() - - # Looking for include - # ------------------- - - # Add system include paths to search include - # ------------------------------------------ - unset(_inc_env) - if(ENV_HWLOC_INCDIR) - list(APPEND _inc_env "${ENV_HWLOC_INCDIR}") - elseif(ENV_HWLOC_DIR) - list(APPEND _inc_env "${ENV_HWLOC_DIR}") - list(APPEND _inc_env "${ENV_HWLOC_DIR}/include") - list(APPEND _inc_env "${ENV_HWLOC_DIR}/include/hwloc") - else() - if(WIN32) - string(REPLACE ":" ";" _inc_env "$ENV{INCLUDE}") - else() - string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") - list(APPEND _inc_env "${_path_env}") - string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") - list(APPEND _inc_env "${_path_env}") - string(REPLACE ":" ";" _path_env "$ENV{CPATH}") - list(APPEND _inc_env "${_path_env}") - string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") - list(APPEND _inc_env "${_path_env}") - endif() - endif() - list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") - list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") - list(REMOVE_DUPLICATES _inc_env) - - # set paths where to look for - set(PATH_TO_LOOK_FOR "${_inc_env}") - - # Try to find the hwloc header in the given paths - # ------------------------------------------------- - # call cmake macro to find the header path - if(HWLOC_INCDIR) - set(HWLOC_hwloc.h_DIRS "HWLOC_hwloc.h_DIRS-NOTFOUND") - find_path(HWLOC_hwloc.h_DIRS - NAMES hwloc.h - HINTS ${HWLOC_INCDIR}) - else() - if(HWLOC_DIR) - set(HWLOC_hwloc.h_DIRS "HWLOC_hwloc.h_DIRS-NOTFOUND") - find_path(HWLOC_hwloc.h_DIRS - NAMES hwloc.h - HINTS ${HWLOC_DIR} - PATH_SUFFIXES "include" "include/hwloc") - else() - set(HWLOC_hwloc.h_DIRS "HWLOC_hwloc.h_DIRS-NOTFOUND") - find_path(HWLOC_hwloc.h_DIRS - NAMES hwloc.h - HINTS ${PATH_TO_LOOK_FOR} - PATH_SUFFIXES "hwloc") - endif() - endif() - mark_as_advanced(HWLOC_hwloc.h_DIRS) - - # Add path to cmake variable - # ------------------------------------ - if (HWLOC_hwloc.h_DIRS) - set(HWLOC_INCLUDE_DIRS "${HWLOC_hwloc.h_DIRS}") - else () - set(HWLOC_INCLUDE_DIRS "HWLOC_INCLUDE_DIRS-NOTFOUND") - if(NOT HWLOC_FIND_QUIETLY) - message(STATUS "Looking for hwloc -- hwloc.h not found") - endif() - endif () - - if (HWLOC_INCLUDE_DIRS) - list(REMOVE_DUPLICATES HWLOC_INCLUDE_DIRS) - endif () - - - # Looking for lib - # --------------- - - # Add system library paths to search lib - # -------------------------------------- - unset(_lib_env) - if(ENV_HWLOC_LIBDIR) - list(APPEND _lib_env "${ENV_HWLOC_LIBDIR}") - elseif(ENV_HWLOC_DIR) - list(APPEND _lib_env "${ENV_HWLOC_DIR}") - list(APPEND _lib_env "${ENV_HWLOC_DIR}/lib") - else() - if(WIN32) - string(REPLACE ":" ";" _lib_env "$ENV{LIB}") - else() - if(APPLE) - string(REPLACE ":" ";" _lib_env "$ENV{DYLD_LIBRARY_PATH}") - else() - string(REPLACE ":" ";" _lib_env "$ENV{LD_LIBRARY_PATH}") - endif() - list(APPEND _lib_env "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}") - list(APPEND _lib_env "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") - endif() - endif() - list(REMOVE_DUPLICATES _lib_env) - - # set paths where to look for - set(PATH_TO_LOOK_FOR "${_lib_env}") - - # Try to find the hwloc lib in the given paths - # ---------------------------------------------- - - # call cmake macro to find the lib path - if(HWLOC_LIBDIR) - set(HWLOC_hwloc_LIBRARY "HWLOC_hwloc_LIBRARY-NOTFOUND") - find_library(HWLOC_hwloc_LIBRARY - NAMES hwloc - HINTS ${HWLOC_LIBDIR}) - else() - if(HWLOC_DIR) - set(HWLOC_hwloc_LIBRARY "HWLOC_hwloc_LIBRARY-NOTFOUND") - find_library(HWLOC_hwloc_LIBRARY - NAMES hwloc - HINTS ${HWLOC_DIR} - PATH_SUFFIXES lib lib32 lib64) - else() - set(HWLOC_hwloc_LIBRARY "HWLOC_hwloc_LIBRARY-NOTFOUND") - find_library(HWLOC_hwloc_LIBRARY - NAMES hwloc - HINTS ${PATH_TO_LOOK_FOR}) - endif() - endif() - mark_as_advanced(HWLOC_hwloc_LIBRARY) - - # If found, add path to cmake variable - # ------------------------------------ - if (HWLOC_hwloc_LIBRARY) - get_filename_component(hwloc_lib_path ${HWLOC_hwloc_LIBRARY} PATH) - # set cmake variables (respects naming convention) - set(HWLOC_LIBRARIES "${HWLOC_hwloc_LIBRARY}") - set(HWLOC_LIBRARY_DIRS "${hwloc_lib_path}") - else () - set(HWLOC_LIBRARIES "HWLOC_LIBRARIES-NOTFOUND") - set(HWLOC_LIBRARY_DIRS "HWLOC_LIBRARY_DIRS-NOTFOUND") - if(NOT HWLOC_FIND_QUIETLY) - message(STATUS "Looking for hwloc -- lib hwloc not found") - endif() - endif () - - if (HWLOC_LIBRARY_DIRS) - list(REMOVE_DUPLICATES HWLOC_LIBRARY_DIRS) - endif () - - # check a function to validate the find - if(HWLOC_LIBRARIES) - - set(REQUIRED_INCDIRS) - set(REQUIRED_LIBDIRS) - set(REQUIRED_LIBS) - - # HWLOC - if (HWLOC_INCLUDE_DIRS) - set(REQUIRED_INCDIRS "${HWLOC_INCLUDE_DIRS}") - endif() - if (HWLOC_LIBRARY_DIRS) - set(REQUIRED_LIBDIRS "${HWLOC_LIBRARY_DIRS}") - endif() - set(REQUIRED_LIBS "${HWLOC_LIBRARIES}") - - # set required libraries for link - set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}") - set(CMAKE_REQUIRED_LIBRARIES) - foreach(lib_dir ${REQUIRED_LIBDIRS}) - list(APPEND CMAKE_REQUIRED_LIBRARIES "-L${lib_dir}") - endforeach() - list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LIBS}") - string(REGEX REPLACE "^ -" "-" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}") - - # test link - unset(HWLOC_WORKS CACHE) - include(CheckFunctionExists) - check_function_exists(hwloc_topology_init HWLOC_WORKS) - mark_as_advanced(HWLOC_WORKS) - - if(NOT HWLOC_WORKS) - if(NOT HWLOC_FIND_QUIETLY) - message(STATUS "Looking for hwloc : test of hwloc_topology_init with hwloc library fails") - message(STATUS "CMAKE_REQUIRED_LIBRARIES: ${CMAKE_REQUIRED_LIBRARIES}") - message(STATUS "CMAKE_REQUIRED_INCLUDES: ${CMAKE_REQUIRED_INCLUDES}") - message(STATUS "Check in CMakeFiles/CMakeError.log to figure out why it fails") - endif() - endif() - set(CMAKE_REQUIRED_INCLUDES) - set(CMAKE_REQUIRED_FLAGS) - set(CMAKE_REQUIRED_LIBRARIES) - endif() - -endif() - -if (HWLOC_LIBRARIES) - if (HWLOC_LIBRARY_DIRS) - list(GET HWLOC_LIBRARY_DIRS 0 first_lib_path) - else() - list(GET HWLOC_LIBRARIES 0 first_lib) - get_filename_component(first_lib_path "${first_lib}" PATH) - endif() - if (${first_lib_path} MATCHES "/lib(32|64)?$") - string(REGEX REPLACE "/lib(32|64)?$" "" not_cached_dir "${first_lib_path}") - set(HWLOC_DIR_FOUND "${not_cached_dir}" CACHE PATH "Installation directory of HWLOC library" FORCE) - else() - set(HWLOC_DIR_FOUND "${first_lib_path}" CACHE PATH "Installation directory of HWLOC library" FORCE) - endif() -endif() -mark_as_advanced(HWLOC_DIR) -mark_as_advanced(HWLOC_DIR_FOUND) - -# check that HWLOC has been found -# ------------------------------- -include(FindPackageHandleStandardArgs) -if (PKG_CONFIG_EXECUTABLE AND HWLOC_FOUND) - find_package_handle_standard_args(HWLOC DEFAULT_MSG - HWLOC_LIBRARIES) -else() - find_package_handle_standard_args(HWLOC DEFAULT_MSG - HWLOC_LIBRARIES - HWLOC_WORKS) -endif() - -if (HWLOC_FOUND) - set(HWLOC_SAVE_CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES}) - list(APPEND CMAKE_REQUIRED_INCLUDES ${HWLOC_INCLUDE_DIRS}) - - # test headers to guess the version - check_struct_has_member( "struct hwloc_obj" parent hwloc.h HAVE_HWLOC_PARENT_MEMBER ) - check_struct_has_member( "struct hwloc_cache_attr_s" size hwloc.h HAVE_HWLOC_CACHE_ATTR ) - check_c_source_compiles( "#include - int main(void) { hwloc_obj_t o; o->type = HWLOC_OBJ_PU; return 0;}" HAVE_HWLOC_OBJ_PU) - include(CheckLibraryExists) - check_library_exists(${HWLOC_LIBRARIES} hwloc_bitmap_free "" HAVE_HWLOC_BITMAP) - - set(CMAKE_REQUIRED_INCLUDES ${HWLOC_SAVE_CMAKE_REQUIRED_INCLUDES}) -endif() diff --git a/external/eigen-3.4.0/cmake/FindKLU.cmake b/external/eigen-3.4.0/cmake/FindKLU.cmake deleted file mode 100644 index 6217d14..0000000 --- a/external/eigen-3.4.0/cmake/FindKLU.cmake +++ /dev/null @@ -1,48 +0,0 @@ -# KLU lib usually requires linking to a blas library. -# It is up to the user of this module to find a BLAS and link to it. - -if (KLU_INCLUDES AND KLU_LIBRARIES) - set(KLU_FIND_QUIETLY TRUE) -endif () - -find_path(KLU_INCLUDES - NAMES - klu.h - PATHS - $ENV{KLUDIR} - ${INCLUDE_INSTALL_DIR} - PATH_SUFFIXES - suitesparse - ufsparse -) - -find_library(KLU_LIBRARIES klu PATHS $ENV{KLUDIR} ${LIB_INSTALL_DIR}) - -if(KLU_LIBRARIES) - - if(NOT KLU_LIBDIR) - get_filename_component(KLU_LIBDIR ${KLU_LIBRARIES} PATH) - endif() - - find_library(COLAMD_LIBRARY colamd PATHS ${KLU_LIBDIR} $ENV{KLUDIR} ${LIB_INSTALL_DIR}) - if(COLAMD_LIBRARY) - set(KLU_LIBRARIES ${KLU_LIBRARIES} ${COLAMD_LIBRARY}) - endif () - - find_library(AMD_LIBRARY amd PATHS ${KLU_LIBDIR} $ENV{KLUDIR} ${LIB_INSTALL_DIR}) - if(AMD_LIBRARY) - set(KLU_LIBRARIES ${KLU_LIBRARIES} ${AMD_LIBRARY}) - endif () - - find_library(BTF_LIBRARY btf PATHS $ENV{KLU_LIBDIR} $ENV{KLUDIR} ${LIB_INSTALL_DIR}) - if(BTF_LIBRARY) - set(KLU_LIBRARIES ${KLU_LIBRARIES} ${BTF_LIBRARY}) - endif() - -endif() - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(KLU DEFAULT_MSG - KLU_INCLUDES KLU_LIBRARIES) - -mark_as_advanced(KLU_INCLUDES KLU_LIBRARIES AMD_LIBRARY COLAMD_LIBRARY BTF_LIBRARY) diff --git a/external/eigen-3.4.0/cmake/FindLAPACK.cmake b/external/eigen-3.4.0/cmake/FindLAPACK.cmake deleted file mode 100644 index 3fd7388..0000000 --- a/external/eigen-3.4.0/cmake/FindLAPACK.cmake +++ /dev/null @@ -1,274 +0,0 @@ -# Find LAPACK library -# -# This module finds an installed library that implements the LAPACK -# linear-algebra interface (see http://www.netlib.org/lapack/). -# The approach follows mostly that taken for the autoconf macro file, acx_lapack.m4 -# (distributed at http://ac-archive.sourceforge.net/ac-archive/acx_lapack.html). -# -# This module sets the following variables: -# LAPACK_FOUND - set to true if a library implementing the LAPACK interface -# is found -# LAPACK_INCLUDE_DIR - Directories containing the LAPACK header files -# LAPACK_DEFINITIONS - Compilation options to use LAPACK -# LAPACK_LINKER_FLAGS - Linker flags to use LAPACK (excluding -l -# and -L). -# LAPACK_LIBRARIES_DIR - Directories containing the LAPACK libraries. -# May be null if LAPACK_LIBRARIES contains libraries name using full path. -# LAPACK_LIBRARIES - List of libraries to link against LAPACK interface. -# May be null if the compiler supports auto-link (e.g. VC++). -# LAPACK_USE_FILE - The name of the cmake module to include to compile -# applications or libraries using LAPACK. -# -# This module was modified by CGAL team: -# - find libraries for a C++ compiler, instead of Fortran -# - added LAPACK_INCLUDE_DIR, LAPACK_DEFINITIONS and LAPACK_LIBRARIES_DIR -# - removed LAPACK95_LIBRARIES - - -include(CheckFunctionExists) -include(CMakeFindDependencyMacro) - -# This macro checks for the existence of the combination of fortran libraries -# given by _list. If the combination is found, this macro checks (using the -# check_function_exists macro) whether can link against that library -# combination using the name of a routine given by _name using the linker -# flags given by _flags. If the combination of libraries is found and passes -# the link test, LIBRARIES is set to the list of complete library paths that -# have been found and DEFINITIONS to the required definitions. -# Otherwise, LIBRARIES is set to FALSE. -# N.B. _prefix is the prefix applied to the names of all cached variables that -# are generated internally and marked advanced by this macro. -macro(check_lapack_libraries DEFINITIONS LIBRARIES _prefix _name _flags _list _blas _path) - #message("DEBUG: check_lapack_libraries(${_list} in ${_path} with ${_blas})") - - # Check for the existence of the libraries given by _list - set(_libraries_found TRUE) - set(_libraries_work FALSE) - set(${DEFINITIONS} "") - set(${LIBRARIES} "") - set(_combined_name) - foreach(_library ${_list}) - set(_combined_name ${_combined_name}_${_library}) - - if(_libraries_found) - # search first in ${_path} - find_library(${_prefix}_${_library}_LIBRARY - NAMES ${_library} - PATHS ${_path} NO_DEFAULT_PATH - ) - # if not found, search in environment variables and system - if ( WIN32 ) - find_library(${_prefix}_${_library}_LIBRARY - NAMES ${_library} - PATHS ENV LIB - ) - elseif ( APPLE ) - find_library(${_prefix}_${_library}_LIBRARY - NAMES ${_library} - PATHS /usr/local/lib /usr/lib /usr/local/lib64 /usr/lib64 ENV DYLD_LIBRARY_PATH - ) - else () - find_library(${_prefix}_${_library}_LIBRARY - NAMES ${_library} - PATHS /usr/local/lib /usr/lib /usr/local/lib64 /usr/lib64 ENV LD_LIBRARY_PATH - ) - endif() - mark_as_advanced(${_prefix}_${_library}_LIBRARY) - set(${LIBRARIES} ${${LIBRARIES}} ${${_prefix}_${_library}_LIBRARY}) - set(_libraries_found ${${_prefix}_${_library}_LIBRARY}) - endif() - endforeach() - if(_libraries_found) - set(_libraries_found ${${LIBRARIES}}) - endif() - - # Test this combination of libraries with the Fortran/f2c interface. - # We test the Fortran interface first as it is well standardized. - if(_libraries_found AND NOT _libraries_work) - set(${DEFINITIONS} "-D${_prefix}_USE_F2C") - set(${LIBRARIES} ${_libraries_found}) - # Some C++ linkers require the f2c library to link with Fortran libraries. - # I do not know which ones, thus I just add the f2c library if it is available. - find_dependency( F2C QUIET ) - if ( F2C_FOUND ) - set(${DEFINITIONS} ${${DEFINITIONS}} ${F2C_DEFINITIONS}) - set(${LIBRARIES} ${${LIBRARIES}} ${F2C_LIBRARIES}) - endif() - set(CMAKE_REQUIRED_DEFINITIONS ${${DEFINITIONS}}) - set(CMAKE_REQUIRED_LIBRARIES ${_flags} ${${LIBRARIES}} ${_blas}) - #message("DEBUG: CMAKE_REQUIRED_DEFINITIONS = ${CMAKE_REQUIRED_DEFINITIONS}") - #message("DEBUG: CMAKE_REQUIRED_LIBRARIES = ${CMAKE_REQUIRED_LIBRARIES}") - # Check if function exists with f2c calling convention (ie a trailing underscore) - check_function_exists(${_name}_ ${_prefix}_${_name}_${_combined_name}_f2c_WORKS) - set(CMAKE_REQUIRED_DEFINITIONS} "") - set(CMAKE_REQUIRED_LIBRARIES "") - mark_as_advanced(${_prefix}_${_name}_${_combined_name}_f2c_WORKS) - set(_libraries_work ${${_prefix}_${_name}_${_combined_name}_f2c_WORKS}) - endif() - - # If not found, test this combination of libraries with a C interface. - # A few implementations (ie ACML) provide a C interface. Unfortunately, there is no standard. - if(_libraries_found AND NOT _libraries_work) - set(${DEFINITIONS} "") - set(${LIBRARIES} ${_libraries_found}) - set(CMAKE_REQUIRED_DEFINITIONS "") - set(CMAKE_REQUIRED_LIBRARIES ${_flags} ${${LIBRARIES}} ${_blas}) - #message("DEBUG: CMAKE_REQUIRED_LIBRARIES = ${CMAKE_REQUIRED_LIBRARIES}") - check_function_exists(${_name} ${_prefix}_${_name}${_combined_name}_WORKS) - set(CMAKE_REQUIRED_LIBRARIES "") - mark_as_advanced(${_prefix}_${_name}${_combined_name}_WORKS) - set(_libraries_work ${${_prefix}_${_name}${_combined_name}_WORKS}) - endif() - - # on failure - if(NOT _libraries_work) - set(${DEFINITIONS} "") - set(${LIBRARIES} FALSE) - endif() - #message("DEBUG: ${DEFINITIONS} = ${${DEFINITIONS}}") - #message("DEBUG: ${LIBRARIES} = ${${LIBRARIES}}") -endmacro() - - -# -# main -# - -# LAPACK requires BLAS -if(LAPACK_FIND_QUIETLY OR NOT LAPACK_FIND_REQUIRED) - find_dependency(BLAS) -else() - find_dependency(BLAS REQUIRED) -endif() - -if (NOT BLAS_FOUND) - - message(STATUS "LAPACK requires BLAS.") - set(LAPACK_FOUND FALSE) - -# Is it already configured? -elseif (LAPACK_LIBRARIES_DIR OR LAPACK_LIBRARIES) - - set(LAPACK_FOUND TRUE) - -else() - - # reset variables - set( LAPACK_INCLUDE_DIR "" ) - set( LAPACK_DEFINITIONS "" ) - set( LAPACK_LINKER_FLAGS "" ) # unused (yet) - set( LAPACK_LIBRARIES "" ) - set( LAPACK_LIBRARIES_DIR "" ) - - # - # If Unix, search for LAPACK function in possible libraries - # - - #intel mkl lapack? - if(NOT LAPACK_LIBRARIES) - check_lapack_libraries( - LAPACK_DEFINITIONS - LAPACK_LIBRARIES - LAPACK - cheev - "" - "mkl_lapack" - "${BLAS_LIBRARIES}" - "${CGAL_TAUCS_LIBRARIES_DIR} ENV LAPACK_LIB_DIR" - ) - endif() - - #acml lapack? - if(NOT LAPACK_LIBRARIES) - check_lapack_libraries( - LAPACK_DEFINITIONS - LAPACK_LIBRARIES - LAPACK - cheev - "" - "acml" - "${BLAS_LIBRARIES}" - "${CGAL_TAUCS_LIBRARIES_DIR} ENV LAPACK_LIB_DIR" - ) - endif() - - # Apple LAPACK library? - if(NOT LAPACK_LIBRARIES) - check_lapack_libraries( - LAPACK_DEFINITIONS - LAPACK_LIBRARIES - LAPACK - cheev - "" - "Accelerate" - "${BLAS_LIBRARIES}" - "${CGAL_TAUCS_LIBRARIES_DIR} ENV LAPACK_LIB_DIR" - ) - endif() - - if ( NOT LAPACK_LIBRARIES ) - check_lapack_libraries( - LAPACK_DEFINITIONS - LAPACK_LIBRARIES - LAPACK - cheev - "" - "vecLib" - "${BLAS_LIBRARIES}" - "${CGAL_TAUCS_LIBRARIES_DIR} ENV LAPACK_LIB_DIR" - ) - endif () - - # Generic LAPACK library? - # This configuration *must* be the last try as this library is notably slow. - if ( NOT LAPACK_LIBRARIES ) - check_lapack_libraries( - LAPACK_DEFINITIONS - LAPACK_LIBRARIES - LAPACK - cheev - "" - "lapack" - "${BLAS_LIBRARIES}" - "${CGAL_TAUCS_LIBRARIES_DIR} ENV LAPACK_LIB_DIR" - ) - endif() - - if(LAPACK_LIBRARIES_DIR OR LAPACK_LIBRARIES) - set(LAPACK_FOUND TRUE) - else() - set(LAPACK_FOUND FALSE) - endif() - - if(NOT LAPACK_FIND_QUIETLY) - if(LAPACK_FOUND) - message(STATUS "A library with LAPACK API found.") - else() - if(LAPACK_FIND_REQUIRED) - message(FATAL_ERROR "A required library with LAPACK API not found. Please specify library location.") - else() - message(STATUS "A library with LAPACK API not found. Please specify library location.") - endif() - endif() - endif() - - # Add variables to cache - set( LAPACK_INCLUDE_DIR "${LAPACK_INCLUDE_DIR}" - CACHE PATH "Directories containing the LAPACK header files" FORCE ) - set( LAPACK_DEFINITIONS "${LAPACK_DEFINITIONS}" - CACHE STRING "Compilation options to use LAPACK" FORCE ) - set( LAPACK_LINKER_FLAGS "${LAPACK_LINKER_FLAGS}" - CACHE STRING "Linker flags to use LAPACK" FORCE ) - set( LAPACK_LIBRARIES "${LAPACK_LIBRARIES}" - CACHE FILEPATH "LAPACK libraries name" FORCE ) - set( LAPACK_LIBRARIES_DIR "${LAPACK_LIBRARIES_DIR}" - CACHE PATH "Directories containing the LAPACK libraries" FORCE ) - - #message("DEBUG: LAPACK_INCLUDE_DIR = ${LAPACK_INCLUDE_DIR}") - #message("DEBUG: LAPACK_DEFINITIONS = ${LAPACK_DEFINITIONS}") - #message("DEBUG: LAPACK_LINKER_FLAGS = ${LAPACK_LINKER_FLAGS}") - #message("DEBUG: LAPACK_LIBRARIES = ${LAPACK_LIBRARIES}") - #message("DEBUG: LAPACK_LIBRARIES_DIR = ${LAPACK_LIBRARIES_DIR}") - #message("DEBUG: LAPACK_FOUND = ${LAPACK_FOUND}") - -endif() diff --git a/external/eigen-3.4.0/cmake/FindMPFR.cmake b/external/eigen-3.4.0/cmake/FindMPFR.cmake deleted file mode 100644 index d8da9d6..0000000 --- a/external/eigen-3.4.0/cmake/FindMPFR.cmake +++ /dev/null @@ -1,83 +0,0 @@ -# Try to find the MPFR library -# See http://www.mpfr.org/ -# -# This module supports requiring a minimum version, e.g. you can do -# find_package(MPFR 2.3.0) -# to require version 2.3.0 to newer of MPFR. -# -# Once done this will define -# -# MPFR_FOUND - system has MPFR lib with correct version -# MPFR_INCLUDES - the MPFR include directory -# MPFR_LIBRARIES - the MPFR library -# MPFR_VERSION - MPFR version - -# Copyright (c) 2006, 2007 Montel Laurent, -# Copyright (c) 2008, 2009 Gael Guennebaud, -# Copyright (c) 2010 Jitse Niesen, -# Redistribution and use is allowed according to the terms of the BSD license. - -# Set MPFR_INCLUDES - -find_path(MPFR_INCLUDES - NAMES - mpfr.h - PATHS - $ENV{GMPDIR} - ${INCLUDE_INSTALL_DIR} -) - -# Set MPFR_FIND_VERSION to 1.0.0 if no minimum version is specified - -if(NOT MPFR_FIND_VERSION) - if(NOT MPFR_FIND_VERSION_MAJOR) - set(MPFR_FIND_VERSION_MAJOR 1) - endif() - if(NOT MPFR_FIND_VERSION_MINOR) - set(MPFR_FIND_VERSION_MINOR 0) - endif() - if(NOT MPFR_FIND_VERSION_PATCH) - set(MPFR_FIND_VERSION_PATCH 0) - endif() - - set(MPFR_FIND_VERSION "${MPFR_FIND_VERSION_MAJOR}.${MPFR_FIND_VERSION_MINOR}.${MPFR_FIND_VERSION_PATCH}") -endif() - - -if(MPFR_INCLUDES) - - # Set MPFR_VERSION - - file(READ "${MPFR_INCLUDES}/mpfr.h" _mpfr_version_header) - - string(REGEX MATCH "define[ \t]+MPFR_VERSION_MAJOR[ \t]+([0-9]+)" _mpfr_major_version_match "${_mpfr_version_header}") - set(MPFR_MAJOR_VERSION "${CMAKE_MATCH_1}") - string(REGEX MATCH "define[ \t]+MPFR_VERSION_MINOR[ \t]+([0-9]+)" _mpfr_minor_version_match "${_mpfr_version_header}") - set(MPFR_MINOR_VERSION "${CMAKE_MATCH_1}") - string(REGEX MATCH "define[ \t]+MPFR_VERSION_PATCHLEVEL[ \t]+([0-9]+)" _mpfr_patchlevel_version_match "${_mpfr_version_header}") - set(MPFR_PATCHLEVEL_VERSION "${CMAKE_MATCH_1}") - - set(MPFR_VERSION ${MPFR_MAJOR_VERSION}.${MPFR_MINOR_VERSION}.${MPFR_PATCHLEVEL_VERSION}) - - # Check whether found version exceeds minimum version - - if(${MPFR_VERSION} VERSION_LESS ${MPFR_FIND_VERSION}) - set(MPFR_VERSION_OK FALSE) - message(STATUS "MPFR version ${MPFR_VERSION} found in ${MPFR_INCLUDES}, " - "but at least version ${MPFR_FIND_VERSION} is required") - else() - set(MPFR_VERSION_OK TRUE) - endif() - -endif() - -# Set MPFR_LIBRARIES - -find_library(MPFR_LIBRARIES mpfr PATHS $ENV{GMPDIR} ${LIB_INSTALL_DIR}) - -# Epilogue - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(MPFR DEFAULT_MSG - MPFR_INCLUDES MPFR_LIBRARIES MPFR_VERSION_OK) -mark_as_advanced(MPFR_INCLUDES MPFR_LIBRARIES) diff --git a/external/eigen-3.4.0/cmake/FindMPREAL.cmake b/external/eigen-3.4.0/cmake/FindMPREAL.cmake deleted file mode 100644 index 947a1ce..0000000 --- a/external/eigen-3.4.0/cmake/FindMPREAL.cmake +++ /dev/null @@ -1,103 +0,0 @@ -# Try to find the MPFR C++ (MPREAL) library -# See http://www.holoborodko.com/pavel/mpreal/ -# -# This module supports requiring a minimum version, e.g. you can do -# find_package(MPREAL 1.8.6) -# to require version 1.8.6 or newer of MPREAL C++. -# -# Once done this will define -# -# MPREAL_FOUND - system has MPREAL lib with correct version -# MPREAL_INCLUDES - MPREAL required include directories -# MPREAL_LIBRARIES - MPREAL required libraries -# MPREAL_VERSION - MPREAL version - -# Copyright (c) 2020 The Eigen Authors. -# Redistribution and use is allowed according to the terms of the BSD license. - -include(CMakeFindDependencyMacro) -find_dependency(MPFR) -find_dependency(GMP) - -# Set MPREAL_INCLUDES -find_path(MPREAL_INCLUDES - NAMES - mpreal.h - PATHS - $ENV{GMPDIR} - ${INCLUDE_INSTALL_DIR} -) - -# Set MPREAL_FIND_VERSION to 1.0.0 if no minimum version is specified - -if(NOT MPREAL_FIND_VERSION) - if(NOT MPREAL_FIND_VERSION_MAJOR) - set(MPREAL_FIND_VERSION_MAJOR 1) - endif() - if(NOT MPREAL_FIND_VERSION_MINOR) - set(MPREAL_FIND_VERSION_MINOR 0) - endif() - if(NOT MPREAL_FIND_VERSION_PATCH) - set(MPREAL_FIND_VERSION_PATCH 0) - endif() - - set(MPREAL_FIND_VERSION "${MPREAL_FIND_VERSION_MAJOR}.${MPREAL_FIND_VERSION_MINOR}.${MPREAL_FIND_VERSION_PATCH}") -endif() - -# Check bugs -# - https://github.com/advanpix/mpreal/issues/7 -# - https://github.com/advanpix/mpreal/issues/9 -set(MPREAL_TEST_PROGRAM " -#include -#include -int main(int argc, char** argv) { - const mpfr::mpreal one = 1.0; - const mpfr::mpreal zero = 0.0; - using namespace std; - const mpfr::mpreal smaller = min(one, zero); - return 0; -}") - -if(MPREAL_INCLUDES) - - # Set MPREAL_VERSION - - file(READ "${MPREAL_INCLUDES}/mpreal.h" _mpreal_version_header) - - string(REGEX MATCH "define[ \t]+MPREAL_VERSION_MAJOR[ \t]+([0-9]+)" _mpreal_major_version_match "${_mpreal_version_header}") - set(MPREAL_MAJOR_VERSION "${CMAKE_MATCH_1}") - string(REGEX MATCH "define[ \t]+MPREAL_VERSION_MINOR[ \t]+([0-9]+)" _mpreal_minor_version_match "${_mpreal_version_header}") - set(MPREAL_MINOR_VERSION "${CMAKE_MATCH_1}") - string(REGEX MATCH "define[ \t]+MPREAL_VERSION_PATCHLEVEL[ \t]+([0-9]+)" _mpreal_patchlevel_version_match "${_mpreal_version_header}") - set(MPREAL_PATCHLEVEL_VERSION "${CMAKE_MATCH_1}") - - set(MPREAL_VERSION ${MPREAL_MAJOR_VERSION}.${MPREAL_MINOR_VERSION}.${MPREAL_PATCHLEVEL_VERSION}) - - # Check whether found version exceeds minimum version - - if(${MPREAL_VERSION} VERSION_LESS ${MPREAL_FIND_VERSION}) - set(MPREAL_VERSION_OK FALSE) - message(STATUS "MPREAL version ${MPREAL_VERSION} found in ${MPREAL_INCLUDES}, " - "but at least version ${MPREAL_FIND_VERSION} is required") - else() - set(MPREAL_VERSION_OK TRUE) - - list(APPEND MPREAL_INCLUDES "${MPFR_INCLUDES}" "${GMP_INCLUDES}") - list(REMOVE_DUPLICATES MPREAL_INCLUDES) - - list(APPEND MPREAL_LIBRARIES "${MPFR_LIBRARIES}" "${GMP_LIBRARIES}") - list(REMOVE_DUPLICATES MPREAL_LIBRARIES) - - # Make sure it compiles with the current compiler. - unset(MPREAL_WORKS CACHE) - include(CheckCXXSourceCompiles) - set(CMAKE_REQUIRED_INCLUDES "${MPREAL_INCLUDES}") - set(CMAKE_REQUIRED_LIBRARIES "${MPREAL_LIBRARIES}") - check_cxx_source_compiles("${MPREAL_TEST_PROGRAM}" MPREAL_WORKS) - endif() -endif() - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(MPREAL DEFAULT_MSG - MPREAL_INCLUDES MPREAL_VERSION_OK MPREAL_WORKS) -mark_as_advanced(MPREAL_INCLUDES) diff --git a/external/eigen-3.4.0/cmake/FindMetis.cmake b/external/eigen-3.4.0/cmake/FindMetis.cmake deleted file mode 100644 index 747f882..0000000 --- a/external/eigen-3.4.0/cmake/FindMetis.cmake +++ /dev/null @@ -1,265 +0,0 @@ -### -# -# @copyright (c) 2009-2014 The University of Tennessee and The University -# of Tennessee Research Foundation. -# All rights reserved. -# @copyright (c) 2012-2014 Inria. All rights reserved. -# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. -# -### -# -# - Find METIS include dirs and libraries -# Use this module by invoking find_package with the form: -# find_package(METIS -# [REQUIRED] # Fail with error if metis is not found -# ) -# -# This module finds headers and metis library. -# Results are reported in variables: -# METIS_FOUND - True if headers and requested libraries were found -# METIS_INCLUDE_DIRS - metis include directories -# METIS_LIBRARY_DIRS - Link directories for metis libraries -# METIS_LIBRARIES - metis component libraries to be linked -# -# The user can give specific paths where to find the libraries adding cmake -# options at configure (ex: cmake path/to/project -DMETIS_DIR=path/to/metis): -# METIS_DIR - Where to find the base directory of metis -# METIS_INCDIR - Where to find the header files -# METIS_LIBDIR - Where to find the library files -# The module can also look for the following environment variables if paths -# are not given as cmake variable: METIS_DIR, METIS_INCDIR, METIS_LIBDIR - -#============================================================================= -# Copyright 2012-2013 Inria -# Copyright 2012-2013 Emmanuel Agullo -# Copyright 2012-2013 Mathieu Faverge -# Copyright 2012 Cedric Castagnede -# Copyright 2013 Florent Pruvost -# -# Distributed under the OSI-approved BSD License (the "License"); -# see accompanying file MORSE-Copyright.txt for details. -# -# This software is distributed WITHOUT ANY WARRANTY; without even the -# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the License for more information. -#============================================================================= -# (To distribute this file outside of Morse, substitute the full -# License text for the above reference.) - -if (NOT METIS_FOUND) - set(METIS_DIR "" CACHE PATH "Installation directory of METIS library") - if (NOT METIS_FIND_QUIETLY) - message(STATUS "A cache variable, namely METIS_DIR, has been set to specify the install directory of METIS") - endif() -endif() - -# Looking for include -# ------------------- - -# Add system include paths to search include -# ------------------------------------------ -unset(_inc_env) -set(ENV_METIS_DIR "$ENV{METIS_DIR}") -set(ENV_METIS_INCDIR "$ENV{METIS_INCDIR}") -if(ENV_METIS_INCDIR) - list(APPEND _inc_env "${ENV_METIS_INCDIR}") -elseif(ENV_METIS_DIR) - list(APPEND _inc_env "${ENV_METIS_DIR}") - list(APPEND _inc_env "${ENV_METIS_DIR}/include") - list(APPEND _inc_env "${ENV_METIS_DIR}/include/metis") -else() - if(WIN32) - string(REPLACE ":" ";" _inc_env "$ENV{INCLUDE}") - else() - string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") - list(APPEND _inc_env "${_path_env}") - string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") - list(APPEND _inc_env "${_path_env}") - string(REPLACE ":" ";" _path_env "$ENV{CPATH}") - list(APPEND _inc_env "${_path_env}") - string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") - list(APPEND _inc_env "${_path_env}") - endif() -endif() -list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") -list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") -list(REMOVE_DUPLICATES _inc_env) - - -# Try to find the metis header in the given paths -# ------------------------------------------------- -# call cmake macro to find the header path -if(METIS_INCDIR) - set(METIS_metis.h_DIRS "METIS_metis.h_DIRS-NOTFOUND") - find_path(METIS_metis.h_DIRS - NAMES metis.h - HINTS ${METIS_INCDIR}) -else() - if(METIS_DIR) - set(METIS_metis.h_DIRS "METIS_metis.h_DIRS-NOTFOUND") - find_path(METIS_metis.h_DIRS - NAMES metis.h - HINTS ${METIS_DIR} - PATH_SUFFIXES "include" "include/metis") - else() - set(METIS_metis.h_DIRS "METIS_metis.h_DIRS-NOTFOUND") - find_path(METIS_metis.h_DIRS - NAMES metis.h - HINTS ${_inc_env}) - endif() -endif() -mark_as_advanced(METIS_metis.h_DIRS) - - -# If found, add path to cmake variable -# ------------------------------------ -if (METIS_metis.h_DIRS) - set(METIS_INCLUDE_DIRS "${METIS_metis.h_DIRS}") -else () - set(METIS_INCLUDE_DIRS "METIS_INCLUDE_DIRS-NOTFOUND") - if(NOT METIS_FIND_QUIETLY) - message(STATUS "Looking for metis -- metis.h not found") - endif() -endif() - - -# Looking for lib -# --------------- - -# Add system library paths to search lib -# -------------------------------------- -unset(_lib_env) -set(ENV_METIS_LIBDIR "$ENV{METIS_LIBDIR}") -if(ENV_METIS_LIBDIR) - list(APPEND _lib_env "${ENV_METIS_LIBDIR}") -elseif(ENV_METIS_DIR) - list(APPEND _lib_env "${ENV_METIS_DIR}") - list(APPEND _lib_env "${ENV_METIS_DIR}/lib") -else() - if(WIN32) - string(REPLACE ":" ";" _lib_env "$ENV{LIB}") - else() - if(APPLE) - string(REPLACE ":" ";" _lib_env "$ENV{DYLD_LIBRARY_PATH}") - else() - string(REPLACE ":" ";" _lib_env "$ENV{LD_LIBRARY_PATH}") - endif() - list(APPEND _lib_env "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}") - list(APPEND _lib_env "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") - endif() -endif() -list(REMOVE_DUPLICATES _lib_env) - -# Try to find the metis lib in the given paths -# ---------------------------------------------- -# call cmake macro to find the lib path -if(METIS_LIBDIR) - set(METIS_metis_LIBRARY "METIS_metis_LIBRARY-NOTFOUND") - find_library(METIS_metis_LIBRARY - NAMES metis - HINTS ${METIS_LIBDIR}) -else() - if(METIS_DIR) - set(METIS_metis_LIBRARY "METIS_metis_LIBRARY-NOTFOUND") - find_library(METIS_metis_LIBRARY - NAMES metis - HINTS ${METIS_DIR} - PATH_SUFFIXES lib lib32 lib64) - else() - set(METIS_metis_LIBRARY "METIS_metis_LIBRARY-NOTFOUND") - find_library(METIS_metis_LIBRARY - NAMES metis - HINTS ${_lib_env}) - endif() -endif() -mark_as_advanced(METIS_metis_LIBRARY) - - -# If found, add path to cmake variable -# ------------------------------------ -if (METIS_metis_LIBRARY) - get_filename_component(metis_lib_path "${METIS_metis_LIBRARY}" PATH) - # set cmake variables - set(METIS_LIBRARIES "${METIS_metis_LIBRARY}") - set(METIS_LIBRARY_DIRS "${metis_lib_path}") -else () - set(METIS_LIBRARIES "METIS_LIBRARIES-NOTFOUND") - set(METIS_LIBRARY_DIRS "METIS_LIBRARY_DIRS-NOTFOUND") - if(NOT METIS_FIND_QUIETLY) - message(STATUS "Looking for metis -- lib metis not found") - endif() -endif () - -# check a function to validate the find -if(METIS_LIBRARIES) - - set(REQUIRED_INCDIRS) - set(REQUIRED_LIBDIRS) - set(REQUIRED_LIBS) - - # METIS - if (METIS_INCLUDE_DIRS) - set(REQUIRED_INCDIRS "${METIS_INCLUDE_DIRS}") - endif() - if (METIS_LIBRARY_DIRS) - set(REQUIRED_LIBDIRS "${METIS_LIBRARY_DIRS}") - endif() - set(REQUIRED_LIBS "${METIS_LIBRARIES}") - # m - find_library(M_LIBRARY NAMES m) - mark_as_advanced(M_LIBRARY) - if(M_LIBRARY) - list(APPEND REQUIRED_LIBS "-lm") - endif() - - # set required libraries for link - set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}") - set(CMAKE_REQUIRED_LIBRARIES) - foreach(lib_dir ${REQUIRED_LIBDIRS}) - list(APPEND CMAKE_REQUIRED_LIBRARIES "-L${lib_dir}") - endforeach() - list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LIBS}") - string(REGEX REPLACE "^ -" "-" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}") - - # test link - unset(METIS_WORKS CACHE) - include(CheckFunctionExists) - check_function_exists(METIS_NodeND METIS_WORKS) - mark_as_advanced(METIS_WORKS) - - if(NOT METIS_WORKS) - if(NOT METIS_FIND_QUIETLY) - message(STATUS "Looking for METIS : test of METIS_NodeND with METIS library fails") - message(STATUS "CMAKE_REQUIRED_LIBRARIES: ${CMAKE_REQUIRED_LIBRARIES}") - message(STATUS "CMAKE_REQUIRED_INCLUDES: ${CMAKE_REQUIRED_INCLUDES}") - message(STATUS "Check in CMakeFiles/CMakeError.log to figure out why it fails") - endif() - endif() - set(CMAKE_REQUIRED_INCLUDES) - set(CMAKE_REQUIRED_FLAGS) - set(CMAKE_REQUIRED_LIBRARIES) -endif() - -if (METIS_LIBRARIES) - list(GET METIS_LIBRARIES 0 first_lib) - get_filename_component(first_lib_path "${first_lib}" PATH) - if (${first_lib_path} MATCHES "/lib(32|64)?$") - string(REGEX REPLACE "/lib(32|64)?$" "" not_cached_dir "${first_lib_path}") - set(METIS_DIR_FOUND "${not_cached_dir}" CACHE PATH "Installation directory of METIS library" FORCE) - else() - set(METIS_DIR_FOUND "${first_lib_path}" CACHE PATH "Installation directory of METIS library" FORCE) - endif() -endif() -mark_as_advanced(METIS_DIR) -mark_as_advanced(METIS_DIR_FOUND) - -# check that METIS has been found -# --------------------------------- -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(METIS DEFAULT_MSG - METIS_LIBRARIES - METIS_WORKS - METIS_INCLUDE_DIRS) -# -# TODO: Add possibility to check for specific functions in the library -# diff --git a/external/eigen-3.4.0/cmake/FindPASTIX.cmake b/external/eigen-3.4.0/cmake/FindPASTIX.cmake deleted file mode 100644 index db1427b..0000000 --- a/external/eigen-3.4.0/cmake/FindPASTIX.cmake +++ /dev/null @@ -1,704 +0,0 @@ -### -# -# @copyright (c) 2009-2014 The University of Tennessee and The University -# of Tennessee Research Foundation. -# All rights reserved. -# @copyright (c) 2012-2014 Inria. All rights reserved. -# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. -# -### -# -# - Find PASTIX include dirs and libraries -# Use this module by invoking find_package with the form: -# find_package(PASTIX -# [REQUIRED] # Fail with error if pastix is not found -# [COMPONENTS ...] # dependencies -# ) -# -# PASTIX depends on the following libraries: -# - Threads, m, rt -# - MPI -# - HWLOC -# - BLAS -# -# COMPONENTS are optional libraries PASTIX could be linked with, -# Use it to drive detection of a specific compilation chain -# COMPONENTS can be some of the following: -# - MPI: to activate detection of the parallel MPI version (default) -# it looks for Threads, HWLOC, BLAS, MPI and ScaLAPACK libraries -# - SEQ: to activate detection of the sequential version (exclude MPI version) -# - STARPU: to activate detection of StarPU version -# it looks for MPI version of StarPU (default behaviour) -# if SEQ and STARPU are given, it looks for a StarPU without MPI -# - STARPU_CUDA: to activate detection of StarPU with CUDA -# - STARPU_FXT: to activate detection of StarPU with FxT -# - SCOTCH: to activate detection of PASTIX linked with SCOTCH -# - PTSCOTCH: to activate detection of PASTIX linked with SCOTCH -# - METIS: to activate detection of PASTIX linked with SCOTCH -# -# This module finds headers and pastix library. -# Results are reported in variables: -# PASTIX_FOUND - True if headers and requested libraries were found -# PASTIX_LINKER_FLAGS - list of required linker flags (excluding -l and -L) -# PASTIX_INCLUDE_DIRS - pastix include directories -# PASTIX_LIBRARY_DIRS - Link directories for pastix libraries -# PASTIX_LIBRARIES - pastix libraries -# PASTIX_INCLUDE_DIRS_DEP - pastix + dependencies include directories -# PASTIX_LIBRARY_DIRS_DEP - pastix + dependencies link directories -# PASTIX_LIBRARIES_DEP - pastix libraries + dependencies -# -# The user can give specific paths where to find the libraries adding cmake -# options at configure (ex: cmake path/to/project -DPASTIX_DIR=path/to/pastix): -# PASTIX_DIR - Where to find the base directory of pastix -# PASTIX_INCDIR - Where to find the header files -# PASTIX_LIBDIR - Where to find the library files -# The module can also look for the following environment variables if paths -# are not given as cmake variable: PASTIX_DIR, PASTIX_INCDIR, PASTIX_LIBDIR - -#============================================================================= -# Copyright 2012-2013 Inria -# Copyright 2012-2013 Emmanuel Agullo -# Copyright 2012-2013 Mathieu Faverge -# Copyright 2012 Cedric Castagnede -# Copyright 2013 Florent Pruvost -# -# Distributed under the OSI-approved BSD License (the "License"); -# see accompanying file MORSE-Copyright.txt for details. -# -# This software is distributed WITHOUT ANY WARRANTY; without even the -# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the License for more information. -#============================================================================= -# (To distribute this file outside of Morse, substitute the full -# License text for the above reference.) - - -if (NOT PASTIX_FOUND) - set(PASTIX_DIR "" CACHE PATH "Installation directory of PASTIX library") - if (NOT PASTIX_FIND_QUIETLY) - message(STATUS "A cache variable, namely PASTIX_DIR, has been set to specify the install directory of PASTIX") - endif() -endif() - -# Set the version to find -set(PASTIX_LOOK_FOR_MPI ON) -set(PASTIX_LOOK_FOR_SEQ OFF) -set(PASTIX_LOOK_FOR_STARPU OFF) -set(PASTIX_LOOK_FOR_STARPU_CUDA OFF) -set(PASTIX_LOOK_FOR_STARPU_FXT OFF) -set(PASTIX_LOOK_FOR_SCOTCH ON) -set(PASTIX_LOOK_FOR_PTSCOTCH OFF) -set(PASTIX_LOOK_FOR_METIS OFF) - -if( PASTIX_FIND_COMPONENTS ) - foreach( component ${PASTIX_FIND_COMPONENTS} ) - if (${component} STREQUAL "SEQ") - # means we look for the sequential version of PaStiX (without MPI) - set(PASTIX_LOOK_FOR_SEQ ON) - set(PASTIX_LOOK_FOR_MPI OFF) - endif() - if (${component} STREQUAL "MPI") - # means we look for the MPI version of PaStiX (default) - set(PASTIX_LOOK_FOR_SEQ OFF) - set(PASTIX_LOOK_FOR_MPI ON) - endif() - if (${component} STREQUAL "STARPU") - # means we look for PaStiX with StarPU - set(PASTIX_LOOK_FOR_STARPU ON) - endif() - if (${component} STREQUAL "STARPU_CUDA") - # means we look for PaStiX with StarPU + CUDA - set(PASTIX_LOOK_FOR_STARPU ON) - set(PASTIX_LOOK_FOR_STARPU_CUDA ON) - endif() - if (${component} STREQUAL "STARPU_FXT") - # means we look for PaStiX with StarPU + FxT - set(PASTIX_LOOK_FOR_STARPU_FXT ON) - endif() - if (${component} STREQUAL "SCOTCH") - set(PASTIX_LOOK_FOR_SCOTCH ON) - endif() - if (${component} STREQUAL "PTSCOTCH") - set(PASTIX_LOOK_FOR_PTSCOTCH ON) - endif() - if (${component} STREQUAL "METIS") - set(PASTIX_LOOK_FOR_METIS ON) - endif() - endforeach() -endif() - -# Dependencies detection -# ---------------------- - - -# Required dependencies -# --------------------- -include(CMakeFindDependencyMacro) -if (NOT PASTIX_FIND_QUIETLY) - message(STATUS "Looking for PASTIX - Try to detect pthread") -endif() -if (PASTIX_FIND_REQUIRED) - find_dependency(Threads REQUIRED QUIET) -else() - find_dependency(Threads QUIET) -endif() -set(PASTIX_EXTRA_LIBRARIES "") -if( THREADS_FOUND ) - list(APPEND PASTIX_EXTRA_LIBRARIES ${CMAKE_THREAD_LIBS_INIT}) -endif () - -# Add math library to the list of extra -# it normally exists on all common systems provided with a C compiler -if (NOT PASTIX_FIND_QUIETLY) - message(STATUS "Looking for PASTIX - Try to detect libm") -endif() -set(PASTIX_M_LIBRARIES "") -if(UNIX OR WIN32) - find_library( - PASTIX_M_m_LIBRARY - NAMES m - ) - mark_as_advanced(PASTIX_M_m_LIBRARY) - if (PASTIX_M_m_LIBRARY) - list(APPEND PASTIX_M_LIBRARIES "${PASTIX_M_m_LIBRARY}") - list(APPEND PASTIX_EXTRA_LIBRARIES "${PASTIX_M_m_LIBRARY}") - else() - if (PASTIX_FIND_REQUIRED) - message(FATAL_ERROR "Could NOT find libm on your system." - "Are you sure to a have a C compiler installed?") - endif() - endif() -endif() - -# Try to find librt (libposix4 - POSIX.1b Realtime Extensions library) -# on Unix systems except Apple ones because it does not exist on it -if (NOT PASTIX_FIND_QUIETLY) - message(STATUS "Looking for PASTIX - Try to detect librt") -endif() -set(PASTIX_RT_LIBRARIES "") -if(UNIX AND NOT APPLE) - find_library( - PASTIX_RT_rt_LIBRARY - NAMES rt - ) - mark_as_advanced(PASTIX_RT_rt_LIBRARY) - if (PASTIX_RT_rt_LIBRARY) - list(APPEND PASTIX_RT_LIBRARIES "${PASTIX_RT_rt_LIBRARY}") - list(APPEND PASTIX_EXTRA_LIBRARIES "${PASTIX_RT_rt_LIBRARY}") - else() - if (PASTIX_FIND_REQUIRED) - message(FATAL_ERROR "Could NOT find librt on your system") - endif() - endif() -endif() - -# PASTIX depends on HWLOC -#------------------------ -if (NOT PASTIX_FIND_QUIETLY) - message(STATUS "Looking for PASTIX - Try to detect HWLOC") -endif() -if (PASTIX_FIND_REQUIRED) - find_dependency(HWLOC REQUIRED QUIET) -else() - find_dependency(HWLOC QUIET) -endif() - -# PASTIX depends on BLAS -#----------------------- -if (NOT PASTIX_FIND_QUIETLY) - message(STATUS "Looking for PASTIX - Try to detect BLAS") -endif() -if (PASTIX_FIND_REQUIRED) - find_dependency(BLASEXT REQUIRED QUIET) -else() - find_dependency(BLASEXT QUIET) -endif() - -# Optional dependencies -# --------------------- - -# PASTIX may depend on MPI -#------------------------- -if (NOT MPI_FOUND AND PASTIX_LOOK_FOR_MPI) - if (NOT PASTIX_FIND_QUIETLY) - message(STATUS "Looking for PASTIX - Try to detect MPI") - endif() - # allows to use an external mpi compilation by setting compilers with - # -DMPI_C_COMPILER=path/to/mpicc -DMPI_Fortran_COMPILER=path/to/mpif90 - # at cmake configure - if(NOT MPI_C_COMPILER) - set(MPI_C_COMPILER mpicc) - endif() - if (PASTIX_FIND_REQUIRED AND PASTIX_FIND_REQUIRED_MPI) - find_dependency(MPI REQUIRED QUIET) - else() - find_dependency(MPI QUIET) - endif() - if (MPI_FOUND) - mark_as_advanced(MPI_LIBRARY) - mark_as_advanced(MPI_EXTRA_LIBRARY) - endif() -endif () - -# PASTIX may depend on STARPU -#---------------------------- -if( NOT STARPU_FOUND AND PASTIX_LOOK_FOR_STARPU) - - if (NOT PASTIX_FIND_QUIETLY) - message(STATUS "Looking for PASTIX - Try to detect StarPU") - endif() - - set(PASTIX_STARPU_VERSION "1.1" CACHE STRING "oldest STARPU version desired") - - # create list of components in order to make a single call to find_package(starpu...) - # we explicitly need a StarPU version built with hwloc - set(STARPU_COMPONENT_LIST "HWLOC") - - # StarPU may depend on MPI - # allows to use an external mpi compilation by setting compilers with - # -DMPI_C_COMPILER=path/to/mpicc -DMPI_Fortran_COMPILER=path/to/mpif90 - # at cmake configure - if (PASTIX_LOOK_FOR_MPI) - if(NOT MPI_C_COMPILER) - set(MPI_C_COMPILER mpicc) - endif() - list(APPEND STARPU_COMPONENT_LIST "MPI") - endif() - if (PASTIX_LOOK_FOR_STARPU_CUDA) - list(APPEND STARPU_COMPONENT_LIST "CUDA") - endif() - if (PASTIX_LOOK_FOR_STARPU_FXT) - list(APPEND STARPU_COMPONENT_LIST "FXT") - endif() - # set the list of optional dependencies we may discover - if (PASTIX_FIND_REQUIRED AND PASTIX_FIND_REQUIRED_STARPU) - find_dependency(STARPU ${PASTIX_STARPU_VERSION} REQUIRED - COMPONENTS ${STARPU_COMPONENT_LIST}) - else() - find_dependency(STARPU ${PASTIX_STARPU_VERSION} - COMPONENTS ${STARPU_COMPONENT_LIST}) - endif() - -endif() - -# PASTIX may depends on SCOTCH -#----------------------------- -if (NOT SCOTCH_FOUND AND PASTIX_LOOK_FOR_SCOTCH) - if (NOT PASTIX_FIND_QUIETLY) - message(STATUS "Looking for PASTIX - Try to detect SCOTCH") - endif() - if (PASTIX_FIND_REQUIRED AND PASTIX_FIND_REQUIRED_SCOTCH) - find_dependency(SCOTCH REQUIRED QUIET) - else() - find_dependency(SCOTCH QUIET) - endif() -endif() - -# PASTIX may depends on PTSCOTCH -#------------------------------- -if (NOT PTSCOTCH_FOUND AND PASTIX_LOOK_FOR_PTSCOTCH) - if (NOT PASTIX_FIND_QUIETLY) - message(STATUS "Looking for PASTIX - Try to detect PTSCOTCH") - endif() - if (PASTIX_FIND_REQUIRED AND PASTIX_FIND_REQUIRED_PTSCOTCH) - find_dependency(PTSCOTCH REQUIRED QUIET) - else() - find_dependency(PTSCOTCH QUIET) - endif() -endif() - -# PASTIX may depends on METIS -#---------------------------- -if (NOT METIS_FOUND AND PASTIX_LOOK_FOR_METIS) - if (NOT PASTIX_FIND_QUIETLY) - message(STATUS "Looking for PASTIX - Try to detect METIS") - endif() - if (PASTIX_FIND_REQUIRED AND PASTIX_FIND_REQUIRED_METIS) - find_dependency(METIS REQUIRED QUIET) - else() - find_dependency(METIS QUIET) - endif() -endif() - -# Error if pastix required and no partitioning lib found -if (PASTIX_FIND_REQUIRED AND NOT SCOTCH_FOUND AND NOT PTSCOTCH_FOUND AND NOT METIS_FOUND) - message(FATAL_ERROR "Could NOT find any partitioning library on your system" - " (install scotch, ptscotch or metis)") -endif() - - -# Looking for PaStiX -# ------------------ - -# Looking for include -# ------------------- - -# Add system include paths to search include -# ------------------------------------------ -unset(_inc_env) -set(ENV_PASTIX_DIR "$ENV{PASTIX_DIR}") -set(ENV_PASTIX_INCDIR "$ENV{PASTIX_INCDIR}") -if(ENV_PASTIX_INCDIR) - list(APPEND _inc_env "${ENV_PASTIX_INCDIR}") -elseif(ENV_PASTIX_DIR) - list(APPEND _inc_env "${ENV_PASTIX_DIR}") - list(APPEND _inc_env "${ENV_PASTIX_DIR}/include") - list(APPEND _inc_env "${ENV_PASTIX_DIR}/include/pastix") -else() - if(WIN32) - string(REPLACE ":" ";" _inc_env "$ENV{INCLUDE}") - else() - string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") - list(APPEND _inc_env "${_path_env}") - string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") - list(APPEND _inc_env "${_path_env}") - string(REPLACE ":" ";" _path_env "$ENV{CPATH}") - list(APPEND _inc_env "${_path_env}") - string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") - list(APPEND _inc_env "${_path_env}") - endif() -endif() -list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") -list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") -list(REMOVE_DUPLICATES _inc_env) - - -# Try to find the pastix header in the given paths -# --------------------------------------------------- -# call cmake macro to find the header path -if(PASTIX_INCDIR) - set(PASTIX_pastix.h_DIRS "PASTIX_pastix.h_DIRS-NOTFOUND") - find_path(PASTIX_pastix.h_DIRS - NAMES pastix.h - HINTS ${PASTIX_INCDIR}) -else() - if(PASTIX_DIR) - set(PASTIX_pastix.h_DIRS "PASTIX_pastix.h_DIRS-NOTFOUND") - find_path(PASTIX_pastix.h_DIRS - NAMES pastix.h - HINTS ${PASTIX_DIR} - PATH_SUFFIXES "include" "include/pastix") - else() - set(PASTIX_pastix.h_DIRS "PASTIX_pastix.h_DIRS-NOTFOUND") - find_path(PASTIX_pastix.h_DIRS - NAMES pastix.h - HINTS ${_inc_env} - PATH_SUFFIXES "pastix") - endif() -endif() -mark_as_advanced(PASTIX_pastix.h_DIRS) - -# If found, add path to cmake variable -# ------------------------------------ -if (PASTIX_pastix.h_DIRS) - set(PASTIX_INCLUDE_DIRS "${PASTIX_pastix.h_DIRS}") -else () - set(PASTIX_INCLUDE_DIRS "PASTIX_INCLUDE_DIRS-NOTFOUND") - if(NOT PASTIX_FIND_QUIETLY) - message(STATUS "Looking for pastix -- pastix.h not found") - endif() -endif() - - -# Looking for lib -# --------------- - -# Add system library paths to search lib -# -------------------------------------- -unset(_lib_env) -set(ENV_PASTIX_LIBDIR "$ENV{PASTIX_LIBDIR}") -if(ENV_PASTIX_LIBDIR) - list(APPEND _lib_env "${ENV_PASTIX_LIBDIR}") -elseif(ENV_PASTIX_DIR) - list(APPEND _lib_env "${ENV_PASTIX_DIR}") - list(APPEND _lib_env "${ENV_PASTIX_DIR}/lib") -else() - if(WIN32) - string(REPLACE ":" ";" _lib_env "$ENV{LIB}") - else() - if(APPLE) - string(REPLACE ":" ";" _lib_env "$ENV{DYLD_LIBRARY_PATH}") - else() - string(REPLACE ":" ";" _lib_env "$ENV{LD_LIBRARY_PATH}") - endif() - list(APPEND _lib_env "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}") - list(APPEND _lib_env "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") - endif() -endif() -list(REMOVE_DUPLICATES _lib_env) - -# Try to find the pastix lib in the given paths -# ------------------------------------------------ - -# create list of libs to find -set(PASTIX_libs_to_find "pastix_murge;pastix") - -# call cmake macro to find the lib path -if(PASTIX_LIBDIR) - foreach(pastix_lib ${PASTIX_libs_to_find}) - set(PASTIX_${pastix_lib}_LIBRARY "PASTIX_${pastix_lib}_LIBRARY-NOTFOUND") - find_library(PASTIX_${pastix_lib}_LIBRARY - NAMES ${pastix_lib} - HINTS ${PASTIX_LIBDIR}) - endforeach() -else() - if(PASTIX_DIR) - foreach(pastix_lib ${PASTIX_libs_to_find}) - set(PASTIX_${pastix_lib}_LIBRARY "PASTIX_${pastix_lib}_LIBRARY-NOTFOUND") - find_library(PASTIX_${pastix_lib}_LIBRARY - NAMES ${pastix_lib} - HINTS ${PASTIX_DIR} - PATH_SUFFIXES lib lib32 lib64) - endforeach() - else() - foreach(pastix_lib ${PASTIX_libs_to_find}) - set(PASTIX_${pastix_lib}_LIBRARY "PASTIX_${pastix_lib}_LIBRARY-NOTFOUND") - find_library(PASTIX_${pastix_lib}_LIBRARY - NAMES ${pastix_lib} - HINTS ${_lib_env}) - endforeach() - endif() -endif() - -# If found, add path to cmake variable -# ------------------------------------ -foreach(pastix_lib ${PASTIX_libs_to_find}) - - get_filename_component(${pastix_lib}_lib_path ${PASTIX_${pastix_lib}_LIBRARY} PATH) - # set cmake variables (respects naming convention) - if (PASTIX_LIBRARIES) - list(APPEND PASTIX_LIBRARIES "${PASTIX_${pastix_lib}_LIBRARY}") - else() - set(PASTIX_LIBRARIES "${PASTIX_${pastix_lib}_LIBRARY}") - endif() - if (PASTIX_LIBRARY_DIRS) - list(APPEND PASTIX_LIBRARY_DIRS "${${pastix_lib}_lib_path}") - else() - set(PASTIX_LIBRARY_DIRS "${${pastix_lib}_lib_path}") - endif() - mark_as_advanced(PASTIX_${pastix_lib}_LIBRARY) - -endforeach() - -# check a function to validate the find -if(PASTIX_LIBRARIES) - - set(REQUIRED_LDFLAGS) - set(REQUIRED_INCDIRS) - set(REQUIRED_LIBDIRS) - set(REQUIRED_LIBS) - - # PASTIX - if (PASTIX_INCLUDE_DIRS) - set(REQUIRED_INCDIRS "${PASTIX_INCLUDE_DIRS}") - endif() - foreach(libdir ${PASTIX_LIBRARY_DIRS}) - if (libdir) - list(APPEND REQUIRED_LIBDIRS "${libdir}") - endif() - endforeach() - set(REQUIRED_LIBS "${PASTIX_LIBRARIES}") - # STARPU - if (PASTIX_LOOK_FOR_STARPU AND STARPU_FOUND) - if (STARPU_INCLUDE_DIRS_DEP) - list(APPEND REQUIRED_INCDIRS "${STARPU_INCLUDE_DIRS_DEP}") - elseif (STARPU_INCLUDE_DIRS) - list(APPEND REQUIRED_INCDIRS "${STARPU_INCLUDE_DIRS}") - endif() - if(STARPU_LIBRARY_DIRS_DEP) - list(APPEND REQUIRED_LIBDIRS "${STARPU_LIBRARY_DIRS_DEP}") - elseif(STARPU_LIBRARY_DIRS) - list(APPEND REQUIRED_LIBDIRS "${STARPU_LIBRARY_DIRS}") - endif() - if (STARPU_LIBRARIES_DEP) - list(APPEND REQUIRED_LIBS "${STARPU_LIBRARIES_DEP}") - elseif (STARPU_LIBRARIES) - foreach(lib ${STARPU_LIBRARIES}) - if (EXISTS ${lib} OR ${lib} MATCHES "^-") - list(APPEND REQUIRED_LIBS "${lib}") - else() - list(APPEND REQUIRED_LIBS "-l${lib}") - endif() - endforeach() - endif() - endif() - # CUDA - if (PASTIX_LOOK_FOR_STARPU_CUDA AND CUDA_FOUND) - if (CUDA_INCLUDE_DIRS) - list(APPEND REQUIRED_INCDIRS "${CUDA_INCLUDE_DIRS}") - endif() - foreach(libdir ${CUDA_LIBRARY_DIRS}) - if (libdir) - list(APPEND REQUIRED_LIBDIRS "${libdir}") - endif() - endforeach() - list(APPEND REQUIRED_LIBS "${CUDA_CUBLAS_LIBRARIES};${CUDA_LIBRARIES}") - endif() - # MPI - if (PASTIX_LOOK_FOR_MPI AND MPI_FOUND) - if (MPI_C_INCLUDE_PATH) - list(APPEND REQUIRED_INCDIRS "${MPI_C_INCLUDE_PATH}") - endif() - if (MPI_C_LINK_FLAGS) - if (${MPI_C_LINK_FLAGS} MATCHES " -") - string(REGEX REPLACE " -" "-" MPI_C_LINK_FLAGS ${MPI_C_LINK_FLAGS}) - endif() - list(APPEND REQUIRED_LDFLAGS "${MPI_C_LINK_FLAGS}") - endif() - list(APPEND REQUIRED_LIBS "${MPI_C_LIBRARIES}") - endif() - # HWLOC - if (HWLOC_FOUND) - if (HWLOC_INCLUDE_DIRS) - list(APPEND REQUIRED_INCDIRS "${HWLOC_INCLUDE_DIRS}") - endif() - foreach(libdir ${HWLOC_LIBRARY_DIRS}) - if (libdir) - list(APPEND REQUIRED_LIBDIRS "${libdir}") - endif() - endforeach() - foreach(lib ${HWLOC_LIBRARIES}) - if (EXISTS ${lib} OR ${lib} MATCHES "^-") - list(APPEND REQUIRED_LIBS "${lib}") - else() - list(APPEND REQUIRED_LIBS "-l${lib}") - endif() - endforeach() - endif() - # BLAS - if (BLAS_FOUND) - if (BLAS_INCLUDE_DIRS) - list(APPEND REQUIRED_INCDIRS "${BLAS_INCLUDE_DIRS}") - endif() - foreach(libdir ${BLAS_LIBRARY_DIRS}) - if (libdir) - list(APPEND REQUIRED_LIBDIRS "${libdir}") - endif() - endforeach() - list(APPEND REQUIRED_LIBS "${BLAS_LIBRARIES}") - if (BLAS_LINKER_FLAGS) - list(APPEND REQUIRED_LDFLAGS "${BLAS_LINKER_FLAGS}") - endif() - endif() - # SCOTCH - if (PASTIX_LOOK_FOR_SCOTCH AND SCOTCH_FOUND) - if (SCOTCH_INCLUDE_DIRS) - list(APPEND REQUIRED_INCDIRS "${SCOTCH_INCLUDE_DIRS}") - endif() - foreach(libdir ${SCOTCH_LIBRARY_DIRS}) - if (libdir) - list(APPEND REQUIRED_LIBDIRS "${libdir}") - endif() - endforeach() - list(APPEND REQUIRED_LIBS "${SCOTCH_LIBRARIES}") - endif() - # PTSCOTCH - if (PASTIX_LOOK_FOR_PTSCOTCH AND PTSCOTCH_FOUND) - if (PTSCOTCH_INCLUDE_DIRS) - list(APPEND REQUIRED_INCDIRS "${PTSCOTCH_INCLUDE_DIRS}") - endif() - foreach(libdir ${PTSCOTCH_LIBRARY_DIRS}) - if (libdir) - list(APPEND REQUIRED_LIBDIRS "${libdir}") - endif() - endforeach() - list(APPEND REQUIRED_LIBS "${PTSCOTCH_LIBRARIES}") - endif() - # METIS - if (PASTIX_LOOK_FOR_METIS AND METIS_FOUND) - if (METIS_INCLUDE_DIRS) - list(APPEND REQUIRED_INCDIRS "${METIS_INCLUDE_DIRS}") - endif() - foreach(libdir ${METIS_LIBRARY_DIRS}) - if (libdir) - list(APPEND REQUIRED_LIBDIRS "${libdir}") - endif() - endforeach() - list(APPEND REQUIRED_LIBS "${METIS_LIBRARIES}") - endif() - # Fortran - if (CMAKE_C_COMPILER_ID MATCHES "GNU") - find_library( - FORTRAN_gfortran_LIBRARY - NAMES gfortran - HINTS ${_lib_env} - ) - mark_as_advanced(FORTRAN_gfortran_LIBRARY) - if (FORTRAN_gfortran_LIBRARY) - list(APPEND REQUIRED_LIBS "${FORTRAN_gfortran_LIBRARY}") - endif() - elseif (CMAKE_C_COMPILER_ID MATCHES "Intel") - find_library( - FORTRAN_ifcore_LIBRARY - NAMES ifcore - HINTS ${_lib_env} - ) - mark_as_advanced(FORTRAN_ifcore_LIBRARY) - if (FORTRAN_ifcore_LIBRARY) - list(APPEND REQUIRED_LIBS "${FORTRAN_ifcore_LIBRARY}") - endif() - endif() - # EXTRA LIBS such that pthread, m, rt - list(APPEND REQUIRED_LIBS ${PASTIX_EXTRA_LIBRARIES}) - - # set required libraries for link - set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}") - set(CMAKE_REQUIRED_LIBRARIES) - list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LDFLAGS}") - foreach(lib_dir ${REQUIRED_LIBDIRS}) - list(APPEND CMAKE_REQUIRED_LIBRARIES "-L${lib_dir}") - endforeach() - list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LIBS}") - list(APPEND CMAKE_REQUIRED_FLAGS "${REQUIRED_FLAGS}") - string(REGEX REPLACE "^ -" "-" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}") - - # test link - unset(PASTIX_WORKS CACHE) - include(CheckFunctionExists) - check_function_exists(pastix PASTIX_WORKS) - mark_as_advanced(PASTIX_WORKS) - - if(PASTIX_WORKS) - # save link with dependencies - set(PASTIX_LIBRARIES_DEP "${REQUIRED_LIBS}") - set(PASTIX_LIBRARY_DIRS_DEP "${REQUIRED_LIBDIRS}") - set(PASTIX_INCLUDE_DIRS_DEP "${REQUIRED_INCDIRS}") - set(PASTIX_LINKER_FLAGS "${REQUIRED_LDFLAGS}") - list(REMOVE_DUPLICATES PASTIX_LIBRARY_DIRS_DEP) - list(REMOVE_DUPLICATES PASTIX_INCLUDE_DIRS_DEP) - list(REMOVE_DUPLICATES PASTIX_LINKER_FLAGS) - else() - if(NOT PASTIX_FIND_QUIETLY) - message(STATUS "Looking for PASTIX : test of pastix() fails") - message(STATUS "CMAKE_REQUIRED_LIBRARIES: ${CMAKE_REQUIRED_LIBRARIES}") - message(STATUS "CMAKE_REQUIRED_INCLUDES: ${CMAKE_REQUIRED_INCLUDES}") - message(STATUS "Check in CMakeFiles/CMakeError.log to figure out why it fails") - message(STATUS "Maybe PASTIX is linked with specific libraries. " - "Have you tried with COMPONENTS (MPI/SEQ, STARPU, STARPU_CUDA, SCOTCH, PTSCOTCH, METIS)? " - "See the explanation in FindPASTIX.cmake.") - endif() - endif() - set(CMAKE_REQUIRED_INCLUDES) - set(CMAKE_REQUIRED_FLAGS) - set(CMAKE_REQUIRED_LIBRARIES) -endif() - -if (PASTIX_LIBRARIES) - list(GET PASTIX_LIBRARIES 0 first_lib) - get_filename_component(first_lib_path "${first_lib}" PATH) - if (${first_lib_path} MATCHES "/lib(32|64)?$") - string(REGEX REPLACE "/lib(32|64)?$" "" not_cached_dir "${first_lib_path}") - set(PASTIX_DIR_FOUND "${not_cached_dir}" CACHE PATH "Installation directory of PASTIX library" FORCE) - else() - set(PASTIX_DIR_FOUND "${first_lib_path}" CACHE PATH "Installation directory of PASTIX library" FORCE) - endif() -endif() -mark_as_advanced(PASTIX_DIR) -mark_as_advanced(PASTIX_DIR_FOUND) - -# check that PASTIX has been found -# --------------------------------- -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(PASTIX DEFAULT_MSG - PASTIX_LIBRARIES - PASTIX_WORKS) diff --git a/external/eigen-3.4.0/cmake/FindPTSCOTCH.cmake b/external/eigen-3.4.0/cmake/FindPTSCOTCH.cmake deleted file mode 100644 index 6ccc743..0000000 --- a/external/eigen-3.4.0/cmake/FindPTSCOTCH.cmake +++ /dev/null @@ -1,422 +0,0 @@ -### -# -# @copyright (c) 2009-2014 The University of Tennessee and The University -# of Tennessee Research Foundation. -# All rights reserved. -# @copyright (c) 2012-2016 Inria. All rights reserved. -# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. -# -### -# -# - Find PTSCOTCH include dirs and libraries -# Use this module by invoking find_package with the form: -# find_package(PTSCOTCH -# [REQUIRED] # Fail with error if ptscotch is not found -# [COMPONENTS ...] # dependencies -# ) -# -# PTSCOTCH depends on the following libraries: -# - Threads -# - MPI -# -# COMPONENTS can be some of the following: -# - ESMUMPS: to activate detection of PT-Scotch with the esmumps interface -# -# This module finds headers and ptscotch library. -# Results are reported in variables: -# PTSCOTCH_FOUND - True if headers and requested libraries were found -# PTSCOTCH_LINKER_FLAGS - list of required linker flags (excluding -l and -L) -# PTSCOTCH_INCLUDE_DIRS - ptscotch include directories -# PTSCOTCH_LIBRARY_DIRS - Link directories for ptscotch libraries -# PTSCOTCH_LIBRARIES - ptscotch component libraries to be linked -# PTSCOTCH_INCLUDE_DIRS_DEP - ptscotch + dependencies include directories -# PTSCOTCH_LIBRARY_DIRS_DEP - ptscotch + dependencies link directories -# PTSCOTCH_LIBRARIES_DEP - ptscotch libraries + dependencies -# PTSCOTCH_INTSIZE - Number of octets occupied by a SCOTCH_Num -# -# The user can give specific paths where to find the libraries adding cmake -# options at configure (ex: cmake path/to/project -DPTSCOTCH=path/to/ptscotch): -# PTSCOTCH_DIR - Where to find the base directory of ptscotch -# PTSCOTCH_INCDIR - Where to find the header files -# PTSCOTCH_LIBDIR - Where to find the library files -# The module can also look for the following environment variables if paths -# are not given as cmake variable: PTSCOTCH_DIR, PTSCOTCH_INCDIR, PTSCOTCH_LIBDIR - -#============================================================================= -# Copyright 2012-2013 Inria -# Copyright 2012-2013 Emmanuel Agullo -# Copyright 2012-2013 Mathieu Faverge -# Copyright 2012 Cedric Castagnede -# Copyright 2013-2016 Florent Pruvost -# -# Distributed under the OSI-approved BSD License (the "License"); -# see accompanying file MORSE-Copyright.txt for details. -# -# This software is distributed WITHOUT ANY WARRANTY; without even the -# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the License for more information. -#============================================================================= -# (To distribute this file outside of Morse, substitute the full -# License text for the above reference.) - -if (NOT PTSCOTCH_FOUND) - set(PTSCOTCH_DIR "" CACHE PATH "Installation directory of PTSCOTCH library") - if (NOT PTSCOTCH_FIND_QUIETLY) - message(STATUS "A cache variable, namely PTSCOTCH_DIR, has been set to specify the install directory of PTSCOTCH") - endif() -endif() - -# Set the version to find -set(PTSCOTCH_LOOK_FOR_ESMUMPS OFF) - -if( PTSCOTCH_FIND_COMPONENTS ) - foreach( component ${PTSCOTCH_FIND_COMPONENTS} ) - if (${component} STREQUAL "ESMUMPS") - # means we look for esmumps library - set(PTSCOTCH_LOOK_FOR_ESMUMPS ON) - endif() - endforeach() -endif() - -# PTSCOTCH depends on Threads, try to find it -include(CMakeFindDependencyMacro) -if (NOT THREADS_FOUND) - if (PTSCOTCH_FIND_REQUIRED) - find_dependency(Threads REQUIRED) - else() - find_dependency(Threads) - endif() -endif() - -# PTSCOTCH depends on MPI, try to find it -if (NOT MPI_FOUND) - if (PTSCOTCH_FIND_REQUIRED) - find_dependency(MPI REQUIRED) - else() - find_dependency(MPI) - endif() -endif() - -# Looking for include -# ------------------- - -# Add system include paths to search include -# ------------------------------------------ -unset(_inc_env) -set(ENV_PTSCOTCH_DIR "$ENV{PTSCOTCH_DIR}") -set(ENV_PTSCOTCH_INCDIR "$ENV{PTSCOTCH_INCDIR}") -if(ENV_PTSCOTCH_INCDIR) - list(APPEND _inc_env "${ENV_PTSCOTCH_INCDIR}") -elseif(ENV_PTSCOTCH_DIR) - list(APPEND _inc_env "${ENV_PTSCOTCH_DIR}") - list(APPEND _inc_env "${ENV_PTSCOTCH_DIR}/include") - list(APPEND _inc_env "${ENV_PTSCOTCH_DIR}/include/ptscotch") -else() - if(WIN32) - string(REPLACE ":" ";" _inc_env "$ENV{INCLUDE}") - else() - string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") - list(APPEND _inc_env "${_path_env}") - string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") - list(APPEND _inc_env "${_path_env}") - string(REPLACE ":" ";" _path_env "$ENV{CPATH}") - list(APPEND _inc_env "${_path_env}") - string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") - list(APPEND _inc_env "${_path_env}") - endif() -endif() -list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") -list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") -list(REMOVE_DUPLICATES _inc_env) - - -# Try to find the ptscotch header in the given paths -# ------------------------------------------------- - -set(PTSCOTCH_hdrs_to_find "ptscotch.h;scotch.h") - -# call cmake macro to find the header path -if(PTSCOTCH_INCDIR) - foreach(ptscotch_hdr ${PTSCOTCH_hdrs_to_find}) - set(PTSCOTCH_${ptscotch_hdr}_DIRS "PTSCOTCH_${ptscotch_hdr}_DIRS-NOTFOUND") - find_path(PTSCOTCH_${ptscotch_hdr}_DIRS - NAMES ${ptscotch_hdr} - HINTS ${PTSCOTCH_INCDIR}) - mark_as_advanced(PTSCOTCH_${ptscotch_hdr}_DIRS) - endforeach() -else() - if(PTSCOTCH_DIR) - foreach(ptscotch_hdr ${PTSCOTCH_hdrs_to_find}) - set(PTSCOTCH_${ptscotch_hdr}_DIRS "PTSCOTCH_${ptscotch_hdr}_DIRS-NOTFOUND") - find_path(PTSCOTCH_${ptscotch_hdr}_DIRS - NAMES ${ptscotch_hdr} - HINTS ${PTSCOTCH_DIR} - PATH_SUFFIXES "include" "include/scotch") - mark_as_advanced(PTSCOTCH_${ptscotch_hdr}_DIRS) - endforeach() - else() - foreach(ptscotch_hdr ${PTSCOTCH_hdrs_to_find}) - set(PTSCOTCH_${ptscotch_hdr}_DIRS "PTSCOTCH_${ptscotch_hdr}_DIRS-NOTFOUND") - find_path(PTSCOTCH_${ptscotch_hdr}_DIRS - NAMES ${ptscotch_hdr} - HINTS ${_inc_env} - PATH_SUFFIXES "scotch") - mark_as_advanced(PTSCOTCH_${ptscotch_hdr}_DIRS) - endforeach() - endif() -endif() - -# If found, add path to cmake variable -# ------------------------------------ -foreach(ptscotch_hdr ${PTSCOTCH_hdrs_to_find}) - if (PTSCOTCH_${ptscotch_hdr}_DIRS) - list(APPEND PTSCOTCH_INCLUDE_DIRS "${PTSCOTCH_${ptscotch_hdr}_DIRS}") - else () - if (NOT PTSCOTCH_FIND_QUIETLY) - message(STATUS "Looking for ptscotch -- ${ptscotch_hdr} not found") - endif() - endif() -endforeach() -list(REMOVE_DUPLICATES PTSCOTCH_INCLUDE_DIRS) - -# Looking for lib -# --------------- - -# Add system library paths to search lib -# -------------------------------------- -unset(_lib_env) -set(ENV_PTSCOTCH_LIBDIR "$ENV{PTSCOTCH_LIBDIR}") -if(ENV_PTSCOTCH_LIBDIR) - list(APPEND _lib_env "${ENV_PTSCOTCH_LIBDIR}") -elseif(ENV_PTSCOTCH_DIR) - list(APPEND _lib_env "${ENV_PTSCOTCH_DIR}") - list(APPEND _lib_env "${ENV_PTSCOTCH_DIR}/lib") -else() - if(WIN32) - string(REPLACE ":" ";" _lib_env "$ENV{LIB}") - else() - if(APPLE) - string(REPLACE ":" ";" _lib_env "$ENV{DYLD_LIBRARY_PATH}") - else() - string(REPLACE ":" ";" _lib_env "$ENV{LD_LIBRARY_PATH}") - endif() - list(APPEND _lib_env "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}") - list(APPEND _lib_env "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") - endif() -endif() -list(REMOVE_DUPLICATES _lib_env) - -# Try to find the ptscotch lib in the given paths -# ---------------------------------------------- - -set(PTSCOTCH_libs_to_find "ptscotch;ptscotcherr") -if (PTSCOTCH_LOOK_FOR_ESMUMPS) - list(INSERT PTSCOTCH_libs_to_find 0 "ptesmumps") - list(APPEND PTSCOTCH_libs_to_find "esmumps" ) -endif() -list(APPEND PTSCOTCH_libs_to_find "scotch;scotcherr") - -# call cmake macro to find the lib path -if(PTSCOTCH_LIBDIR) - foreach(ptscotch_lib ${PTSCOTCH_libs_to_find}) - set(PTSCOTCH_${ptscotch_lib}_LIBRARY "PTSCOTCH_${ptscotch_lib}_LIBRARY-NOTFOUND") - find_library(PTSCOTCH_${ptscotch_lib}_LIBRARY - NAMES ${ptscotch_lib} - HINTS ${PTSCOTCH_LIBDIR}) - endforeach() -else() - if(PTSCOTCH_DIR) - foreach(ptscotch_lib ${PTSCOTCH_libs_to_find}) - set(PTSCOTCH_${ptscotch_lib}_LIBRARY "PTSCOTCH_${ptscotch_lib}_LIBRARY-NOTFOUND") - find_library(PTSCOTCH_${ptscotch_lib}_LIBRARY - NAMES ${ptscotch_lib} - HINTS ${PTSCOTCH_DIR} - PATH_SUFFIXES lib lib32 lib64) - endforeach() - else() - foreach(ptscotch_lib ${PTSCOTCH_libs_to_find}) - set(PTSCOTCH_${ptscotch_lib}_LIBRARY "PTSCOTCH_${ptscotch_lib}_LIBRARY-NOTFOUND") - find_library(PTSCOTCH_${ptscotch_lib}_LIBRARY - NAMES ${ptscotch_lib} - HINTS ${_lib_env}) - endforeach() - endif() -endif() - -set(PTSCOTCH_LIBRARIES "") -set(PTSCOTCH_LIBRARY_DIRS "") -# If found, add path to cmake variable -# ------------------------------------ -foreach(ptscotch_lib ${PTSCOTCH_libs_to_find}) - - if (PTSCOTCH_${ptscotch_lib}_LIBRARY) - get_filename_component(${ptscotch_lib}_lib_path "${PTSCOTCH_${ptscotch_lib}_LIBRARY}" PATH) - # set cmake variables - list(APPEND PTSCOTCH_LIBRARIES "${PTSCOTCH_${ptscotch_lib}_LIBRARY}") - list(APPEND PTSCOTCH_LIBRARY_DIRS "${${ptscotch_lib}_lib_path}") - else () - if (NOT PTSCOTCH_FIND_QUIETLY) - message(STATUS "Looking for ptscotch -- lib ${ptscotch_lib} not found") - endif() - endif () - - mark_as_advanced(PTSCOTCH_${ptscotch_lib}_LIBRARY) - -endforeach() -list(REMOVE_DUPLICATES PTSCOTCH_LIBRARY_DIRS) - -# check a function to validate the find -if(PTSCOTCH_LIBRARIES) - - set(REQUIRED_LDFLAGS) - set(REQUIRED_INCDIRS) - set(REQUIRED_LIBDIRS) - set(REQUIRED_LIBS) - - # PTSCOTCH - if (PTSCOTCH_INCLUDE_DIRS) - set(REQUIRED_INCDIRS "${PTSCOTCH_INCLUDE_DIRS}") - endif() - if (PTSCOTCH_LIBRARY_DIRS) - set(REQUIRED_LIBDIRS "${PTSCOTCH_LIBRARY_DIRS}") - endif() - set(REQUIRED_LIBS "${PTSCOTCH_LIBRARIES}") - # MPI - if (MPI_FOUND) - if (MPI_C_INCLUDE_PATH) - list(APPEND CMAKE_REQUIRED_INCLUDES "${MPI_C_INCLUDE_PATH}") - endif() - if (MPI_C_LINK_FLAGS) - if (${MPI_C_LINK_FLAGS} MATCHES " -") - string(REGEX REPLACE " -" "-" MPI_C_LINK_FLAGS ${MPI_C_LINK_FLAGS}) - endif() - list(APPEND REQUIRED_LDFLAGS "${MPI_C_LINK_FLAGS}") - endif() - list(APPEND REQUIRED_LIBS "${MPI_C_LIBRARIES}") - endif() - # THREADS - if(CMAKE_THREAD_LIBS_INIT) - list(APPEND REQUIRED_LIBS "${CMAKE_THREAD_LIBS_INIT}") - endif() - set(Z_LIBRARY "Z_LIBRARY-NOTFOUND") - find_library(Z_LIBRARY NAMES z) - mark_as_advanced(Z_LIBRARY) - if(Z_LIBRARY) - list(APPEND REQUIRED_LIBS "-lz") - endif() - set(M_LIBRARY "M_LIBRARY-NOTFOUND") - find_library(M_LIBRARY NAMES m) - mark_as_advanced(M_LIBRARY) - if(M_LIBRARY) - list(APPEND REQUIRED_LIBS "-lm") - endif() - set(RT_LIBRARY "RT_LIBRARY-NOTFOUND") - find_library(RT_LIBRARY NAMES rt) - mark_as_advanced(RT_LIBRARY) - if(RT_LIBRARY) - list(APPEND REQUIRED_LIBS "-lrt") - endif() - - # set required libraries for link - set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}") - set(CMAKE_REQUIRED_LIBRARIES) - list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LDFLAGS}") - foreach(lib_dir ${REQUIRED_LIBDIRS}) - list(APPEND CMAKE_REQUIRED_LIBRARIES "-L${lib_dir}") - endforeach() - list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LIBS}") - list(APPEND CMAKE_REQUIRED_FLAGS "${REQUIRED_FLAGS}") - string(REGEX REPLACE "^ -" "-" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}") - - # test link - unset(PTSCOTCH_WORKS CACHE) - include(CheckFunctionExists) - check_function_exists(SCOTCH_dgraphInit PTSCOTCH_WORKS) - mark_as_advanced(PTSCOTCH_WORKS) - - if(PTSCOTCH_WORKS) - # save link with dependencies - set(PTSCOTCH_LIBRARIES_DEP "${REQUIRED_LIBS}") - set(PTSCOTCH_LIBRARY_DIRS_DEP "${REQUIRED_LIBDIRS}") - set(PTSCOTCH_INCLUDE_DIRS_DEP "${REQUIRED_INCDIRS}") - set(PTSCOTCH_LINKER_FLAGS "${REQUIRED_LDFLAGS}") - list(REMOVE_DUPLICATES PTSCOTCH_LIBRARY_DIRS_DEP) - list(REMOVE_DUPLICATES PTSCOTCH_INCLUDE_DIRS_DEP) - list(REMOVE_DUPLICATES PTSCOTCH_LINKER_FLAGS) - else() - if(NOT PTSCOTCH_FIND_QUIETLY) - message(STATUS "Looking for PTSCOTCH : test of SCOTCH_dgraphInit with PTSCOTCH library fails") - message(STATUS "CMAKE_REQUIRED_LIBRARIES: ${CMAKE_REQUIRED_LIBRARIES}") - message(STATUS "CMAKE_REQUIRED_INCLUDES: ${CMAKE_REQUIRED_INCLUDES}") - message(STATUS "Check in CMakeFiles/CMakeError.log to figure out why it fails") - endif() - endif() - set(CMAKE_REQUIRED_INCLUDES) - set(CMAKE_REQUIRED_FLAGS) - set(CMAKE_REQUIRED_LIBRARIES) -endif() - -if (PTSCOTCH_LIBRARIES) - list(GET PTSCOTCH_LIBRARIES 0 first_lib) - get_filename_component(first_lib_path "${first_lib}" PATH) - if (${first_lib_path} MATCHES "/lib(32|64)?$") - string(REGEX REPLACE "/lib(32|64)?$" "" not_cached_dir "${first_lib_path}") - set(PTSCOTCH_DIR_FOUND "${not_cached_dir}" CACHE PATH "Installation directory of PTSCOTCH library" FORCE) - else() - set(PTSCOTCH_DIR_FOUND "${first_lib_path}" CACHE PATH "Installation directory of PTSCOTCH library" FORCE) - endif() -endif() -mark_as_advanced(PTSCOTCH_DIR) -mark_as_advanced(PTSCOTCH_DIR_FOUND) - -# Check the size of SCOTCH_Num -# --------------------------------- -set(CMAKE_REQUIRED_INCLUDES ${PTSCOTCH_INCLUDE_DIRS}) - -include(CheckCSourceRuns) -#stdio.h and stdint.h should be included by scotch.h directly -set(PTSCOTCH_C_TEST_SCOTCH_Num_4 " -#include -#include -#include -int main(int argc, char **argv) { - if (sizeof(SCOTCH_Num) == 4) - return 0; - else - return 1; -} -") - -set(PTSCOTCH_C_TEST_SCOTCH_Num_8 " -#include -#include -#include -int main(int argc, char **argv) { - if (sizeof(SCOTCH_Num) == 8) - return 0; - else - return 1; -} -") -check_c_source_runs("${PTSCOTCH_C_TEST_SCOTCH_Num_4}" PTSCOTCH_Num_4) -if(NOT PTSCOTCH_Num_4) - check_c_source_runs("${PTSCOTCH_C_TEST_SCOTCH_Num_8}" PTSCOTCH_Num_8) - if(NOT PTSCOTCH_Num_8) - set(PTSCOTCH_INTSIZE -1) - else() - set(PTSCOTCH_INTSIZE 8) - endif() -else() - set(PTSCOTCH_INTSIZE 4) -endif() -set(CMAKE_REQUIRED_INCLUDES "") - -# check that PTSCOTCH has been found -# --------------------------------- -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(PTSCOTCH DEFAULT_MSG - PTSCOTCH_LIBRARIES - PTSCOTCH_WORKS) -# -# TODO: Add possibility to check for specific functions in the library -# diff --git a/external/eigen-3.4.0/cmake/FindSCOTCH.cmake b/external/eigen-3.4.0/cmake/FindSCOTCH.cmake deleted file mode 100644 index 11b971a..0000000 --- a/external/eigen-3.4.0/cmake/FindSCOTCH.cmake +++ /dev/null @@ -1,370 +0,0 @@ -### -# -# @copyright (c) 2009-2014 The University of Tennessee and The University -# of Tennessee Research Foundation. -# All rights reserved. -# @copyright (c) 2012-2014 Inria. All rights reserved. -# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. -# -### -# -# - Find SCOTCH include dirs and libraries -# Use this module by invoking find_package with the form: -# find_package(SCOTCH -# [REQUIRED] # Fail with error if scotch is not found -# [COMPONENTS ...] # dependencies -# ) -# -# COMPONENTS can be some of the following: -# - ESMUMPS: to activate detection of Scotch with the esmumps interface -# -# This module finds headers and scotch library. -# Results are reported in variables: -# SCOTCH_FOUND - True if headers and requested libraries were found -# SCOTCH_INCLUDE_DIRS - scotch include directories -# SCOTCH_LIBRARY_DIRS - Link directories for scotch libraries -# SCOTCH_LIBRARIES - scotch component libraries to be linked -# SCOTCH_INTSIZE - Number of octets occupied by a SCOTCH_Num -# -# The user can give specific paths where to find the libraries adding cmake -# options at configure (ex: cmake path/to/project -DSCOTCH=path/to/scotch): -# SCOTCH_DIR - Where to find the base directory of scotch -# SCOTCH_INCDIR - Where to find the header files -# SCOTCH_LIBDIR - Where to find the library files -# The module can also look for the following environment variables if paths -# are not given as cmake variable: SCOTCH_DIR, SCOTCH_INCDIR, SCOTCH_LIBDIR - -#============================================================================= -# Copyright 2012-2013 Inria -# Copyright 2012-2013 Emmanuel Agullo -# Copyright 2012-2013 Mathieu Faverge -# Copyright 2012 Cedric Castagnede -# Copyright 2013 Florent Pruvost -# -# Distributed under the OSI-approved BSD License (the "License"); -# see accompanying file MORSE-Copyright.txt for details. -# -# This software is distributed WITHOUT ANY WARRANTY; without even the -# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the License for more information. -#============================================================================= -# (To distribute this file outside of Morse, substitute the full -# License text for the above reference.) - -if (NOT SCOTCH_FOUND) - set(SCOTCH_DIR "" CACHE PATH "Installation directory of SCOTCH library") - if (NOT SCOTCH_FIND_QUIETLY) - message(STATUS "A cache variable, namely SCOTCH_DIR, has been set to specify the install directory of SCOTCH") - endif() -endif() - -# Set the version to find -set(SCOTCH_LOOK_FOR_ESMUMPS OFF) - -if( SCOTCH_FIND_COMPONENTS ) - foreach( component ${SCOTCH_FIND_COMPONENTS} ) - if (${component} STREQUAL "ESMUMPS") - # means we look for esmumps library - set(SCOTCH_LOOK_FOR_ESMUMPS ON) - endif() - endforeach() -endif() - -# SCOTCH may depend on Threads, try to find it -include(CMakeFindDependencyMacro) -if (NOT THREADS_FOUND) - if (SCOTCH_FIND_REQUIRED) - find_dependency(Threads REQUIRED) - else() - find_dependency(Threads) - endif() -endif() - -# Looking for include -# ------------------- - -# Add system include paths to search include -# ------------------------------------------ -unset(_inc_env) -set(ENV_SCOTCH_DIR "$ENV{SCOTCH_DIR}") -set(ENV_SCOTCH_INCDIR "$ENV{SCOTCH_INCDIR}") -if(ENV_SCOTCH_INCDIR) - list(APPEND _inc_env "${ENV_SCOTCH_INCDIR}") -elseif(ENV_SCOTCH_DIR) - list(APPEND _inc_env "${ENV_SCOTCH_DIR}") - list(APPEND _inc_env "${ENV_SCOTCH_DIR}/include") - list(APPEND _inc_env "${ENV_SCOTCH_DIR}/include/scotch") -else() - if(WIN32) - string(REPLACE ":" ";" _inc_env "$ENV{INCLUDE}") - else() - string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") - list(APPEND _inc_env "${_path_env}") - string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") - list(APPEND _inc_env "${_path_env}") - string(REPLACE ":" ";" _path_env "$ENV{CPATH}") - list(APPEND _inc_env "${_path_env}") - string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") - list(APPEND _inc_env "${_path_env}") - endif() -endif() -list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") -list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") -list(REMOVE_DUPLICATES _inc_env) - - -# Try to find the scotch header in the given paths -# ------------------------------------------------- -# call cmake macro to find the header path -if(SCOTCH_INCDIR) - set(SCOTCH_scotch.h_DIRS "SCOTCH_scotch.h_DIRS-NOTFOUND") - find_path(SCOTCH_scotch.h_DIRS - NAMES scotch.h - HINTS ${SCOTCH_INCDIR}) -else() - if(SCOTCH_DIR) - set(SCOTCH_scotch.h_DIRS "SCOTCH_scotch.h_DIRS-NOTFOUND") - find_path(SCOTCH_scotch.h_DIRS - NAMES scotch.h - HINTS ${SCOTCH_DIR} - PATH_SUFFIXES "include" "include/scotch") - else() - set(SCOTCH_scotch.h_DIRS "SCOTCH_scotch.h_DIRS-NOTFOUND") - find_path(SCOTCH_scotch.h_DIRS - NAMES scotch.h - HINTS ${_inc_env} - PATH_SUFFIXES "scotch") - endif() -endif() -mark_as_advanced(SCOTCH_scotch.h_DIRS) - -# If found, add path to cmake variable -# ------------------------------------ -if (SCOTCH_scotch.h_DIRS) - set(SCOTCH_INCLUDE_DIRS "${SCOTCH_scotch.h_DIRS}") -else () - set(SCOTCH_INCLUDE_DIRS "SCOTCH_INCLUDE_DIRS-NOTFOUND") - if (NOT SCOTCH_FIND_QUIETLY) - message(STATUS "Looking for scotch -- scotch.h not found") - endif() -endif() -list(REMOVE_DUPLICATES SCOTCH_INCLUDE_DIRS) - -# Looking for lib -# --------------- - -# Add system library paths to search lib -# -------------------------------------- -unset(_lib_env) -set(ENV_SCOTCH_LIBDIR "$ENV{SCOTCH_LIBDIR}") -if(ENV_SCOTCH_LIBDIR) - list(APPEND _lib_env "${ENV_SCOTCH_LIBDIR}") -elseif(ENV_SCOTCH_DIR) - list(APPEND _lib_env "${ENV_SCOTCH_DIR}") - list(APPEND _lib_env "${ENV_SCOTCH_DIR}/lib") -else() - if(WIN32) - string(REPLACE ":" ";" _lib_env "$ENV{LIB}") - else() - if(APPLE) - string(REPLACE ":" ";" _lib_env "$ENV{DYLD_LIBRARY_PATH}") - else() - string(REPLACE ":" ";" _lib_env "$ENV{LD_LIBRARY_PATH}") - endif() - list(APPEND _lib_env "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}") - list(APPEND _lib_env "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") - endif() -endif() -list(REMOVE_DUPLICATES _lib_env) - -# Try to find the scotch lib in the given paths -# ---------------------------------------------- - -set(SCOTCH_libs_to_find "scotch;scotcherrexit") -if (SCOTCH_LOOK_FOR_ESMUMPS) - list(INSERT SCOTCH_libs_to_find 0 "esmumps") -endif() - -# call cmake macro to find the lib path -if(SCOTCH_LIBDIR) - foreach(scotch_lib ${SCOTCH_libs_to_find}) - set(SCOTCH_${scotch_lib}_LIBRARY "SCOTCH_${scotch_lib}_LIBRARY-NOTFOUND") - find_library(SCOTCH_${scotch_lib}_LIBRARY - NAMES ${scotch_lib} - HINTS ${SCOTCH_LIBDIR}) - endforeach() -else() - if(SCOTCH_DIR) - foreach(scotch_lib ${SCOTCH_libs_to_find}) - set(SCOTCH_${scotch_lib}_LIBRARY "SCOTCH_${scotch_lib}_LIBRARY-NOTFOUND") - find_library(SCOTCH_${scotch_lib}_LIBRARY - NAMES ${scotch_lib} - HINTS ${SCOTCH_DIR} - PATH_SUFFIXES lib lib32 lib64) - endforeach() - else() - foreach(scotch_lib ${SCOTCH_libs_to_find}) - set(SCOTCH_${scotch_lib}_LIBRARY "SCOTCH_${scotch_lib}_LIBRARY-NOTFOUND") - find_library(SCOTCH_${scotch_lib}_LIBRARY - NAMES ${scotch_lib} - HINTS ${_lib_env}) - endforeach() - endif() -endif() - -set(SCOTCH_LIBRARIES "") -set(SCOTCH_LIBRARY_DIRS "") -# If found, add path to cmake variable -# ------------------------------------ -foreach(scotch_lib ${SCOTCH_libs_to_find}) - - if (SCOTCH_${scotch_lib}_LIBRARY) - get_filename_component(${scotch_lib}_lib_path "${SCOTCH_${scotch_lib}_LIBRARY}" PATH) - # set cmake variables - list(APPEND SCOTCH_LIBRARIES "${SCOTCH_${scotch_lib}_LIBRARY}") - list(APPEND SCOTCH_LIBRARY_DIRS "${${scotch_lib}_lib_path}") - else () - list(APPEND SCOTCH_LIBRARIES "${SCOTCH_${scotch_lib}_LIBRARY}") - if (NOT SCOTCH_FIND_QUIETLY) - message(STATUS "Looking for scotch -- lib ${scotch_lib} not found") - endif() - endif () - - mark_as_advanced(SCOTCH_${scotch_lib}_LIBRARY) - -endforeach() -list(REMOVE_DUPLICATES SCOTCH_LIBRARY_DIRS) - -# check a function to validate the find -if(SCOTCH_LIBRARIES) - - set(REQUIRED_INCDIRS) - set(REQUIRED_LIBDIRS) - set(REQUIRED_LIBS) - - # SCOTCH - if (SCOTCH_INCLUDE_DIRS) - set(REQUIRED_INCDIRS "${SCOTCH_INCLUDE_DIRS}") - endif() - if (SCOTCH_LIBRARY_DIRS) - set(REQUIRED_LIBDIRS "${SCOTCH_LIBRARY_DIRS}") - endif() - set(REQUIRED_LIBS "${SCOTCH_LIBRARIES}") - # THREADS - if(CMAKE_THREAD_LIBS_INIT) - list(APPEND REQUIRED_LIBS "${CMAKE_THREAD_LIBS_INIT}") - endif() - set(Z_LIBRARY "Z_LIBRARY-NOTFOUND") - find_library(Z_LIBRARY NAMES z) - mark_as_advanced(Z_LIBRARY) - if(Z_LIBRARY) - list(APPEND REQUIRED_LIBS "-lz") - endif() - set(M_LIBRARY "M_LIBRARY-NOTFOUND") - find_library(M_LIBRARY NAMES m) - mark_as_advanced(M_LIBRARY) - if(M_LIBRARY) - list(APPEND REQUIRED_LIBS "-lm") - endif() - set(RT_LIBRARY "RT_LIBRARY-NOTFOUND") - find_library(RT_LIBRARY NAMES rt) - mark_as_advanced(RT_LIBRARY) - if(RT_LIBRARY) - list(APPEND REQUIRED_LIBS "-lrt") - endif() - - # set required libraries for link - set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}") - set(CMAKE_REQUIRED_LIBRARIES) - foreach(lib_dir ${REQUIRED_LIBDIRS}) - list(APPEND CMAKE_REQUIRED_LIBRARIES "-L${lib_dir}") - endforeach() - list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LIBS}") - string(REGEX REPLACE "^ -" "-" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}") - - # test link - unset(SCOTCH_WORKS CACHE) - include(CheckFunctionExists) - check_function_exists(SCOTCH_graphInit SCOTCH_WORKS) - mark_as_advanced(SCOTCH_WORKS) - - if(SCOTCH_WORKS) - # save link with dependencies - set(SCOTCH_LIBRARIES "${REQUIRED_LIBS}") - else() - if(NOT SCOTCH_FIND_QUIETLY) - message(STATUS "Looking for SCOTCH : test of SCOTCH_graphInit with SCOTCH library fails") - message(STATUS "CMAKE_REQUIRED_LIBRARIES: ${CMAKE_REQUIRED_LIBRARIES}") - message(STATUS "CMAKE_REQUIRED_INCLUDES: ${CMAKE_REQUIRED_INCLUDES}") - message(STATUS "Check in CMakeFiles/CMakeError.log to figure out why it fails") - endif() - endif() - set(CMAKE_REQUIRED_INCLUDES) - set(CMAKE_REQUIRED_FLAGS) - set(CMAKE_REQUIRED_LIBRARIES) -endif() - -if (SCOTCH_LIBRARIES) - list(GET SCOTCH_LIBRARIES 0 first_lib) - get_filename_component(first_lib_path "${first_lib}" PATH) - if (${first_lib_path} MATCHES "/lib(32|64)?$") - string(REGEX REPLACE "/lib(32|64)?$" "" not_cached_dir "${first_lib_path}") - set(SCOTCH_DIR_FOUND "${not_cached_dir}" CACHE PATH "Installation directory of SCOTCH library" FORCE) - else() - set(SCOTCH_DIR_FOUND "${first_lib_path}" CACHE PATH "Installation directory of SCOTCH library" FORCE) - endif() -endif() -mark_as_advanced(SCOTCH_DIR) -mark_as_advanced(SCOTCH_DIR_FOUND) - -# Check the size of SCOTCH_Num -# --------------------------------- -set(CMAKE_REQUIRED_INCLUDES ${SCOTCH_INCLUDE_DIRS}) - -include(CheckCSourceRuns) -#stdio.h and stdint.h should be included by scotch.h directly -set(SCOTCH_C_TEST_SCOTCH_Num_4 " -#include -#include -#include -int main(int argc, char **argv) { - if (sizeof(SCOTCH_Num) == 4) - return 0; - else - return 1; -} -") - -set(SCOTCH_C_TEST_SCOTCH_Num_8 " -#include -#include -#include -int main(int argc, char **argv) { - if (sizeof(SCOTCH_Num) == 8) - return 0; - else - return 1; -} -") -check_c_source_runs("${SCOTCH_C_TEST_SCOTCH_Num_4}" SCOTCH_Num_4) -if(NOT SCOTCH_Num_4) - check_c_source_runs("${SCOTCH_C_TEST_SCOTCH_Num_8}" SCOTCH_Num_8) - if(NOT SCOTCH_Num_8) - set(SCOTCH_INTSIZE -1) - else() - set(SCOTCH_INTSIZE 8) - endif() -else() - set(SCOTCH_INTSIZE 4) -endif() -set(CMAKE_REQUIRED_INCLUDES "") - -# check that SCOTCH has been found -# --------------------------------- -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(SCOTCH DEFAULT_MSG - SCOTCH_LIBRARIES - SCOTCH_WORKS) -# -# TODO: Add possibility to check for specific functions in the library -# diff --git a/external/eigen-3.4.0/cmake/FindSPQR.cmake b/external/eigen-3.4.0/cmake/FindSPQR.cmake deleted file mode 100644 index d6fb2e1..0000000 --- a/external/eigen-3.4.0/cmake/FindSPQR.cmake +++ /dev/null @@ -1,41 +0,0 @@ -# SPQR lib usually requires linking to a blas and lapack library. -# It is up to the user of this module to find a BLAS and link to it. - -# SPQR lib requires Cholmod, colamd and amd as well. -# FindCholmod.cmake can be used to find those packages before finding spqr - -if (SPQR_INCLUDES AND SPQR_LIBRARIES) - set(SPQR_FIND_QUIETLY TRUE) -endif () - -find_path(SPQR_INCLUDES - NAMES - SuiteSparseQR.hpp - PATHS - $ENV{SPQRDIR} - ${INCLUDE_INSTALL_DIR} - PATH_SUFFIXES - suitesparse - ufsparse -) - -find_library(SPQR_LIBRARIES spqr $ENV{SPQRDIR} ${LIB_INSTALL_DIR}) - -if(SPQR_LIBRARIES) - - find_library(SUITESPARSE_LIBRARY SuiteSparse PATHS $ENV{SPQRDIR} ${LIB_INSTALL_DIR}) - if (SUITESPARSE_LIBRARY) - set(SPQR_LIBRARIES ${SPQR_LIBRARIES} ${SUITESPARSE_LIBRARY}) - endif() - - find_library(CHOLMOD_LIBRARY cholmod PATHS $ENV{UMFPACK_LIBDIR} $ENV{UMFPACKDIR} ${LIB_INSTALL_DIR}) - if(CHOLMOD_LIBRARY) - set(SPQR_LIBRARIES ${SPQR_LIBRARIES} ${CHOLMOD_LIBRARY}) - endif() - -endif() - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(SPQR DEFAULT_MSG SPQR_INCLUDES SPQR_LIBRARIES) - -mark_as_advanced(SPQR_INCLUDES SPQR_LIBRARIES) \ No newline at end of file diff --git a/external/eigen-3.4.0/cmake/FindStandardMathLibrary.cmake b/external/eigen-3.4.0/cmake/FindStandardMathLibrary.cmake deleted file mode 100644 index 1d1e5b3..0000000 --- a/external/eigen-3.4.0/cmake/FindStandardMathLibrary.cmake +++ /dev/null @@ -1,70 +0,0 @@ -# - Try to find how to link to the standard math library, if anything at all is needed to do. -# On most platforms this is automatic, but for example it's not automatic on QNX. -# -# Once done this will define -# -# STANDARD_MATH_LIBRARY_FOUND - we found how to successfully link to the standard math library -# STANDARD_MATH_LIBRARY - the name of the standard library that one has to link to. -# -- this will be left empty if it's automatic (most platforms). -# -- this will be set to "m" on platforms where one must explicitly -# pass the "-lm" linker flag. -# -# Copyright (c) 2010 Benoit Jacob -# 2020 Susi Lehtola -# Redistribution and use is allowed according to the terms of the 2-clause BSD license. - - -include(CheckCXXSourceCompiles) - -# a little test program for c++ math functions. -# notice the std:: is required on some platforms such as QNX -# notice the (void) is required if -Wall (-Wunused-value) is added to CMAKE_CXX_FLAG - -# We read in the arguments from standard input to avoid the compiler optimizing away the calls -set(find_standard_math_library_test_program -" -#include -int main(int argc, char **){ - return int(std::sin(double(argc)) + std::log(double(argc))); -}") - -# first try compiling/linking the test program without any linker flags - -set(CMAKE_REQUIRED_FLAGS "") -set(CMAKE_REQUIRED_LIBRARIES "") -CHECK_CXX_SOURCE_COMPILES( - "${find_standard_math_library_test_program}" - standard_math_library_linked_to_automatically -) - -if(standard_math_library_linked_to_automatically) - - # the test program linked successfully without any linker flag. - set(STANDARD_MATH_LIBRARY "") - set(STANDARD_MATH_LIBRARY_FOUND TRUE) - -else() - - # the test program did not link successfully without any linker flag. - # This is a very uncommon case that so far we only saw on QNX. The next try is the - # standard name 'm' for the standard math library. - - set(CMAKE_REQUIRED_LIBRARIES "m") - CHECK_CXX_SOURCE_COMPILES( - "${find_standard_math_library_test_program}" - standard_math_library_linked_to_as_m) - - if(standard_math_library_linked_to_as_m) - - # the test program linked successfully when linking to the 'm' library - set(STANDARD_MATH_LIBRARY "m") - set(STANDARD_MATH_LIBRARY_FOUND TRUE) - - else() - - # the test program still doesn't link successfully - set(STANDARD_MATH_LIBRARY_FOUND FALSE) - - endif() - -endif() diff --git a/external/eigen-3.4.0/cmake/FindSuperLU.cmake b/external/eigen-3.4.0/cmake/FindSuperLU.cmake deleted file mode 100644 index 4b779f5..0000000 --- a/external/eigen-3.4.0/cmake/FindSuperLU.cmake +++ /dev/null @@ -1,97 +0,0 @@ - -# Umfpack lib usually requires linking to a blas library. -# It is up to the user of this module to find a BLAS and link to it. - -if (SUPERLU_INCLUDES AND SUPERLU_LIBRARIES) - set(SUPERLU_FIND_QUIETLY TRUE) -endif () - -find_path(SUPERLU_INCLUDES - NAMES - supermatrix.h - PATHS - $ENV{SUPERLUDIR} - ${INCLUDE_INSTALL_DIR} - PATH_SUFFIXES - superlu - SRC -) - -find_library(SUPERLU_LIBRARIES - NAMES "superlu_5.2.1" "superlu_5.2" "superlu_5.1.1" "superlu_5.1" "superlu_5.0" "superlu_4.3" "superlu_4.2" "superlu_4.1" "superlu_4.0" "superlu_3.1" "superlu_3.0" "superlu" - PATHS $ENV{SUPERLUDIR} ${LIB_INSTALL_DIR} - PATH_SUFFIXES lib) - -if(SUPERLU_INCLUDES AND SUPERLU_LIBRARIES) - -include(CheckCXXSourceCompiles) -include(CMakePushCheckState) -cmake_push_check_state() - -set(CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES} ${SUPERLU_INCLUDES}) - -# check whether struct mem_usage_t is globally defined -check_cxx_source_compiles(" -typedef int int_t; -#include -#include -int main() { - mem_usage_t mem; - return 0; -}" -SUPERLU_HAS_GLOBAL_MEM_USAGE_T) - - -check_cxx_source_compiles(" -typedef int int_t; -#include -#include -int main() { - return SLU_SINGLE; -}" -SUPERLU_HAS_CLEAN_ENUMS) - -check_cxx_source_compiles(" -typedef int int_t; -#include -#include -int main(void) -{ - GlobalLU_t glu; - return 0; -}" -SUPERLU_HAS_GLOBALLU_T) - -if(SUPERLU_HAS_GLOBALLU_T) - # at least 5.0 - set(SUPERLU_VERSION_VAR "5.0") -elseif(SUPERLU_HAS_CLEAN_ENUMS) - # at least 4.3 - set(SUPERLU_VERSION_VAR "4.3") -elseif(SUPERLU_HAS_GLOBAL_MEM_USAGE_T) - # at least 4.0 - set(SUPERLU_VERSION_VAR "4.0") -else() - set(SUPERLU_VERSION_VAR "3.0") -endif() - -cmake_pop_check_state() - -if(SuperLU_FIND_VERSION) - if(${SUPERLU_VERSION_VAR} VERSION_LESS ${SuperLU_FIND_VERSION}) - set(SUPERLU_VERSION_OK FALSE) - else() - set(SUPERLU_VERSION_OK TRUE) - endif() -else() - set(SUPERLU_VERSION_OK TRUE) -endif() - -endif() - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(SuperLU - REQUIRED_VARS SUPERLU_INCLUDES SUPERLU_LIBRARIES SUPERLU_VERSION_OK - VERSION_VAR SUPERLU_VERSION_VAR) - -mark_as_advanced(SUPERLU_INCLUDES SUPERLU_LIBRARIES) diff --git a/external/eigen-3.4.0/cmake/FindTriSYCL.cmake b/external/eigen-3.4.0/cmake/FindTriSYCL.cmake deleted file mode 100644 index 8104239..0000000 --- a/external/eigen-3.4.0/cmake/FindTriSYCL.cmake +++ /dev/null @@ -1,173 +0,0 @@ -#.rst: -# FindTriSYCL -#--------------- -# -# TODO : insert Copyright and licence - -######################### -# FindTriSYCL.cmake -######################### -# -# Tools for finding and building with TriSYCL. -# -# User must define TRISYCL_INCLUDE_DIR pointing to the triSYCL -# include directory. -# -# Latest version of this file can be found at: -# https://github.com/triSYCL/triSYCL - -# Requite CMake version 3.5 or higher -cmake_minimum_required (VERSION 3.5) - -# Check that a supported host compiler can be found -if(CMAKE_COMPILER_IS_GNUCXX) - # Require at least gcc 5.4 - if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.4) - message(FATAL_ERROR - "host compiler - Not found! (gcc version must be at least 5.4)") - else() - message(STATUS "host compiler - gcc ${CMAKE_CXX_COMPILER_VERSION}") - endif() -elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") - # Require at least clang 3.9 - if (${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.9) - message(FATAL_ERROR - "host compiler - Not found! (clang version must be at least 3.9)") - else() - message(STATUS "host compiler - clang ${CMAKE_CXX_COMPILER_VERSION}") - endif() -else() - message(WARNING - "host compiler - Not found! (triSYCL supports GCC and Clang)") -endif() - -#triSYCL options -option(TRISYCL_OPENMP "triSYCL multi-threading with OpenMP" ON) -option(TRISYCL_OPENCL "triSYCL OpenCL interoperability mode" OFF) -option(TRISYCL_NO_ASYNC "triSYCL use synchronous kernel execution" OFF) -option(TRISYCL_DEBUG "triSCYL use debug mode" OFF) -option(TRISYCL_DEBUG_STRUCTORS "triSYCL trace of object lifetimes" OFF) -option(TRISYCL_TRACE_KERNEL "triSYCL trace of kernel execution" OFF) - -mark_as_advanced(TRISYCL_OPENMP) -mark_as_advanced(TRISYCL_OPENCL) -mark_as_advanced(TRISYCL_NO_ASYNC) -mark_as_advanced(TRISYCL_DEBUG) -mark_as_advanced(TRISYCL_DEBUG_STRUCTORS) -mark_as_advanced(TRISYCL_TRACE_KERNEL) - -#triSYCL definitions -set(CL_SYCL_LANGUAGE_VERSION 220 CACHE STRING - "Host language version to be used by trisYCL (default is: 220)") -set(TRISYCL_CL_LANGUAGE_VERSION 220 CACHE STRING - "Device language version to be used by trisYCL (default is: 220)") -# triSYCL now requires c++17 -set(CMAKE_CXX_STANDARD 17) -set(CXX_STANDARD_REQUIRED ON) - - -# Find OpenCL package -include(CMakeFindDependencyMacro) -if(TRISYCL_OPENCL) - find_dependency(OpenCL REQUIRED) - if(UNIX) - set(BOOST_COMPUTE_INCPATH /usr/include/compute CACHE PATH - "Path to Boost.Compute headers (default is: /usr/include/compute)") - endif() -endif() - -# Find OpenMP package -if(TRISYCL_OPENMP) - find_dependency(OpenMP REQUIRED) -endif() - -# Find Boost -find_dependency(Boost 1.58 REQUIRED COMPONENTS chrono log) - -# If debug or trace we need boost log -if(TRISYCL_DEBUG OR TRISYCL_DEBUG_STRUCTORS OR TRISYCL_TRACE_KERNEL) - set(LOG_NEEDED ON) -else() - set(LOG_NEEDED OFF) -endif() - -find_dependency(Threads REQUIRED) - -# Find triSYCL directory -if (TRISYCL_INCLUDES AND TRISYCL_LIBRARIES) - set(TRISYCL_FIND_QUIETLY TRUE) -endif () - -find_path(TRISYCL_INCLUDE_DIR - NAMES sycl.hpp - PATHS $ENV{TRISYCLDIR} $ENV{TRISYCLDIR}/include ${INCLUDE_INSTALL_DIR} - PATH_SUFFIXES triSYCL -) - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(TriSYCL DEFAULT_MSG - TRISYCL_INCLUDE_DIR) - -if(NOT TRISYCL_INCLUDE_DIR) - message(FATAL_ERROR - "triSYCL include directory - Not found! (please set TRISYCL_INCLUDE_DIR") -else() - message(STATUS "triSYCL include directory - Found ${TRISYCL_INCLUDE_DIR}") -endif() - -include(CMakeParseArguments) -####################### -# add_sycl_to_target -####################### -function(add_sycl_to_target) - set(options) - set(one_value_args - TARGET - ) - set(multi_value_args - SOURCES - ) - cmake_parse_arguments(ADD_SYCL_ARGS - "${options}" - "${one_value_args}" - "${multi_value_args}" - ${ARGN} - ) - - # Add include directories to the "#include <>" paths - target_include_directories (${ADD_SYCL_ARGS_TARGET} PUBLIC - ${TRISYCL_INCLUDE_DIR} - ${Boost_INCLUDE_DIRS} - $<$:${OpenCL_INCLUDE_DIRS}> - $<$:${BOOST_COMPUTE_INCPATH}>) - - # Link dependencies - target_link_libraries(${ADD_SYCL_ARGS_TARGET} - $<$:${OpenCL_LIBRARIES}> - Threads::Threads - $<$:Boost::log> - Boost::chrono) - - # Compile definitions - target_compile_definitions(${ADD_SYCL_ARGS_TARGET} PUBLIC - EIGEN_SYCL_TRISYCL - $<$:TRISYCL_NO_ASYNC> - $<$:TRISYCL_OPENCL> - $<$:TRISYCL_DEBUG> - $<$:TRISYCL_DEBUG_STRUCTORS> - $<$:TRISYCL_TRACE_KERNEL> - $<$:BOOST_LOG_DYN_LINK>) - - # C++ and OpenMP requirements - target_compile_options(${ADD_SYCL_ARGS_TARGET} PUBLIC - ${TRISYCL_COMPILE_OPTIONS} - $<$:${OpenMP_CXX_FLAGS}>) - - if(${TRISYCL_OPENMP} AND (NOT WIN32)) - # Does not support generator expressions - set_target_properties(${ADD_SYCL_ARGS_TARGET} - PROPERTIES - LINK_FLAGS ${OpenMP_CXX_FLAGS}) - endif() - -endfunction() diff --git a/external/eigen-3.4.0/cmake/FindUMFPACK.cmake b/external/eigen-3.4.0/cmake/FindUMFPACK.cmake deleted file mode 100644 index 91cf637..0000000 --- a/external/eigen-3.4.0/cmake/FindUMFPACK.cmake +++ /dev/null @@ -1,53 +0,0 @@ -# Umfpack lib usually requires linking to a blas library. -# It is up to the user of this module to find a BLAS and link to it. - -if (UMFPACK_INCLUDES AND UMFPACK_LIBRARIES) - set(UMFPACK_FIND_QUIETLY TRUE) -endif () - -find_path(UMFPACK_INCLUDES - NAMES - umfpack.h - PATHS - $ENV{UMFPACKDIR} - ${INCLUDE_INSTALL_DIR} - PATH_SUFFIXES - suitesparse - ufsparse -) - -find_library(UMFPACK_LIBRARIES umfpack PATHS $ENV{UMFPACKDIR} ${LIB_INSTALL_DIR}) - -if(UMFPACK_LIBRARIES) - - if(NOT UMFPACK_LIBDIR) - get_filename_component(UMFPACK_LIBDIR ${UMFPACK_LIBRARIES} PATH) - endif() - - find_library(COLAMD_LIBRARY colamd PATHS ${UMFPACK_LIBDIR} $ENV{UMFPACKDIR} ${LIB_INSTALL_DIR}) - if(COLAMD_LIBRARY) - set(UMFPACK_LIBRARIES ${UMFPACK_LIBRARIES} ${COLAMD_LIBRARY}) - endif () - - find_library(AMD_LIBRARY amd PATHS ${UMFPACK_LIBDIR} $ENV{UMFPACKDIR} ${LIB_INSTALL_DIR}) - if(AMD_LIBRARY) - set(UMFPACK_LIBRARIES ${UMFPACK_LIBRARIES} ${AMD_LIBRARY}) - endif () - - find_library(SUITESPARSE_LIBRARY SuiteSparse PATHS ${UMFPACK_LIBDIR} $ENV{UMFPACKDIR} ${LIB_INSTALL_DIR}) - if(SUITESPARSE_LIBRARY) - set(UMFPACK_LIBRARIES ${UMFPACK_LIBRARIES} ${SUITESPARSE_LIBRARY}) - endif () - - find_library(CHOLMOD_LIBRARY cholmod PATHS $ENV{UMFPACK_LIBDIR} $ENV{UMFPACKDIR} ${LIB_INSTALL_DIR}) - if(CHOLMOD_LIBRARY) - set(UMFPACK_LIBRARIES ${UMFPACK_LIBRARIES} ${CHOLMOD_LIBRARY}) - endif() - -endif() - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(UMFPACK DEFAULT_MSG - UMFPACK_INCLUDES UMFPACK_LIBRARIES) - -mark_as_advanced(UMFPACK_INCLUDES UMFPACK_LIBRARIES AMD_LIBRARY COLAMD_LIBRARY CHOLMOD_LIBRARY SUITESPARSE_LIBRARY) diff --git a/external/eigen-3.4.0/cmake/RegexUtils.cmake b/external/eigen-3.4.0/cmake/RegexUtils.cmake deleted file mode 100644 index f0a1524..0000000 --- a/external/eigen-3.4.0/cmake/RegexUtils.cmake +++ /dev/null @@ -1,19 +0,0 @@ -function(escape_string_as_regex _str_out _str_in) - string(REGEX REPLACE "\\\\" "\\\\\\\\" FILETEST2 "${_str_in}") - string(REGEX REPLACE "([.$+*?|-])" "\\\\\\1" FILETEST2 "${FILETEST2}") - string(REGEX REPLACE "\\^" "\\\\^" FILETEST2 "${FILETEST2}") - string(REGEX REPLACE "\\(" "\\\\(" FILETEST2 "${FILETEST2}") - string(REGEX REPLACE "\\)" "\\\\)" FILETEST2 "${FILETEST2}") - string(REGEX REPLACE "\\[" "\\\\[" FILETEST2 "${FILETEST2}") - string(REGEX REPLACE "\\]" "\\\\]" FILETEST2 "${FILETEST2}") - set(${_str_out} "${FILETEST2}" PARENT_SCOPE) -endfunction() - -function(test_escape_string_as_regex) - set(test1 "\\.^$-+*()[]?|") - escape_string_as_regex(test2 "${test1}") - set(testRef "\\\\\\.\\^\\$\\-\\+\\*\\(\\)\\[\\]\\?\\|") - if(NOT test2 STREQUAL testRef) - message("Error in the escape_string_for_regex function : \n ${test1} was escaped as ${test2}, should be ${testRef}") - endif() -endfunction() \ No newline at end of file diff --git a/external/eigen-3.4.0/cmake/UseEigen3.cmake b/external/eigen-3.4.0/cmake/UseEigen3.cmake deleted file mode 100644 index a38bac8..0000000 --- a/external/eigen-3.4.0/cmake/UseEigen3.cmake +++ /dev/null @@ -1,6 +0,0 @@ -# -*- cmake -*- -# -# UseEigen3.cmake - -add_definitions ( ${EIGEN3_DEFINITIONS} ) -include_directories ( ${EIGEN3_INCLUDE_DIRS} ) diff --git a/scripts/timing/test_timing.py b/scripts/timing/test_timing.py index 8fb8c51..d905895 100644 --- a/scripts/timing/test_timing.py +++ b/scripts/timing/test_timing.py @@ -79,31 +79,31 @@ def run_modma(f: ioh.ProblemType, dim: int, n_evaluations, x0: np.ndarray): return cma -@timeit -def run_pycma(f: ioh.ProblemType, dim: int, n_evaluations: int, x0: np.ndarray): - options = pycma.CMAOptions() - options['CMA_active'] = False - # options['maxfevals'] = n_evaluations - options["verbose"] = -1 - options["CMA_diagonal"] = False - # pprint(options) - - cma = pycma.CMAEvolutionStrategy(x0, 2.0, options=options) - - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - while f.state.evaluations < n_evaluations: - X, y = cma.ask_and_eval(f) - cma.tell(X, y) - # cma.disp() - assert f.state.evaluations >= n_evaluations +# @timeit +# def run_pycma(f: ioh.ProblemType, dim: int, n_evaluations: int, x0: np.ndarray): +# options = pycma.CMAOptions() +# options['CMA_active'] = False +# # options['maxfevals'] = n_evaluations +# options["verbose"] = -1 +# options["CMA_diagonal"] = False +# # pprint(options) + +# cma = pycma.CMAEvolutionStrategy(x0, 2.0, options=options) + +# with warnings.catch_warnings(): +# warnings.simplefilter("ignore") +# while f.state.evaluations < n_evaluations: +# X, y = cma.ask_and_eval(f) +# cma.tell(X, y) +# # cma.disp() +# assert f.state.evaluations >= n_evaluations if __name__ == "__main__": - n_iters = 1 + n_iters = 3 n_evals = 1_000 fid = 12 - dimensions = [50] + dimensions = [100] names, functions = zip( *[ (name, obj) diff --git a/setup.py b/setup.py index a5c0f9d..81f13f6 100644 --- a/setup.py +++ b/setup.py @@ -13,31 +13,36 @@ __version__ = "1.0.13" -ext = Pybind11Extension( - "modcma.c_maes.cmaescpp", - [x for x in glob("src/*cpp") if "main" not in x], - include_dirs=["include", "external"], - cxx_std=17, -) if platform.system() in ("Linux", "Darwin"): - os.environ["CC"] = "clang" - os.environ["CXX"] = "clang" - flags = [ + os.environ["CC"] = "g++" + os.environ["CXX"] = "g++" + c_flags = [ "-O3", - # "-fno-math-errno", "-msse2", "-mavx", "-mfma", "-mtune=native", - # "-march=native", "-ffast-math", "-flto", "-funroll-loops", "-ftree-vectorize" + "-fno-math-errno", + "-funroll-loops", + "-ftree-vectorize", + ] + l_flags = [ + "-flto", ] - if platform.system() == "Darwin": - flags.append("-mmacosx-version-min=10.15") - # else: - # flags.append("-march=native") - - ext._add_cflags(flags) - ext._add_ldflags(flags) + c_flags.append("-mmacosx-version-min=10.15") + else: + c_flags.extend([ + "-march=native", + "-mtune=native", + ]) else: - ext._add_cflags(["/O2"]) + c_flags = ["/O2"] +ext = Pybind11Extension( + "modcma.c_maes.cmaescpp", + [x for x in glob("src/*cpp") if "main" not in x], + include_dirs=["include", "external"], + cxx_std=17, + extra_link_args=l_flags, + extra_compile_args=c_flags +) setuptools.setup( name="modcma", From 424d44039459254da50d719aed7e5bdbca5e77eb Mon Sep 17 00:00:00 2001 From: jacobdenobel Date: Fri, 30 May 2025 15:23:48 +0200 Subject: [PATCH 10/74] what --- scripts/timing/test_timing.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/scripts/timing/test_timing.py b/scripts/timing/test_timing.py index d905895..142b1ed 100644 --- a/scripts/timing/test_timing.py +++ b/scripts/timing/test_timing.py @@ -47,11 +47,12 @@ def run_fcmaes(f: ioh.ProblemType, dim: int, n_evaluations, x0: np.ndarray): # ret = retry.minimize(f, bounds.T, optimizer=optimizer.Cma_cpp(n_evaluations)) assert f.state.evaluations >= n_evaluations + print(f.state.current_best_internal.y) @timeit def run_modma(f: ioh.ProblemType, dim: int, n_evaluations, x0: np.ndarray): - modcma.constants.calc_eigv = True + modcma.constants.calc_eigv = False modules = modcma.parameters.Modules() # modules.sample_transformation = modcma.options.SCALED_UNIFORM modules.matrix_adaptation = modcma.options.COVARIANCE @@ -74,7 +75,7 @@ def run_modma(f: ioh.ProblemType, dim: int, n_evaluations, x0: np.ndarray): while cma.step(f): pass # cma.run(f) - print(cma.p.stats.t, cma.p.stats.n_updates) + print(cma.p.stats.t, cma.p.stats.n_updates, f.state.current_best_internal.y) assert f.state.evaluations >= n_evaluations return cma @@ -100,8 +101,8 @@ def run_modma(f: ioh.ProblemType, dim: int, n_evaluations, x0: np.ndarray): if __name__ == "__main__": - n_iters = 3 - n_evals = 1_000 + n_iters = 2 + n_evals = 2_000 fid = 12 dimensions = [100] names, functions = zip( From 60108af65ec2b240f32f551eeb4ffbe89294451f Mon Sep 17 00:00:00 2001 From: Jacob de Nobel Date: Fri, 30 May 2025 17:25:36 +0200 Subject: [PATCH 11/74] working cholesky cma-es --- CMakeLists.txt | 2 +- include/acmaes.hpp | 116 -- include/evaluator.h | 508 --------- include/matrix_adaptation.hpp | 47 +- include/modules.hpp | 3 +- include/pcg_extras.hpp | 667 ----------- include/pcg_random.hpp | 1958 --------------------------------- include/pcg_uint128.hpp | 1010 ----------------- include/settings.hpp | 2 +- include/to_string.hpp | 8 + src/acmaes.cpp | 670 ----------- src/main.cpp | 102 +- src/matrix_adaptation.cpp | 94 +- 13 files changed, 152 insertions(+), 5035 deletions(-) delete mode 100644 include/acmaes.hpp delete mode 100644 include/evaluator.h delete mode 100644 include/pcg_extras.hpp delete mode 100644 include/pcg_random.hpp delete mode 100644 include/pcg_uint128.hpp delete mode 100644 src/acmaes.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index a383403..511794c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,7 +9,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) file(GLOB SRC_FILES ${PROJECT_SOURCE_DIR}/src/*.cpp) list(FILTER SRC_FILES EXCLUDE REGEX ".*interface.cpp$") - + add_executable(main ${SRC_FILES}) target_include_directories(main PUBLIC diff --git a/include/acmaes.hpp b/include/acmaes.hpp deleted file mode 100644 index 2cdb15c..0000000 --- a/include/acmaes.hpp +++ /dev/null @@ -1,116 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include "evaluator.h" - - -namespace acmaes { - - class AcmaesOptimizer { - - public: - - AcmaesOptimizer(long runid_, Fitness* fitfun_, int popsize_, int mu_, - const vec& guess_, const vec& inputSigma_, int maxEvaluations_, - double accuracy_, double stopfitness_, double stopTolHistFun_, - int update_gap_, long seed); - - - ~AcmaesOptimizer(); - // param zmean weighted row matrix of the gaussian random numbers generating the current offspring - // param xold xmean matrix of the previous generation - // return hsig flag indicating a small correction - - bool updateEvolutionPaths(const vec& zmean, const vec& xold); - - // param hsig flag indicating a small correction - // param bestArx fitness-sorted matrix of the argument vectors producing the current offspring - // param arz unsorted matrix containing the gaussian random values of the current offspring - // param arindex indices indicating the fitness-order of the current offspring - // param xold xmean matrix of the previous generation - - double updateCovariance(bool hsig, const mat& bestArx, const mat& arz, - const ivec& arindex, const mat& xold); - - // Update B and diagD from C - // param negccov Negative covariance factor. - - void updateBD(double negccov); - mat ask_all(); - int tell_all(mat ys, mat xs); - int tell_all_asked(mat ys, mat xs); - mat getPopulation(); - vec ask(); - int tell(double y, const vec& x); - void updateCMA(); - int doOptimize(); - int do_optimize_delayed_update(int workers); - vec getBestX(); - double getBestValue(); - double getIterations(); - int getStop(); - Fitness* getFitfun(); - int getDim(); - int getPopsize(); - Fitness* getFitfunPar(); - mat popX; - - int n_updates; - - private: - long runid; - Fitness* fitfun; - vec guess; - double accuracy; - int popsize; // population size - vec inputSigma; - int dim; - int maxEvaluations; - double stopfitness; - double stopTolUpX; - double stopTolX; - double stopTolFun; - double stopTolHistFun; - int mu; // - vec weights; - double mueff; // - double sigma; - double cc; - double cs; - double damps; - double ccov1; - double ccovmu; - double chiN; - double ccov1Sep; - double ccovmuSep; - double lazy_update_gap = 0; - vec xmean; - vec pc; - vec ps; - double normps; - mat B; - mat BD; - mat diagD; - mat C; - vec diagC; - mat arz; - mat arx; - vec fitness; - int iterations = 0; - int last_update = 0; - vec fitnessHistory; - int historySize; - double bestValue; - vec bestX; - int stop; - int told = 0; - pcg64* rs; - bool computeArz; - }; -} diff --git a/include/evaluator.h b/include/evaluator.h deleted file mode 100644 index ded88ad..0000000 --- a/include/evaluator.h +++ /dev/null @@ -1,508 +0,0 @@ -/* - * evaluator.hpp - * - * Created on: Jul 12, 2021 - * Author: Dietmar Wolz - */ - -#ifndef EVALUATOR_HPP_ -#define EVALUATOR_HPP_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define EIGEN_VECTORIZE_SSE2 -#include "pcg_random.hpp" - -using Clock = std::chrono::steady_clock; -using std::chrono::time_point; -using std::chrono::duration_cast; -using std::chrono::milliseconds; - -template -class blocking_queue { - -private: - size_t _capacity; - std::queue _queue; - std::mutex _mutex; - std::condition_variable _not_full; - std::condition_variable _not_empty; - -public: - inline blocking_queue(size_t capacity) : - _capacity(capacity) { - } - - inline size_t size() { - std::unique_lock lock(_mutex); - return _queue.size(); - } - - //Inserts the specified element into this queue, - // waiting if necessary for space to become available. - inline void put(const T& elem) { - { - std::unique_lock lock(_mutex); - while (_queue.size() >= _capacity) - _not_full.wait(lock); - _queue.push(elem); - } - _not_empty.notify_one(); - } - - // Retrieves and removes the head of this queue, - // waiting if necessary until an element becomes available. - inline const T& take() { - std::unique_lock lock(_mutex); - while (_queue.size() == 0) - _not_empty.wait(lock); - T& front = _queue.front(); - _queue.pop(); - _not_full.notify_one(); - return front; - } -}; - -typedef Eigen::Matrix vec; -typedef Eigen::Matrix ivec; -typedef Eigen::Matrix mat; -typedef Eigen::Matrix imat; - -typedef bool (*callback_type)(int, const double*, double*); - -typedef void (*callback_parallel)(int, int, double*, double*); - -static bool noop_callback(int popsize, const double* x, double* y) { - return true; -} - -static void noop_callback_par(int popsize, int dim, double* x, double* y) { -} - -static std::uniform_real_distribution<> distr_01 = -std::uniform_real_distribution<>(0, 1); - -static std::normal_distribution<> gauss_01 = std::normal_distribution<>(0, 1); - -static Eigen::MatrixXd normal(int dx, int dy, pcg64& rs) { - return Eigen::MatrixXd::NullaryExpr(dx, dy, [&]() { - return gauss_01(rs); - }); -} - -static Eigen::MatrixXd normalVec(int dim, pcg64& rs) { - return Eigen::MatrixXd::NullaryExpr(dim, 1, [&]() { - return gauss_01(rs); - }); -} - -static Eigen::MatrixXd uniform(int dx, int dy, pcg64& rs) { - return Eigen::MatrixXd::NullaryExpr(dx, dy, [&]() { - return distr_01(rs); - }); -} - -static Eigen::MatrixXd uniformVec(int dim, pcg64& rs) { - return Eigen::MatrixXd::NullaryExpr(dim, 1, [&]() { - return distr_01(rs); - }); -} - -static double rand01(pcg64& rs) { - return distr_01(rs); -} - -static int randInt(pcg64& rs, int max) { - return (int)(max * distr_01(rs)); -} - -static double normreal(pcg64& rs, double mu, double sdev) { - return gauss_01(rs) * sdev + mu; -} - -static vec normalVec(const vec& mean, const vec& sdev, int dim, pcg64& rs) { - vec nv = normalVec(dim, rs); - return (nv.array() * sdev.array()).matrix() + mean; -} - -static vec zeros(int n) { - return Eigen::MatrixXd::Zero(n, 1); -} - -static mat zeros(int n, int m) { - return Eigen::MatrixXd::Zero(n, m); -} - -static vec constant(int n, double val) { - return Eigen::MatrixXd::Constant(n, 1, val); -} - -static mat constant(int n, int m, double val) { - return Eigen::MatrixXd::Constant(n, m, val); -} - -struct IndexVal { - int index; - double val; -}; - -static bool compareIndexVal(IndexVal i1, IndexVal i2) { - return (i1.val < i2.val); -} - -static ivec sort_index(const vec& x) { - int size = x.size(); - std::vector ivals(size); - for (int i = 0; i < size; i++) { - ivals[i].index = i; - ivals[i].val = x[i]; - } - std::sort(ivals.data(), ivals.data() + size, compareIndexVal); - return Eigen::MatrixXi::NullaryExpr(size, 1, [&ivals](int i) { - return ivals[i].index; - }); -} - -static int index_min(vec& v) { - double minv = DBL_MAX; - int mi = -1; - for (int i = 0; i < v.size(); i++) { - if (v[i] < minv) { - mi = i; - minv = v[i]; - } - } - return mi; -} - -// wrapper around the fitness function, scales according to boundaries - -class Fitness { - -public: - - Fitness(callback_type func, callback_parallel func_par_, int dim, int nobj, - const vec& lower, const vec& upper) : - _func(func), _func_par(func_par_), _dim(dim), _nobj(nobj), _lower( - lower), _upper(upper) { - if (_lower.size() > 0) { // bounds defined - _scale = _upper - _lower; - _typx = 0.5 * (_upper + _lower); - } - else { - _scale = constant(dim, 1.0); - _typx = zeros(dim); - } - _evaluationCounter = 0; - _normalize = false; - _terminate = false; - _dim = dim; - } - - bool terminate() { - return _terminate; - } - - vec eval(const vec& X) { - std::vector res(_nobj); - _terminate = _terminate || _func(_dim, X.data(), res.data()); - for (int i = 0; i < _nobj; i++) { - if (std::isnan(res[i]) || !std::isfinite(res[i])) - res[i] = 1E99; - } - _evaluationCounter++; - vec rvec = Eigen::Map(res.data(), _nobj); - return rvec; - } - - vec eval(const double* const p) { - std::vector res(_nobj); - _terminate = _terminate || _func(_dim, p, res.data()); - for (int i = 0; i < _nobj; i++) { - if (std::isnan(res[i]) || !std::isfinite(res[i])) - res[i] = 1E99; - } - _evaluationCounter++; - vec rvec = Eigen::Map(res.data(), _nobj); - return rvec; - } - - vec getClosestFeasible(const vec& X) const { - if (_lower.size() > 0) - return X.cwiseMin(_upper).cwiseMax(_lower); - return X; - } - - double getClosestFeasible_i(int i, double x_i) { - return _lower.size() == 0 ? x_i : std::min(_upper[i], std::max(_lower[i], x_i)); - } - - vec getClosestFeasibleNormed(const vec& X) const { - if (_lower.size() > 0) { - if (_normalize) - return X.cwiseMin(1.0).cwiseMax(-1.0); - else - return X.cwiseMin(_upper).cwiseMax(_lower); - } - return X; - } - - void setClosestFeasible(mat& X) const { - if (_lower.size() > 0) { - for (int i = 0; i < X.cols(); i++) { - if (_normalize) - X.col(i) = X.col(i).cwiseMin(1.0).cwiseMax(-1.0); - else - X.col(i) = X.col(i).cwiseMin(_upper).cwiseMax(_lower); - } - } - } - - vec norm(const vec& X) const { - if (_lower.size() == 0) - std::cout << "no bounds error" << std::endl; - return ((X - _lower).array() / _scale.array()).matrix(); - } - - double norm_i(int i, double x) const { - if (_lower.size() == 0) - std::cout << "no bounds error" << std::endl; - return std::max(0.0, std::min(1.0, (x - _lower[i]) / _scale[i])); // avoid Nan errors - } - - bool feasible(int i, double x) { - return _lower.size() == 0 || (x >= _lower[i] && x <= _upper[i]); - } - - bool hasBounds() { - return _lower.size() != 0; - } - - vec sample(pcg64& rs) { - if (_lower.size() == 0) - std::cout << "no bounds error" << std::endl; - vec rv = uniformVec(_dim, rs); - return (rv.array() * _scale.array()).matrix() + _lower; - } - - vec sample(pcg64& rs, vec& up, vec& lo) { - vec rv = uniformVec(_dim, rs); - return (rv.array() * (up - lo).array()).matrix() + lo; - } - - double sample_i(int i, pcg64& rs) { - if (_lower.size() == 0) - std::cout << "no bounds error" << std::endl; - return _lower[i] + _scale[i] * distr_01(rs); - } - - double sample_i(int i, pcg64& rs, vec& up, vec& lo) { - return lo[i] + (up[i] - lo[i]) * distr_01(rs); - } - - int evaluations() { - return _evaluationCounter; - } - - void resetEvaluations() { - _evaluationCounter = 0; - } - - void incrEvaluations() { - _evaluationCounter++; - } - - vec scale() { - return _scale; - } - - vec typx() { - return _typx; - } - - void setNormalize(bool normalize) { - _normalize = normalize; - } - - void setTerminate() { - _terminate = true; - } - - vec encode(const vec& X) const { - if (_normalize) - return 2 * (X - _typx).array() / _scale.array(); - else - return X; - } - - vec decode(const vec& X) const { - if (_normalize) - return 0.5 * (X.array() * _scale.array()).matrix() + _typx; - else - return X; - } - - void values(const mat& popX, vec& ys) { - int popsize = popX.cols(); - int n = popX.rows(); - std::vector pargs(popsize * n); - std::vector res(popsize); - for (int p = 0; p < popsize; p++) { - vec x = getClosestFeasible(decode(popX.col(p))); - for (int i = 0; i < n; i++) - pargs[p * n + i] = x(i); - } - _func_par(popsize, n, pargs.data(), res.data()); - for (int p = 0; p < popX.cols(); p++) - ys[p] = res[p]; - _evaluationCounter += popsize; - } - - vec violations(const mat& X, double penalty_coef) { - vec violations = zeros(X.cols()); - if (_lower.size() > 0) { - for (int i = 0; i < X.cols(); i++) { - vec x = decode(X.col(i)); - violations[i] = penalty_coef - * ((_lower - x).cwiseMax(0).sum() - + (x - _upper).cwiseMax(0).sum()); - } - } - return violations; - } - - void getMinValues(double* const p) const { - for (int i = 0; i < _lower.size(); i++) - p[i] = _lower[i]; - } - - void getMaxValues(double* const p) const { - for (int i = 0; i < _upper.size(); i++) - p[i] = _upper[i]; - } - -private: - callback_type _func; - callback_parallel _func_par; - int _dim; - int _nobj; - vec _lower; - vec _upper; - vec _scale; - vec _typx; - bool _normalize; - bool _terminate; - long _evaluationCounter; -}; - -struct vec_id { -public: - - vec_id(const vec& v, int id) : - _id(id), _v(v) { - } - - int _id; - vec _v; -}; - -class evaluator { -public: - - evaluator(Fitness* fit, int nobj, int workers) : - _fit(fit), _nobj(nobj), _workers(workers), _stop(false) { - _requests = new blocking_queue(2 * workers); - _evaled = new blocking_queue(2 * workers); - _t0 = Clock::now(); - if (_workers <= 0) - _workers = std::thread::hardware_concurrency(); - for (int thread_id = 0; thread_id < _workers; thread_id++) { - _jobs.push_back(evaluator_job(thread_id, this)); - } - } - - ~evaluator() { - join(); - delete _requests; - delete _evaled; - } - - void evaluate(vec& x, int id) { - _requests->put(new vec_id(x, id)); - } - - // needs to be deleted - vec_id* result() { - return _evaled->take(); - } - - void execute(int thread_id) { - while (!_stop) { - vec_id* vid = _requests->take(); - if (!_stop) { - try { - vec x = _fit->getClosestFeasible(_fit->decode(vid->_v)); - vid->_v = _fit->eval(x); - } - catch (std::exception& e) { - std::cout << e.what() << std::endl; - vid->_v = constant(_nobj, DBL_MAX); - } - _evaled->put(vid); - } - else - delete vid; - } - } - - void join() { - _stop = true; - vec x(0); - // to release all locks - for (auto& job : _jobs) { - _requests->put(new vec_id(x, 0)); - } - for (auto& job : _jobs) { - job.join(); - } - } - -private: - - class evaluator_job { - - public: - evaluator_job(int id, evaluator* exec) { - _thread = std::thread(&evaluator::execute, exec, id); - } - - void join() { - if (_thread.joinable()) - _thread.join(); - } - - private: - std::thread _thread; - }; - - Fitness* _fit; - int _nobj; - int _workers; - bool _stop; - blocking_queue* _requests; - blocking_queue* _evaled; - std::vector _jobs; - time_point _t0; -}; - -#endif /* EVALUATOR_HPP_ */ \ No newline at end of file diff --git a/include/matrix_adaptation.hpp b/include/matrix_adaptation.hpp index b5731c8..a7f38a9 100644 --- a/include/matrix_adaptation.hpp +++ b/include/matrix_adaptation.hpp @@ -32,14 +32,21 @@ namespace matrix_adaptation virtual bool adapt_matrix(const parameters::Weights& w, const parameters::Modules& m, const Population& pop, size_t mu, const parameters::Settings& settings, parameters::Stats& stats) = 0; - virtual void restart(const parameters::Settings& settings) = 0; - virtual Vector compute_y(const Vector&) = 0; virtual Vector invert_x(const Vector&, Float sigma); virtual Vector invert_y(const Vector&) = 0; + virtual void restart(const parameters::Settings& settings) + { + m = settings.x0.value_or(Vector::Zero(settings.dim)); + m_old.setZero(); + dm.setZero(); + ps.setZero(); + dz.setZero(); + } + }; struct None final : Adaptation @@ -58,7 +65,6 @@ namespace matrix_adaptation const std::shared_ptr& mutation, const parameters::Stats& stats, size_t mu, size_t lambda) override; - void restart(const parameters::Settings& settings) override; Vector compute_y(const Vector&) override; @@ -150,8 +156,41 @@ namespace matrix_adaptation Vector compute_y(const Vector&) override; Vector invert_y(const Vector&) override; + }; + + struct CholeskyAdaptation final : Adaptation + { + Matrix A; + Vector pc; + + CholeskyAdaptation(const size_t dim, const Vector& x0, const Float expected_length_z) + : Adaptation(dim, x0, Vector::Ones(dim), expected_length_z), + A(Matrix::Identity(dim, dim)), + pc(Vector::Zero(dim)), + A_prime(Matrix::Zero(dim, dim)) + { + } + + void adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, + const std::shared_ptr& mutation, const parameters::Stats& stats, + size_t mu, size_t lambda) override; + + bool adapt_matrix(const parameters::Weights& w, const parameters::Modules& m, const Population& pop, size_t mu, + const parameters::Settings& settings, parameters::Stats& stats) override; + + void restart(const parameters::Settings& settings) override; + + Vector compute_y(const Vector&) override; + + Vector invert_y(const Vector&) override; + + Matrix rank_one_update(const Matrix& A, const Float beta, Vector a); + + private: + Matrix A_prime; }; + inline std::shared_ptr get(const parameters::Modules& m, const size_t dim, const Vector& x0, const Float expected_z) { using namespace parameters; @@ -165,6 +204,8 @@ namespace matrix_adaptation return std::make_shared(dim, x0, expected_z); case MatrixAdaptationType::ONEPLUSONE: return std::make_shared(dim, x0, expected_z); + case MatrixAdaptationType::CHOLESKY: + return std::make_shared(dim, x0, expected_z); default: case MatrixAdaptationType::COVARIANCE: return std::make_shared(dim, x0, expected_z); diff --git a/include/modules.hpp b/include/modules.hpp index a78724c..06ed83c 100644 --- a/include/modules.hpp +++ b/include/modules.hpp @@ -73,7 +73,8 @@ namespace parameters COVARIANCE, MATRIX, SEPERABLE, - ONEPLUSONE + ONEPLUSONE, + CHOLESKY, }; enum class CenterPlacement diff --git a/include/pcg_extras.hpp b/include/pcg_extras.hpp deleted file mode 100644 index 6467d15..0000000 --- a/include/pcg_extras.hpp +++ /dev/null @@ -1,667 +0,0 @@ -/* - * PCG Random Number Generation for C++ - * - * Copyright 2014-2017 Melissa O'Neill , - * and the PCG Project contributors. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - * - * Licensed under the Apache License, Version 2.0 (provided in - * LICENSE-APACHE.txt and at http://www.apache.org/licenses/LICENSE-2.0) - * or under the MIT license (provided in LICENSE-MIT.txt and at - * http://opensource.org/licenses/MIT), at your option. This file may not - * be copied, modified, or distributed except according to those terms. - * - * Distributed on an "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, either - * express or implied. See your chosen license for details. - * - * For additional information about the PCG random number generation scheme, - * visit http://www.pcg-random.org/. - */ - - /* - * This file provides support code that is useful for random-number generation - * but not specific to the PCG generation scheme, including: - * - 128-bit int support for platforms where it isn't available natively - * - bit twiddling operations - * - I/O of 128-bit and 8-bit integers - * - Handling the evilness of SeedSeq - * - Support for efficiently producing random numbers less than a given - * bound - */ - -#ifndef PCG_EXTRAS_HPP_INCLUDED -#define PCG_EXTRAS_HPP_INCLUDED 1 - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef __GNUC__ -#include -#endif - - /* - * Abstractions for compiler-specific directives - */ - -#ifdef __GNUC__ -#define PCG_NOINLINE __attribute__((noinline)) -#else -#define PCG_NOINLINE -#endif - - /* - * Some members of the PCG library use 128-bit math. When compiling on 64-bit - * platforms, both GCC and Clang provide 128-bit integer types that are ideal - * for the job. - * - * On 32-bit platforms (or with other compilers), we fall back to a C++ - * class that provides 128-bit unsigned integers instead. It may seem - * like we're reinventing the wheel here, because libraries already exist - * that support large integers, but most existing libraries provide a very - * generic multiprecision code, but here we're operating at a fixed size. - * Also, most other libraries are fairly heavyweight. So we use a direct - * implementation. Sadly, it's much slower than hand-coded assembly or - * direct CPU support. - * - */ -#if __SIZEOF_INT128__ && !PCG_FORCE_EMULATED_128BIT_MATH -namespace pcg_extras { - typedef __uint128_t pcg128_t; -} -#define PCG_128BIT_CONSTANT(high,low) \ - ((pcg_extras::pcg128_t(high) << 64) + low) -#else -#include "pcg_uint128.hpp" -namespace pcg_extras { - typedef pcg_extras::uint_x4 pcg128_t; -} -#define PCG_128BIT_CONSTANT(high,low) \ - pcg_extras::pcg128_t(high,low) -#define PCG_EMULATED_128BIT_MATH 1 -#endif - - -namespace pcg_extras { - - /* - * We often need to represent a "number of bits". When used normally, these - * numbers are never greater than 128, so an unsigned char is plenty. - * If you're using a nonstandard generator of a larger size, you can set - * PCG_BITCOUNT_T to have it define it as a larger size. (Some compilers - * might produce faster code if you set it to an unsigned int.) - */ - -#ifndef PCG_BITCOUNT_T - typedef uint8_t bitcount_t; -#else - typedef PCG_BITCOUNT_T bitcount_t; -#endif - - /* - * C++ requires us to be able to serialize RNG state by printing or reading - * it from a stream. Because we use 128-bit ints, we also need to be able - * ot print them, so here is code to do so. - * - * This code provides enough functionality to print 128-bit ints in decimal - * and zero-padded in hex. It's not a full-featured implementation. - */ - - template - std::basic_ostream& - operator<<(std::basic_ostream& out, pcg128_t value) - { - auto desired_base = out.flags() & out.basefield; - bool want_hex = desired_base == out.hex; - - if (want_hex) { - uint64_t highpart = uint64_t(value >> 64); - uint64_t lowpart = uint64_t(value); - auto desired_width = out.width(); - if (desired_width > 16) { - out.width(desired_width - 16); - } - if (highpart != 0 || desired_width > 16) - out << highpart; - CharT oldfill = '\0'; - if (highpart != 0) { - out.width(16); - oldfill = out.fill('0'); - } - auto oldflags = out.setf(decltype(desired_base){}, out.showbase); - out << lowpart; - out.setf(oldflags); - if (highpart != 0) { - out.fill(oldfill); - } - return out; - } - constexpr size_t MAX_CHARS_128BIT = 40; - - char buffer[MAX_CHARS_128BIT]; - char* pos = buffer + sizeof(buffer); - *(--pos) = '\0'; - constexpr auto BASE = pcg128_t(10ULL); - do { - auto div = value / BASE; - auto mod = uint32_t(value - (div * BASE)); - *(--pos) = '0' + char(mod); - value = div; - } while (value != pcg128_t(0ULL)); - return out << pos; - } - - template - std::basic_istream& - operator>>(std::basic_istream& in, pcg128_t& value) - { - typename std::basic_istream::sentry s(in); - - if (!s) - return in; - - constexpr auto BASE = pcg128_t(10ULL); - pcg128_t current(0ULL); - bool did_nothing = true; - bool overflow = false; - for (;;) { - CharT wide_ch = in.get(); - if (!in.good()) { - in.clear(std::ios::eofbit); - break; - } - auto ch = in.narrow(wide_ch, '\0'); - if (ch < '0' || ch > '9') { - in.unget(); - break; - } - did_nothing = false; - pcg128_t digit(uint32_t(ch - '0')); - pcg128_t timesbase = current * BASE; - overflow = overflow || timesbase < current; - current = timesbase + digit; - overflow = overflow || current < digit; - } - - if (did_nothing || overflow) { - in.setstate(std::ios::failbit); - if (overflow) - current = ~pcg128_t(0ULL); - } - - value = current; - - return in; - } - - /* - * Likewise, if people use tiny rngs, we'll be serializing uint8_t. - * If we just used the provided IO operators, they'd read/write chars, - * not ints, so we need to define our own. We *can* redefine this operator - * here because we're in our own namespace. - */ - - template - std::basic_ostream& - operator<<(std::basic_ostream& out, uint8_t value) - { - return out << uint32_t(value); - } - - template - std::basic_istream& - operator>>(std::basic_istream& in, uint8_t& target) - { - uint32_t value = 0xdecea5edU; - in >> value; - if (!in && value == 0xdecea5edU) - return in; - if (value > uint8_t(~0)) { - in.setstate(std::ios::failbit); - value = ~0U; - } - target = uint8_t(value); - return in; - } - - /* Unfortunately, the above functions don't get found in preference to the - * built in ones, so we create some more specific overloads that will. - * Ugh. - */ - - inline std::ostream& operator<<(std::ostream& out, uint8_t value) - { - return pcg_extras::operator<< (out, value); - } - - inline std::istream& operator>>(std::istream& in, uint8_t& value) - { - return pcg_extras::operator>> (in, value); - } - - - - /* - * Useful bitwise operations. - */ - - /* - * XorShifts are invertable, but they are someting of a pain to invert. - * This function backs them out. It's used by the whacky "inside out" - * generator defined later. - */ - - template - inline itype unxorshift(itype x, bitcount_t bits, bitcount_t shift) - { - if (2 * shift >= bits) { - return x ^ (x >> shift); - } - itype lowmask1 = (itype(1U) << (bits - shift * 2)) - 1; - itype highmask1 = ~lowmask1; - itype top1 = x; - itype bottom1 = x & lowmask1; - top1 ^= top1 >> shift; - top1 &= highmask1; - x = top1 | bottom1; - itype lowmask2 = (itype(1U) << (bits - shift)) - 1; - itype bottom2 = x & lowmask2; - bottom2 = unxorshift(bottom2, bits - shift, shift); - bottom2 &= lowmask1; - return top1 | bottom2; - } - - /* - * Rotate left and right. - * - * In ideal world, compilers would spot idiomatic rotate code and convert it - * to a rotate instruction. Of course, opinions vary on what the correct - * idiom is and how to spot it. For clang, sometimes it generates better - * (but still crappy) code if you define PCG_USE_ZEROCHECK_ROTATE_IDIOM. - */ - - template - inline itype rotl(itype value, bitcount_t rot) - { - constexpr bitcount_t bits = sizeof(itype) * 8; - constexpr bitcount_t mask = bits - 1; -#if PCG_USE_ZEROCHECK_ROTATE_IDIOM - return rot ? (value << rot) | (value >> (bits - rot)) : value; -#else - return (value << rot) | (value >> ((-rot) & mask)); -#endif - } - - template - inline itype rotr(itype value, bitcount_t rot) - { - constexpr bitcount_t bits = sizeof(itype) * 8; - constexpr bitcount_t mask = bits - 1; -#if PCG_USE_ZEROCHECK_ROTATE_IDIOM - return rot ? (value >> rot) | (value << (bits - rot)) : value; -#else - return (value >> rot) | (value << ((-rot) & mask)); -#endif - } - - /* Unfortunately, both Clang and GCC sometimes perform poorly when it comes - * to properly recognizing idiomatic rotate code, so for we also provide - * assembler directives (enabled with PCG_USE_INLINE_ASM). Boo, hiss. - * (I hope that these compilers get better so that this code can die.) - * - * These overloads will be preferred over the general template code above. - */ -#if PCG_USE_INLINE_ASM && __GNUC__ && (__x86_64__ || __i386__) - - inline uint8_t rotr(uint8_t value, bitcount_t rot) - { - asm("rorb %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); - return value; - } - - inline uint16_t rotr(uint16_t value, bitcount_t rot) - { - asm("rorw %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); - return value; - } - - inline uint32_t rotr(uint32_t value, bitcount_t rot) - { - asm("rorl %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); - return value; - } - -#if __x86_64__ - inline uint64_t rotr(uint64_t value, bitcount_t rot) - { - asm("rorq %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); - return value; - } -#endif // __x86_64__ - -#elif defined(_MSC_VER) - // Use MSVC++ bit rotation intrinsics - -#pragma intrinsic(_rotr, _rotr64, _rotr8, _rotr16) - - inline uint8_t rotr(uint8_t value, bitcount_t rot) - { - return _rotr8(value, rot); - } - - inline uint16_t rotr(uint16_t value, bitcount_t rot) - { - return _rotr16(value, rot); - } - - inline uint32_t rotr(uint32_t value, bitcount_t rot) - { - return _rotr(value, rot); - } - - inline uint64_t rotr(uint64_t value, bitcount_t rot) - { - return _rotr64(value, rot); - } - -#endif // PCG_USE_INLINE_ASM - - - /* - * The C++ SeedSeq concept (modelled by seed_seq) can fill an array of - * 32-bit integers with seed data, but sometimes we want to produce - * larger or smaller integers. - * - * The following code handles this annoyance. - * - * uneven_copy will copy an array of 32-bit ints to an array of larger or - * smaller ints (actually, the code is general it only needing forward - * iterators). The copy is identical to the one that would be performed if - * we just did memcpy on a standard little-endian machine, but works - * regardless of the endian of the machine (or the weirdness of the ints - * involved). - * - * generate_to initializes an array of integers using a SeedSeq - * object. It is given the size as a static constant at compile time and - * tries to avoid memory allocation. If we're filling in 32-bit constants - * we just do it directly. If we need a separate buffer and it's small, - * we allocate it on the stack. Otherwise, we fall back to heap allocation. - * Ugh. - * - * generate_one produces a single value of some integral type using a - * SeedSeq object. - */ - - /* uneven_copy helper, case where destination ints are less than 32 bit. */ - - template - SrcIter uneven_copy_impl( - SrcIter src_first, DestIter dest_first, DestIter dest_last, - std::true_type) - { - typedef typename std::iterator_traits::value_type src_t; - typedef typename std::iterator_traits::value_type dest_t; - - constexpr bitcount_t SRC_SIZE = sizeof(src_t); - constexpr bitcount_t DEST_SIZE = sizeof(dest_t); - constexpr bitcount_t DEST_BITS = DEST_SIZE * 8; - constexpr bitcount_t SCALE = SRC_SIZE / DEST_SIZE; - - size_t count = 0; - src_t value = 0; - - while (dest_first != dest_last) { - if ((count++ % SCALE) == 0) - value = *src_first++; // Get more bits - else - value >>= DEST_BITS; // Move down bits - - *dest_first++ = dest_t(value); // Truncates, ignores high bits. - } - return src_first; - } - - /* uneven_copy helper, case where destination ints are more than 32 bit. */ - - template - SrcIter uneven_copy_impl( - SrcIter src_first, DestIter dest_first, DestIter dest_last, - std::false_type) - { - typedef typename std::iterator_traits::value_type src_t; - typedef typename std::iterator_traits::value_type dest_t; - - constexpr auto SRC_SIZE = sizeof(src_t); - constexpr auto SRC_BITS = SRC_SIZE * 8; - constexpr auto DEST_SIZE = sizeof(dest_t); - constexpr auto SCALE = (DEST_SIZE + SRC_SIZE - 1) / SRC_SIZE; - - while (dest_first != dest_last) { - dest_t value(0UL); - unsigned int shift = 0; - - for (size_t i = 0; i < SCALE; ++i) { - value |= dest_t(*src_first++) << shift; - shift += SRC_BITS; - } - - *dest_first++ = value; - } - return src_first; - } - - /* uneven_copy, call the right code for larger vs. smaller */ - - template - inline SrcIter uneven_copy(SrcIter src_first, - DestIter dest_first, DestIter dest_last) - { - typedef typename std::iterator_traits::value_type src_t; - typedef typename std::iterator_traits::value_type dest_t; - - constexpr bool DEST_IS_SMALLER = sizeof(dest_t) < sizeof(src_t); - - return uneven_copy_impl(src_first, dest_first, dest_last, - std::integral_constant{}); - } - - /* generate_to, fill in a fixed-size array of integral type using a SeedSeq - * (actually works for any random-access iterator) - */ - - template - inline void generate_to_impl(SeedSeq&& generator, DestIter dest, - std::true_type) - { - generator.generate(dest, dest + size); - } - - template - void generate_to_impl(SeedSeq&& generator, DestIter dest, - std::false_type) - { - typedef typename std::iterator_traits::value_type dest_t; - constexpr auto DEST_SIZE = sizeof(dest_t); - constexpr auto GEN_SIZE = sizeof(uint32_t); - - constexpr bool GEN_IS_SMALLER = GEN_SIZE < DEST_SIZE; - constexpr size_t FROM_ELEMS = - GEN_IS_SMALLER - ? size * ((DEST_SIZE + GEN_SIZE - 1) / GEN_SIZE) - : (size + (GEN_SIZE / DEST_SIZE) - 1) - / ((GEN_SIZE / DEST_SIZE) + GEN_IS_SMALLER); - // this odd code ^^^^^^^^^^^^^^^^^ is work-around for - // a bug: http://llvm.org/bugs/show_bug.cgi?id=21287 - - if (FROM_ELEMS <= 1024) { - uint32_t buffer[FROM_ELEMS]; - generator.generate(buffer, buffer + FROM_ELEMS); - uneven_copy(buffer, dest, dest + size); - } - else { - uint32_t* buffer = static_cast(malloc(GEN_SIZE * FROM_ELEMS)); - generator.generate(buffer, buffer + FROM_ELEMS); - uneven_copy(buffer, dest, dest + size); - free(static_cast(buffer)); - } - } - - template - inline void generate_to(SeedSeq&& generator, DestIter dest) - { - typedef typename std::iterator_traits::value_type dest_t; - constexpr bool IS_32BIT = sizeof(dest_t) == sizeof(uint32_t); - - generate_to_impl(std::forward(generator), dest, - std::integral_constant{}); - } - - /* generate_one, produce a value of integral type using a SeedSeq - * (optionally, we can have it produce more than one and pick which one - * we want) - */ - - template - inline UInt generate_one(SeedSeq&& generator) - { - UInt result[N]; - generate_to(std::forward(generator), result); - return result[i]; - } - - template - auto bounded_rand(RngType& rng, typename RngType::result_type upper_bound) - -> typename RngType::result_type - { - typedef typename RngType::result_type rtype; - rtype threshold = (RngType::max() - RngType::min() + rtype(1) - upper_bound) - % upper_bound; - for (;;) { - rtype r = rng() - RngType::min(); - if (r >= threshold) - return r % upper_bound; - } - } - - template - void shuffle(Iter from, Iter to, RandType&& rng) - { - typedef typename std::iterator_traits::difference_type delta_t; - typedef typename std::remove_reference::type::result_type result_t; - auto count = to - from; - while (count > 1) { - delta_t chosen = delta_t(bounded_rand(rng, result_t(count))); - --count; - --to; - using std::swap; - swap(*(from + chosen), *to); - } - } - - /* - * Although std::seed_seq is useful, it isn't everything. Often we want to - * initialize a random-number generator some other way, such as from a random - * device. - * - * Technically, it does not meet the requirements of a SeedSequence because - * it lacks some of the rarely-used member functions (some of which would - * be impossible to provide). However the C++ standard is quite specific - * that actual engines only called the generate method, so it ought not to be - * a problem in practice. - */ - - template - class seed_seq_from { - private: - RngType rng_; - - typedef uint_least32_t result_type; - - public: - template - seed_seq_from(Args&&... args) : - rng_(std::forward(args)...) - { - // Nothing (else) to do... - } - - template - void generate(Iter start, Iter finish) - { - for (auto i = start; i != finish; ++i) - *i = result_type(rng_()); - } - - constexpr size_t size() const - { - return (sizeof(typename RngType::result_type) > sizeof(result_type) - && RngType::max() > ~size_t(0UL)) - ? ~size_t(0UL) - : size_t(RngType::max()); - } - }; - - /* - * Sometimes you might want a distinct seed based on when the program - * was compiled. That way, a particular instance of the program will - * behave the same way, but when recompiled it'll produce a different - * value. - */ - - template - struct static_arbitrary_seed { - private: - static constexpr IntType fnv(IntType hash, const char* pos) { - return *pos == '\0' - ? hash - : fnv((hash * IntType(16777619U)) ^ *pos, (pos + 1)); - } - - public: - static constexpr IntType value = fnv(IntType(2166136261U ^ sizeof(IntType)), - __DATE__ __TIME__ __FILE__); - }; - - // Sometimes, when debugging or testing, it's handy to be able print the name - // of a (in human-readable form). This code allows the idiom: - // - // cout << printable_typename() - // - // to print out my_foo_type_t (or its concrete type if it is a synonym) - -#if __cpp_rtti || __GXX_RTTI - - template - struct printable_typename {}; - - template - std::ostream& operator<<(std::ostream& out, printable_typename) { - const char* implementation_typename = typeid(T).name(); -#ifdef __GNUC__ - int status; - char* pretty_name = - abi::__cxa_demangle(implementation_typename, nullptr, nullptr, &status); - if (status == 0) - out << pretty_name; - free(static_cast(pretty_name)); - if (status == 0) - return out; -#endif - out << implementation_typename; - return out; - } - -#endif // __cpp_rtti || __GXX_RTTI - -} // namespace pcg_extras - -#endif // PCG_EXTRAS_HPP_INCLUDED \ No newline at end of file diff --git a/include/pcg_random.hpp b/include/pcg_random.hpp deleted file mode 100644 index 944458a..0000000 --- a/include/pcg_random.hpp +++ /dev/null @@ -1,1958 +0,0 @@ -/* - * PCG Random Number Generation for C++ - * - * Copyright 2014-2022 Melissa O'Neill , - * and the PCG Project contributors. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - * - * Licensed under the Apache License, Version 2.0 (provided in - * LICENSE-APACHE.txt and at http://www.apache.org/licenses/LICENSE-2.0) - * or under the MIT license (provided in LICENSE-MIT.txt and at - * http://opensource.org/licenses/MIT), at your option. This file may not - * be copied, modified, or distributed except according to those terms. - * - * Distributed on an "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, either - * express or implied. See your chosen license for details. - * - * For additional information about the PCG random number generation scheme, - * visit http://www.pcg-random.org/. - */ - - /* - * This code provides the reference implementation of the PCG family of - * random number generators. The code is complex because it implements - * - * - several members of the PCG family, specifically members corresponding - * to the output functions: - * - XSH RR (good for 64-bit state, 32-bit output) - * - XSH RS (good for 64-bit state, 32-bit output) - * - XSL RR (good for 128-bit state, 64-bit output) - * - RXS M XS (statistically most powerful generator) - * - XSL RR RR (good for 128-bit state, 128-bit output) - * - and RXS, RXS M, XSH, XSL (mostly for testing) - * - at potentially *arbitrary* bit sizes - * - with four different techniques for random streams (MCG, one-stream - * LCG, settable-stream LCG, unique-stream LCG) - * - and the extended generation schemes allowing arbitrary periods - * - with all features of C++11 random number generation (and more), - * some of which are somewhat painful, including - * - initializing with a SeedSequence which writes 32-bit values - * to memory, even though the state of the generator may not - * use 32-bit values (it might use smaller or larger integers) - * - I/O for RNGs and a prescribed format, which needs to handle - * the issue that 8-bit and 128-bit integers don't have working - * I/O routines (e.g., normally 8-bit = char, not integer) - * - equality and inequality for RNGs - * - and a number of convenience typedefs to mask all the complexity - * - * The code employees a fairly heavy level of abstraction, and has to deal - * with various C++ minutia. If you're looking to learn about how the PCG - * scheme works, you're probably best of starting with one of the other - * codebases (see www.pcg-random.org). But if you're curious about the - * constants for the various output functions used in those other, simpler, - * codebases, this code shows how they are calculated. - * - * On the positive side, at least there are convenience typedefs so that you - * can say - * - * pcg32 myRNG; - * - * rather than: - * - * pcg_detail::engine< - * uint32_t, // Output Type - * uint64_t, // State Type - * pcg_detail::xsh_rr_mixin, true, // Output Func - * pcg_detail::specific_stream, // Stream Kind - * pcg_detail::default_multiplier // LCG Mult - * > myRNG; - * - */ - -#ifndef PCG_RAND_HPP_INCLUDED -#define PCG_RAND_HPP_INCLUDED 1 - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef _MSC_VER -#pragma warning(disable:4146) -#endif - -#ifdef _MSC_VER -#define PCG_ALWAYS_INLINE __forceinline -#elif __GNUC__ -#define PCG_ALWAYS_INLINE __attribute__((always_inline)) -#else -#define PCG_ALWAYS_INLINE inline -#endif - - /* - * The pcg_extras namespace contains some support code that is likely to - * be useful for a variety of RNGs, including: - * - 128-bit int support for platforms where it isn't available natively - * - bit twiddling operations - * - I/O of 128-bit and 8-bit integers - * - Handling the evilness of SeedSeq - * - Support for efficiently producing random numbers less than a given - * bound - */ - -#include "pcg_extras.hpp" - -namespace pcg_detail { - - using namespace pcg_extras; - - /* - * The LCG generators need some constants to function. This code lets you - * look up the constant by *type*. For example - * - * default_multiplier::multiplier() - * - * gives you the default multiplier for 32-bit integers. We use the name - * of the constant and not a generic word like value to allow these classes - * to be used as mixins. - */ - - template - struct default_multiplier { - // Not defined for an arbitrary type - }; - - template - struct default_increment { - // Not defined for an arbitrary type - }; - -#define PCG_DEFINE_CONSTANT(type, what, kind, constant) \ - template <> \ - struct what ## _ ## kind { \ - static constexpr type kind() { \ - return constant; \ - } \ - }; - - PCG_DEFINE_CONSTANT(uint8_t, default, multiplier, 141U) - PCG_DEFINE_CONSTANT(uint8_t, default, increment, 77U) - - PCG_DEFINE_CONSTANT(uint16_t, default, multiplier, 12829U) - PCG_DEFINE_CONSTANT(uint16_t, default, increment, 47989U) - - PCG_DEFINE_CONSTANT(uint32_t, default, multiplier, 747796405U) - PCG_DEFINE_CONSTANT(uint32_t, default, increment, 2891336453U) - - PCG_DEFINE_CONSTANT(uint64_t, default, multiplier, 6364136223846793005ULL) - PCG_DEFINE_CONSTANT(uint64_t, default, increment, 1442695040888963407ULL) - - PCG_DEFINE_CONSTANT(pcg128_t, default, multiplier, - PCG_128BIT_CONSTANT(2549297995355413924ULL, 4865540595714422341ULL)) - PCG_DEFINE_CONSTANT(pcg128_t, default, increment, - PCG_128BIT_CONSTANT(6364136223846793005ULL, 1442695040888963407ULL)) - - /* Alternative (cheaper) multipliers for 128-bit */ - - template - struct cheap_multiplier : public default_multiplier { - // For most types just use the default. - }; - - template <> - struct cheap_multiplier { - static constexpr uint64_t multiplier() { - return 0xda942042e4dd58b5ULL; - } - }; - - - /* - * Each PCG generator is available in four variants, based on how it applies - * the additive constant for its underlying LCG; the variations are: - * - * single stream - all instances use the same fixed constant, thus - * the RNG always somewhere in same sequence - * mcg - adds zero, resulting in a single stream and reduced - * period - * specific stream - the constant can be changed at any time, selecting - * a different random sequence - * unique stream - the constant is based on the memory address of the - * object, thus every RNG has its own unique sequence - * - * This variation is provided though mixin classes which define a function - * value called increment() that returns the necessary additive constant. - */ - - - - /* - * unique stream - */ - - - template - class unique_stream { - protected: - static constexpr bool is_mcg = false; - - // Is never called, but is provided for symmetry with specific_stream - void set_stream(...) - { - abort(); - } - - public: - typedef itype state_type; - - constexpr itype increment() const { - return itype(reinterpret_cast(this) | 1); - } - - constexpr itype stream() const - { - return increment() >> 1; - } - - static constexpr bool can_specify_stream = false; - - static constexpr size_t streams_pow2() - { - return (sizeof(itype) < sizeof(size_t) ? sizeof(itype) - : sizeof(size_t)) * 8 - 1u; - } - - protected: - constexpr unique_stream() = default; - }; - - - /* - * no stream (mcg) - */ - - template - class no_stream { - protected: - static constexpr bool is_mcg = true; - - // Is never called, but is provided for symmetry with specific_stream - void set_stream(...) - { - abort(); - } - - public: - typedef itype state_type; - - static constexpr itype increment() { - return 0; - } - - static constexpr bool can_specify_stream = false; - - static constexpr size_t streams_pow2() - { - return 0u; - } - - protected: - constexpr no_stream() = default; - }; - - - /* - * single stream/sequence (oneseq) - */ - - template - class oneseq_stream : public default_increment { - protected: - static constexpr bool is_mcg = false; - - // Is never called, but is provided for symmetry with specific_stream - void set_stream(...) - { - abort(); - } - - public: - typedef itype state_type; - - static constexpr itype stream() - { - return default_increment::increment() >> 1; - } - - static constexpr bool can_specify_stream = false; - - static constexpr size_t streams_pow2() - { - return 0u; - } - - protected: - constexpr oneseq_stream() = default; - }; - - - /* - * specific stream - */ - - template - class specific_stream { - protected: - static constexpr bool is_mcg = false; - - itype inc_ = default_increment::increment(); - - public: - typedef itype state_type; - typedef itype stream_state; - - constexpr itype increment() const { - return inc_; - } - - itype stream() - { - return inc_ >> 1; - } - - void set_stream(itype specific_seq) - { - inc_ = (specific_seq << 1) | 1; - } - - static constexpr bool can_specify_stream = true; - - static constexpr size_t streams_pow2() - { - return (sizeof(itype) * 8) - 1u; - } - - protected: - specific_stream() = default; - - specific_stream(itype specific_seq) - : inc_(itype(specific_seq << 1) | itype(1U)) - { - // Nothing (else) to do. - } - }; - - - /* - * This is where it all comes together. This function joins together three - * mixin classes which define - * - the LCG additive constant (the stream) - * - the LCG multiplier - * - the output function - * in addition, we specify the type of the LCG state, and the result type, - * and whether to use the pre-advance version of the state for the output - * (increasing instruction-level parallelism) or the post-advance version - * (reducing register pressure). - * - * Given the high level of parameterization, the code has to use some - * template-metaprogramming tricks to handle some of the subtle variations - * involved. - */ - - template , - typename multiplier_mixin = default_multiplier > - class engine : protected output_mixin, - public stream_mixin, - protected multiplier_mixin { - protected: - itype state_; - - struct can_specify_stream_tag {}; - struct no_specifiable_stream_tag {}; - - using stream_mixin::increment; - using multiplier_mixin::multiplier; - - public: - typedef xtype result_type; - typedef itype state_type; - - static constexpr size_t period_pow2() - { - return sizeof(state_type) * 8 - 2 * stream_mixin::is_mcg; - } - - // It would be nice to use std::numeric_limits for these, but - // we can't be sure that it'd be defined for the 128-bit types. - - static constexpr result_type min() - { - return result_type(0UL); - } - - static constexpr result_type max() - { - return result_type(~result_type(0UL)); - } - - protected: - itype bump(itype state) - { - return state * multiplier() + increment(); - } - - itype base_generate() - { - return state_ = bump(state_); - } - - itype base_generate0() - { - itype old_state = state_; - state_ = bump(state_); - return old_state; - } - - public: - result_type operator()() - { - if (output_previous) - return this->output(base_generate0()); - else - return this->output(base_generate()); - } - - result_type operator()(result_type upper_bound) - { - return bounded_rand(*this, upper_bound); - } - - protected: - static itype advance(itype state, itype delta, - itype cur_mult, itype cur_plus); - - static itype distance(itype cur_state, itype newstate, itype cur_mult, - itype cur_plus, itype mask = ~itype(0U)); - - itype distance(itype newstate, itype mask = itype(~itype(0U))) const - { - return distance(state_, newstate, multiplier(), increment(), mask); - } - - public: - void advance(itype delta) - { - state_ = advance(state_, delta, this->multiplier(), this->increment()); - } - - void backstep(itype delta) - { - advance(-delta); - } - - void discard(itype delta) - { - advance(delta); - } - - bool wrapped() - { - if (stream_mixin::is_mcg) { - // For MCGs, the low order two bits never change. In this - // implementation, we keep them fixed at 3 to make this test - // easier. - return state_ == 3; - } - else { - return state_ == 0; - } - } - - engine(itype state = itype(0xcafef00dd15ea5e5ULL)) - : state_(this->is_mcg ? state | state_type(3U) - : bump(state + this->increment())) - { - // Nothing else to do. - } - - // This function may or may not exist. It thus has to be a template - // to use SFINAE; users don't have to worry about its template-ness. - - template - engine(itype state, typename sm::stream_state stream_seed) - : stream_mixin(stream_seed), - state_(this->is_mcg ? state | state_type(3U) - : bump(state + this->increment())) - { - // Nothing else to do. - } - - template - engine(SeedSeq&& seedSeq, typename std::enable_if< - !stream_mixin::can_specify_stream - && !std::is_convertible::value - && !std::is_convertible::value, - no_specifiable_stream_tag>::type = {}) - : engine(generate_one(std::forward(seedSeq))) - { - // Nothing else to do. - } - - template - engine(SeedSeq&& seedSeq, typename std::enable_if< - stream_mixin::can_specify_stream - && !std::is_convertible::value - && !std::is_convertible::value, - can_specify_stream_tag>::type = {}) - { - itype seeddata[2]; - generate_to<2>(std::forward(seedSeq), seeddata); - seed(seeddata[1], seeddata[0]); - } - - - template - void seed(Args&&... args) - { - new (this) engine(std::forward(args)...); - } - - template - friend bool operator==(const engine&, - const engine&); - - template - friend itype1 operator-(const engine&, - const engine&); - - template - friend std::basic_ostream& - operator<<(std::basic_ostream& out, - const engine&); - - template - friend std::basic_istream& - operator>>(std::basic_istream& in, - engine& rng); - }; - - template - std::basic_ostream& - operator<<(std::basic_ostream& out, - const engine& rng) - { - using pcg_extras::operator<<; - - auto orig_flags = out.flags(std::ios_base::dec | std::ios_base::left); - auto space = out.widen(' '); - auto orig_fill = out.fill(); - - out << rng.multiplier() << space - << rng.increment() << space - << rng.state_; - - out.flags(orig_flags); - out.fill(orig_fill); - return out; - } - - - template - std::basic_istream& - operator>>(std::basic_istream& in, - engine& rng) - { - using pcg_extras::operator>>; - - auto orig_flags = in.flags(std::ios_base::dec | std::ios_base::skipws); - - itype multiplier, increment, state; - in >> multiplier >> increment >> state; - - if (!in.fail()) { - bool good = true; - if (multiplier != rng.multiplier()) { - good = false; - } - else if (rng.can_specify_stream) { - rng.set_stream(increment >> 1); - } - else if (increment != rng.increment()) { - good = false; - } - if (good) { - rng.state_ = state; - } - else { - in.clear(std::ios::failbit); - } - } - - in.flags(orig_flags); - return in; - } - - - template - itype engine::advance( - itype state, itype delta, itype cur_mult, itype cur_plus) - { - // The method used here is based on Brown, "Random Number Generation - // with Arbitrary Stride,", Transactions of the American Nuclear - // Society (Nov. 1994). The algorithm is very similar to fast - // exponentiation. - // - // Even though delta is an unsigned integer, we can pass a - // signed integer to go backwards, it just goes "the long way round". - - constexpr itype ZERO = 0u; // itype may be a non-trivial types, so - constexpr itype ONE = 1u; // we define some ugly constants. - itype acc_mult = 1; - itype acc_plus = 0; - while (delta > ZERO) { - if (delta & ONE) { - acc_mult *= cur_mult; - acc_plus = acc_plus * cur_mult + cur_plus; - } - cur_plus = (cur_mult + ONE) * cur_plus; - cur_mult *= cur_mult; - delta >>= 1; - } - return acc_mult * state + acc_plus; - } - - template - itype engine::distance( - itype cur_state, itype newstate, itype cur_mult, itype cur_plus, itype mask) - { - constexpr itype ONE = 1u; // itype could be weird, so use constant - bool is_mcg = cur_plus == itype(0); - itype the_bit = is_mcg ? itype(4u) : itype(1u); - itype distance = 0u; - while ((cur_state & mask) != (newstate & mask)) { - if ((cur_state & the_bit) != (newstate & the_bit)) { - cur_state = cur_state * cur_mult + cur_plus; - distance |= the_bit; - } - assert((cur_state & the_bit) == (newstate & the_bit)); - the_bit <<= 1; - cur_plus = (cur_mult + ONE) * cur_plus; - cur_mult *= cur_mult; - } - return is_mcg ? distance >> 2 : distance; - } - - template - itype operator-(const engine& lhs, - const engine& rhs) - { - static_assert( - std::is_same::value && - std::is_same::value, - "Incomparable generators"); - if (lhs.increment() == rhs.increment()) { - return rhs.distance(lhs.state_); - } - else { - constexpr itype ONE = 1u; - itype lhs_diff = lhs.increment() + (lhs.multiplier() - ONE) * lhs.state_; - itype rhs_diff = rhs.increment() + (rhs.multiplier() - ONE) * rhs.state_; - if ((lhs_diff & itype(3u)) != (rhs_diff & itype(3u))) { - rhs_diff = -rhs_diff; - } - return rhs.distance(rhs_diff, lhs_diff, rhs.multiplier(), itype(0u)); - } - } - - - template - bool operator==(const engine& lhs, - const engine& rhs) - { - return (lhs.multiplier() == rhs.multiplier()) - && (lhs.increment() == rhs.increment()) - && (lhs.state_ == rhs.state_); - } - - template - inline bool operator!=(const engine& lhs, - const engine& rhs) - { - return !operator==(lhs, rhs); - } - - - template class output_mixin, - bool output_previous = (sizeof(itype) <= 8), - template class multiplier_mixin = default_multiplier> - using oneseq_base = engine, output_previous, - oneseq_stream, - multiplier_mixin >; - - template class output_mixin, - bool output_previous = (sizeof(itype) <= 8), - template class multiplier_mixin = default_multiplier> - using unique_base = engine, output_previous, - unique_stream, - multiplier_mixin >; - - template class output_mixin, - bool output_previous = (sizeof(itype) <= 8), - template class multiplier_mixin = default_multiplier> - using setseq_base = engine, output_previous, - specific_stream, - multiplier_mixin >; - - template class output_mixin, - bool output_previous = (sizeof(itype) <= 8), - template class multiplier_mixin = default_multiplier> - using mcg_base = engine, output_previous, - no_stream, - multiplier_mixin >; - - /* - * OUTPUT FUNCTIONS. - * - * These are the core of the PCG generation scheme. They specify how to - * turn the base LCG's internal state into the output value of the final - * generator. - * - * They're implemented as mixin classes. - * - * All of the classes have code that is written to allow it to be applied - * at *arbitrary* bit sizes, although in practice they'll only be used at - * standard sizes supported by C++. - */ - - /* - * XSH RS -- high xorshift, followed by a random shift - * - * Fast. A good performer. - */ - - template - struct xsh_rs_mixin { - static xtype output(itype internal) - { - constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); - constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); - constexpr bitcount_t sparebits = bits - xtypebits; - constexpr bitcount_t opbits = - sparebits - 5 >= 64 ? 5 - : sparebits - 4 >= 32 ? 4 - : sparebits - 3 >= 16 ? 3 - : sparebits - 2 >= 4 ? 2 - : sparebits - 1 >= 1 ? 1 - : 0; - constexpr bitcount_t mask = (1 << opbits) - 1; - constexpr bitcount_t maxrandshift = mask; - constexpr bitcount_t topspare = opbits; - constexpr bitcount_t bottomspare = sparebits - topspare; - constexpr bitcount_t xshift = topspare + (xtypebits + maxrandshift) / 2; - bitcount_t rshift = - opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0; - internal ^= internal >> xshift; - xtype result = xtype(internal >> (bottomspare - maxrandshift + rshift)); - return result; - } - }; - - /* - * XSH RR -- high xorshift, followed by a random rotate - * - * Fast. A good performer. Slightly better statistically than XSH RS. - */ - - template - struct xsh_rr_mixin { - static xtype output(itype internal) - { - constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); - constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); - constexpr bitcount_t sparebits = bits - xtypebits; - constexpr bitcount_t wantedopbits = - xtypebits >= 128 ? 7 - : xtypebits >= 64 ? 6 - : xtypebits >= 32 ? 5 - : xtypebits >= 16 ? 4 - : 3; - constexpr bitcount_t opbits = - sparebits >= wantedopbits ? wantedopbits - : sparebits; - constexpr bitcount_t amplifier = wantedopbits - opbits; - constexpr bitcount_t mask = (1 << opbits) - 1; - constexpr bitcount_t topspare = opbits; - constexpr bitcount_t bottomspare = sparebits - topspare; - constexpr bitcount_t xshift = (topspare + xtypebits) / 2; - bitcount_t rot = opbits ? bitcount_t(internal >> (bits - opbits)) & mask - : 0; - bitcount_t amprot = (rot << amplifier) & mask; - internal ^= internal >> xshift; - xtype result = xtype(internal >> bottomspare); - result = rotr(result, amprot); - return result; - } - }; - - /* - * RXS -- random xorshift - */ - - template - struct rxs_mixin { - static xtype output_rxs(itype internal) - { - constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); - constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); - constexpr bitcount_t shift = bits - xtypebits; - constexpr bitcount_t extrashift = (xtypebits - shift) / 2; - bitcount_t rshift = shift > 64 + 8 ? (internal >> (bits - 6)) & 63 - : shift > 32 + 4 ? (internal >> (bits - 5)) & 31 - : shift > 16 + 2 ? (internal >> (bits - 4)) & 15 - : shift > 8 + 1 ? (internal >> (bits - 3)) & 7 - : shift > 4 + 1 ? (internal >> (bits - 2)) & 3 - : shift > 2 + 1 ? (internal >> (bits - 1)) & 1 - : 0; - internal ^= internal >> (shift + extrashift - rshift); - xtype result = internal >> rshift; - return result; - } - }; - - /* - * RXS M XS -- random xorshift, mcg multiply, fixed xorshift - * - * The most statistically powerful generator, but all those steps - * make it slower than some of the others. We give it the rottenest jobs. - * - * Because it's usually used in contexts where the state type and the - * result type are the same, it is a permutation and is thus invertable. - * We thus provide a function to invert it. This function is used to - * for the "inside out" generator used by the extended generator. - */ - - /* Defined type-based concepts for the multiplication step. They're actually - * all derived by truncating the 128-bit, which was computed to be a good - * "universal" constant. - */ - - template - struct mcg_multiplier { - // Not defined for an arbitrary type - }; - - template - struct mcg_unmultiplier { - // Not defined for an arbitrary type - }; - - PCG_DEFINE_CONSTANT(uint8_t, mcg, multiplier, 217U) - PCG_DEFINE_CONSTANT(uint8_t, mcg, unmultiplier, 105U) - - PCG_DEFINE_CONSTANT(uint16_t, mcg, multiplier, 62169U) - PCG_DEFINE_CONSTANT(uint16_t, mcg, unmultiplier, 28009U) - - PCG_DEFINE_CONSTANT(uint32_t, mcg, multiplier, 277803737U) - PCG_DEFINE_CONSTANT(uint32_t, mcg, unmultiplier, 2897767785U) - - PCG_DEFINE_CONSTANT(uint64_t, mcg, multiplier, 12605985483714917081ULL) - PCG_DEFINE_CONSTANT(uint64_t, mcg, unmultiplier, 15009553638781119849ULL) - - PCG_DEFINE_CONSTANT(pcg128_t, mcg, multiplier, - PCG_128BIT_CONSTANT(17766728186571221404ULL, 12605985483714917081ULL)) - PCG_DEFINE_CONSTANT(pcg128_t, mcg, unmultiplier, - PCG_128BIT_CONSTANT(14422606686972528997ULL, 15009553638781119849ULL)) - - - template - struct rxs_m_xs_mixin { - static xtype output(itype internal) - { - constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); - constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); - constexpr bitcount_t opbits = xtypebits >= 128 ? 6 - : xtypebits >= 64 ? 5 - : xtypebits >= 32 ? 4 - : xtypebits >= 16 ? 3 - : 2; - constexpr bitcount_t shift = bits - xtypebits; - constexpr bitcount_t mask = (1 << opbits) - 1; - bitcount_t rshift = - opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0; - internal ^= internal >> (opbits + rshift); - internal *= mcg_multiplier::multiplier(); - xtype result = internal >> shift; - result ^= result >> ((2U * xtypebits + 2U) / 3U); - return result; - } - - static itype unoutput(itype internal) - { - constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); - constexpr bitcount_t opbits = bits >= 128 ? 6 - : bits >= 64 ? 5 - : bits >= 32 ? 4 - : bits >= 16 ? 3 - : 2; - constexpr bitcount_t mask = (1 << opbits) - 1; - - internal = unxorshift(internal, bits, (2U * bits + 2U) / 3U); - - internal *= mcg_unmultiplier::unmultiplier(); - - bitcount_t rshift = opbits ? (internal >> (bits - opbits)) & mask : 0; - internal = unxorshift(internal, bits, opbits + rshift); - - return internal; - } - }; - - - /* - * RXS M -- random xorshift, mcg multiply - */ - - template - struct rxs_m_mixin { - static xtype output(itype internal) - { - constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); - constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); - constexpr bitcount_t opbits = xtypebits >= 128 ? 6 - : xtypebits >= 64 ? 5 - : xtypebits >= 32 ? 4 - : xtypebits >= 16 ? 3 - : 2; - constexpr bitcount_t shift = bits - xtypebits; - constexpr bitcount_t mask = (1 << opbits) - 1; - bitcount_t rshift = opbits ? (internal >> (bits - opbits)) & mask : 0; - internal ^= internal >> (opbits + rshift); - internal *= mcg_multiplier::multiplier(); - xtype result = internal >> shift; - return result; - } - }; - - - /* - * DXSM -- double xorshift multiply - * - * This is a new, more powerful output permutation (added in 2019). It's - * a more comprehensive scrambling than RXS M, but runs faster on 128-bit - * types. Although primarily intended for use at large sizes, also works - * at smaller sizes as well. - * - * This permutation is similar to xorshift multiply hash functions, except - * that one of the multipliers is the LCG multiplier (to avoid needing to - * have a second constant) and the other is based on the low-order bits. - * This latter aspect means that the scrambling applied to the high bits - * depends on the low bits, and makes it (to my eye) impractical to back - * out the permutation without having the low-order bits. - */ - - template - struct dxsm_mixin { - inline xtype output(itype internal) - { - constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); - constexpr bitcount_t itypebits = bitcount_t(sizeof(itype) * 8); - static_assert(xtypebits <= itypebits / 2, - "Output type must be half the size of the state type."); - - xtype hi = xtype(internal >> (itypebits - xtypebits)); - xtype lo = xtype(internal); - - lo |= 1; - hi ^= hi >> (xtypebits / 2); - hi *= xtype(cheap_multiplier::multiplier()); - hi ^= hi >> (3 * (xtypebits / 4)); - hi *= lo; - return hi; - } - }; - - - /* - * XSL RR -- fixed xorshift (to low bits), random rotate - * - * Useful for 128-bit types that are split across two CPU registers. - */ - - template - struct xsl_rr_mixin { - static xtype output(itype internal) - { - constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); - constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); - constexpr bitcount_t sparebits = bits - xtypebits; - constexpr bitcount_t wantedopbits = xtypebits >= 128 ? 7 - : xtypebits >= 64 ? 6 - : xtypebits >= 32 ? 5 - : xtypebits >= 16 ? 4 - : 3; - constexpr bitcount_t opbits = sparebits >= wantedopbits ? wantedopbits - : sparebits; - constexpr bitcount_t amplifier = wantedopbits - opbits; - constexpr bitcount_t mask = (1 << opbits) - 1; - constexpr bitcount_t topspare = sparebits; - constexpr bitcount_t bottomspare = sparebits - topspare; - constexpr bitcount_t xshift = (topspare + xtypebits) / 2; - - bitcount_t rot = - opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0; - bitcount_t amprot = (rot << amplifier) & mask; - internal ^= internal >> xshift; - xtype result = xtype(internal >> bottomspare); - result = rotr(result, amprot); - return result; - } - }; - - - /* - * XSL RR RR -- fixed xorshift (to low bits), random rotate (both parts) - * - * Useful for 128-bit types that are split across two CPU registers. - * If you really want an invertable 128-bit RNG, I guess this is the one. - */ - - template struct halfsize_trait {}; - template <> struct halfsize_trait { typedef uint64_t type; }; - template <> struct halfsize_trait { typedef uint32_t type; }; - template <> struct halfsize_trait { typedef uint16_t type; }; - template <> struct halfsize_trait { typedef uint8_t type; }; - - template - struct xsl_rr_rr_mixin { - typedef typename halfsize_trait::type htype; - - static itype output(itype internal) - { - constexpr bitcount_t htypebits = bitcount_t(sizeof(htype) * 8); - constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); - constexpr bitcount_t sparebits = bits - htypebits; - constexpr bitcount_t wantedopbits = htypebits >= 128 ? 7 - : htypebits >= 64 ? 6 - : htypebits >= 32 ? 5 - : htypebits >= 16 ? 4 - : 3; - constexpr bitcount_t opbits = sparebits >= wantedopbits ? wantedopbits - : sparebits; - constexpr bitcount_t amplifier = wantedopbits - opbits; - constexpr bitcount_t mask = (1 << opbits) - 1; - constexpr bitcount_t topspare = sparebits; - constexpr bitcount_t xshift = (topspare + htypebits) / 2; - - bitcount_t rot = - opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0; - bitcount_t amprot = (rot << amplifier) & mask; - internal ^= internal >> xshift; - htype lowbits = htype(internal); - lowbits = rotr(lowbits, amprot); - htype highbits = htype(internal >> topspare); - bitcount_t rot2 = lowbits & mask; - bitcount_t amprot2 = (rot2 << amplifier) & mask; - highbits = rotr(highbits, amprot2); - return (itype(highbits) << topspare) ^ itype(lowbits); - } - }; - - - /* - * XSH -- fixed xorshift (to high bits) - * - * You shouldn't use this at 64-bits or less. - */ - - template - struct xsh_mixin { - static xtype output(itype internal) - { - constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); - constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); - constexpr bitcount_t sparebits = bits - xtypebits; - constexpr bitcount_t topspare = 0; - constexpr bitcount_t bottomspare = sparebits - topspare; - constexpr bitcount_t xshift = (topspare + xtypebits) / 2; - - internal ^= internal >> xshift; - xtype result = internal >> bottomspare; - return result; - } - }; - - /* - * XSL -- fixed xorshift (to low bits) - * - * You shouldn't use this at 64-bits or less. - */ - - template - struct xsl_mixin { - inline xtype output(itype internal) - { - constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); - constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); - constexpr bitcount_t sparebits = bits - xtypebits; - constexpr bitcount_t topspare = sparebits; - constexpr bitcount_t bottomspare = sparebits - topspare; - constexpr bitcount_t xshift = (topspare + xtypebits) / 2; - - internal ^= internal >> xshift; - xtype result = internal >> bottomspare; - return result; - } - }; - - - /* ---- End of Output Functions ---- */ - - - template - struct inside_out : private baseclass { - inside_out() = delete; - - typedef typename baseclass::result_type result_type; - typedef typename baseclass::state_type state_type; - static_assert(sizeof(result_type) == sizeof(state_type), - "Require a RNG whose output function is a permutation"); - - static bool external_step(result_type& randval, size_t i) - { - state_type state = baseclass::unoutput(randval); - state = state * baseclass::multiplier() + baseclass::increment() - + state_type(i * 2); - result_type result = baseclass::output(state); - randval = result; - state_type zero = - baseclass::is_mcg ? state & state_type(3U) : state_type(0U); - return result == zero; - } - - static bool external_advance(result_type& randval, size_t i, - result_type delta, bool forwards = true) - { - state_type state = baseclass::unoutput(randval); - state_type mult = baseclass::multiplier(); - state_type inc = baseclass::increment() + state_type(i * 2); - state_type zero = - baseclass::is_mcg ? state & state_type(3U) : state_type(0U); - state_type dist_to_zero = baseclass::distance(state, zero, mult, inc); - bool crosses_zero = - forwards ? dist_to_zero <= delta - : (-dist_to_zero) <= delta; - if (!forwards) - delta = -delta; - state = baseclass::advance(state, delta, mult, inc); - randval = baseclass::output(state); - return crosses_zero; - } - }; - - - template - class extended : public baseclass { - public: - typedef typename baseclass::state_type state_type; - typedef typename baseclass::result_type result_type; - typedef inside_out insideout; - - private: - static constexpr bitcount_t rtypebits = sizeof(result_type) * 8; - static constexpr bitcount_t stypebits = sizeof(state_type) * 8; - - static constexpr bitcount_t tick_limit_pow2 = 64U; - - static constexpr size_t table_size = 1UL << table_pow2; - static constexpr size_t table_shift = stypebits - table_pow2; - static constexpr state_type table_mask = - (state_type(1U) << table_pow2) - state_type(1U); - - static constexpr bool may_tick = - (advance_pow2 < stypebits) && (advance_pow2 < tick_limit_pow2); - static constexpr size_t tick_shift = stypebits - advance_pow2; - static constexpr state_type tick_mask = - may_tick ? state_type( - (uint64_t(1) << (advance_pow2 * may_tick)) - 1) - // ^-- stupidity to appease GCC warnings - : ~state_type(0U); - - static constexpr bool may_tock = stypebits < tick_limit_pow2; - - result_type data_[table_size]; - - PCG_NOINLINE void advance_table(); - - PCG_NOINLINE void advance_table(state_type delta, bool isForwards = true); - - result_type& get_extended_value() - { - state_type state = this->state_; - if (kdd && baseclass::is_mcg) { - // The low order bits of an MCG are constant, so drop them. - state >>= 2; - } - size_t index = kdd ? state & table_mask - : state >> table_shift; - - if (may_tick) { - bool tick = kdd ? (state & tick_mask) == state_type(0u) - : (state >> tick_shift) == state_type(0u); - if (tick) - advance_table(); - } - if (may_tock) { - bool tock = state == state_type(0u); - if (tock) - advance_table(); - } - return data_[index]; - } - - public: - static constexpr size_t period_pow2() - { - return baseclass::period_pow2() + table_size * extvalclass::period_pow2(); - } - - PCG_ALWAYS_INLINE result_type operator()() - { - result_type rhs = get_extended_value(); - result_type lhs = this->baseclass::operator()(); - return lhs ^ rhs; - } - - result_type operator()(result_type upper_bound) - { - return bounded_rand(*this, upper_bound); - } - - void set(result_type wanted) - { - result_type& rhs = get_extended_value(); - result_type lhs = this->baseclass::operator()(); - rhs = lhs ^ wanted; - } - - void advance(state_type distance, bool forwards = true); - - void backstep(state_type distance) - { - advance(distance, false); - } - - extended(const result_type* data) - : baseclass() - { - datainit(data); - } - - extended(const result_type* data, state_type seed) - : baseclass(seed) - { - datainit(data); - } - - // This function may or may not exist. It thus has to be a template - // to use SFINAE; users don't have to worry about its template-ness. - - template - extended(const result_type* data, state_type seed, - typename bc::stream_state stream_seed) - : baseclass(seed, stream_seed) - { - datainit(data); - } - - extended() - : baseclass() - { - selfinit(); - } - - extended(state_type seed) - : baseclass(seed) - { - selfinit(); - } - - // This function may or may not exist. It thus has to be a template - // to use SFINAE; users don't have to worry about its template-ness. - - template - extended(state_type seed, typename bc::stream_state stream_seed) - : baseclass(seed, stream_seed) - { - selfinit(); - } - - private: - void selfinit(); - void datainit(const result_type* data); - - public: - - template::value - && !std::is_convertible::value>::type> - extended(SeedSeq&& seedSeq) - : baseclass(seedSeq) - { - generate_to(seedSeq, data_); - } - - template - void seed(Args&&... args) - { - new (this) extended(std::forward(args)...); - } - - template - friend bool operator==(const extended&, - const extended&); - - template - friend std::basic_ostream& - operator<<(std::basic_ostream& out, - const extended&); - - template - friend std::basic_istream& - operator>>(std::basic_istream& in, - extended&); - - }; - - - template - void extended::datainit( - const result_type* data) - { - for (size_t i = 0; i < table_size; ++i) - data_[i] = data[i]; - } - - template - void extended::selfinit() - { - // We need to fill the extended table with something, and we have - // very little provided data, so we use the base generator to - // produce values. Although not ideal (use a seed sequence, folks!), - // unexpected correlations are mitigated by - // - using XOR differences rather than the number directly - // - the way the table is accessed, its values *won't* be accessed - // in the same order the were written. - // - any strange correlations would only be apparent if we - // were to backstep the generator so that the base generator - // was generating the same values again - result_type lhs = baseclass::operator()(); - result_type rhs = baseclass::operator()(); - result_type xdiff = lhs - rhs; - for (size_t i = 0; i < table_size; ++i) { - data_[i] = baseclass::operator()() ^ xdiff; - } - } - - template - bool operator==(const extended& lhs, - const extended& rhs) - { - auto& base_lhs = static_cast(lhs); - auto& base_rhs = static_cast(rhs); - return base_lhs == base_rhs - && std::equal( - std::begin(lhs.data_), std::end(lhs.data_), - std::begin(rhs.data_) - ); - } - - template - inline bool operator!=(const extended& lhs, - const extended& rhs) - { - return !operator==(lhs, rhs); - } - - template - std::basic_ostream& - operator<<(std::basic_ostream& out, - const extended& rng) - { - using pcg_extras::operator<<; - - auto orig_flags = out.flags(std::ios_base::dec | std::ios_base::left); - auto space = out.widen(' '); - auto orig_fill = out.fill(); - - out << rng.multiplier() << space - << rng.increment() << space - << rng.state_; - - for (const auto& datum : rng.data_) - out << space << datum; - - out.flags(orig_flags); - out.fill(orig_fill); - return out; - } - - template - std::basic_istream& - operator>>(std::basic_istream& in, - extended& rng) - { - extended new_rng; - auto& base_rng = static_cast(new_rng); - in >> base_rng; - - if (in.fail()) - return in; - - using pcg_extras::operator>>; - - auto orig_flags = in.flags(std::ios_base::dec | std::ios_base::skipws); - - for (auto& datum : new_rng.data_) { - in >> datum; - if (in.fail()) - goto bail; - } - - rng = new_rng; - - bail: - in.flags(orig_flags); - return in; - } - - - - template - void - extended::advance_table() - { - bool carry = false; - for (size_t i = 0; i < table_size; ++i) { - if (carry) { - carry = insideout::external_step(data_[i], i + 1); - } - bool carry2 = insideout::external_step(data_[i], i + 1); - carry = carry || carry2; - } - } - - template - void - extended::advance_table( - state_type delta, bool isForwards) - { - typedef typename baseclass::state_type base_state_t; - typedef typename extvalclass::state_type ext_state_t; - constexpr bitcount_t basebits = sizeof(base_state_t) * 8; - constexpr bitcount_t extbits = sizeof(ext_state_t) * 8; - static_assert(basebits <= extbits || advance_pow2 > 0, - "Current implementation might overflow its carry"); - - base_state_t carry = 0; - for (size_t i = 0; i < table_size; ++i) { - base_state_t total_delta = carry + delta; - ext_state_t trunc_delta = ext_state_t(total_delta); - if (basebits > extbits) { - carry = total_delta >> extbits; - } - else { - carry = 0; - } - carry += - insideout::external_advance(data_[i], i + 1, trunc_delta, isForwards); - } - } - - template - void extended::advance( - state_type distance, bool forwards) - { - static_assert(kdd, - "Efficient advance is too hard for non-kdd extension. " - "For a weak advance, cast to base class"); - state_type zero = - baseclass::is_mcg ? this->state_ & state_type(3U) : state_type(0U); - if (may_tick) { - state_type ticks = distance >> (advance_pow2 * may_tick); - // ^-- stupidity to appease GCC - // warnings - state_type adv_mask = - baseclass::is_mcg ? tick_mask << 2 : tick_mask; - state_type next_advance_distance = this->distance(zero, adv_mask); - if (!forwards) - next_advance_distance = (-next_advance_distance) & tick_mask; - if (next_advance_distance < (distance & tick_mask)) { - ++ticks; - } - if (ticks) - advance_table(ticks, forwards); - } - if (forwards) { - if (may_tock && this->distance(zero) <= distance) - advance_table(); - baseclass::advance(distance); - } - else { - if (may_tock && -(this->distance(zero)) <= distance) - advance_table(state_type(1U), false); - baseclass::advance(-distance); - } - } - -} // namespace pcg_detail - -namespace pcg_engines { - - using namespace pcg_detail; - - /* Predefined types for XSH RS */ - - typedef oneseq_base oneseq_xsh_rs_16_8; - typedef oneseq_base oneseq_xsh_rs_32_16; - typedef oneseq_base oneseq_xsh_rs_64_32; - typedef oneseq_base oneseq_xsh_rs_128_64; - typedef oneseq_base - cm_oneseq_xsh_rs_128_64; - - typedef unique_base unique_xsh_rs_16_8; - typedef unique_base unique_xsh_rs_32_16; - typedef unique_base unique_xsh_rs_64_32; - typedef unique_base unique_xsh_rs_128_64; - typedef unique_base - cm_unique_xsh_rs_128_64; - - typedef setseq_base setseq_xsh_rs_16_8; - typedef setseq_base setseq_xsh_rs_32_16; - typedef setseq_base setseq_xsh_rs_64_32; - typedef setseq_base setseq_xsh_rs_128_64; - typedef setseq_base - cm_setseq_xsh_rs_128_64; - - typedef mcg_base mcg_xsh_rs_16_8; - typedef mcg_base mcg_xsh_rs_32_16; - typedef mcg_base mcg_xsh_rs_64_32; - typedef mcg_base mcg_xsh_rs_128_64; - typedef mcg_base - cm_mcg_xsh_rs_128_64; - - /* Predefined types for XSH RR */ - - typedef oneseq_base oneseq_xsh_rr_16_8; - typedef oneseq_base oneseq_xsh_rr_32_16; - typedef oneseq_base oneseq_xsh_rr_64_32; - typedef oneseq_base oneseq_xsh_rr_128_64; - typedef oneseq_base - cm_oneseq_xsh_rr_128_64; - - typedef unique_base unique_xsh_rr_16_8; - typedef unique_base unique_xsh_rr_32_16; - typedef unique_base unique_xsh_rr_64_32; - typedef unique_base unique_xsh_rr_128_64; - typedef unique_base - cm_unique_xsh_rr_128_64; - - typedef setseq_base setseq_xsh_rr_16_8; - typedef setseq_base setseq_xsh_rr_32_16; - typedef setseq_base setseq_xsh_rr_64_32; - typedef setseq_base setseq_xsh_rr_128_64; - typedef setseq_base - cm_setseq_xsh_rr_128_64; - - typedef mcg_base mcg_xsh_rr_16_8; - typedef mcg_base mcg_xsh_rr_32_16; - typedef mcg_base mcg_xsh_rr_64_32; - typedef mcg_base mcg_xsh_rr_128_64; - typedef mcg_base - cm_mcg_xsh_rr_128_64; - - - /* Predefined types for RXS M XS */ - - typedef oneseq_base oneseq_rxs_m_xs_8_8; - typedef oneseq_base oneseq_rxs_m_xs_16_16; - typedef oneseq_base oneseq_rxs_m_xs_32_32; - typedef oneseq_base oneseq_rxs_m_xs_64_64; - typedef oneseq_base - oneseq_rxs_m_xs_128_128; - typedef oneseq_base - cm_oneseq_rxs_m_xs_128_128; - - typedef unique_base unique_rxs_m_xs_8_8; - typedef unique_base unique_rxs_m_xs_16_16; - typedef unique_base unique_rxs_m_xs_32_32; - typedef unique_base unique_rxs_m_xs_64_64; - typedef unique_base unique_rxs_m_xs_128_128; - typedef unique_base - cm_unique_rxs_m_xs_128_128; - - typedef setseq_base setseq_rxs_m_xs_8_8; - typedef setseq_base setseq_rxs_m_xs_16_16; - typedef setseq_base setseq_rxs_m_xs_32_32; - typedef setseq_base setseq_rxs_m_xs_64_64; - typedef setseq_base setseq_rxs_m_xs_128_128; - typedef setseq_base - cm_setseq_rxs_m_xs_128_128; - - // MCG versions don't make sense here, so aren't defined. - -/* Predefined types for RXS M */ - - typedef oneseq_base oneseq_rxs_m_16_8; - typedef oneseq_base oneseq_rxs_m_32_16; - typedef oneseq_base oneseq_rxs_m_64_32; - typedef oneseq_base oneseq_rxs_m_128_64; - typedef oneseq_base - cm_oneseq_rxs_m_128_64; - - typedef unique_base unique_rxs_m_16_8; - typedef unique_base unique_rxs_m_32_16; - typedef unique_base unique_rxs_m_64_32; - typedef unique_base unique_rxs_m_128_64; - typedef unique_base - cm_unique_rxs_m_128_64; - - typedef setseq_base setseq_rxs_m_16_8; - typedef setseq_base setseq_rxs_m_32_16; - typedef setseq_base setseq_rxs_m_64_32; - typedef setseq_base setseq_rxs_m_128_64; - typedef setseq_base - cm_setseq_rxs_m_128_64; - - typedef mcg_base mcg_rxs_m_16_8; - typedef mcg_base mcg_rxs_m_32_16; - typedef mcg_base mcg_rxs_m_64_32; - typedef mcg_base mcg_rxs_m_128_64; - typedef mcg_base - cm_mcg_rxs_m_128_64; - - /* Predefined types for DXSM */ - - typedef oneseq_base oneseq_dxsm_16_8; - typedef oneseq_base oneseq_dxsm_32_16; - typedef oneseq_base oneseq_dxsm_64_32; - typedef oneseq_base oneseq_dxsm_128_64; - typedef oneseq_base - cm_oneseq_dxsm_128_64; - - typedef unique_base unique_dxsm_16_8; - typedef unique_base unique_dxsm_32_16; - typedef unique_base unique_dxsm_64_32; - typedef unique_base unique_dxsm_128_64; - typedef unique_base - cm_unique_dxsm_128_64; - - typedef setseq_base setseq_dxsm_16_8; - typedef setseq_base setseq_dxsm_32_16; - typedef setseq_base setseq_dxsm_64_32; - typedef setseq_base setseq_dxsm_128_64; - typedef setseq_base - cm_setseq_dxsm_128_64; - - typedef mcg_base mcg_dxsm_16_8; - typedef mcg_base mcg_dxsm_32_16; - typedef mcg_base mcg_dxsm_64_32; - typedef mcg_base mcg_dxsm_128_64; - typedef mcg_base - cm_mcg_dxsm_128_64; - - /* Predefined types for XSL RR (only defined for "large" types) */ - - typedef oneseq_base oneseq_xsl_rr_64_32; - typedef oneseq_base oneseq_xsl_rr_128_64; - typedef oneseq_base - cm_oneseq_xsl_rr_128_64; - - typedef unique_base unique_xsl_rr_64_32; - typedef unique_base unique_xsl_rr_128_64; - typedef unique_base - cm_unique_xsl_rr_128_64; - - typedef setseq_base setseq_xsl_rr_64_32; - typedef setseq_base setseq_xsl_rr_128_64; - typedef setseq_base - cm_setseq_xsl_rr_128_64; - - typedef mcg_base mcg_xsl_rr_64_32; - typedef mcg_base mcg_xsl_rr_128_64; - typedef mcg_base - cm_mcg_xsl_rr_128_64; - - - /* Predefined types for XSL RR RR (only defined for "large" types) */ - - typedef oneseq_base - oneseq_xsl_rr_rr_64_64; - typedef oneseq_base - oneseq_xsl_rr_rr_128_128; - typedef oneseq_base - cm_oneseq_xsl_rr_rr_128_128; - - typedef unique_base - unique_xsl_rr_rr_64_64; - typedef unique_base - unique_xsl_rr_rr_128_128; - typedef unique_base - cm_unique_xsl_rr_rr_128_128; - - typedef setseq_base - setseq_xsl_rr_rr_64_64; - typedef setseq_base - setseq_xsl_rr_rr_128_128; - typedef setseq_base - cm_setseq_xsl_rr_rr_128_128; - - // MCG versions don't make sense here, so aren't defined. - -/* Extended generators */ - - template - using ext_std8 = extended; - - template - using ext_std16 = extended; - - template - using ext_std32 = extended; - - template - using ext_std64 = extended; - - - template - using ext_oneseq_rxs_m_xs_32_32 = - ext_std32; - - template - using ext_mcg_xsh_rs_64_32 = - ext_std32; - - template - using ext_oneseq_xsh_rs_64_32 = - ext_std32; - - template - using ext_setseq_xsh_rr_64_32 = - ext_std32; - - template - using ext_mcg_xsl_rr_128_64 = - ext_std64; - - template - using ext_oneseq_xsl_rr_128_64 = - ext_std64; - - template - using ext_setseq_xsl_rr_128_64 = - ext_std64; - -} // namespace pcg_engines - -typedef pcg_engines::setseq_xsh_rr_64_32 pcg32; -typedef pcg_engines::oneseq_xsh_rr_64_32 pcg32_oneseq; -typedef pcg_engines::unique_xsh_rr_64_32 pcg32_unique; -typedef pcg_engines::mcg_xsh_rs_64_32 pcg32_fast; - -typedef pcg_engines::setseq_xsl_rr_128_64 pcg64; -typedef pcg_engines::oneseq_xsl_rr_128_64 pcg64_oneseq; -typedef pcg_engines::unique_xsl_rr_128_64 pcg64_unique; -typedef pcg_engines::mcg_xsl_rr_128_64 pcg64_fast; - -typedef pcg_engines::setseq_rxs_m_xs_8_8 pcg8_once_insecure; -typedef pcg_engines::setseq_rxs_m_xs_16_16 pcg16_once_insecure; -typedef pcg_engines::setseq_rxs_m_xs_32_32 pcg32_once_insecure; -typedef pcg_engines::setseq_rxs_m_xs_64_64 pcg64_once_insecure; -typedef pcg_engines::setseq_xsl_rr_rr_128_128 pcg128_once_insecure; - -typedef pcg_engines::oneseq_rxs_m_xs_8_8 pcg8_oneseq_once_insecure; -typedef pcg_engines::oneseq_rxs_m_xs_16_16 pcg16_oneseq_once_insecure; -typedef pcg_engines::oneseq_rxs_m_xs_32_32 pcg32_oneseq_once_insecure; -typedef pcg_engines::oneseq_rxs_m_xs_64_64 pcg64_oneseq_once_insecure; -typedef pcg_engines::oneseq_xsl_rr_rr_128_128 pcg128_oneseq_once_insecure; - - -// These two extended RNGs provide two-dimensionally equidistributed -// 32-bit generators. pcg32_k2_fast occupies the same space as pcg64, -// and can be called twice to generate 64 bits, but does not required -// 128-bit math; on 32-bit systems, it's faster than pcg64 as well. - -typedef pcg_engines::ext_setseq_xsh_rr_64_32<1, 16, true> pcg32_k2; -typedef pcg_engines::ext_oneseq_xsh_rs_64_32<1, 32, true> pcg32_k2_fast; - -// These eight extended RNGs have about as much state as arc4random -// -// - the k variants are k-dimensionally equidistributed -// - the c variants offer are intended to be harder to predict -// -// (neither is intended for use in cryptographic applications) - -typedef pcg_engines::ext_setseq_xsh_rr_64_32<6, 16, true> pcg32_k64; -typedef pcg_engines::ext_mcg_xsh_rs_64_32<6, 32, true> pcg32_k64_oneseq; -typedef pcg_engines::ext_oneseq_xsh_rs_64_32<6, 32, true> pcg32_k64_fast; - -typedef pcg_engines::ext_setseq_xsh_rr_64_32<6, 16, false> pcg32_c64; -typedef pcg_engines::ext_oneseq_xsh_rs_64_32<6, 32, false> pcg32_c64_oneseq; -typedef pcg_engines::ext_mcg_xsh_rs_64_32<6, 32, false> pcg32_c64_fast; - -typedef pcg_engines::ext_setseq_xsl_rr_128_64<5, 16, true> pcg64_k32; -typedef pcg_engines::ext_oneseq_xsl_rr_128_64<5, 128, true> pcg64_k32_oneseq; -typedef pcg_engines::ext_mcg_xsl_rr_128_64<5, 128, true> pcg64_k32_fast; - -typedef pcg_engines::ext_setseq_xsl_rr_128_64<5, 16, false> pcg64_c32; -typedef pcg_engines::ext_oneseq_xsl_rr_128_64<5, 128, false> pcg64_c32_oneseq; -typedef pcg_engines::ext_mcg_xsl_rr_128_64<5, 128, false> pcg64_c32_fast; - -// These eight extended RNGs have more state than the Mersenne twister -// -// - the k variants are k-dimensionally equidistributed -// - the c variants offer are intended to be harder to predict -// -// (neither is intended for use in cryptographic applications) - -typedef pcg_engines::ext_setseq_xsh_rr_64_32<10, 16, true> pcg32_k1024; -typedef pcg_engines::ext_oneseq_xsh_rs_64_32<10, 32, true> pcg32_k1024_fast; - -typedef pcg_engines::ext_setseq_xsh_rr_64_32<10, 16, false> pcg32_c1024; -typedef pcg_engines::ext_oneseq_xsh_rs_64_32<10, 32, false> pcg32_c1024_fast; - -typedef pcg_engines::ext_setseq_xsl_rr_128_64<10, 16, true> pcg64_k1024; -typedef pcg_engines::ext_oneseq_xsl_rr_128_64<10, 128, true> pcg64_k1024_fast; - -typedef pcg_engines::ext_setseq_xsl_rr_128_64<10, 16, false> pcg64_c1024; -typedef pcg_engines::ext_oneseq_xsl_rr_128_64<10, 128, false> pcg64_c1024_fast; - -// These generators have an insanely huge period (2^524352), and is suitable -// for silly party tricks, such as dumping out 64 KB ZIP files at an arbitrary -// point in the future. [Actually, over the full period of the generator, it -// will produce every 64 KB ZIP file 2^64 times!] - -typedef pcg_engines::ext_setseq_xsh_rr_64_32<14, 16, true> pcg32_k16384; -typedef pcg_engines::ext_oneseq_xsh_rs_64_32<14, 32, true> pcg32_k16384_fast; - -#ifdef _MSC_VER -#pragma warning(default:4146) -#endif - -#endif // PCG_RAND_HPP_INCLUDED \ No newline at end of file diff --git a/include/pcg_uint128.hpp b/include/pcg_uint128.hpp deleted file mode 100644 index 75bc601..0000000 --- a/include/pcg_uint128.hpp +++ /dev/null @@ -1,1010 +0,0 @@ -/* - * PCG Random Number Generation for C++ - * - * Copyright 2014-2021 Melissa O'Neill , - * and the PCG Project contributors. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - * - * Licensed under the Apache License, Version 2.0 (provided in - * LICENSE-APACHE.txt and at http://www.apache.org/licenses/LICENSE-2.0) - * or under the MIT license (provided in LICENSE-MIT.txt and at - * http://opensource.org/licenses/MIT), at your option. This file may not - * be copied, modified, or distributed except according to those terms. - * - * Distributed on an "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, either - * express or implied. See your chosen license for details. - * - * For additional information about the PCG random number generation scheme, - * visit http://www.pcg-random.org/. - */ - - /* - * This code provides a a C++ class that can provide 128-bit (or higher) - * integers. To produce 2K-bit integers, it uses two K-bit integers, - * placed in a union that allowes the code to also see them as four K/2 bit - * integers (and access them either directly name, or by index). - * - * It may seem like we're reinventing the wheel here, because several - * libraries already exist that support large integers, but most existing - * libraries provide a very generic multiprecision code, but here we're - * operating at a fixed size. Also, most other libraries are fairly - * heavyweight. So we use a direct implementation. Sadly, it's much slower - * than hand-coded assembly or direct CPU support. - */ - -#ifndef PCG_UINT128_HPP_INCLUDED -#define PCG_UINT128_HPP_INCLUDED 1 - -#include -#include -#include -#include -#include -#include -#include - -#if defined(_MSC_VER) // Use MSVC++ intrinsics -#include -#endif - - /* - * We want to lay the type out the same way that a native type would be laid - * out, which means we must know the machine's endian, at compile time. - * This ugliness attempts to do so. - */ - -#ifndef PCG_LITTLE_ENDIAN -#if defined(__BYTE_ORDER__) -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ -#define PCG_LITTLE_ENDIAN 1 -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -#define PCG_LITTLE_ENDIAN 0 -#else -#error __BYTE_ORDER__ does not match a standard endian, pick a side -#endif -#elif __LITTLE_ENDIAN__ || _LITTLE_ENDIAN -#define PCG_LITTLE_ENDIAN 1 -#elif __BIG_ENDIAN__ || _BIG_ENDIAN -#define PCG_LITTLE_ENDIAN 0 -#elif __x86_64 || __x86_64__ || _M_X64 || __i386 || __i386__ || _M_IX86 -#define PCG_LITTLE_ENDIAN 1 -#elif __powerpc__ || __POWERPC__ || __ppc__ || __PPC__ \ - || __m68k__ || __mc68000__ -#define PCG_LITTLE_ENDIAN 0 -#else -#error Unable to determine target endianness -#endif -#endif - -#if INTPTR_MAX == INT64_MAX && !defined(PCG_64BIT_SPECIALIZATIONS) -#define PCG_64BIT_SPECIALIZATIONS 1 -#endif - -namespace pcg_extras { - - // Recent versions of GCC have intrinsics we can use to quickly calculate - // the number of leading and trailing zeros in a number. If possible, we - // use them, otherwise we fall back to old-fashioned bit twiddling to figure - // them out. - -#ifndef PCG_BITCOUNT_T - typedef uint8_t bitcount_t; -#else - typedef PCG_BITCOUNT_T bitcount_t; -#endif - - /* - * Provide some useful helper functions - * * flog2 floor(log2(x)) - * * trailingzeros number of trailing zero bits - */ - -#if defined(__GNUC__) // Any GNU-compatible compiler supporting C++11 has - // some useful intrinsics we can use. - - inline bitcount_t flog2(uint32_t v) - { - return 31 - __builtin_clz(v); - } - - inline bitcount_t trailingzeros(uint32_t v) - { - return __builtin_ctz(v); - } - - inline bitcount_t flog2(uint64_t v) - { -#if UINT64_MAX == ULONG_MAX - return 63 - __builtin_clzl(v); -#elif UINT64_MAX == ULLONG_MAX - return 63 - __builtin_clzll(v); -#else -#error Cannot find a function for uint64_t -#endif - } - - inline bitcount_t trailingzeros(uint64_t v) - { -#if UINT64_MAX == ULONG_MAX - return __builtin_ctzl(v); -#elif UINT64_MAX == ULLONG_MAX - return __builtin_ctzll(v); -#else -#error Cannot find a function for uint64_t -#endif - } - -#elif defined(_MSC_VER) // Use MSVC++ intrinsics - -#pragma intrinsic(_BitScanReverse, _BitScanForward) -#if defined(_M_X64) || defined(_M_ARM) || defined(_M_ARM64) -#pragma intrinsic(_BitScanReverse64, _BitScanForward64) -#endif - - inline bitcount_t flog2(uint32_t v) - { - unsigned long i; - _BitScanReverse(&i, v); - return bitcount_t(i); - } - - inline bitcount_t trailingzeros(uint32_t v) - { - unsigned long i; - _BitScanForward(&i, v); - return bitcount_t(i); - } - - inline bitcount_t flog2(uint64_t v) - { -#if defined(_M_X64) || defined(_M_ARM) || defined(_M_ARM64) - unsigned long i; - _BitScanReverse64(&i, v); - return bitcount_t(i); -#else - // 32-bit x86 - uint32_t high = v >> 32; - uint32_t low = uint32_t(v); - return high ? 32 + flog2(high) : flog2(low); -#endif - } - - inline bitcount_t trailingzeros(uint64_t v) - { -#if defined(_M_X64) || defined(_M_ARM) || defined(_M_ARM64) - unsigned long i; - _BitScanForward64(&i, v); - return bitcount_t(i); -#else - // 32-bit x86 - uint32_t high = v >> 32; - uint32_t low = uint32_t(v); - return low ? trailingzeros(low) : trailingzeros(high) + 32; -#endif - } - -#else // Otherwise, we fall back to bit twiddling - // implementations - - inline bitcount_t flog2(uint32_t v) - { - // Based on code by Eric Cole and Mark Dickinson, which appears at - // https://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn - - static const uint8_t multiplyDeBruijnBitPos[32] = { - 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, - 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 - }; - - v |= v >> 1; // first round down to one less than a power of 2 - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - - return multiplyDeBruijnBitPos[(uint32_t)(v * 0x07C4ACDDU) >> 27]; - } - - inline bitcount_t trailingzeros(uint32_t v) - { - static const uint8_t multiplyDeBruijnBitPos[32] = { - 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, - 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 - }; - - return multiplyDeBruijnBitPos[((uint32_t)((v & -v) * 0x077CB531U)) >> 27]; - } - - inline bitcount_t flog2(uint64_t v) - { - uint32_t high = v >> 32; - uint32_t low = uint32_t(v); - - return high ? 32 + flog2(high) : flog2(low); - } - - inline bitcount_t trailingzeros(uint64_t v) - { - uint32_t high = v >> 32; - uint32_t low = uint32_t(v); - - return low ? trailingzeros(low) : trailingzeros(high) + 32; - } - -#endif - - inline bitcount_t flog2(uint8_t v) - { - return flog2(uint32_t(v)); - } - - inline bitcount_t flog2(uint16_t v) - { - return flog2(uint32_t(v)); - } - -#if __SIZEOF_INT128__ - inline bitcount_t flog2(__uint128_t v) - { - uint64_t high = uint64_t(v >> 64); - uint64_t low = uint64_t(v); - - return high ? 64 + flog2(high) : flog2(low); - } -#endif - - inline bitcount_t trailingzeros(uint8_t v) - { - return trailingzeros(uint32_t(v)); - } - - inline bitcount_t trailingzeros(uint16_t v) - { - return trailingzeros(uint32_t(v)); - } - -#if __SIZEOF_INT128__ - inline bitcount_t trailingzeros(__uint128_t v) - { - uint64_t high = uint64_t(v >> 64); - uint64_t low = uint64_t(v); - return low ? trailingzeros(low) : trailingzeros(high) + 64; - } -#endif - - template - inline bitcount_t clog2(UInt v) - { - return flog2(v) + ((v & (-v)) != v); - } - - template - inline UInt addwithcarry(UInt x, UInt y, bool carryin, bool* carryout) - { - UInt half_result = y + carryin; - UInt result = x + half_result; - *carryout = (half_result < y) || (result < x); - return result; - } - - template - inline UInt subwithcarry(UInt x, UInt y, bool carryin, bool* carryout) - { - UInt half_result = y + carryin; - UInt result = x - half_result; - *carryout = (half_result < y) || (result > x); - return result; - } - - - template - class uint_x4 { - // private: - static constexpr unsigned int UINT_BITS = sizeof(UInt) * CHAR_BIT; - public: - union { -#if PCG_LITTLE_ENDIAN - struct { - UInt v0, v1, v2, v3; - } w; - struct { - UIntX2 v01, v23; - } d; -#else - struct { - UInt v3, v2, v1, v0; - } w; - struct { - UIntX2 v23, v01; - } d; -#endif - // For the array access versions, the code that uses the array - // must handle endian itself. Yuck. - UInt wa[4]; - }; - - public: - uint_x4() = default; - - constexpr uint_x4(UInt v3, UInt v2, UInt v1, UInt v0) -#if PCG_LITTLE_ENDIAN - : w{ v0, v1, v2, v3 } -#else - : w{ v3, v2, v1, v0 } -#endif - { - // Nothing (else) to do - } - - constexpr uint_x4(UIntX2 v23, UIntX2 v01) -#if PCG_LITTLE_ENDIAN - : d{ v01,v23 } -#else - : d{ v23,v01 } -#endif - { - // Nothing (else) to do - } - - constexpr uint_x4(UIntX2 v01) -#if PCG_LITTLE_ENDIAN - : d{ v01, UIntX2(0) } -#else - : d{ UIntX2(0),v01 } -#endif - { - // Nothing (else) to do - } - - template::value - && sizeof(Integral) <= sizeof(UIntX2)) - >::type* = nullptr> - constexpr uint_x4(Integral v01) -#if PCG_LITTLE_ENDIAN - : d{ UIntX2(v01), UIntX2(0) } -#else - : d{ UIntX2(0), UIntX2(v01) } -#endif - { - // Nothing (else) to do - } - - explicit constexpr operator UIntX2() const - { - return d.v01; - } - - template::value - && sizeof(Integral) <= sizeof(UIntX2)) - >::type* = nullptr> - explicit constexpr operator Integral() const - { - return Integral(d.v01); - } - - explicit constexpr operator bool() const - { - return d.v01 || d.v23; - } - - template - friend uint_x4 operator*(const uint_x4&, const uint_x4&); - - template - friend uint_x4 operator*(const uint_x4&, V); - - template - friend std::pair< uint_x4, uint_x4 > - divmod(const uint_x4&, const uint_x4&); - - template - friend uint_x4 operator+(const uint_x4&, const uint_x4&); - - template - friend uint_x4 operator-(const uint_x4&, const uint_x4&); - - template - friend uint_x4 operator<<(const uint_x4&, const bitcount_t shift); - - template - friend uint_x4 operator>>(const uint_x4&, const bitcount_t shift); - -#if PCG_64BIT_SPECIALIZATIONS - template - friend uint_x4 operator<<(const uint_x4&, const bitcount_t shift); - - template - friend uint_x4 operator>>(const uint_x4&, const bitcount_t shift); -#endif - - template - friend uint_x4 operator&(const uint_x4&, const uint_x4&); - - template - friend uint_x4 operator|(const uint_x4&, const uint_x4&); - - template - friend uint_x4 operator^(const uint_x4&, const uint_x4&); - - template - friend bool operator==(const uint_x4&, const uint_x4&); - - template - friend bool operator!=(const uint_x4&, const uint_x4&); - - template - friend bool operator<(const uint_x4&, const uint_x4&); - - template - friend bool operator<=(const uint_x4&, const uint_x4&); - - template - friend bool operator>(const uint_x4&, const uint_x4&); - - template - friend bool operator>=(const uint_x4&, const uint_x4&); - - template - friend uint_x4 operator~(const uint_x4&); - - template - friend uint_x4 operator-(const uint_x4&); - - template - friend bitcount_t flog2(const uint_x4&); - - template - friend bitcount_t trailingzeros(const uint_x4&); - -#if PCG_64BIT_SPECIALIZATIONS - template - friend bitcount_t flog2(const uint_x4&); - - template - friend bitcount_t trailingzeros(const uint_x4&); -#endif - - uint_x4& operator*=(const uint_x4& rhs) - { - uint_x4 result = *this * rhs; - return *this = result; - } - - uint_x4& operator*=(UIntX2 rhs) - { - uint_x4 result = *this * rhs; - return *this = result; - } - - uint_x4& operator/=(const uint_x4& rhs) - { - uint_x4 result = *this / rhs; - return *this = result; - } - - uint_x4& operator%=(const uint_x4& rhs) - { - uint_x4 result = *this % rhs; - return *this = result; - } - - uint_x4& operator+=(const uint_x4& rhs) - { - uint_x4 result = *this + rhs; - return *this = result; - } - - uint_x4& operator-=(const uint_x4& rhs) - { - uint_x4 result = *this - rhs; - return *this = result; - } - - uint_x4& operator&=(const uint_x4& rhs) - { - uint_x4 result = *this & rhs; - return *this = result; - } - - uint_x4& operator|=(const uint_x4& rhs) - { - uint_x4 result = *this | rhs; - return *this = result; - } - - uint_x4& operator^=(const uint_x4& rhs) - { - uint_x4 result = *this ^ rhs; - return *this = result; - } - - uint_x4& operator>>=(bitcount_t shift) - { - uint_x4 result = *this >> shift; - return *this = result; - } - - uint_x4& operator<<=(bitcount_t shift) - { - uint_x4 result = *this << shift; - return *this = result; - } - - }; - - template - bitcount_t flog2(const uint_x4& v) - { -#if PCG_LITTLE_ENDIAN - for (uint8_t i = 4; i != 0; /* dec in loop */) { - --i; -#else - for (uint8_t i = 0; i < 4; ++i) { -#endif - if (v.wa[i] == 0) - continue; - return flog2(v.wa[i]) + uint_x4::UINT_BITS* i; - } - abort(); - } - - template - bitcount_t trailingzeros(const uint_x4&v) - { -#if PCG_LITTLE_ENDIAN - for (uint8_t i = 0; i < 4; ++i) { -#else - for (uint8_t i = 4; i != 0; /* dec in loop */) { - --i; -#endif - if (v.wa[i] != 0) - return trailingzeros(v.wa[i]) + uint_x4::UINT_BITS* i; - } - return uint_x4::UINT_BITS * 4; - } - -#if PCG_64BIT_SPECIALIZATIONS - template - bitcount_t flog2(const uint_x4&v) - { - return v.d.v23 > 0 ? flog2(v.d.v23) + uint_x4::UINT_BITS * 2 - : flog2(v.d.v01); - } - - template - bitcount_t trailingzeros(const uint_x4&v) - { - return v.d.v01 == 0 ? trailingzeros(v.d.v23) + uint_x4::UINT_BITS * 2 - : trailingzeros(v.d.v01); - } -#endif - - template - std::pair< uint_x4, uint_x4 > - divmod(const uint_x4&orig_dividend, - const uint_x4&divisor) - { - // If the dividend is less than the divisor, the answer is always zero. - // This takes care of boundary cases like 0/x (which would otherwise be - // problematic because we can't take the log of zero. (The boundary case - // of division by zero is undefined.) - if (orig_dividend < divisor) - return { uint_x4(UIntX2(0)), orig_dividend }; - - auto dividend = orig_dividend; - - auto log2_divisor = flog2(divisor); - auto log2_dividend = flog2(dividend); - // assert(log2_dividend >= log2_divisor); - bitcount_t logdiff = log2_dividend - log2_divisor; - - constexpr uint_x4 ONE(UIntX2(1)); - if (logdiff == 0) - return { ONE, dividend - divisor }; - - // Now we change the log difference to - // floor(log2(divisor)) - ceil(log2(dividend)) - // to ensure that we *underestimate* the result. - logdiff -= 1; - - uint_x4 quotient(UIntX2(0)); - - auto qfactor = ONE << logdiff; - auto factor = divisor << logdiff; - - do { - dividend -= factor; - quotient += qfactor; - while (dividend < factor) { - factor >>= 1; - qfactor >>= 1; - } - } while (dividend >= divisor); - - return { quotient, dividend }; - } - - template - uint_x4 operator/(const uint_x4÷nd, - const uint_x4&divisor) - { - return divmod(dividend, divisor).first; - } - - template - uint_x4 operator%(const uint_x4÷nd, - const uint_x4&divisor) - { - return divmod(dividend, divisor).second; - } - - - template - uint_x4 operator*(const uint_x4&a, - const uint_x4&b) - { - constexpr auto UINT_BITS = uint_x4::UINT_BITS; - uint_x4 r = { 0U, 0U, 0U, 0U }; - bool carryin = false; - bool carryout; - UIntX2 a0b0 = UIntX2(a.w.v0) * UIntX2(b.w.v0); - r.w.v0 = UInt(a0b0); - r.w.v1 = UInt(a0b0 >> UINT_BITS); - - UIntX2 a1b0 = UIntX2(a.w.v1) * UIntX2(b.w.v0); - r.w.v2 = UInt(a1b0 >> UINT_BITS); - r.w.v1 = addwithcarry(r.w.v1, UInt(a1b0), carryin, &carryout); - carryin = carryout; - r.w.v2 = addwithcarry(r.w.v2, UInt(0U), carryin, &carryout); - carryin = carryout; - r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout); - - UIntX2 a0b1 = UIntX2(a.w.v0) * UIntX2(b.w.v1); - carryin = false; - r.w.v2 = addwithcarry(r.w.v2, UInt(a0b1 >> UINT_BITS), carryin, &carryout); - carryin = carryout; - r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout); - - carryin = false; - r.w.v1 = addwithcarry(r.w.v1, UInt(a0b1), carryin, &carryout); - carryin = carryout; - r.w.v2 = addwithcarry(r.w.v2, UInt(0U), carryin, &carryout); - carryin = carryout; - r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout); - - UIntX2 a1b1 = UIntX2(a.w.v1) * UIntX2(b.w.v1); - carryin = false; - r.w.v2 = addwithcarry(r.w.v2, UInt(a1b1), carryin, &carryout); - carryin = carryout; - r.w.v3 = addwithcarry(r.w.v3, UInt(a1b1 >> UINT_BITS), carryin, &carryout); - - r.d.v23 += a.d.v01 * b.d.v23 + a.d.v23 * b.d.v01; - - return r; - } - - - template - uint_x4 operator*(const uint_x4&a, - UIntX2 b01) - { - constexpr auto UINT_BITS = uint_x4::UINT_BITS; - uint_x4 r = { 0U, 0U, 0U, 0U }; - bool carryin = false; - bool carryout; - UIntX2 a0b0 = UIntX2(a.w.v0) * UIntX2(UInt(b01)); - r.w.v0 = UInt(a0b0); - r.w.v1 = UInt(a0b0 >> UINT_BITS); - - UIntX2 a1b0 = UIntX2(a.w.v1) * UIntX2(UInt(b01)); - r.w.v2 = UInt(a1b0 >> UINT_BITS); - r.w.v1 = addwithcarry(r.w.v1, UInt(a1b0), carryin, &carryout); - carryin = carryout; - r.w.v2 = addwithcarry(r.w.v2, UInt(0U), carryin, &carryout); - carryin = carryout; - r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout); - - UIntX2 a0b1 = UIntX2(a.w.v0) * UIntX2(b01 >> UINT_BITS); - carryin = false; - r.w.v2 = addwithcarry(r.w.v2, UInt(a0b1 >> UINT_BITS), carryin, &carryout); - carryin = carryout; - r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout); - - carryin = false; - r.w.v1 = addwithcarry(r.w.v1, UInt(a0b1), carryin, &carryout); - carryin = carryout; - r.w.v2 = addwithcarry(r.w.v2, UInt(0U), carryin, &carryout); - carryin = carryout; - r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout); - - UIntX2 a1b1 = UIntX2(a.w.v1) * UIntX2(b01 >> UINT_BITS); - carryin = false; - r.w.v2 = addwithcarry(r.w.v2, UInt(a1b1), carryin, &carryout); - carryin = carryout; - r.w.v3 = addwithcarry(r.w.v3, UInt(a1b1 >> UINT_BITS), carryin, &carryout); - - r.d.v23 += a.d.v23 * b01; - - return r; - } - -#if PCG_64BIT_SPECIALIZATIONS -#if defined(_MSC_VER) -#pragma intrinsic(_umul128) -#endif - -#if defined(_MSC_VER) || __SIZEOF_INT128__ - template - uint_x4 operator*(const uint_x4&a, - const uint_x4&b) - { -#if defined(_MSC_VER) - uint64_t hi; - uint64_t lo = _umul128(a.d.v01, b.d.v01, &hi); -#else - __uint128_t r = __uint128_t(a.d.v01) * __uint128_t(b.d.v01); - uint64_t lo = uint64_t(r); - uint64_t hi = r >> 64; -#endif - hi += a.d.v23 * b.d.v01 + a.d.v01 * b.d.v23; - return { hi, lo }; - } -#endif -#endif - - - template - uint_x4 operator+(const uint_x4&a, - const uint_x4&b) - { - uint_x4 r = { 0U, 0U, 0U, 0U }; - - bool carryin = false; - bool carryout; - r.w.v0 = addwithcarry(a.w.v0, b.w.v0, carryin, &carryout); - carryin = carryout; - r.w.v1 = addwithcarry(a.w.v1, b.w.v1, carryin, &carryout); - carryin = carryout; - r.w.v2 = addwithcarry(a.w.v2, b.w.v2, carryin, &carryout); - carryin = carryout; - r.w.v3 = addwithcarry(a.w.v3, b.w.v3, carryin, &carryout); - - return r; - } - - template - uint_x4 operator-(const uint_x4&a, - const uint_x4&b) - { - uint_x4 r = { 0U, 0U, 0U, 0U }; - - bool carryin = false; - bool carryout; - r.w.v0 = subwithcarry(a.w.v0, b.w.v0, carryin, &carryout); - carryin = carryout; - r.w.v1 = subwithcarry(a.w.v1, b.w.v1, carryin, &carryout); - carryin = carryout; - r.w.v2 = subwithcarry(a.w.v2, b.w.v2, carryin, &carryout); - carryin = carryout; - r.w.v3 = subwithcarry(a.w.v3, b.w.v3, carryin, &carryout); - - return r; - } - -#if PCG_64BIT_SPECIALIZATIONS - template - uint_x4 operator+(const uint_x4&a, - const uint_x4&b) - { - uint_x4 r = { uint64_t(0u), uint64_t(0u) }; - - bool carryin = false; - bool carryout; - r.d.v01 = addwithcarry(a.d.v01, b.d.v01, carryin, &carryout); - carryin = carryout; - r.d.v23 = addwithcarry(a.d.v23, b.d.v23, carryin, &carryout); - - return r; - } - - template - uint_x4 operator-(const uint_x4&a, - const uint_x4&b) - { - uint_x4 r = { uint64_t(0u), uint64_t(0u) }; - - bool carryin = false; - bool carryout; - r.d.v01 = subwithcarry(a.d.v01, b.d.v01, carryin, &carryout); - carryin = carryout; - r.d.v23 = subwithcarry(a.d.v23, b.d.v23, carryin, &carryout); - - return r; - } -#endif - - template - uint_x4 operator&(const uint_x4&a, - const uint_x4&b) - { - return uint_x4(a.d.v23 & b.d.v23, a.d.v01 & b.d.v01); - } - - template - uint_x4 operator|(const uint_x4&a, - const uint_x4&b) - { - return uint_x4(a.d.v23 | b.d.v23, a.d.v01 | b.d.v01); - } - - template - uint_x4 operator^(const uint_x4&a, - const uint_x4&b) - { - return uint_x4(a.d.v23 ^ b.d.v23, a.d.v01 ^ b.d.v01); - } - - template - uint_x4 operator~(const uint_x4&v) - { - return uint_x4(~v.d.v23, ~v.d.v01); - } - - template - uint_x4 operator-(const uint_x4&v) - { - return uint_x4(0UL, 0UL) - v; - } - - template - bool operator==(const uint_x4&a, const uint_x4&b) - { - return (a.d.v01 == b.d.v01) && (a.d.v23 == b.d.v23); - } - - template - bool operator!=(const uint_x4&a, const uint_x4&b) - { - return !operator==(a, b); - } - - - template - bool operator<(const uint_x4&a, const uint_x4&b) - { - return (a.d.v23 < b.d.v23) - || ((a.d.v23 == b.d.v23) && (a.d.v01 < b.d.v01)); - } - - template - bool operator>(const uint_x4&a, const uint_x4&b) - { - return operator<(b, a); - } - - template - bool operator<=(const uint_x4&a, const uint_x4&b) - { - return !(operator<(b, a)); - } - - template - bool operator>=(const uint_x4&a, const uint_x4&b) - { - return !(operator<(a, b)); - } - - - - template - uint_x4 operator<<(const uint_x4&v, - const bitcount_t shift) - { - uint_x4 r = { 0U, 0U, 0U, 0U }; - const bitcount_t bits = uint_x4::UINT_BITS; - const bitcount_t bitmask = bits - 1; - const bitcount_t shiftdiv = shift / bits; - const bitcount_t shiftmod = shift & bitmask; - - if (shiftmod) { - UInt carryover = 0; -#if PCG_LITTLE_ENDIAN - for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) { -#else - for (uint8_t out = 4 - shiftdiv, in = 4; out != 0; /* dec in loop */) { - --out, --in; -#endif - r.wa[out] = (v.wa[in] << shiftmod) | carryover; - carryover = (v.wa[in] >> (bits - shiftmod)); - } - } - else { -#if PCG_LITTLE_ENDIAN - for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) { -#else - for (uint8_t out = 4 - shiftdiv, in = 4; out != 0; /* dec in loop */) { - --out, --in; -#endif - r.wa[out] = v.wa[in]; - } - } - - return r; - } - - template - uint_x4 operator>>(const uint_x4&v, - const bitcount_t shift) - { - uint_x4 r = { 0U, 0U, 0U, 0U }; - const bitcount_t bits = uint_x4::UINT_BITS; - const bitcount_t bitmask = bits - 1; - const bitcount_t shiftdiv = shift / bits; - const bitcount_t shiftmod = shift & bitmask; - - if (shiftmod) { - UInt carryover = 0; -#if PCG_LITTLE_ENDIAN - for (uint8_t out = 4 - shiftdiv, in = 4; out != 0; /* dec in loop */) { - --out, --in; -#else - for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) { -#endif - r.wa[out] = (v.wa[in] >> shiftmod) | carryover; - carryover = (v.wa[in] << (bits - shiftmod)); - } - } - else { -#if PCG_LITTLE_ENDIAN - for (uint8_t out = 4 - shiftdiv, in = 4; out != 0; /* dec in loop */) { - --out, --in; -#else - for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) { -#endif - r.wa[out] = v.wa[in]; - } - } - - return r; - } - -#if PCG_64BIT_SPECIALIZATIONS - template - uint_x4 operator<<(const uint_x4&v, - const bitcount_t shift) - { - constexpr bitcount_t bits2 = uint_x4::UINT_BITS * 2; - - if (shift >= bits2) { - return { v.d.v01 << (shift - bits2), uint64_t(0u) }; - } - else { - return { shift ? (v.d.v23 << shift) | (v.d.v01 >> (bits2 - shift)) - : v.d.v23, - v.d.v01 << shift }; - } - } - - template - uint_x4 operator>>(const uint_x4&v, - const bitcount_t shift) - { - constexpr bitcount_t bits2 = uint_x4::UINT_BITS * 2; - - if (shift >= bits2) { - return { uint64_t(0u), v.d.v23 >> (shift - bits2) }; - } - else { - return { v.d.v23 >> shift, - shift ? (v.d.v01 >> shift) | (v.d.v23 << (bits2 - shift)) - : v.d.v01 }; - } - } -#endif - - } // namespace pcg_extras - -#endif // PCG_UINT128_HPP_INCLUDED \ No newline at end of file diff --git a/include/settings.hpp b/include/settings.hpp index a93a1ae..b63f4d2 100644 --- a/include/settings.hpp +++ b/include/settings.hpp @@ -83,7 +83,7 @@ namespace parameters if (modules.restart_strategy == RestartStrategyType::BIPOP || modules.restart_strategy == RestartStrategyType::IPOP) modules.restart_strategy = RestartStrategyType::RESTART; } - volume = (this->ub - this->lb).prod(); + volume = (this->ub.cwiseMin(10 * sigma0) - this->lb.cwiseMax(-10 * sigma0)).prod(); } }; diff --git a/include/to_string.hpp b/include/to_string.hpp index 5315a95..277e039 100644 --- a/include/to_string.hpp +++ b/include/to_string.hpp @@ -111,6 +111,14 @@ namespace parameters { case MatrixAdaptationType::MATRIX: return "MATRIX"; + case MatrixAdaptationType::CHOLESKY: + return "CHOLESKY"; + case MatrixAdaptationType::ONEPLUSONE: + return "ONEPLUSONE"; + case MatrixAdaptationType::NONE: + return "NONE"; + case MatrixAdaptationType::SEPERABLE: + return "SEPERABLE"; default: case MatrixAdaptationType::COVARIANCE: return "COVARIANCE"; diff --git a/src/acmaes.cpp b/src/acmaes.cpp deleted file mode 100644 index e42f344..0000000 --- a/src/acmaes.cpp +++ /dev/null @@ -1,670 +0,0 @@ -// Copyright (c) Dietmar Wolz. -// -// This source code is licensed under the MIT license found in the -// LICENSE file in the root directory. - -// Eigen based implementation of active CMA-ES - -// Supports parallel fitness function evaluation. -// -// For expensive objective functions (e.g. machine learning parameter optimization) use the workers -// parameter to parallelize objective function evaluation. The workers parameter should be limited -// the population size because otherwize poulation update is delayed. - -// Derived from http://cma.gforge.inria.fr/cmaes.m which follows -// https://www.researchgate.net/publication/227050324_The_CMA_Evolution_Strategy_A_Comparing_Review - -// Requires Eigen version >= 3.4 because new slicing capabilities are used, see -// https://eigen.tuxfamily.org/dox-devel/group__TutorialSlicingIndexing.html -// requires https://github.com/bab2min/EigenRand for random number generation. - - - -#include "acmaes.hpp" - -using namespace std; - -namespace acmaes { - - static ivec inverse(const ivec& indices) { - ivec inverse = ivec(indices.size()); - for (int i = 0; i < indices.size(); i++) - inverse(indices(i)) = i; - return inverse; - } - - static vec sequence(double start, double end, double step) { - int size = (int)((end - start) / step + 1); - vec d(size); - double value = start; - for (int r = 0; r < size; r++) { - d(r) = value; - value += step; - } - return d; - } - - AcmaesOptimizer::AcmaesOptimizer(long runid_, Fitness* fitfun_, int popsize_, int mu_, - const vec& guess_, const vec& inputSigma_, int maxEvaluations_, - double accuracy_, double stopfitness_, double stopTolHistFun_, - int update_gap_, long seed) { - // runid used for debugging / logging - runid = runid_; - // fitness function to minimize - fitfun = fitfun_; - // initial guess for the arguments of the fitness function - guess = guess_; - // accuracy = 1.0 is default, > 1.0 reduces accuracy - accuracy = accuracy_; - // number of objective variables/problem dimension - dim = guess_.size(); - // population size, offspring number. The primary strategy parameter to play - // with, which can be increased from its default value. Increasing the - // population size improves global search properties in exchange to speed. - // Speed decreases, as a rule, at most linearly with increasing population - // size. It is advisable to begin with the default small population size. - if (popsize_ > 0) - popsize = popsize_; - else - popsize = 4 + int(3. * log(dim)); - // individual sigma values - initial search volume. inputSigma determines - // the initial coordinate wise standard deviations for the search. Setting - // SIGMA one third of the initial search region is appropriate. - if (inputSigma_.size() == 1) - inputSigma = vec::Constant(dim, inputSigma_[0]); - else - inputSigma = inputSigma_; - // overall standard deviation - search volume. - sigma = inputSigma.maxCoeff(); - // termination criteria - // maximal number of evaluations allowed. - maxEvaluations = maxEvaluations_; - // limit for fitness value. - stopfitness = stopfitness_; - // stop if x-changes larger stopTolUpX. - stopTolUpX = 1e3 * sigma; - // stop if x-change smaller stopTolX. - stopTolX = 1e-11 * sigma * accuracy; - // stop if fun-changes smaller stopTolFun. - stopTolFun = 1e-12 * accuracy; - // stop if back fun-changes smaller stopTolHistFun. - stopTolHistFun = stopTolHistFun_ < 0 ? 1e-13 * accuracy : stopTolHistFun_; - // selection strategy parameters - // number of parents/points for recombination. - mu = mu_ > 0 ? mu_ : popsize / 2; - // array for weighted recombination. - weights = (log(sequence(1, mu, 1).array()) * -1.) + log(mu + 0.5); - double sumw = weights.sum(); - double sumwq = weights.squaredNorm(); - weights *= 1. / sumw; - // variance-effectiveness of sum w_i x_i. - mueff = sumw * sumw / sumwq; - - // dynamic strategy parameters and constants - // cumulation constant. - cc = (4. + mueff / dim) / (dim + 4. + 2. * mueff / dim); - // cumulation constant for step-size. - cs = (mueff + 2.) / (dim + mueff + 3.); - // damping for step-size. - damps = (1. + 2. * std::max(0., sqrt((mueff - 1.) / (dim + 1.)) - 1.)) - * max(0.3, - 1. - // modification for short runs - dim / (1e-6 + (maxEvaluations / popsize))) - + cs; // minor increment - // learning rate for rank-one update. - ccov1 = 2. / ((dim + 1.3) * (dim + 1.3) + mueff); - // learning rate for rank-mu update' - ccovmu = min(1. - ccov1, - 2. * (mueff - 2. + 1. / mueff) - / ((dim + 2.) * (dim + 2.) + mueff)); - // expectation of ||N(0,I)|| == norm(randn(N,1)). - chiN = sqrt(dim) * (1. - 1. / (4. * dim) + 1 / (21. * dim * dim)); - ccov1Sep = min(1., ccov1 * (dim + 1.5) / 3.); - ccovmuSep = min(1. - ccov1, ccovmu * (dim + 1.5) / 3.); - // lazy covariance update gap - lazy_update_gap = - update_gap_ >= 0 ? - update_gap_ : - 1.0 / (ccov1 + ccovmu + 1e-23) / dim / 10.0; - // CMA internal values - updated each generation - // objective variables. - xmean = fitfun->encode(guess); - // evolution path. - pc = zeros(dim); - // evolution path for sigma. - ps = zeros(dim); - // norm of ps, stored for efficiency. - normps = ps.norm(); - // coordinate system. - B = Eigen::MatrixXd::Identity(dim, dim); - // diagonal of sqrt(D), stored for efficiency. - diagD = inputSigma / sigma; - diagC = diagD.cwiseProduct(diagD); - // B*D, stored for efficiency. - BD = B.cwiseProduct(diagD.transpose().replicate(dim, 1)); - // covariance matrix. - C = B * (Eigen::MatrixXd::Identity(dim, dim) * B.transpose()); - // number of iterations. - iterations = 1; - // size of history queue of best values. - historySize = 10 + int(3. * 10. * dim / popsize); - // stop criteria - stop = 0; - // best value so far - bestValue = DBL_MAX; - // best parameters so far - bestX = guess; - // history queue of best values. - fitnessHistory = vec::Constant(historySize, DBL_MAX); - fitnessHistory(0) = bestValue; - rs = new pcg64(seed); - - computeArz = true; - fitness = vec(popsize); - arx = mat(dim, popsize); - n_updates = 0; - } - - AcmaesOptimizer::~AcmaesOptimizer() { - delete rs; - } - - // param zmean weighted row matrix of the gaussian random numbers generating the current offspring - // param xold xmean matrix of the previous generation - // return hsig flag indicating a small correction - - bool AcmaesOptimizer::updateEvolutionPaths(const vec& zmean, const vec& xold) { - ps = ps * (1. - cs) + ((B * zmean) * sqrt(cs * (2. - cs) * mueff)); - normps = ps.norm(); - bool hsig = normps / sqrt(1. - pow(1. - cs, 2. * iterations)) / chiN - < 1.4 + 2. / (dim + 1.); - pc *= (1. - cc); - if (hsig) - pc += (xmean - xold) * (sqrt(cc * (2. - cc) * mueff) / sigma); - return hsig; - } - - // param hsig flag indicating a small correction - // param bestArx fitness-sorted matrix of the argument vectors producing the current offspring - // param arz unsorted matrix containing the gaussian random values of the current offspring - // param arindex indices indicating the fitness-order of the current offspring - // param xold xmean matrix of the previous generation - - double AcmaesOptimizer::updateCovariance(bool hsig, const mat& bestArx, const mat& arz, - const ivec& arindex, const mat& xold) { - double negccov = 0; - if (ccov1 + ccovmu > 0) { - mat arpos = (bestArx - xold.replicate(1, mu)) * (1. / sigma); // mu difference vectors - mat roneu = pc * pc.transpose() * ccov1; - // minor correction if hsig==false - double oldFac = hsig ? 0 : ccov1 * cc * (2. - cc); - oldFac += 1. - ccov1 - ccovmu; - // Adapt covariance matrix C active CMA - negccov = (1. - ccovmu) * 0.25 * mueff - / (pow(dim + 2., 1.5) + 2. * mueff); - double negminresidualvariance = 0.66; - // keep at least 0.66 in all directions, small popsize are most critical - double negalphaold = 0.5; // where to make up for the variance loss, - // prepare vectors, compute negative updating matrix Cneg - ivec arReverseIndex = arindex.reverse(); - mat arzneg = arz(Eigen::indexing::all, arReverseIndex.head(mu)); - vec arnorms = arzneg.colwise().norm(); - ivec idxnorms = sort_index(arnorms); - vec arnormsSorted = arnorms(idxnorms); - ivec idxReverse = idxnorms.reverse(); - vec arnormsReverse = arnorms(idxReverse); - arnorms = arnormsReverse.cwiseQuotient(arnormsSorted); - vec arnormsInv = arnorms(inverse(idxnorms)); - mat sqarnw = arnormsInv.cwiseProduct(arnormsInv).transpose() - * weights; - double negcovMax = (1. - negminresidualvariance) / sqarnw(0); - if (negccov > negcovMax) - negccov = negcovMax; - arzneg = arzneg.cwiseProduct( - arnormsInv.transpose().replicate(dim, 1)); - mat artmp = BD * arzneg; - mat Cneg = artmp * weights.asDiagonal() * artmp.transpose(); - oldFac += negalphaold * negccov; - C = (C * oldFac) + roneu - + (arpos * (ccovmu + (1. - negalphaold) * negccov) - * weights.replicate(1, dim).cwiseProduct( - arpos.transpose())) - (Cneg * negccov); - } - return negccov; - } - - // Update B and diagD from C - // param negccov Negative covariance factor. - - void AcmaesOptimizer::updateBD(double negccov) { - - if (ccov1 + ccovmu + negccov > 0 - && (std::fmod(iterations, - 1. / (ccov1 + ccovmu + negccov) / dim / 10.)) < 1.) { - // to achieve O(N^2) enforce symmetry to prevent complex numbers - mat triC = C.triangularView(); - mat triC1 = C.triangularView(); - C = triC + triC1.transpose(); - Eigen::SelfAdjointEigenSolver sades; - sades.compute(C); - // diagD defines the scaling - diagD = sades.eigenvalues(); - B = sades.eigenvectors(); - if (diagD.minCoeff() <= 0) { - for (int i = 0; i < dim; i++) - if (diagD(i, 0) < 0) - diagD(i, 0) = 0.; - double tfac = diagD.maxCoeff() / 1e14; - C += Eigen::MatrixXd::Identity(dim, dim) * tfac; - diagD += vec::Constant(dim, 1.0) * tfac; - } - if (diagD.maxCoeff() > 1e14 * diagD.minCoeff()) { - double tfac = diagD.maxCoeff() / 1e14 - diagD.minCoeff(); - C += Eigen::MatrixXd::Identity(dim, dim) * tfac; - diagD += vec::Constant(dim, 1.0) * tfac; - } - diagC = C.diagonal(); - diagD = diagD.cwiseSqrt(); // D contains standard deviations now - BD = B.cwiseProduct(diagD.transpose().replicate(dim, 1)); - - n_updates++; - - } - } - - mat AcmaesOptimizer::ask_all() { // undecoded - // generate popsize offspring. - arz = normal(dim, popsize, *rs); - mat xs(dim, popsize); - for (int k = 0; k < popsize; k++) { - vec delta = (BD * arz.col(k)) * sigma; - xs.col(k) = fitfun->getClosestFeasibleNormed(xmean + delta); - } - computeArz = false; - return xs; - } - - int AcmaesOptimizer::tell_all(mat ys, mat xs) { - told = 0; - for (int p = 0; p < popsize; p++) - tell(ys(p), xs.col(p)); - computeArz = true; - return stop; - } - - int AcmaesOptimizer::tell_all_asked(mat ys, mat xs) { - told = 0; - for (int p = 0; p < popsize; p++) - tell(ys(p), xs.col(p)); - computeArz = false; - return stop; - } - - mat AcmaesOptimizer::getPopulation() { - mat pop(dim, popsize); - for (int p = 0; p < popsize; p++) - pop.col(p) = fitfun->decode(fitfun->getClosestFeasibleNormed(popX.col(p))); - return pop; - } - - vec AcmaesOptimizer::ask() { - // ask for one new argument vector. - vec arz1 = normalVec(dim, *rs); - vec delta = (BD * arz1) * sigma; - vec arx1 = fitfun->getClosestFeasibleNormed(xmean + delta); - computeArz = true; - return arx1; - } - - int AcmaesOptimizer::tell(double y, const vec& x) { - //tell function value for a argument list retrieved by ask_one(). - fitness[told] = isfinite(y) ? y : DBL_MAX; - arx.col(told) = x; - told++; - if (told >= popsize) { - xmean = fitfun->getClosestFeasibleNormed(xmean); - if (computeArz) { - try { - arz = (BD.inverse() - * ((arx - xmean.replicate(1, popsize)) / sigma)); - } - catch (std::exception& e) { - arz = normal(dim, popsize, *rs); - } - } - updateCMA(); - told = 0; - iterations += 1; - } - return stop; - } - - void AcmaesOptimizer::updateCMA() { - // sort by fitness and compute weighted mean into xmean - ivec arindex = sort_index(fitness); - // calculate new xmean, this is selection and recombination - vec xold = xmean; // for speed up of Eq. (2) and (3) - ivec bestIndex = arindex.head(mu); - mat bestArx = arx(Eigen::indexing::all, bestIndex); - xmean = bestArx * weights; - mat bestArz = arz(Eigen::indexing::all, bestIndex); - mat zmean = bestArz * weights; - bool hsig = updateEvolutionPaths(zmean, xold); - // adapt step size sigma - sigma *= exp(min(1.0, (normps / chiN - 1.) * cs / damps)); - double bestFitness = fitness(arindex(0)); - double worstFitness = fitness(arindex(arindex.size() - 1)); - if (bestValue > bestFitness) { - bestValue = bestFitness; - bestX = fitfun->decode(bestArx.col(0)); - if (isfinite(stopfitness) && bestFitness < stopfitness) { - stop = 1; - return; - } - } - if (iterations >= last_update + lazy_update_gap) { - last_update = iterations; - double negccov = updateCovariance(hsig, bestArx, arz, arindex, - xold); - updateBD(negccov); - // handle termination criteria - vec sqrtDiagC = diagC.cwiseSqrt(); - vec pcCol = pc; - for (int i = 0; i < dim; i++) { - if (sigma * (max(abs(pcCol[i]), sqrtDiagC[i])) > stopTolX) - break; - if (i >= dim - 1) - stop = 2; - } - if (stop > 0) - return; - for (int i = 0; i < dim; i++) - if (sigma * sqrtDiagC[i] > stopTolUpX) - stop = 3; - if (stop > 0) - return; - } - double historyBest = fitnessHistory.minCoeff(); - double historyWorst = fitnessHistory.maxCoeff(); - if (iterations > 2 - && max(historyWorst, worstFitness) - - min(historyBest, bestFitness) < stopTolFun) { - stop = 4; - return; - } - if (iterations > fitnessHistory.size() - && historyWorst - historyBest < stopTolHistFun) { - stop = 5; - return; - } - // condition number of the covariance matrix exceeds 1e14 - if (diagD.maxCoeff() / diagD.minCoeff() > 1e7 * 1.0 / sqrt(accuracy)) { - stop = 6; - return; - } - // adjust step size in case of equal function values (flat fitness) - if (bestValue == fitness[arindex[(int)(0.1 + popsize / 4.)]]) { - sigma *= exp(0.2 + cs / damps); - } - if (iterations > 2 - && max(historyWorst, bestFitness) - - std::min(historyBest, bestFitness) == 0) { - sigma *= ::exp(0.2 + cs / damps); - } - // store best in history - for (int i = 1; i < fitnessHistory.size(); i++) - fitnessHistory[i] = fitnessHistory[i - 1]; - fitnessHistory[0] = bestFitness; - } - - int AcmaesOptimizer::doOptimize() { - - // -------------------- Generation Loop -------------------------------- - iterations = 0; - fitfun->resetEvaluations(); - while (fitfun->evaluations() < maxEvaluations && !fitfun->terminate()) { - // generate and evaluate popsize offspring - mat xs = ask_all(); - vec ys(popsize); - fitfun->values(xs, ys); // decodes - told = 0; - for (int k = 0; k < popsize && stop == 0; k++) - tell(ys(k), xs.col(k)); // tell encoded - if (stop != 0) - return fitfun->evaluations(); - } - return fitfun->evaluations(); - } - - int AcmaesOptimizer::do_optimize_delayed_update(int workers) { - //iterations = 0; - //fitfun->resetEvaluations(); - //evaluator eval(fitfun, 1, workers); - //vec evals_x(workers); - //// fill eval queue with initial population - //for (int i = 0; i < workers; i++) { - // vec x = ask(); - // vec xdec = fitfun->decode(x); - // eval.evaluate(xdec, i); - // evals_x[i] = x; // encoded - //} - //while (fitfun->evaluations() < maxEvaluations) { - // vec_id* vid = eval.result(); - // vec y = vec(vid->_v); - // int p = vid->_id; - // delete vid; - // vec x = evals_x[p]; - // tell(y(0), x); // tell evaluated encoded x - // if (fitfun->evaluations() >= maxEvaluations || stop != 0) - // break; - // x = ask(); - // eval.evaluate(x, p); - // evals_x[p] = x; - //} - //return fitfun->evaluations(); - return 0; - } - - vec AcmaesOptimizer::getBestX() { - return bestX; - } - - double AcmaesOptimizer::getBestValue() { - return bestValue; - } - - double AcmaesOptimizer::getIterations() { - return iterations; - } - - int AcmaesOptimizer::getStop() { - return stop; - } - - Fitness* AcmaesOptimizer::getFitfun() { - return fitfun; - } - - int AcmaesOptimizer::getDim() { - return dim; - } - - int AcmaesOptimizer::getPopsize() { - return popsize; - } - - Fitness* AcmaesOptimizer::getFitfunPar() { - return fitfun; - } -} - -/* -using namespace acmaes; - -extern "C" { - void optimizeACMA_C(long runid, callback_type func, callback_parallel func_par, int dim, - double* init, double* lower, double* upper, double* sigma, - int maxEvals, double stopfitness, double stopTolHistFun, int mu, int popsize, double accuracy, - long seed, bool normalize, bool use_delayed_update, int update_gap, int workers, double* res) { - - vec guess(dim), lower_limit(dim), upper_limit(dim), inputSigma(dim); - for (int i = 0; i < dim; i++) {// guess is mandatory - guess[i] = init[i]; - inputSigma[i] = sigma[i]; - } - if (lower != NULL && upper != NULL) { - for (int i = 0; i < dim; i++) { - guess[i] = init[i]; - lower_limit[i] = lower[i]; - upper_limit[i] = upper[i]; - } - } - else { - lower_limit.resize(0); - upper_limit.resize(0); - normalize = false; - } - - Fitness fitfun(func, func_par, dim, 1, lower_limit, upper_limit); - fitfun.setNormalize(normalize); - - AcmaesOptimizer opt(runid, &fitfun, popsize, mu, guess, inputSigma, - maxEvals, accuracy, stopfitness, stopTolHistFun, update_gap, seed); - try { - int evals = 0; - if (workers > 1 && use_delayed_update) - evals = opt.do_optimize_delayed_update(workers); - else - evals = opt.doOptimize(); - vec bestX = opt.getBestX(); - double bestY = opt.getBestValue(); - for (int i = 0; i < dim; i++) - res[i] = bestX[i]; - res[dim] = bestY; - res[dim + 1] = evals; - res[dim + 2] = opt.getIterations(); - res[dim + 3] = opt.getStop(); - } - catch (std::exception& e) { - cout << e.what() << endl; - } - } - - uintptr_t initACMA_C(long runid, int dim, - double* init, double* lower, double* upper, double* sigma, - int maxEvals, double stopfitness, double stopTolHistFun, int mu, int popsize, double accuracy, - long seed, bool normalize, bool use_delayed_update, int update_gap) { - - vec guess(dim), lower_limit(dim), upper_limit(dim), inputSigma(dim); - for (int i = 0; i < dim; i++) {// guess is mandatory - guess[i] = init[i]; - inputSigma[i] = sigma[i]; - } - if (lower != NULL && upper != NULL) { - for (int i = 0; i < dim; i++) { - guess[i] = init[i]; - lower_limit[i] = lower[i]; - upper_limit[i] = upper[i]; - } - } - else { - lower_limit.resize(0); - upper_limit.resize(0); - normalize = false; - } - - Fitness* fitfun = new Fitness(noop_callback, noop_callback_par, dim, 1, lower_limit, upper_limit); // never used here - fitfun->setNormalize(normalize); - - AcmaesOptimizer* opt = new AcmaesOptimizer(runid, fitfun, popsize, mu, guess, inputSigma, - maxEvals, accuracy, stopfitness, stopTolHistFun, update_gap, seed); - return (uintptr_t)opt; - } - - void destroyACMA_C(uintptr_t ptr) { - AcmaesOptimizer* opt = (AcmaesOptimizer*)ptr; - Fitness* fitfun = opt->getFitfun(); - delete fitfun; - delete opt; - } - - void askACMA_C(uintptr_t ptr, double* xs) { - AcmaesOptimizer* opt = (AcmaesOptimizer*)ptr; - int n = opt->getDim(); - int popsize = opt->getPopsize(); - opt->popX = opt->ask_all(); - Fitness* fitfun = opt->getFitfun(); - for (int p = 0; p < popsize; p++) { - vec x = fitfun->decode(opt->popX.col(p)); - for (int i = 0; i < n; i++) - xs[p * n + i] = x[i]; - } - } - - int tellACMA_C(uintptr_t ptr, double* ys) { - AcmaesOptimizer* opt = (AcmaesOptimizer*)ptr; - int popsize = opt->getPopsize(); - vec vals(popsize); - for (int i = 0; i < popsize; i++) - vals[i] = ys[i]; - opt->tell_all_asked(vals, opt->popX); - return opt->getStop(); - } - - int tellXACMA_C(uintptr_t ptr, double* ys, double* xs) { - AcmaesOptimizer* opt = (AcmaesOptimizer*)ptr; - int popsize = opt->getPopsize(); - int dim = opt->getDim(); - Fitness* fitfun = opt->getFitfun(); - opt->popX = mat(dim, popsize); - for (int p = 0; p < popsize; p++) { - vec x(dim); - for (int i = 0; i < dim; i++) - x[i] = xs[p * dim + i]; - opt->popX.col(p) = fitfun->encode(x); - } - vec vals(popsize); - for (int i = 0; i < popsize; i++) - vals[i] = ys[i]; - opt->tell_all(vals, opt->popX); - return opt->getStop(); - } - - int populationACMA_C(uintptr_t ptr, double* xs) { - AcmaesOptimizer* opt = (AcmaesOptimizer*)ptr; - int dim = opt->getDim(); - int popsize = opt->getPopsize(); - mat popX = opt->getPopulation(); - for (int p = 0; p < popsize; p++) { - vec x = popX.col(p); - for (int i = 0; i < dim; i++) - x[i] = xs[p * dim + i]; - } - return opt->getStop(); - } - - int resultACMA_C(uintptr_t ptr, double* res) { - AcmaesOptimizer* opt = (AcmaesOptimizer*)ptr; - vec bestX = opt->getBestX(); - double bestY = opt->getBestValue(); - int n = bestX.size(); - for (int i = 0; i < bestX.size(); i++) - res[i] = bestX[i]; - res[n] = bestY; - Fitness* fitfun = opt->getFitfun(); - res[n + 1] = fitfun->evaluations(); - res[n + 2] = opt->getIterations(); - res[n + 3] = opt->getStop(); - return opt->getStop(); - } - - int testACMA_C(int n, double* res) { - for (int i = 0; i < n; i++) { - cout << i << ": " << res[i] << endl; - res[i] = -res[i]; - } - return 7; - } -} - -*/ \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index e661943..6f692e9 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,5 +1,5 @@ #include "c_maes.hpp" -#include "acmaes.hpp" +#include "to_string.hpp" #include #include @@ -10,17 +10,17 @@ using std::chrono::duration; using std::chrono::milliseconds; -static int dim = 100; -static bool rotated = false; -static size_t budget = dim * 1000; +static int dim = 50; +static bool rotated = true; +static size_t budget = dim * 4000; struct Ellipse { size_t evals = 0; - Matrix R; + Matrix R; Ellipse(const int dim, const bool rotated = false) : - R{ rotated ? functions::random_rotation_matrix(dim, 1): Matrix::Identity(dim, dim) } + R{ rotated ? functions::random_rotation_matrix(dim, 1) : Matrix::Identity(dim, dim) } { } @@ -50,21 +50,21 @@ struct Timer { const auto t2 = high_resolution_clock::now(); const auto ms_int = duration_cast(t2 - t1); - std::cout << "Time elapsed: " << static_cast(ms_int.count()) / 1000.0 << "s\n"; + std::cout << "Time elapsed: " << static_cast(ms_int.count()) / 1000.0 << "s\n\n"; } }; -void run_modcma() +void run_modcma(parameters::MatrixAdaptationType mat_t) { rng::set_seed(42); parameters::Modules m; - //m.matrix_adaptation = parameters::MatrixAdaptationType::MATRIX; - //m.sample_transformation = parameters::SampleTranformerType::SCALED_UNIFORM; - m.bound_correction = parameters::CorrectionMethod::NONE; - - parameters::Settings settings(dim, m, -std::numeric_limits::infinity(), - std::nullopt, budget, 2.0); + m.matrix_adaptation = mat_t; + m.elitist = true; + parameters::Settings settings( + dim, m, -std::numeric_limits::infinity(), + std::nullopt, budget, 2.0 + ); auto p = std::make_shared(settings); auto cma = ModularCMAES(p); @@ -97,80 +97,16 @@ void run_modcma() // } - std::cout << "modcmaes\n" << std::defaultfloat; + std::cout << "modcmaes: " << parameters::to_string(mat_t) << "\n" << std::defaultfloat; std::cout << "evals: " << cma.p->stats.evaluations << std::endl; std::cout << "iters: " << cma.p->stats.t << std::endl; std::cout << "updates: " << cma.p->stats.n_updates << std::endl; - std::cout << "best_y: " << std::scientific << std::setprecision(3) << cma.p->stats.global_best.y << std::endl << std::endl; + std::cout << "best_y: " << std::scientific << std::setprecision(3) << cma.p->stats.global_best.y << std::endl; } -void run_acmaes() -{ - Timer t; - double sigma = 2.0; - bool normalize = false; - - vec guess(dim), lower_limit(dim), upper_limit(dim), inputSigma(dim); - for (int i = 0; i < dim; i++) { - guess[i] = 0.; - inputSigma[i] = sigma; - lower_limit[i] = -5; - upper_limit[i] = 5; - } - - - auto func_par = [](int popsize, int dim, double* x, double* y) { - static FunctionType f = Ellipse(dim, rotated); - //std::cout << "is this called\n"; - - for (int i = 0; i < popsize; i++) - { - auto map = Eigen::Map(x + i * dim, dim); - //std::cout << map.transpose() << std::endl; - y[i] = f(map); - } - }; - - - auto func = [](int popsize, const double* x, double* y) { - static FunctionType f = Ellipse(dim, rotated); - std::cout << "is this called\n"; - return true; - }; - - - - Fitness fitfun(func, func_par, dim, 1, lower_limit, upper_limit); - fitfun.setNormalize(normalize); - - int popsize = 4 + std::floor(3 * std::log((double)dim)); - int mu = popsize / 2; - long seed = 32; - constexpr double accuracy = -std::numeric_limits::infinity(); - constexpr double stop_fitness = -std::numeric_limits::infinity(); - double stopTolHistFun = 0; - int update_gap = -1; - - acmaes::AcmaesOptimizer opt(0, &fitfun, popsize, mu, guess, inputSigma, - budget, accuracy, stop_fitness, stopTolHistFun, update_gap, seed); - - int evals = 0; - - evals = opt.doOptimize(); - vec bestX = opt.getBestX(); - double bestY = opt.getBestValue(); - - std::cout << "acmaes\n" << std::defaultfloat; - std::cout << "evals: " << evals << std::endl; - std::cout << "iters: " << (int)opt.getIterations() << std::endl; - std::cout << "updates: " << opt.n_updates << std::endl; - std::cout << "best_y: " << std::scientific << std::setprecision(3) << bestY << std::endl << std::endl; - //std::cout << bestX.transpose() << std::endl; -} - - int main() { - run_modcma(); - run_acmaes(); + run_modcma(parameters::MatrixAdaptationType::CHOLESKY); + //run_modcma(parameters::MatrixAdaptationType::MATRIX); + run_modcma(parameters::MatrixAdaptationType::COVARIANCE); } \ No newline at end of file diff --git a/src/matrix_adaptation.cpp b/src/matrix_adaptation.cpp index 4fb328c..761fdcc 100644 --- a/src/matrix_adaptation.cpp +++ b/src/matrix_adaptation.cpp @@ -122,16 +122,13 @@ namespace matrix_adaptation void CovarianceAdaptation::restart(const Settings& settings) { + Adaptation::restart(settings); B = Matrix::Identity(settings.dim, settings.dim); C = Matrix::Identity(settings.dim, settings.dim); A = Matrix::Identity(settings.dim, settings.dim); inv_root_C = Matrix::Identity(settings.dim, settings.dim); d.setOnes(); - m = settings.x0.value_or(Vector::Zero(settings.dim)); - m_old.setZero(); - dm.setZero(); pc.setZero(); - ps.setZero(); } Vector CovarianceAdaptation::compute_y(const Vector& zi) @@ -141,8 +138,9 @@ namespace matrix_adaptation Vector CovarianceAdaptation::invert_y(const Vector& yi) { - if (!constants::calc_eigv) + if (!constants::calc_eigv) { return A.triangularView().solve(yi); + } return (B.transpose() * yi).cwiseQuotient(d); } @@ -174,6 +172,9 @@ namespace matrix_adaptation { return true; } + + stats.last_update = stats.t; + stats.n_updates++; return CovarianceAdaptation::adapt_matrix(w, m, pop, mu, settings, stats); } @@ -189,6 +190,9 @@ namespace matrix_adaptation bool MatrixAdaptation::adapt_matrix(const Weights& w, const Modules& m, const Population& pop, const size_t mu, const Settings& settings, parameters::Stats& stats) { + stats.last_update = stats.t; + stats.n_updates++; + const auto old_m = (1. - (0.5 * w.c1) - (0.5 * w.cmu)) * M; const auto scaled_ps = (0.5 * w.c1) * (M * ps) * ps.transpose(); @@ -215,10 +219,7 @@ namespace matrix_adaptation void MatrixAdaptation::restart(const Settings& settings) { - ps.setOnes(); - m = settings.x0.value_or(Vector::Zero(settings.dim)); - m_old.setZero(); - dm.setZero(); + Adaptation::restart(settings); M = Matrix::Identity(settings.dim, settings.dim); M_inv = Matrix::Identity(settings.dim, settings.dim); } @@ -241,14 +242,6 @@ namespace matrix_adaptation ps = (1.0 - mutation->cs) * ps + (sqrt(mutation->cs * (2.0 - mutation->cs) * w.mueff) * dz); } - void None::restart(const Settings& settings) - { - ps.setZero(); - m = settings.x0.value_or(Vector::Zero(settings.dim)); - m_old.setZero(); - dm.setZero(); - } - Vector None::compute_y(const Vector& zi) { return zi; @@ -259,4 +252,71 @@ namespace matrix_adaptation { return yi; } + + + Matrix CholeskyAdaptation::rank_one_update(const Matrix& A, const Float beta, Vector a) + { + const auto d = a.size(); + Float b = 1.0; + A_prime.setZero(); + + for (int j = 0; j < d; j++) + { + const Float aj2 = std::pow(a(j), 2); + const Float Ajj2 = std::pow(A(j, j), 2); + const Float gamma = Ajj2 * b + beta * aj2; + + A_prime(j, j) = std::sqrt(Ajj2 + (beta / b) * aj2); + + for (int k = j+1; k < d; k++) + { + a(k) -= a(j) / A(j, j) * A(k, j); + A_prime(k, j) = A_prime(j, j) / A(j, j) * A(k, j) + A_prime(j, j) * beta * a(j) / gamma * a(k); + } + b += beta * aj2 / Ajj2; + } + return A_prime; + } + + void CholeskyAdaptation::adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, + const std::shared_ptr& mutation, const parameters::Stats& stats, + size_t mu, size_t lambda) + { + pc = (1.0 - w.cc) * pc + (std::sqrt(w.cc * (2.0 - w.cc) * w.mueff)) * dm; + ps = (1.0 - mutation->cs)* ps + (sqrt(mutation->cs * (2.0 - mutation->cs) * w.mueff) * + A.triangularView().solve(dm)); + } + + bool CholeskyAdaptation::adapt_matrix(const parameters::Weights& w, const parameters::Modules& m, const Population& pop, size_t mu, + const parameters::Settings& settings, parameters::Stats& stats) + { + + stats.last_update = stats.t; + stats.n_updates++; + + A *= std::sqrt(1 - w.c1 - w.cmu); + A = rank_one_update(A, w.c1, pc); + for (auto i = 0; i < mu; i++) + A = rank_one_update(A, w.cmu * w.positive(i), pop.Y.col(i)); + return true; + } + + void CholeskyAdaptation::restart(const parameters::Settings& settings) + { + Adaptation::restart(settings); + A = Matrix::Identity(settings.dim, settings.dim); + } + + Vector CholeskyAdaptation::compute_y(const Vector& zi) + { + return A * zi; + } + + Vector CholeskyAdaptation::invert_y(const Vector& yi) + { + //TODO: check is this correct + return A.triangularView().solve(yi); + } + + } From a41bc59951d9796c39cda27136f5d745ef5160c6 Mon Sep 17 00:00:00 2001 From: Jacob de Nobel Date: Sat, 31 May 2025 12:44:53 +0200 Subject: [PATCH 12/74] fixed seperable --- include/matrix_adaptation.hpp | 28 ++++++++++++++-- src/main.cpp | 50 +++++++++++----------------- src/matrix_adaptation.cpp | 62 +++++++++++++++++++++++++++++++++-- 3 files changed, 103 insertions(+), 37 deletions(-) diff --git a/include/matrix_adaptation.hpp b/include/matrix_adaptation.hpp index a7f38a9..6279637 100644 --- a/include/matrix_adaptation.hpp +++ b/include/matrix_adaptation.hpp @@ -109,11 +109,33 @@ namespace matrix_adaptation Vector invert_y(const Vector&) override; }; - struct SeperableAdaptation : CovarianceAdaptation + struct SeperableAdaptation : Adaptation { - using CovarianceAdaptation::CovarianceAdaptation; + Vector pc, d, c; + bool hs; + // B remains I + + + SeperableAdaptation(const size_t dim, const Vector& x0, const Float expected_length_z) : Adaptation(dim, x0, Vector::Zero(dim), expected_length_z), + pc(Vector::Zero(dim)), + d(Vector::Ones(dim)), + c(Vector::Ones(dim)), + hs(true) + { + } + + void adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, + const std::shared_ptr& mutation, const parameters::Stats& stats, + size_t mu, size_t lambda) override; - bool perform_eigendecomposition(const parameters::Settings& settings) override; + bool adapt_matrix(const parameters::Weights& w, const parameters::Modules& m, const Population& pop, size_t mu, + const parameters::Settings& settings, parameters::Stats& stats) override; + + void restart(const parameters::Settings& settings) override; + + Vector compute_y(const Vector&) override; + + Vector invert_y(const Vector&) override; }; diff --git a/src/main.cpp b/src/main.cpp index 6f692e9..b08c43d 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -10,9 +10,9 @@ using std::chrono::duration; using std::chrono::milliseconds; -static int dim = 50; -static bool rotated = true; -static size_t budget = dim * 4000; +static int dim = 100; +static bool rotated = false; +static size_t budget = dim * 5000; struct Ellipse { @@ -28,7 +28,7 @@ struct Ellipse { evals++; const auto x_shift = R * (x.array() - 1.).matrix(); - return functions::rosenbrock(x_shift); + return functions::ellipse(x_shift); } }; @@ -60,10 +60,16 @@ void run_modcma(parameters::MatrixAdaptationType mat_t) rng::set_seed(42); parameters::Modules m; m.matrix_adaptation = mat_t; - m.elitist = true; + m.elitist = false; + m.active = true; + parameters::Settings settings( - dim, m, -std::numeric_limits::infinity(), - std::nullopt, budget, 2.0 + dim, + m, + -std::numeric_limits::infinity(), + std::nullopt, + budget, + 2.0 ); auto p = std::make_shared(settings); auto cma = ModularCMAES(p); @@ -72,29 +78,8 @@ void run_modcma(parameters::MatrixAdaptationType mat_t) FunctionType f = Ellipse(dim, rotated); while (cma.step(f)) { - //std::cout << cma.p->stats << std::endl; - //std::cout << cma.p->mutation->sigma << std::endl; - //auto sr = std::dynamic_pointer_cast(cma.p->mutation); - //std::cout << "p_succ: " << sr->success_ratio << ", " << sr->max_success_ratio << std::endl; - - //if (cma.p->stats.current_best.y < 1e-8) - // break; - - // No rotation - // e: Stats t=549 e=5490 - // no-e: Stats t=594 e=5940 - // Rotation - // e: Stats t = 559 e = 5590 - // no-e: Stats t=549 e=5490 - - // Rosen - // no rotation - // e: Stats t = 617 e = 6170 - // noe: Stats t=625 e=6250 - // rotation: - // e: Stats t=618 e=6180 - // no-e Stats t=568 e=5680 - // + if (cma.p->stats.global_best.y < 1e-8) + break; } std::cout << "modcmaes: " << parameters::to_string(mat_t) << "\n" << std::defaultfloat; @@ -102,11 +87,14 @@ void run_modcma(parameters::MatrixAdaptationType mat_t) std::cout << "iters: " << cma.p->stats.t << std::endl; std::cout << "updates: " << cma.p->stats.n_updates << std::endl; std::cout << "best_y: " << std::scientific << std::setprecision(3) << cma.p->stats.global_best.y << std::endl; + std::cout << "solved: " << std::boolalpha << (cma.p->stats.global_best.y < 1e-8) << std::endl; } int main() { - run_modcma(parameters::MatrixAdaptationType::CHOLESKY); + //run_modcma(parameters::MatrixAdaptationType::NONE); + run_modcma(parameters::MatrixAdaptationType::SEPERABLE); //run_modcma(parameters::MatrixAdaptationType::MATRIX); + run_modcma(parameters::MatrixAdaptationType::CHOLESKY); run_modcma(parameters::MatrixAdaptationType::COVARIANCE); } \ No newline at end of file diff --git a/src/matrix_adaptation.cpp b/src/matrix_adaptation.cpp index 761fdcc..7b0c94f 100644 --- a/src/matrix_adaptation.cpp +++ b/src/matrix_adaptation.cpp @@ -145,10 +145,66 @@ namespace matrix_adaptation return (B.transpose() * yi).cwiseQuotient(d); } - bool SeperableAdaptation::perform_eigendecomposition(const Settings& settings) + + void SeperableAdaptation::adapt_evolution_paths_inner( + const Population& pop, + const parameters::Weights& w, + const std::shared_ptr& mutation, + const parameters::Stats& stats, + size_t mu, size_t lambda) + { + ps = (1.0 - mutation->cs) * ps + (sqrt(mutation->cs * (2.0 - mutation->cs) * sqrt(w.mueff)) * dz); + + const Float actual_ps_length = ps.norm() / sqrt( + 1.0 - pow(1.0 - mutation->cs, 2.0 * (stats.evaluations / lambda))); + + const Float expected_ps_length = (1.4 + (2.0 / (dd + 1.0))) * expected_length_z; + + hs = actual_ps_length < expected_ps_length; + pc = (1.0 - w.cc) * pc + (hs * sqrt(w.cc * (2.0 - w.cc) * w.mueff)) * dm; + } + + bool SeperableAdaptation::adapt_matrix(const parameters::Weights& w, const parameters::Modules& m, const Population& pop, size_t mu, + const parameters::Settings& settings, parameters::Stats& stats) + { + + stats.last_update = stats.t; + stats.n_updates++; + + const auto dhs = (1 - hs) * w.cc * (2.0 - w.cc); + const auto decay_c = (1 - (w.c1 * dhs) - w.c1 - (w.cmu * w.positive.sum())); + + for (auto j = 0; j < settings.dim; j++) + { + auto rank_mu = (pop.Z.leftCols(mu).row(j).array().pow(2) * w.positive.transpose().array() * c(j)).sum(); + + if (m.active) + rank_mu += (pop.Z.rightCols(pop.Z.cols() - mu).row(j).array().pow(2) * w.negative.transpose().array() * c(j)).sum(); + + + c(j) = decay_c * c(j) + w.c1 * pow(pc(j), 2) + w.cmu * rank_mu; + d(j) = std::sqrt(c(j)); + } + + + return true; + } + + void SeperableAdaptation::restart(const parameters::Settings& settings) { + Adaptation::restart(settings); + c.setOnes(); + d.setOnes(); + pc.setZero(); + } + + Vector SeperableAdaptation::compute_y(const Vector& zi) + { + return d.array() * zi.array(); + } + + Vector SeperableAdaptation::invert_y(const Vector& yi) { - d = C.diagonal().cwiseSqrt(); - return d.minCoeff() > 0.0; + return yi.array() / d.array(); } From 9dfe8324d3813a10f0b0e5f29bc661ce3aabe1eb Mon Sep 17 00:00:00 2001 From: Jacob de Nobel Date: Sat, 31 May 2025 12:50:44 +0200 Subject: [PATCH 13/74] active for cholesky --- include/matrix_adaptation.hpp | 2 -- src/main.cpp | 10 +++++----- src/matrix_adaptation.cpp | 9 ++++++--- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/include/matrix_adaptation.hpp b/include/matrix_adaptation.hpp index 6279637..85b94ea 100644 --- a/include/matrix_adaptation.hpp +++ b/include/matrix_adaptation.hpp @@ -113,8 +113,6 @@ namespace matrix_adaptation { Vector pc, d, c; bool hs; - // B remains I - SeperableAdaptation(const size_t dim, const Vector& x0, const Float expected_length_z) : Adaptation(dim, x0, Vector::Zero(dim), expected_length_z), pc(Vector::Zero(dim)), diff --git a/src/main.cpp b/src/main.cpp index b08c43d..80226ef 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -10,7 +10,7 @@ using std::chrono::duration; using std::chrono::milliseconds; -static int dim = 100; +static int dim = 50; static bool rotated = false; static size_t budget = dim * 5000; @@ -60,8 +60,8 @@ void run_modcma(parameters::MatrixAdaptationType mat_t) rng::set_seed(42); parameters::Modules m; m.matrix_adaptation = mat_t; - m.elitist = false; - m.active = true; + m.elitist = true; + m.active = false; parameters::Settings settings( dim, @@ -93,8 +93,8 @@ void run_modcma(parameters::MatrixAdaptationType mat_t) int main() { //run_modcma(parameters::MatrixAdaptationType::NONE); - run_modcma(parameters::MatrixAdaptationType::SEPERABLE); + //run_modcma(parameters::MatrixAdaptationType::SEPERABLE); //run_modcma(parameters::MatrixAdaptationType::MATRIX); run_modcma(parameters::MatrixAdaptationType::CHOLESKY); - run_modcma(parameters::MatrixAdaptationType::COVARIANCE); + //run_modcma(parameters::MatrixAdaptationType::COVARIANCE); } \ No newline at end of file diff --git a/src/matrix_adaptation.cpp b/src/matrix_adaptation.cpp index 7b0c94f..2a512e3 100644 --- a/src/matrix_adaptation.cpp +++ b/src/matrix_adaptation.cpp @@ -181,7 +181,6 @@ namespace matrix_adaptation if (m.active) rank_mu += (pop.Z.rightCols(pop.Z.cols() - mu).row(j).array().pow(2) * w.negative.transpose().array() * c(j)).sum(); - c(j) = decay_c * c(j) + w.c1 * pow(pc(j), 2) + w.cmu * rank_mu; d(j) = std::sqrt(c(j)); } @@ -352,8 +351,13 @@ namespace matrix_adaptation A *= std::sqrt(1 - w.c1 - w.cmu); A = rank_one_update(A, w.c1, pc); - for (auto i = 0; i < mu; i++) + for (auto i = 0; i < mu; i++) A = rank_one_update(A, w.cmu * w.positive(i), pop.Y.col(i)); + + if (m.active) + for (auto i = 0; i < pop.Y.cols() - mu; i++) + A = rank_one_update(A, w.cmu * w.negative(i), pop.Y.col(mu + i)); + return true; } @@ -370,7 +374,6 @@ namespace matrix_adaptation Vector CholeskyAdaptation::invert_y(const Vector& yi) { - //TODO: check is this correct return A.triangularView().solve(yi); } From 3f9f8055983a05469815a8f34bb688aa559651e2 Mon Sep 17 00:00:00 2001 From: Jacob de Nobel Date: Sun, 1 Jun 2025 16:06:31 +0200 Subject: [PATCH 14/74] fixed other matrix adap, move all weights to weights --- include/common.hpp | 41 + include/matrix_adaptation.hpp | 25 +- include/mutation.hpp | 317 +++-- include/parameters.hpp | 3 +- include/weights.hpp | 8 +- src/common.cpp | 2 + src/interface.cpp | 2117 ++++++++++++++++----------------- src/main.cpp | 61 +- src/matrix_adaptation.cpp | 154 +-- src/mutation.cpp | 126 +- src/parameters.cpp | 82 +- src/restart_criteria.cpp | 4 +- src/weights.cpp | 59 +- 13 files changed, 1516 insertions(+), 1483 deletions(-) diff --git a/include/common.hpp b/include/common.hpp index 9077789..e2bf6dc 100644 --- a/include/common.hpp +++ b/include/common.hpp @@ -341,4 +341,45 @@ namespace functions Float ellipse(const Vector& x); Float rosenbrock(const Vector& x); Matrix random_rotation_matrix(int n, int seed); + + enum ObjectiveFunction { + ELLIPSE, + ROSENBROCK, + SPHERE, + RASTRIGIN + }; + + inline FunctionType get(const ObjectiveFunction f) + { + switch (f) + { + case ELLIPSE: + return ellipse; + case RASTRIGIN: + return rastrigin; + case ROSENBROCK: + return rosenbrock; + case SPHERE: + return sphere; + default: + return sphere; + } + } + + inline std::string to_string(const ObjectiveFunction f) + { + switch (f) + { + case ELLIPSE: + return "ellipse"; + case RASTRIGIN: + return "rastrigin"; + case ROSENBROCK: + return "rosenbrock"; + case SPHERE: + return "sphere"; + default: + return "unknown"; + } + } } diff --git a/include/matrix_adaptation.hpp b/include/matrix_adaptation.hpp index 85b94ea..20f14c7 100644 --- a/include/matrix_adaptation.hpp +++ b/include/matrix_adaptation.hpp @@ -22,11 +22,9 @@ namespace matrix_adaptation } void adapt_evolution_paths(const Population& pop, const parameters::Weights& w, - const std::shared_ptr& mutation, const parameters::Stats& stats, size_t mu, size_t lambda); virtual void adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, - const std::shared_ptr& mutation, const parameters::Stats& stats, size_t mu, size_t lambda) = 0; virtual bool adapt_matrix(const parameters::Weights& w, const parameters::Modules& m, const Population& pop, @@ -61,8 +59,10 @@ namespace matrix_adaptation return true; } - void adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, - const std::shared_ptr& mutation, const parameters::Stats& stats, + void adapt_evolution_paths_inner( + const Population& pop, + const parameters::Weights& w, + const parameters::Stats& stats, size_t mu, size_t lambda) override; @@ -96,7 +96,7 @@ namespace matrix_adaptation virtual bool perform_eigendecomposition(const parameters::Settings& settings); void adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, - const std::shared_ptr& mutation, const parameters::Stats& stats, + const parameters::Stats& stats, size_t mu, size_t lambda) override; bool adapt_matrix(const parameters::Weights& w, const parameters::Modules& m, const Population& pop, size_t mu, @@ -123,7 +123,7 @@ namespace matrix_adaptation } void adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, - const std::shared_ptr& mutation, const parameters::Stats& stats, + const parameters::Stats& stats, size_t mu, size_t lambda) override; bool adapt_matrix(const parameters::Weights& w, const parameters::Modules& m, const Population& pop, size_t mu, @@ -144,7 +144,7 @@ namespace matrix_adaptation using CovarianceAdaptation::CovarianceAdaptation; void adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, - const std::shared_ptr& mutation, const parameters::Stats& stats, + const parameters::Stats& stats, size_t mu, size_t lambda) override; bool adapt_matrix(const parameters::Weights& w, const parameters::Modules& m, const Population& pop, size_t mu, @@ -165,7 +165,7 @@ namespace matrix_adaptation } void adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, - const std::shared_ptr& mutation, const parameters::Stats& stats, + const parameters::Stats& stats, size_t mu, size_t lambda) override; bool adapt_matrix(const parameters::Weights& w, const parameters::Modules& m, const Population& pop, size_t mu, @@ -191,9 +191,12 @@ namespace matrix_adaptation { } - void adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, - const std::shared_ptr& mutation, const parameters::Stats& stats, - size_t mu, size_t lambda) override; + void adapt_evolution_paths_inner( + const Population& pop, + const parameters::Weights& w, + const parameters::Stats& stats, + size_t mu, size_t lambda + ) override; bool adapt_matrix(const parameters::Weights& w, const parameters::Modules& m, const Population& pop, size_t mu, const parameters::Settings& settings, parameters::Stats& stats) override; diff --git a/include/mutation.hpp b/include/mutation.hpp index 876cf3d..e081ba9 100644 --- a/include/mutation.hpp +++ b/include/mutation.hpp @@ -7,191 +7,188 @@ namespace parameters { - struct Stats; - struct Parameters; - struct Weights; - struct Strategy; - struct Modules; + struct Stats; + struct Parameters; + struct Weights; + struct Strategy; + struct Modules; } namespace matrix_adaptation { - struct Adaptation; + struct Adaptation; } namespace bounds { - struct BoundCorrection; + struct BoundCorrection; } namespace mutation { - struct ThresholdConvergence - { - Float init_threshold = 0.1; - Float decay_factor = 0.995; - virtual Vector scale(const Vector &zi, const Float diameter, const size_t budget, const size_t evaluations); - }; - - struct NoThresholdConvergence : ThresholdConvergence - { - Vector scale(const Vector &zi, const Float diameter, const size_t budget, const size_t evaluations) override - { - return zi; - } - }; - - class SequentialSelection - { - Float seq_cutoff_factor; - size_t seq_cutoff; - - public: - SequentialSelection(const parameters::Mirror &m, const size_t mu, const Float seq_cutoff_factor = 1.0) : seq_cutoff_factor(m == parameters::Mirror::PAIRWISE ? std::max(Float{2.}, seq_cutoff_factor) : seq_cutoff_factor), - seq_cutoff(static_cast(mu * seq_cutoff_factor)) - { - } - virtual bool break_conditions(const size_t i, const Float f, Float fopt, const parameters::Mirror &m); - }; - - struct NoSequentialSelection : SequentialSelection - { - - using SequentialSelection::SequentialSelection; - - bool break_conditions(const size_t i, const Float f, Float fopt, const parameters::Mirror &m) override { return false; } - }; - - struct SigmaSampler - { - Float beta; - - SigmaSampler(const Float d) : beta(std::log(2.0) / std::max((std::sqrt(d) * std::log(d)), Float{1.0})) {} - - virtual void sample(const Float sigma, Population &pop) const - { - pop.s = sampling::Random>(pop.s.size(), - std::lognormal_distribution<>(std::log(sigma), beta))(); - } - }; - - struct NoSigmaSampler : SigmaSampler - { - using SigmaSampler::SigmaSampler; - - void sample(const Float sigma, Population &pop) const override - { - pop.s.setConstant(sigma); - } - }; - - struct Strategy - { - std::shared_ptr tc; - std::shared_ptr sq; - std::shared_ptr ss; - Float cs; - Float sigma; - Float s = 0; - - Strategy( - const std::shared_ptr &threshold_covergence, - const std::shared_ptr &sequential_selection, - const std::shared_ptr &sigma_sampler, - const Float cs, const Float sigma0) : tc(threshold_covergence), sq(sequential_selection), ss(sigma_sampler), cs(cs), sigma(sigma0) {} - - virtual void mutate(FunctionType &objective, const size_t n_offspring, parameters::Parameters &p) = 0; - - virtual void adapt(const parameters::Weights &w, std::shared_ptr adaptation, Population &pop, - const Population &old_pop, const parameters::Stats &stats, const size_t lambda) = 0; - }; - - struct CSA : Strategy - { - Float damps; - Float expected_length_z; - - CSA(const std::shared_ptr &threshold_covergence, - const std::shared_ptr &sequential_selection, - const std::shared_ptr &sigma_sampler, - const Float cs, const Float damps, const Float sigma0, const Float expected_z) : Strategy(threshold_covergence, sequential_selection, sigma_sampler, cs, sigma0), damps(damps), expected_length_z(expected_z) {} - - void mutate(FunctionType &objective, const size_t n_offspring, parameters::Parameters &p) override; - - void adapt(const parameters::Weights &w, std::shared_ptr adaptation, Population &pop, - const Population &old_pop, const parameters::Stats &stats, const size_t lambda) override; - }; - - struct TPA : CSA - { - using CSA::CSA; - - Float a_tpa = 0.5; - Float b_tpa = 0.0; - Float rank_tpa = 0.0; - - void mutate(FunctionType &objective, const size_t n_offspring, parameters::Parameters &p) override; - - void adapt(const parameters::Weights &w, std::shared_ptr adaptation, Population &pop, - const Population &old_pop, const parameters::Stats &stats, const size_t lambda) override; - }; - - struct MSR : CSA - { - using CSA::CSA; + struct ThresholdConvergence + { + Float init_threshold = 0.1; + Float decay_factor = 0.995; + virtual Vector scale(const Vector& zi, const Float diameter, const size_t budget, const size_t evaluations); + }; + + struct NoThresholdConvergence : ThresholdConvergence + { + Vector scale(const Vector& zi, const Float diameter, const size_t budget, const size_t evaluations) override + { + return zi; + } + }; + + class SequentialSelection + { + Float seq_cutoff_factor; + size_t seq_cutoff; + + public: + SequentialSelection(const parameters::Mirror& m, const size_t mu, const Float seq_cutoff_factor = 1.0) : seq_cutoff_factor(m == parameters::Mirror::PAIRWISE ? std::max(Float{ 2. }, seq_cutoff_factor) : seq_cutoff_factor), + seq_cutoff(static_cast(mu* seq_cutoff_factor)) + { + } + virtual bool break_conditions(const size_t i, const Float f, Float fopt, const parameters::Mirror& m); + }; + + struct NoSequentialSelection : SequentialSelection + { + + using SequentialSelection::SequentialSelection; + + bool break_conditions(const size_t i, const Float f, Float fopt, const parameters::Mirror& m) override { return false; } + }; + + struct SigmaSampler + { + Float beta; + + SigmaSampler(const Float d) : beta(std::log(2.0) / std::max((std::sqrt(d) * std::log(d)), Float{ 1.0 })) {} + + virtual void sample(const Float sigma, Population& pop) const + { + pop.s = sampling::Random>(pop.s.size(), + std::lognormal_distribution<>(std::log(sigma), beta))(); + } + }; + + struct NoSigmaSampler : SigmaSampler + { + using SigmaSampler::SigmaSampler; + + void sample(const Float sigma, Population& pop) const override + { + pop.s.setConstant(sigma); + } + }; + + struct Strategy + { + std::shared_ptr tc; + std::shared_ptr sq; + std::shared_ptr ss; + Float sigma; + Float s = 0; - void adapt(const parameters::Weights &w, std::shared_ptr adaptation, Population &pop, - const Population &old_pop, const parameters::Stats &stats, const size_t lambda) override; - }; + Strategy( + const std::shared_ptr& threshold_covergence, + const std::shared_ptr& sequential_selection, + const std::shared_ptr& sigma_sampler, + const Float sigma0) : tc(threshold_covergence), sq(sequential_selection), ss(sigma_sampler), sigma(sigma0) { + } - struct PSR : CSA - { - Float success_ratio = .25; + virtual void mutate(FunctionType& objective, const size_t n_offspring, parameters::Parameters& p) = 0; - Vector combined; + virtual void adapt(const parameters::Weights& w, std::shared_ptr adaptation, Population& pop, + const Population& old_pop, const parameters::Stats& stats, const size_t lambda) = 0; + }; - using CSA::CSA; + struct CSA : Strategy + { + CSA(const std::shared_ptr& threshold_covergence, + const std::shared_ptr& sequential_selection, + const std::shared_ptr& sigma_sampler, + const Float sigma0) : Strategy(threshold_covergence, sequential_selection, sigma_sampler, sigma0) { + } - void adapt(const parameters::Weights &w, std::shared_ptr adaptation, Population &pop, - const Population &old_pop, const parameters::Stats &stats, const size_t lambda) override; - }; + void mutate(FunctionType& objective, const size_t n_offspring, parameters::Parameters& p) override; - struct XNES : CSA - { - using CSA::CSA; + void adapt(const parameters::Weights& w, std::shared_ptr adaptation, Population& pop, + const Population& old_pop, const parameters::Stats& stats, const size_t lambda) override; + }; - void adapt(const parameters::Weights &w, std::shared_ptr adaptation, Population &pop, - const Population &old_pop, const parameters::Stats &stats, const size_t lambda) override; - }; + struct TPA : CSA + { + using CSA::CSA; - struct MXNES : CSA - { - using CSA::CSA; + Float a_tpa = 0.5; + Float b_tpa = 0.0; + Float rank_tpa = 0.0; + + void mutate(FunctionType& objective, const size_t n_offspring, parameters::Parameters& p) override; - void adapt(const parameters::Weights &w, std::shared_ptr adaptation, Population &pop, - const Population &old_pop, const parameters::Stats &stats, const size_t lambda) override; - }; + void adapt(const parameters::Weights& w, std::shared_ptr adaptation, Population& pop, + const Population& old_pop, const parameters::Stats& stats, const size_t lambda) override; + }; - struct LPXNES : CSA - { - using CSA::CSA; + struct MSR : CSA + { + using CSA::CSA; - void adapt(const parameters::Weights &w, std::shared_ptr adaptation, Population &pop, - const Population &old_pop, const parameters::Stats &stats, const size_t lambda) override; - }; + void adapt(const parameters::Weights& w, std::shared_ptr adaptation, Population& pop, + const Population& old_pop, const parameters::Stats& stats, const size_t lambda) override; + }; + struct PSR : CSA + { + Float success_ratio = .25; - struct SR : CSA - { - constexpr static Float tgt_success_ratio = 2.0 / 11.0; - - using CSA::CSA; + Vector combined; - void adapt(const parameters::Weights& w, std::shared_ptr adaptation, Population& pop, - const Population& old_pop, const parameters::Stats& stats, const size_t lambda) override; - }; - + using CSA::CSA; - std::shared_ptr get(const parameters::Modules &m, const size_t mu, - const Float mueff, const Float d, const Float sigma, const std::optional cs, const Float expected_z); + void adapt(const parameters::Weights& w, std::shared_ptr adaptation, Population& pop, + const Population& old_pop, const parameters::Stats& stats, const size_t lambda) override; + }; + + struct XNES : CSA + { + using CSA::CSA; + + void adapt(const parameters::Weights& w, std::shared_ptr adaptation, Population& pop, + const Population& old_pop, const parameters::Stats& stats, const size_t lambda) override; + }; + + struct MXNES : CSA + { + using CSA::CSA; + + void adapt(const parameters::Weights& w, std::shared_ptr adaptation, Population& pop, + const Population& old_pop, const parameters::Stats& stats, const size_t lambda) override; + }; + + struct LPXNES : CSA + { + using CSA::CSA; + + void adapt(const parameters::Weights& w, std::shared_ptr adaptation, Population& pop, + const Population& old_pop, const parameters::Stats& stats, const size_t lambda) override; + }; + + + struct SR : CSA + { + constexpr static Float tgt_success_ratio = 2.0 / 11.0; + + using CSA::CSA; + + void adapt(const parameters::Weights& w, std::shared_ptr adaptation, Population& pop, + const Population& old_pop, const parameters::Stats& stats, const size_t lambda) override; + }; + + + std::shared_ptr get(const parameters::Modules& m, const size_t mu, const Float d, const Float sigma); } \ No newline at end of file diff --git a/include/parameters.hpp b/include/parameters.hpp index 1f4d0a5..fd625db 100644 --- a/include/parameters.hpp +++ b/include/parameters.hpp @@ -24,13 +24,14 @@ namespace parameters Settings settings; Stats stats; + std::shared_ptr sampler; + Weights weights; Population pop; Population old_pop; restart::Criteria criteria; - std::shared_ptr sampler; std::shared_ptr adaptation; std::shared_ptr mutation; std::shared_ptr selection; diff --git a/include/weights.hpp b/include/weights.hpp index 1b33f47..c770b35 100644 --- a/include/weights.hpp +++ b/include/weights.hpp @@ -12,11 +12,15 @@ namespace parameters Vector negative; Float mueff, mueff_neg; - Float c1, cmu, cc; + Float c1, cmu, cc, cs; + Float damps; + Float sqrt_cc_mueff, sqrt_cs_mueff; Float lazy_update_interval; Float sigma_path_scale; + Float expected_length_z; + Float expected_length_ps; - Weights(const size_t dim, const size_t mu, const size_t lambda, const Settings &settings); + Weights(const size_t dim, const size_t mu, const size_t lambda, const Settings &settings, const Float expected_length_z); void weights_default(const size_t lambda); diff --git a/src/common.cpp b/src/common.cpp index f97b69b..d520620 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -201,4 +201,6 @@ namespace functions return Q; } + + } diff --git a/src/interface.cpp b/src/interface.cpp index 8cebeac..b228d43 100644 --- a/src/interface.cpp +++ b/src/interface.cpp @@ -15,1165 +15,1118 @@ namespace py = pybind11; template Float random_double() { - static RNG gen; - return gen(rng::GENERATOR); + static RNG gen; + return gen(rng::GENERATOR); } -void define_options(py::module &main) +void define_options(py::module& main) { - auto m = main.def_submodule("options"); - using namespace parameters; - py::enum_(m, "RecombinationWeights") - .value("DEFAULT", parameters::RecombinationWeights::DEFAULT) - .value("EQUAL", parameters::RecombinationWeights::EQUAL) - .value("HALF_POWER_LAMBDA", parameters::RecombinationWeights::HALF_POWER_LAMBDA) - .export_values(); - - py::enum_(m, "BaseSampler") - .value("UNIFORM", BaseSampler::UNIFORM) - .value("SOBOL", BaseSampler::SOBOL) - .value("HALTON", BaseSampler::HALTON) - .export_values(); - - py::enum_(m, "SampleTranformerType") - .value("NONE", SampleTranformerType::NONE) - .value("GAUSSIAN", SampleTranformerType::GAUSSIAN) - .value("SCALED_UNIFORM", SampleTranformerType::SCALED_UNIFORM) - .value("LAPLACE", SampleTranformerType::LAPLACE) - .value("LOGISTIC", SampleTranformerType::LOGISTIC) - .value("CAUCHY", SampleTranformerType::CAUCHY) - .value("DOUBLE_WEIBULL", SampleTranformerType::DOUBLE_WEIBULL) - .export_values(); - - py::enum_(m, "Mirror") - .value("NONE", Mirror::NONE) - .value("MIRRORED", Mirror::MIRRORED) - .value("PAIRWISE", Mirror::PAIRWISE) - .export_values(); - - py::enum_(m, "StepSizeAdaptation") - .value("CSA", StepSizeAdaptation::CSA) - .value("TPA", StepSizeAdaptation::TPA) - .value("MSR", StepSizeAdaptation::MSR) - .value("XNES", StepSizeAdaptation::XNES) - .value("MXNES", StepSizeAdaptation::MXNES) - .value("LPXNES", StepSizeAdaptation::LPXNES) - .value("PSR", StepSizeAdaptation::PSR) - .value("SR", StepSizeAdaptation::PSR) - .export_values(); - - py::enum_(m, "CorrectionMethod") - .value("NONE", CorrectionMethod::NONE) - .value("MIRROR", CorrectionMethod::MIRROR) - .value("COTN", CorrectionMethod::COTN) - .value("UNIFORM_RESAMPLE", CorrectionMethod::UNIFORM_RESAMPLE) - .value("SATURATE", CorrectionMethod::SATURATE) - .value("TOROIDAL", CorrectionMethod::TOROIDAL) - .value("RESAMPLE", CorrectionMethod::RESAMPLE) - .export_values(); - - py::enum_(m, "RestartStrategy") - .value("NONE", RestartStrategyType::NONE) - .value("STOP", RestartStrategyType::STOP) - .value("RESTART", RestartStrategyType::RESTART) - .value("IPOP", RestartStrategyType::IPOP) - .value("BIPOP", RestartStrategyType::BIPOP) - .export_values(); - - py::enum_(m, "MatrixAdaptationType") - .value("COVARIANCE", MatrixAdaptationType::COVARIANCE) - .value("NONE", MatrixAdaptationType::NONE) - .value("MATRIX", MatrixAdaptationType::MATRIX) - .value("SEPERABLE", MatrixAdaptationType::SEPERABLE) - .value("ONEPLUSONE", MatrixAdaptationType::ONEPLUSONE) - .export_values(); - - py::enum_(m, "CenterPlacement") - .value("X0", CenterPlacement::X0) - .value("ZERO", CenterPlacement::ZERO) - .value("UNIFORM", CenterPlacement::UNIFORM) - .export_values(); + auto m = main.def_submodule("options"); + using namespace parameters; + py::enum_(m, "RecombinationWeights") + .value("DEFAULT", parameters::RecombinationWeights::DEFAULT) + .value("EQUAL", parameters::RecombinationWeights::EQUAL) + .value("HALF_POWER_LAMBDA", parameters::RecombinationWeights::HALF_POWER_LAMBDA) + .export_values(); + + py::enum_(m, "BaseSampler") + .value("UNIFORM", BaseSampler::UNIFORM) + .value("SOBOL", BaseSampler::SOBOL) + .value("HALTON", BaseSampler::HALTON) + .export_values(); + + py::enum_(m, "SampleTranformerType") + .value("NONE", SampleTranformerType::NONE) + .value("GAUSSIAN", SampleTranformerType::GAUSSIAN) + .value("SCALED_UNIFORM", SampleTranformerType::SCALED_UNIFORM) + .value("LAPLACE", SampleTranformerType::LAPLACE) + .value("LOGISTIC", SampleTranformerType::LOGISTIC) + .value("CAUCHY", SampleTranformerType::CAUCHY) + .value("DOUBLE_WEIBULL", SampleTranformerType::DOUBLE_WEIBULL) + .export_values(); + + py::enum_(m, "Mirror") + .value("NONE", Mirror::NONE) + .value("MIRRORED", Mirror::MIRRORED) + .value("PAIRWISE", Mirror::PAIRWISE) + .export_values(); + + py::enum_(m, "StepSizeAdaptation") + .value("CSA", StepSizeAdaptation::CSA) + .value("TPA", StepSizeAdaptation::TPA) + .value("MSR", StepSizeAdaptation::MSR) + .value("XNES", StepSizeAdaptation::XNES) + .value("MXNES", StepSizeAdaptation::MXNES) + .value("LPXNES", StepSizeAdaptation::LPXNES) + .value("PSR", StepSizeAdaptation::PSR) + .value("SR", StepSizeAdaptation::PSR) + .export_values(); + + py::enum_(m, "CorrectionMethod") + .value("NONE", CorrectionMethod::NONE) + .value("MIRROR", CorrectionMethod::MIRROR) + .value("COTN", CorrectionMethod::COTN) + .value("UNIFORM_RESAMPLE", CorrectionMethod::UNIFORM_RESAMPLE) + .value("SATURATE", CorrectionMethod::SATURATE) + .value("TOROIDAL", CorrectionMethod::TOROIDAL) + .value("RESAMPLE", CorrectionMethod::RESAMPLE) + .export_values(); + + py::enum_(m, "RestartStrategy") + .value("NONE", RestartStrategyType::NONE) + .value("STOP", RestartStrategyType::STOP) + .value("RESTART", RestartStrategyType::RESTART) + .value("IPOP", RestartStrategyType::IPOP) + .value("BIPOP", RestartStrategyType::BIPOP) + .export_values(); + + py::enum_(m, "MatrixAdaptationType") + .value("COVARIANCE", MatrixAdaptationType::COVARIANCE) + .value("NONE", MatrixAdaptationType::NONE) + .value("MATRIX", MatrixAdaptationType::MATRIX) + .value("SEPERABLE", MatrixAdaptationType::SEPERABLE) + .value("ONEPLUSONE", MatrixAdaptationType::ONEPLUSONE) + .value("CHOLESKSY", MatrixAdaptationType::CHOLESKSY) + .export_values(); + + py::enum_(m, "CenterPlacement") + .value("X0", CenterPlacement::X0) + .value("ZERO", CenterPlacement::ZERO) + .value("UNIFORM", CenterPlacement::UNIFORM) + .export_values(); } struct PySampler : sampling::Sampler { - std::function func; + std::function func; - PySampler(size_t d, std::function f) : Sampler::Sampler(d), func(f) {} + PySampler(size_t d, std::function f) : Sampler::Sampler(d), func(f) {} - Vector operator()() override - { - Vector res(d); - for (size_t j = 0; j < d; ++j) - res(j) = func(); - return res; - }; + Vector operator()() override + { + Vector res(d); + for (size_t j = 0; j < d; ++j) + res(j) = func(); + return res; + }; }; -void define_samplers(py::module &main) +void define_samplers(py::module& main) { - using namespace sampling; - - auto m = main.def_submodule("sampling"); - - py::class_>(m, "Sampler") - .def_readonly("d", &Sampler::d) - .def("reset", &Sampler::reset) - .def("expected_length", &Sampler::expected_length); - - py::class_>(m, "PySampler") - .def(py::init>(), py::arg("d"), py::arg("function")) - .def("__call__", &PySampler::operator()); - - py::class_>(m, "Gaussian") - .def(py::init(), py::arg("d")) - .def("__call__", &Gaussian::operator()); - - py::class_>(m, "Uniform") - .def(py::init(), py::arg("d")) - .def("__call__", &Uniform::operator()); - - py::class_>(m, "Sobol") - .def(py::init(), py::arg("d")) - .def_readonly("cache", &Sobol::cache) - .def("__call__", &Sobol::operator()); - - py::class_>(m, "Halton") - .def(py::init(), py::arg("d"), py::arg("scrambled") = true) - .def("__call__", &Halton::operator()); - - py::class_>(m, "Mirrored") - .def(py::init>(), py::arg("sampler")) - .def("__call__", &Mirrored::operator()); - - py::class_>(m, "CachedSampler") - .def(py::init>(), py::arg("sampler")) - .def(py::init, bool>(), py::arg("cache"), py::arg("transform_ppf") = false) - .def("__call__", &CachedSampler::operator()) - .def_readonly("index", &CachedSampler::index) - .def_readonly("n_samples", &CachedSampler::n_samples) - .def_readonly("cache", &CachedSampler::cache); - - py::class_>(m, "Orthogonal") - .def(py::init, size_t>(), - py::arg("sampler"), py::arg("n_samples")) - .def("__call__", &Orthogonal::operator()); - - py::class_>(m, "SampleTransformer") - .def("raw", &SampleTransformer::raw); - - py::class_>(m, "IdentityTransformer") - .def(py::init>(), py::arg("sampler")) - .def("transform", &IdentityTransformer::transform) - .def("__call__", &IdentityTransformer::operator()) - .def("expected_length", &IdentityTransformer::expected_length); - - py::class_>(m, "GaussianTransformer") - .def(py::init>(), py::arg("sampler")) - .def("transform", &GaussianTransformer::transform) - .def("__call__", &GaussianTransformer::operator()) - .def("expected_length", &GaussianTransformer::expected_length); - - py::class_>(m, "UniformScaler") - .def(py::init>(), py::arg("sampler")) - .def("transform", &UniformScaler::transform) - .def("__call__", &UniformScaler::operator()) - .def("expected_length", &UniformScaler::expected_length); - - py::class_>(m, "LaplaceTransformer") - .def(py::init>(), py::arg("sampler")) - .def("transform", &LaplaceTransformer::transform) - .def("__call__", &LaplaceTransformer::operator()) - .def("expected_length", &LaplaceTransformer::expected_length); - - py::class_>(m, "LogisticTransformer") - .def(py::init>(), py::arg("sampler")) - .def("transform", &LogisticTransformer::transform) - .def("__call__", &LogisticTransformer::operator()) - .def("expected_length", &LogisticTransformer::expected_length); - - py::class_>(m, "CauchyTransformer") - .def(py::init>(), py::arg("sampler")) - .def("transform", &CauchyTransformer::transform) - .def("__call__", &CauchyTransformer::operator()) - .def("expected_length", &CauchyTransformer::expected_length); - - py::class_>(m, "DoubleWeibullTransformer") - .def(py::init>(), py::arg("sampler")) - .def("transform", &DoubleWeibullTransformer::transform) - .def("__call__", &DoubleWeibullTransformer::operator()) - .def("expected_length", &DoubleWeibullTransformer::expected_length); + using namespace sampling; + + auto m = main.def_submodule("sampling"); + + py::class_>(m, "Sampler") + .def_readonly("d", &Sampler::d) + .def("reset", &Sampler::reset) + .def("expected_length", &Sampler::expected_length); + + py::class_>(m, "PySampler") + .def(py::init>(), py::arg("d"), py::arg("function")) + .def("__call__", &PySampler::operator()); + + py::class_>(m, "Gaussian") + .def(py::init(), py::arg("d")) + .def("__call__", &Gaussian::operator()); + + py::class_>(m, "Uniform") + .def(py::init(), py::arg("d")) + .def("__call__", &Uniform::operator()); + + py::class_>(m, "Sobol") + .def(py::init(), py::arg("d")) + .def_readonly("cache", &Sobol::cache) + .def("__call__", &Sobol::operator()); + + py::class_>(m, "Halton") + .def(py::init(), py::arg("d"), py::arg("scrambled") = true) + .def("__call__", &Halton::operator()); + + py::class_>(m, "Mirrored") + .def(py::init>(), py::arg("sampler")) + .def("__call__", &Mirrored::operator()); + + py::class_>(m, "CachedSampler") + .def(py::init>(), py::arg("sampler")) + .def(py::init, bool>(), py::arg("cache"), py::arg("transform_ppf") = false) + .def("__call__", &CachedSampler::operator()) + .def_readonly("index", &CachedSampler::index) + .def_readonly("n_samples", &CachedSampler::n_samples) + .def_readonly("cache", &CachedSampler::cache); + + py::class_>(m, "Orthogonal") + .def(py::init, size_t>(), + py::arg("sampler"), py::arg("n_samples")) + .def("__call__", &Orthogonal::operator()); + + py::class_>(m, "SampleTransformer") + .def("raw", &SampleTransformer::raw); + + py::class_>(m, "IdentityTransformer") + .def(py::init>(), py::arg("sampler")) + .def("transform", &IdentityTransformer::transform) + .def("__call__", &IdentityTransformer::operator()) + .def("expected_length", &IdentityTransformer::expected_length); + + py::class_>(m, "GaussianTransformer") + .def(py::init>(), py::arg("sampler")) + .def("transform", &GaussianTransformer::transform) + .def("__call__", &GaussianTransformer::operator()) + .def("expected_length", &GaussianTransformer::expected_length); + + py::class_>(m, "UniformScaler") + .def(py::init>(), py::arg("sampler")) + .def("transform", &UniformScaler::transform) + .def("__call__", &UniformScaler::operator()) + .def("expected_length", &UniformScaler::expected_length); + + py::class_>(m, "LaplaceTransformer") + .def(py::init>(), py::arg("sampler")) + .def("transform", &LaplaceTransformer::transform) + .def("__call__", &LaplaceTransformer::operator()) + .def("expected_length", &LaplaceTransformer::expected_length); + + py::class_>(m, "LogisticTransformer") + .def(py::init>(), py::arg("sampler")) + .def("transform", &LogisticTransformer::transform) + .def("__call__", &LogisticTransformer::operator()) + .def("expected_length", &LogisticTransformer::expected_length); + + py::class_>(m, "CauchyTransformer") + .def(py::init>(), py::arg("sampler")) + .def("transform", &CauchyTransformer::transform) + .def("__call__", &CauchyTransformer::operator()) + .def("expected_length", &CauchyTransformer::expected_length); + + py::class_>(m, "DoubleWeibullTransformer") + .def(py::init>(), py::arg("sampler")) + .def("transform", &DoubleWeibullTransformer::transform) + .def("__call__", &DoubleWeibullTransformer::operator()) + .def("expected_length", &DoubleWeibullTransformer::expected_length); } -void define_utils(py::module &main) +void define_utils(py::module& main) { - auto m = main.def_submodule("utils"); - m.def("cdf", &cdf, py::arg("x")); - m.def("ppf", &ppf, py::arg("x")); - m.def("i8_sobol", &i8_sobol, py::arg("dim_num"), py::arg("seed"), py::arg("quasi")); - m.def("compute_ert", &utils::compute_ert, py::arg("running_times"), py::arg("budget")); - m.def("set_seed", &rng::set_seed, py::arg("seed"), "Set the random seed"); - m.def("random_uniform", &random_double>, "Generate a uniform random number in [0, 1]"); - m.def("random_normal", &random_double>, "Generate a standard normal random number"); - - py::class_(m, "Shuffler") - .def(py::init(), py::arg("start"), py::arg("stop")) - .def(py::init(), py::arg("stop")) - .def("next", &rng::Shuffler::next) - .def_readwrite("start", &rng::Shuffler::start) - .def_readwrite("stop", &rng::Shuffler::stop) - .def_readwrite("n", &rng::Shuffler::n) - .def_readwrite("seed", &rng::Shuffler::seed) - .def_readwrite("offset", &rng::Shuffler::offset) - .def_readwrite("multiplier", &rng::Shuffler::multiplier) - .def_readwrite("modulus", &rng::Shuffler::modulus) - .def_readwrite("found", &rng::Shuffler::found); - - py::class_(m, "CachedShuffleSequence") - .def(py::init(), py::arg("dim")) - .def("fill", &rng::CachedShuffleSequence::fill) - .def("get_index", &rng::CachedShuffleSequence::get_index, py::arg("index")) - .def("next", &rng::CachedShuffleSequence::next); + auto m = main.def_submodule("utils"); + m.def("cdf", &cdf, py::arg("x")); + m.def("ppf", &ppf, py::arg("x")); + m.def("i8_sobol", &i8_sobol, py::arg("dim_num"), py::arg("seed"), py::arg("quasi")); + m.def("compute_ert", &utils::compute_ert, py::arg("running_times"), py::arg("budget")); + m.def("set_seed", &rng::set_seed, py::arg("seed"), "Set the random seed"); + m.def("random_uniform", &random_double>, "Generate a uniform random number in [0, 1]"); + m.def("random_normal", &random_double>, "Generate a standard normal random number"); + + py::class_(m, "Shuffler") + .def(py::init(), py::arg("start"), py::arg("stop")) + .def(py::init(), py::arg("stop")) + .def("next", &rng::Shuffler::next) + .def_readwrite("start", &rng::Shuffler::start) + .def_readwrite("stop", &rng::Shuffler::stop) + .def_readwrite("n", &rng::Shuffler::n) + .def_readwrite("seed", &rng::Shuffler::seed) + .def_readwrite("offset", &rng::Shuffler::offset) + .def_readwrite("multiplier", &rng::Shuffler::multiplier) + .def_readwrite("modulus", &rng::Shuffler::modulus) + .def_readwrite("found", &rng::Shuffler::found); + + py::class_(m, "CachedShuffleSequence") + .def(py::init(), py::arg("dim")) + .def("fill", &rng::CachedShuffleSequence::fill) + .def("get_index", &rng::CachedShuffleSequence::get_index, py::arg("index")) + .def("next", &rng::CachedShuffleSequence::next); } -void define_selection(py::module &main) +void define_selection(py::module& main) { - auto m = main.def_submodule("selection"); - using namespace selection; - py::class_>(m, "Elitsm") - .def(py::init<>()) - .def("__call__", &Elitsm::operator(), py::arg("parameters")); - - py::class_>(m, "NoElitsm") - .def(py::init<>()) - .def("__call__", &NoElitsm::operator(), py::arg("parameters")); - - py::class_>(m, "Pairwise") - .def(py::init<>()) - .def("__call__", &Pairwise::operator(), py::arg("parameters")); - - py::class_>(m, "NoPairwise") - .def(py::init<>()) - .def("__call__", &NoPairwise::operator(), py::arg("parameters")); - - py::class_>(m, "Strategy") - .def(py::init(), py::arg("modules")) - .def("select", &Strategy::select, py::arg("parameters")) - .def_readwrite("pairwise", &Strategy::pairwise) - .def_readwrite("elitsm", &Strategy::elitsm); + auto m = main.def_submodule("selection"); + using namespace selection; + py::class_>(m, "Elitsm") + .def(py::init<>()) + .def("__call__", &Elitsm::operator(), py::arg("parameters")); + + py::class_>(m, "NoElitsm") + .def(py::init<>()) + .def("__call__", &NoElitsm::operator(), py::arg("parameters")); + + py::class_>(m, "Pairwise") + .def(py::init<>()) + .def("__call__", &Pairwise::operator(), py::arg("parameters")); + + py::class_>(m, "NoPairwise") + .def(py::init<>()) + .def("__call__", &NoPairwise::operator(), py::arg("parameters")); + + py::class_>(m, "Strategy") + .def(py::init(), py::arg("modules")) + .def("select", &Strategy::select, py::arg("parameters")) + .def_readwrite("pairwise", &Strategy::pairwise) + .def_readwrite("elitsm", &Strategy::elitsm); } -void define_center_placement(py::module &main) +void define_center_placement(py::module& main) { - auto m = main.def_submodule("center"); - using namespace center; - py::class_>(m, "Placement") - .def("__call__", &Placement::operator(), py::arg("parameters")); + auto m = main.def_submodule("center"); + using namespace center; + py::class_>(m, "Placement") + .def("__call__", &Placement::operator(), py::arg("parameters")); - py::class_>(m, "X0") - .def(py::init<>()); + py::class_>(m, "X0") + .def(py::init<>()); - py::class_>(m, "Uniform") - .def(py::init<>()); + py::class_>(m, "Uniform") + .def(py::init<>()); - py::class_>(m, "Zero") - .def(py::init<>()); + py::class_>(m, "Zero") + .def(py::init<>()); } -void define_repelling(py::module &main) +void define_repelling(py::module& main) { - using namespace repelling; - auto m = main.def_submodule("repelling"); - - py::class_(m, "TabooPoint") - .def(py::init(), py::arg("solution"), py::arg("radius")) - .def("rejects", &TabooPoint::rejects, py::arg("xi"), py::arg("p"), py::arg("attempts")) - .def("shares_basin", &TabooPoint::shares_basin, py::arg("objective"), py::arg("xi"), py::arg("p")) - .def("calculate_criticality", &TabooPoint::calculate_criticality, py::arg("p")) - .def_readwrite("radius", &TabooPoint::radius) - .def_readwrite("n_rep", &TabooPoint::n_rep) - .def_readwrite("solution", &TabooPoint::solution) - .def_readwrite("shrinkage", &TabooPoint::shrinkage) - .def_readwrite("criticality", &TabooPoint::criticality) - .def("__repr__", [](TabooPoint &tb) { - return ""; - }); - - py::class_>(m, "Repelling") - .def(py::init<>()) - .def("is_rejected", &Repelling::is_rejected, py::arg("xi"), py::arg("p")) - .def("update_archive", &Repelling::update_archive, py::arg("objective"), py::arg("p")) - .def("prepare_sampling", &Repelling::prepare_sampling, py::arg("p")) - .def_readwrite("archive", &Repelling::archive) - .def_readwrite("coverage", &Repelling::coverage) - .def_readwrite("attempts", &Repelling::attempts) - .def_readwrite("C_inv", &Repelling::C_inv); - - py::class_>(m, "NoRepelling") - .def(py::init<>()); - - m.def("euclidian", &distance::euclidian, py::arg("u"), py::arg("v")); - m.def("manhattan", &distance::manhattan, py::arg("u"), py::arg("v")); - m.def("mahanolobis", &distance::mahanolobis, py::arg("u"), py::arg("v"), py::arg("C_inv")); - m.def("hill_valley_test", &distance::hill_valley_test, - py::arg("u"), py::arg("v"), py::arg("f"), py::arg("n_evals")); + using namespace repelling; + auto m = main.def_submodule("repelling"); + + py::class_(m, "TabooPoint") + .def(py::init(), py::arg("solution"), py::arg("radius")) + .def("rejects", &TabooPoint::rejects, py::arg("xi"), py::arg("p"), py::arg("attempts")) + .def("shares_basin", &TabooPoint::shares_basin, py::arg("objective"), py::arg("xi"), py::arg("p")) + .def("calculate_criticality", &TabooPoint::calculate_criticality, py::arg("p")) + .def_readwrite("radius", &TabooPoint::radius) + .def_readwrite("n_rep", &TabooPoint::n_rep) + .def_readwrite("solution", &TabooPoint::solution) + .def_readwrite("shrinkage", &TabooPoint::shrinkage) + .def_readwrite("criticality", &TabooPoint::criticality) + .def("__repr__", [](TabooPoint& tb) { + return ""; + }); + + py::class_>(m, "Repelling") + .def(py::init<>()) + .def("is_rejected", &Repelling::is_rejected, py::arg("xi"), py::arg("p")) + .def("update_archive", &Repelling::update_archive, py::arg("objective"), py::arg("p")) + .def("prepare_sampling", &Repelling::prepare_sampling, py::arg("p")) + .def_readwrite("archive", &Repelling::archive) + .def_readwrite("coverage", &Repelling::coverage) + .def_readwrite("attempts", &Repelling::attempts) + .def_readwrite("C_inv", &Repelling::C_inv); + + py::class_>(m, "NoRepelling") + .def(py::init<>()); + + m.def("euclidian", &distance::euclidian, py::arg("u"), py::arg("v")); + m.def("manhattan", &distance::manhattan, py::arg("u"), py::arg("v")); + m.def("mahanolobis", &distance::mahanolobis, py::arg("u"), py::arg("v"), py::arg("C_inv")); + m.def("hill_valley_test", &distance::hill_valley_test, + py::arg("u"), py::arg("v"), py::arg("f"), py::arg("n_evals")); } -void define_matrix_adaptation(py::module &main) +void define_matrix_adaptation(py::module& main) { - using namespace matrix_adaptation; - auto m = main.def_submodule("matrix_adaptation"); - py::class_>(m, "Adaptation") - .def_readwrite("m", &Adaptation::m) - .def_readwrite("m_old", &Adaptation::m_old) - .def_readwrite("dm", &Adaptation::dm) - .def_readwrite("dz", &Adaptation::dz) - .def_readwrite("ps", &Adaptation::ps) - .def_readwrite("dd", &Adaptation::dd) - .def_readwrite("expected_length_z", &Adaptation::expected_length_z) - .def("adapt_evolution_paths", &Adaptation::adapt_evolution_paths, - py::arg("pop"), - py::arg("weights"), - py::arg("mutation"), - py::arg("stats"), - py::arg("mu"), - py::arg("lamb")) - .def("adapt_matrix", &Adaptation::adapt_matrix, - py::arg("weights"), - py::arg("modules"), - py::arg("population"), - py::arg("mu"), - py::arg("settings"), - py::arg("stats")) - .def("restart", &Adaptation::restart, py::arg("settings")) - .def("compute_y", &Adaptation::compute_y, py::arg("zi")) - .def("invert_x", &Adaptation::invert_x, py::arg("xi"), py::arg("sigma")) - .def("invert_y", &Adaptation::invert_y, py::arg("yi")) - .def("__repr__", [](Adaptation &dyn) - { - std::stringstream ss; - ss << std::boolalpha; - ss << ""; - return ss.str(); }); - - py::class_>(m, "CovarianceAdaptation") - .def(py::init(), py::arg("dimension"), py::arg("x0"), py::arg("expected_length_z")) - .def_readwrite("pc", &CovarianceAdaptation::pc) - .def_readwrite("d", &CovarianceAdaptation::d) - .def_readwrite("B", &CovarianceAdaptation::B) - .def_readwrite("C", &CovarianceAdaptation::C) - .def_readwrite("A", &CovarianceAdaptation::A) - .def_readwrite("inv_root_C", &CovarianceAdaptation::inv_root_C) - .def_readwrite("hs", &CovarianceAdaptation::hs) - .def("adapt_covariance_matrix", &CovarianceAdaptation::adapt_covariance_matrix, - py::arg("weights"), - py::arg("modules"), - py::arg("population"), - py::arg("mu")) - .def("perform_eigendecomposition", &CovarianceAdaptation::perform_eigendecomposition, py::arg("stats")) - .def("__repr__", [](CovarianceAdaptation &dyn) - { - std::stringstream ss; - ss << std::boolalpha; - ss << ""; - return ss.str(); }); - - py::class_>(m, "SeperableAdaptation") - .def(py::init(), py::arg("dimension"), py::arg("x0"), py::arg("expected_length_z")) - .def("__repr__", [](SeperableAdaptation &dyn) - { - std::stringstream ss; - ss << std::boolalpha; - ss << ""; - return ss.str(); }); - - py::class_>(m, "OnePlusOneAdaptation") - .def(py::init(), py::arg("dimension"), py::arg("x0"), py::arg("expected_length_z")) - .def("__repr__", [](SeperableAdaptation &dyn) - { - std::stringstream ss; - ss << std::boolalpha; - ss << ""; - return ss.str(); }); - - py::class_>(m, "MatrixAdaptation") - .def(py::init(), py::arg("dimension"), py::arg("x0"), py::arg("expected_length_z")) - .def_readwrite("M", &MatrixAdaptation::M) - .def_readwrite("M_inv", &MatrixAdaptation::M_inv) - .def("__repr__", [](MatrixAdaptation &dyn) - { - std::stringstream ss; - ss << std::boolalpha; - ss << ""; - return ss.str(); }); - - py::class_>(m, "NoAdaptation") - .def(py::init(), py::arg("dimension"), py::arg("x0"), py::arg("expected_length_z")) - .def("__repr__", [](None &dyn) - { - std::stringstream ss; - ss << std::boolalpha; - ss << ""; - return ss.str(); }); + using namespace matrix_adaptation; + auto m = main.def_submodule("matrix_adaptation"); + py::class_>(m, "Adaptation") + .def_readwrite("m", &Adaptation::m) + .def_readwrite("m_old", &Adaptation::m_old) + .def_readwrite("dm", &Adaptation::dm) + .def_readwrite("dz", &Adaptation::dz) + .def_readwrite("ps", &Adaptation::ps) + .def_readwrite("dd", &Adaptation::dd) + .def_readwrite("expected_length_z", &Adaptation::expected_length_z) + .def("adapt_evolution_paths", &Adaptation::adapt_evolution_paths, + py::arg("pop"), + py::arg("weights"), + py::arg("mutation"), + py::arg("stats"), + py::arg("mu"), + py::arg("lamb")) + .def("adapt_matrix", &Adaptation::adapt_matrix, + py::arg("weights"), + py::arg("modules"), + py::arg("population"), + py::arg("mu"), + py::arg("settings"), + py::arg("stats")) + .def("restart", &Adaptation::restart, py::arg("settings")) + .def("compute_y", &Adaptation::compute_y, py::arg("zi")) + .def("invert_x", &Adaptation::invert_x, py::arg("xi"), py::arg("sigma")) + .def("invert_y", &Adaptation::invert_y, py::arg("yi")) + .def("__repr__", [](Adaptation& dyn) + { + std::stringstream ss; + ss << std::boolalpha; + ss << ""; + return ss.str(); }); + + py::class_>(m, "CovarianceAdaptation") + .def(py::init(), py::arg("dimension"), py::arg("x0"), py::arg("expected_length_z")) + .def_readwrite("pc", &CovarianceAdaptation::pc) + .def_readwrite("d", &CovarianceAdaptation::d) + .def_readwrite("B", &CovarianceAdaptation::B) + .def_readwrite("C", &CovarianceAdaptation::C) + .def_readwrite("A", &CovarianceAdaptation::A) + .def_readwrite("inv_root_C", &CovarianceAdaptation::inv_root_C) + .def_readwrite("hs", &CovarianceAdaptation::hs) + .def("adapt_covariance_matrix", &CovarianceAdaptation::adapt_covariance_matrix, + py::arg("weights"), + py::arg("modules"), + py::arg("population"), + py::arg("mu")) + .def("perform_eigendecomposition", &CovarianceAdaptation::perform_eigendecomposition, py::arg("stats")) + .def("__repr__", [](CovarianceAdaptation& dyn) + { + std::stringstream ss; + ss << std::boolalpha; + ss << ""; + return ss.str(); }); + + py::class_>(m, "SeperableAdaptation") + .def(py::init(), py::arg("dimension"), py::arg("x0"), py::arg("expected_length_z")) + .def("__repr__", [](SeperableAdaptation& dyn) + { + std::stringstream ss; + ss << std::boolalpha; + ss << ""; + return ss.str(); }); + + py::class_>(m, "OnePlusOneAdaptation") + .def(py::init(), py::arg("dimension"), py::arg("x0"), py::arg("expected_length_z")) + .def("__repr__", [](SeperableAdaptation& dyn) + { + std::stringstream ss; + ss << std::boolalpha; + ss << ""; + return ss.str(); }); + + py::class_>(m, "MatrixAdaptation") + .def(py::init(), py::arg("dimension"), py::arg("x0"), py::arg("expected_length_z")) + .def_readwrite("M", &MatrixAdaptation::M) + .def_readwrite("M_inv", &MatrixAdaptation::M_inv) + .def("__repr__", [](MatrixAdaptation& dyn) + { + std::stringstream ss; + ss << std::boolalpha; + ss << ""; + return ss.str(); }); + + py::class_>(m, "NoAdaptation") + .def(py::init(), py::arg("dimension"), py::arg("x0"), py::arg("expected_length_z")) + .def("__repr__", [](None& dyn) + { + std::stringstream ss; + ss << std::boolalpha; + ss << ""; + return ss.str(); }); } -void define_parameters(py::module &main) +void define_parameters(py::module& main) { - auto m = main.def_submodule("parameters"); - using namespace parameters; - - py::class_(m, "Modules") - .def(py::init<>()) - .def_readwrite("elitist", &Modules::elitist) - .def_readwrite("active", &Modules::active) - .def_readwrite("orthogonal", &Modules::orthogonal) - .def_readwrite("sequential_selection", &Modules::sequential_selection) - .def_readwrite("threshold_convergence", &Modules::threshold_convergence) - .def_readwrite("sample_sigma", &Modules::sample_sigma) - .def_readwrite("weights", &Modules::weights) - .def_readwrite("sampler", &Modules::sampler) - .def_readwrite("mirrored", &Modules::mirrored) - .def_readwrite("ssa", &Modules::ssa) - .def_readwrite("bound_correction", &Modules::bound_correction) - .def_readwrite("restart_strategy", &Modules::restart_strategy) - .def_readwrite("repelling_restart", &Modules::repelling_restart) - .def_readwrite("matrix_adaptation", &Modules::matrix_adaptation) - .def_readwrite("center_placement", &Modules::center_placement) - .def_readwrite("sample_transformation", &Modules::sample_transformation) - .def("__repr__", [](Modules &mod) - { return to_string(mod); }); - - py::class_(m, "Solution") - .def(py::init<>()) - .def_readwrite("x", &Solution::x) - .def_readwrite("y", &Solution::y) - .def_readwrite("t", &Solution::t) - .def_readwrite("e", &Solution::e) - .def("__repr__", &Solution::repr); - - py::class_(m, "Stats") - .def(py::init<>()) - .def_readwrite("t", &Stats::t) - .def_readwrite("evaluations", &Stats::evaluations) - .def_readwrite("current_avg", &Stats::current_avg) - .def_readwrite("solutions", &Stats::solutions) - .def_readwrite("centers", &Stats::centers) - .def_readwrite("current_best", &Stats::current_best) - .def_readwrite("global_best", &Stats::global_best) - .def_readwrite("has_improved", &Stats::has_improved) - .def_readwrite("success_ratio", &Stats::success_ratio) - .def_readwrite("last_update", &Stats::last_update) - .def_readwrite("n_updates", &Stats::n_updates) - .def("__repr__", [](Stats &stats) - { - std::stringstream ss; - ss << std::boolalpha; - ss << ""; - return ss.str(); }); - - py::class_(m, "Weights") - .def( - py::init(), - py::arg("dimension"), - py::arg("mu0"), - py::arg("lambda0"), - py::arg("modules")) - .def_readwrite("mueff", &Weights::mueff) - .def_readwrite("mueff_neg", &Weights::mueff_neg) - .def_readwrite("c1", &Weights::c1) - .def_readwrite("cmu", &Weights::cmu) - .def_readwrite("cc", &Weights::cc) - .def_readwrite("weights", &Weights::weights) - .def_readwrite("positive", &Weights::positive) - .def_readwrite("negative", &Weights::negative) - .def("__repr__", [](Weights &weights) - { - std::stringstream ss; - ss << std::boolalpha; - ss << ""; - return ss.str(); }); - - py::class_>(m, "Settings") - .def(py::init, std::optional, size_to, size_to, std::optional, - std::optional, std::optional, std::optional, - std::optional, std::optional, - std::optional, std::optional, std::optional, - std::optional, bool>(), - py::arg("dim"), - py::arg("modules") = std::nullopt, - py::arg("target") = std::nullopt, - py::arg("max_generations") = std::nullopt, - py::arg("budget") = std::nullopt, - py::arg("sigma0") = std::nullopt, - py::arg("lambda0") = std::nullopt, - py::arg("mu0") = std::nullopt, - py::arg("x0") = std::nullopt, - py::arg("lb") = std::nullopt, - py::arg("ub") = std::nullopt, - py::arg("cs") = std::nullopt, - py::arg("cc") = std::nullopt, - py::arg("cmu") = std::nullopt, - py::arg("c1") = std::nullopt, - py::arg("verbose") = false) - .def_readonly("dim", &Settings::dim) - .def_readonly("modules", &Settings::modules) - .def_readwrite("target", &Settings::target) - .def_readwrite("max_generations", &Settings::max_generations) - .def_readwrite("budget", &Settings::budget) - .def_readwrite("sigma0", &Settings::sigma0) - .def_readwrite("lambda0", &Settings::lambda0) - .def_readwrite("mu0", &Settings::mu0) - .def_readwrite("x0", &Settings::x0) - .def_readwrite("lb", &Settings::lb) - .def_readwrite("ub", &Settings::ub) - .def_readwrite("cs", &Settings::cs) - .def_readwrite("cc", &Settings::cc) - .def_readwrite("cmu", &Settings::cmu) - .def_readwrite("c1", &Settings::c1) - .def_readwrite("verbose", &Settings::verbose) - .def_readonly("volume", &Settings::volume) - .def("__repr__", [](Settings &settings) - { - std::stringstream ss; - ss << std::boolalpha; - ss << ""; - return ss.str(); }); - - ; - - using AdaptationType = std::variant< - std::shared_ptr, - std::shared_ptr, - std::shared_ptr, - std::shared_ptr, - std::shared_ptr>; - - py::class_>(main, "Parameters") - .def(py::init(), py::arg("dimension")) - .def(py::init(), py::arg("settings")) - .def("adapt", &Parameters::adapt) - .def("start", &Parameters::start, py::arg("objective")) - .def("perform_restart", &Parameters::perform_restart, py::arg("objective"), - py::arg("sigma") = std::nullopt) - .def_readwrite("settings", &Parameters::settings) - .def_readwrite("mu", &Parameters::mu) - .def_readwrite("lamb", &Parameters::lambda) - .def_property( - "adaptation", - [](Parameters &self) -> AdaptationType - { - switch (self.settings.modules.matrix_adaptation) - { - case MatrixAdaptationType::MATRIX: - return std::dynamic_pointer_cast(self.adaptation); - case MatrixAdaptationType::NONE: - return std::dynamic_pointer_cast(self.adaptation); - case MatrixAdaptationType::SEPERABLE: - return std::dynamic_pointer_cast(self.adaptation); - case MatrixAdaptationType::ONEPLUSONE: - return std::dynamic_pointer_cast(self.adaptation); - default: - case MatrixAdaptationType::COVARIANCE: - return std::dynamic_pointer_cast(self.adaptation); - } - }, - [](Parameters &self, std::shared_ptr adaptation) - { - self.adaptation = adaptation; - }) - .def_readwrite("criteria", &Parameters::criteria) - .def_readwrite("stats", &Parameters::stats) - .def_readwrite("weights", &Parameters::weights) - .def_readwrite("pop", &Parameters::pop) - .def_readwrite("old_pop", &Parameters::old_pop) - .def_readwrite("sampler", &Parameters::sampler) - .def_readwrite("mutation", &Parameters::mutation) - .def_readwrite("selection", &Parameters::selection) - .def_readwrite("restart_strategy", &Parameters::restart_strategy) - .def_readwrite("repelling", &Parameters::repelling) - .def_readwrite("bounds", &Parameters::bounds) - .def_readwrite("center_placement", &Parameters::center_placement); + auto m = main.def_submodule("parameters"); + using namespace parameters; + + py::class_(m, "Modules") + .def(py::init<>()) + .def_readwrite("elitist", &Modules::elitist) + .def_readwrite("active", &Modules::active) + .def_readwrite("orthogonal", &Modules::orthogonal) + .def_readwrite("sequential_selection", &Modules::sequential_selection) + .def_readwrite("threshold_convergence", &Modules::threshold_convergence) + .def_readwrite("sample_sigma", &Modules::sample_sigma) + .def_readwrite("weights", &Modules::weights) + .def_readwrite("sampler", &Modules::sampler) + .def_readwrite("mirrored", &Modules::mirrored) + .def_readwrite("ssa", &Modules::ssa) + .def_readwrite("bound_correction", &Modules::bound_correction) + .def_readwrite("restart_strategy", &Modules::restart_strategy) + .def_readwrite("repelling_restart", &Modules::repelling_restart) + .def_readwrite("matrix_adaptation", &Modules::matrix_adaptation) + .def_readwrite("center_placement", &Modules::center_placement) + .def_readwrite("sample_transformation", &Modules::sample_transformation) + .def("__repr__", [](Modules& mod) + { return to_string(mod); }); + + py::class_(m, "Solution") + .def(py::init<>()) + .def_readwrite("x", &Solution::x) + .def_readwrite("y", &Solution::y) + .def_readwrite("t", &Solution::t) + .def_readwrite("e", &Solution::e) + .def("__repr__", &Solution::repr); + + py::class_(m, "Stats") + .def(py::init<>()) + .def_readwrite("t", &Stats::t) + .def_readwrite("evaluations", &Stats::evaluations) + .def_readwrite("current_avg", &Stats::current_avg) + .def_readwrite("solutions", &Stats::solutions) + .def_readwrite("centers", &Stats::centers) + .def_readwrite("current_best", &Stats::current_best) + .def_readwrite("global_best", &Stats::global_best) + .def_readwrite("has_improved", &Stats::has_improved) + .def_readwrite("success_ratio", &Stats::success_ratio) + .def_readwrite("last_update", &Stats::last_update) + .def_readwrite("n_updates", &Stats::n_updates) + .def("__repr__", [](Stats& stats) + { + std::stringstream ss; + ss << std::boolalpha; + ss << ""; + return ss.str(); }); + + py::class_(m, "Weights") + .def( + py::init(), + py::arg("dimension"), + py::arg("mu0"), + py::arg("lambda0"), + py::arg("modules"), + py::arg("expected_length_z") + ) + .def_readwrite("mueff", &Weights::mueff) + .def_readwrite("mueff_neg", &Weights::mueff_neg) + .def_readwrite("c1", &Weights::c1) + .def_readwrite("cmu", &Weights::cmu) + .def_readwrite("cc", &Weights::cc) + .def_readwrite("cs", &Weights::cs) + .def_readwrite("damps", &Weights::damps) + .def_readwrite("sqrt_cc_mueff", &Weights::sqrt_cc_mueff) + .def_readwrite("sqrt_cs_mueff", &Weights::sqrt_cs_mueff) + .def_readwrite("lazy_update_interval", &Weights::lazy_update_interval) + .def_readwrite("sigma_path_scale", &Weights::sigma_path_scale) + .def_readwrite("expected_length_z", &Weights::expected_length_z) + .def_readwrite("expected_length_ps", &Weights::expected_length_ps) + .def_readwrite("weights", &Weights::weights) + .def_readwrite("positive", &Weights::positive) + .def_readwrite("negative", &Weights::negative) + .def("__repr__", [](Weights& weights) + { + std::stringstream ss; + ss << std::boolalpha; + ss << ""; + return ss.str(); }); + + py::class_>(m, "Settings") + .def(py::init, std::optional, size_to, size_to, std::optional, + std::optional, std::optional, std::optional, + std::optional, std::optional, + std::optional, std::optional, std::optional, + std::optional, bool>(), + py::arg("dim"), + py::arg("modules") = std::nullopt, + py::arg("target") = std::nullopt, + py::arg("max_generations") = std::nullopt, + py::arg("budget") = std::nullopt, + py::arg("sigma0") = std::nullopt, + py::arg("lambda0") = std::nullopt, + py::arg("mu0") = std::nullopt, + py::arg("x0") = std::nullopt, + py::arg("lb") = std::nullopt, + py::arg("ub") = std::nullopt, + py::arg("cs") = std::nullopt, + py::arg("cc") = std::nullopt, + py::arg("cmu") = std::nullopt, + py::arg("c1") = std::nullopt, + py::arg("verbose") = false) + .def_readonly("dim", &Settings::dim) + .def_readonly("modules", &Settings::modules) + .def_readwrite("target", &Settings::target) + .def_readwrite("max_generations", &Settings::max_generations) + .def_readwrite("budget", &Settings::budget) + .def_readwrite("sigma0", &Settings::sigma0) + .def_readwrite("lambda0", &Settings::lambda0) + .def_readwrite("mu0", &Settings::mu0) + .def_readwrite("x0", &Settings::x0) + .def_readwrite("lb", &Settings::lb) + .def_readwrite("ub", &Settings::ub) + .def_readwrite("cs", &Settings::cs) + .def_readwrite("cc", &Settings::cc) + .def_readwrite("cmu", &Settings::cmu) + .def_readwrite("c1", &Settings::c1) + .def_readwrite("verbose", &Settings::verbose) + .def_readonly("volume", &Settings::volume) + .def("__repr__", [](Settings& settings) + { + std::stringstream ss; + ss << std::boolalpha; + ss << ""; + return ss.str(); }); + + ; + + using AdaptationType = std::variant< + std::shared_ptr, + std::shared_ptr, + std::shared_ptr, + std::shared_ptr, + std::shared_ptr>; + + py::class_>(main, "Parameters") + .def(py::init(), py::arg("dimension")) + .def(py::init(), py::arg("settings")) + .def("adapt", &Parameters::adapt) + .def("start", &Parameters::start, py::arg("objective")) + .def("perform_restart", &Parameters::perform_restart, py::arg("objective"), + py::arg("sigma") = std::nullopt) + .def_readwrite("settings", &Parameters::settings) + .def_readwrite("mu", &Parameters::mu) + .def_readwrite("lamb", &Parameters::lambda) + .def_property( + "adaptation", + [](Parameters& self) -> AdaptationType + { + switch (self.settings.modules.matrix_adaptation) + { + case MatrixAdaptationType::MATRIX: + return std::dynamic_pointer_cast(self.adaptation); + case MatrixAdaptationType::NONE: + return std::dynamic_pointer_cast(self.adaptation); + case MatrixAdaptationType::SEPERABLE: + return std::dynamic_pointer_cast(self.adaptation); + case MatrixAdaptationType::ONEPLUSONE: + return std::dynamic_pointer_cast(self.adaptation); + default: + case MatrixAdaptationType::COVARIANCE: + return std::dynamic_pointer_cast(self.adaptation); + } + }, + [](Parameters& self, std::shared_ptr adaptation) + { + self.adaptation = adaptation; + }) + .def_readwrite("criteria", &Parameters::criteria) + .def_readwrite("stats", &Parameters::stats) + .def_readwrite("weights", &Parameters::weights) + .def_readwrite("pop", &Parameters::pop) + .def_readwrite("old_pop", &Parameters::old_pop) + .def_readwrite("sampler", &Parameters::sampler) + .def_readwrite("mutation", &Parameters::mutation) + .def_readwrite("selection", &Parameters::selection) + .def_readwrite("restart_strategy", &Parameters::restart_strategy) + .def_readwrite("repelling", &Parameters::repelling) + .def_readwrite("bounds", &Parameters::bounds) + .def_readwrite("center_placement", &Parameters::center_placement); } -void define_bounds(py::module &main) +void define_bounds(py::module& main) { - auto m = main.def_submodule("bounds"); - using namespace bounds; + auto m = main.def_submodule("bounds"); + using namespace bounds; - py::class_>(m, "BoundCorrection") - .def_readwrite("lb", &BoundCorrection::lb) - .def_readwrite("ub", &BoundCorrection::ub) - .def_readwrite("db", &BoundCorrection::db) - .def_readwrite("diameter", &BoundCorrection::diameter) - .def_readonly("n_out_of_bounds", &BoundCorrection::n_out_of_bounds) - .def("correct", &BoundCorrection::correct, - py::arg("population"), py::arg("m")); + py::class_>(m, "BoundCorrection") + .def_readwrite("lb", &BoundCorrection::lb) + .def_readwrite("ub", &BoundCorrection::ub) + .def_readwrite("db", &BoundCorrection::db) + .def_readwrite("diameter", &BoundCorrection::diameter) + .def_readonly("n_out_of_bounds", &BoundCorrection::n_out_of_bounds) + .def("correct", &BoundCorrection::correct, + py::arg("population"), py::arg("m")); - py::class_>(m, "Resample") - .def(py::init(), py::arg("lb"), py::arg("ub")); + py::class_>(m, "Resample") + .def(py::init(), py::arg("lb"), py::arg("ub")); - py::class_>(m, "NoCorrection") - .def(py::init(), py::arg("lb"), py::arg("ub")); + py::class_>(m, "NoCorrection") + .def(py::init(), py::arg("lb"), py::arg("ub")); - py::class_>(m, "COTN") - .def(py::init(), py::arg("lb"), py::arg("ub")) - .def_readonly("sampler", &COTN::sampler); + py::class_>(m, "COTN") + .def(py::init(), py::arg("lb"), py::arg("ub")) + .def_readonly("sampler", &COTN::sampler); - py::class_>(m, "Mirror") - .def(py::init(), py::arg("lb"), py::arg("ub")); + py::class_>(m, "Mirror") + .def(py::init(), py::arg("lb"), py::arg("ub")); - py::class_>(m, "UniformResample") - .def(py::init(), py::arg("lb"), py::arg("ub")); + py::class_>(m, "UniformResample") + .def(py::init(), py::arg("lb"), py::arg("ub")); - py::class_>(m, "Saturate") - .def(py::init(), py::arg("lb"), py::arg("ub")); + py::class_>(m, "Saturate") + .def(py::init(), py::arg("lb"), py::arg("ub")); - py::class_>(m, "Toroidal") - .def(py::init(), py::arg("lb"), py::arg("ub")); + py::class_>(m, "Toroidal") + .def(py::init(), py::arg("lb"), py::arg("ub")); } -void define_mutation(py::module &main) +void define_mutation(py::module& main) { - auto m = main.def_submodule("mutation"); - using namespace mutation; - - py::class_>(m, "ThresholdConvergence") - .def(py::init<>()) - .def_readwrite("init_threshold", &ThresholdConvergence::init_threshold) - .def_readwrite("decay_factor", &ThresholdConvergence::decay_factor) - .def("scale", &ThresholdConvergence::scale, py::arg("population"), py::arg("diameter"), py::arg("budget"), py::arg("evaluations")); - - py::class_>(m, "NoThresholdConvergence") - .def(py::init<>()); - - py::class_>(m, "SequentialSelection") - .def(py::init(), - py::arg("mirror"), - py::arg("mu"), - py::arg("seq_cuttoff_factor") = 1.0) - .def("break_conditions", &SequentialSelection::break_conditions, - py::arg("i"), - py::arg("f"), - py::arg("fopt"), - py::arg("mirror")); - - py::class_>(m, "NoSequentialSelection") - .def(py::init(), - py::arg("mirror"), - py::arg("mu"), - py::arg("seq_cuttoff_factor") = 1.0); - - py::class_>(m, "SigmaSampler") - .def(py::init(), py::arg("dimension")) - .def_readwrite("beta", &SigmaSampler::beta) - .def("sample", &SigmaSampler::sample, py::arg("sigma"), py::arg("population")); - - py::class_>(m, "NoSigmaSampler") - .def(py::init(), py::arg("dimension")); - - py::class_>(m, "Strategy") - .def("adapt", &Strategy::adapt, py::arg("weights"), - py::arg("dynamic"), - py::arg("population"), - py::arg("old_population"), - py::arg("stats"), - py::arg("lamb")) - .def_readwrite("threshold_convergence", &Strategy::tc) - .def_readwrite("sequential_selection", &Strategy::sq) - .def_readwrite("sigma_sampler", &Strategy::ss) - .def_readwrite("cs", &Strategy::cs) - .def_readwrite("sigma", &Strategy::sigma) - .def_readwrite("s", &Strategy::s); - - py::class_>(m, "CSA") - .def( - py::init, std::shared_ptr, std::shared_ptr, Float, Float, Float, Float>(), - py::arg("threshold_convergence"), - py::arg("sequential_selection"), - py::arg("sigma_sampler"), - py::arg("cs"), - py::arg("damps"), - py::arg("sigma0"), - py::arg("expected_length_z")) - .def_readwrite("damps", &CSA::damps) - .def_readwrite("expected_length_z", &CSA::expected_length_z) - .def( - "mutate", &CSA::mutate, py::arg("objective"), - py::arg("n_offspring"), - py::arg("parameters")); - - py::class_>(m, "TPA") - .def(py::init, std::shared_ptr, std::shared_ptr, Float, Float, Float, Float>(), - py::arg("threshold_convergence"), - py::arg("sequential_selection"), - py::arg("sigma_sampler"), - py::arg("cs"), - py::arg("damps"), - py::arg("sigma0"), - py::arg("expected_length_z")) - .def_readwrite("a_tpa", &TPA::a_tpa) - .def_readwrite("b_tpa", &TPA::b_tpa) - .def_readwrite("rank_tpa", &TPA::rank_tpa); - - py::class_>(m, "MSR") - .def(py::init, std::shared_ptr, std::shared_ptr, Float, Float, Float, Float>(), - py::arg("threshold_convergence"), - py::arg("sequential_selection"), - py::arg("sigma_sampler"), - py::arg("cs"), - py::arg("damps"), - py::arg("sigma0"), - py::arg("expected_length_z")); - - py::class_>(m, "PSR") - .def(py::init, std::shared_ptr, std::shared_ptr, Float, Float, Float, Float>(), - py::arg("threshold_convergence"), - py::arg("sequential_selection"), - py::arg("sigma_sampler"), - py::arg("cs"), - py::arg("damps"), - py::arg("sigma0"), - py::arg("expected_length_z")) - .def_readwrite("success_ratio", &PSR::success_ratio); - - py::class_>(m, "XNES") - .def(py::init, std::shared_ptr, std::shared_ptr, Float, Float, Float, Float>(), - py::arg("threshold_convergence"), - py::arg("sequential_selection"), - py::arg("sigma_sampler"), - py::arg("cs"), - py::arg("damps"), - py::arg("sigma0"), - py::arg("expected_length_z")); - - py::class_>(m, "MXNES") - .def(py::init, std::shared_ptr, std::shared_ptr, Float, Float, Float, Float>(), - py::arg("threshold_convergence"), - py::arg("sequential_selection"), - py::arg("sigma_sampler"), - py::arg("cs"), - py::arg("damps"), - py::arg("sigma0"), - py::arg("expected_length_z")); - - py::class_>(m, "LPXNES") - .def(py::init, std::shared_ptr, std::shared_ptr, Float, Float, Float, Float>(), - py::arg("threshold_convergence"), - py::arg("sequential_selection"), - py::arg("sigma_sampler"), - py::arg("cs"), - py::arg("damps"), - py::arg("sigma0"), - py::arg("expected_length_z")); - - py::class_>(m, "SR") - .def(py::init, std::shared_ptr, std::shared_ptr, Float, Float, Float, Float>(), - py::arg("threshold_convergence"), - py::arg("sequential_selection"), - py::arg("sigma_sampler"), - py::arg("cs"), - py::arg("damps"), - py::arg("sigma0"), - py::arg("expected_length_z")) - // .def_staticreadwrite("tgt_success_ratio", &SR::tgt_success_ratio) - ; + auto m = main.def_submodule("mutation"); + using namespace mutation; + + py::class_>(m, "ThresholdConvergence") + .def(py::init<>()) + .def_readwrite("init_threshold", &ThresholdConvergence::init_threshold) + .def_readwrite("decay_factor", &ThresholdConvergence::decay_factor) + .def("scale", &ThresholdConvergence::scale, py::arg("population"), py::arg("diameter"), py::arg("budget"), py::arg("evaluations")); + + py::class_>(m, "NoThresholdConvergence") + .def(py::init<>()); + + py::class_>(m, "SequentialSelection") + .def(py::init(), + py::arg("mirror"), + py::arg("mu"), + py::arg("seq_cuttoff_factor") = 1.0) + .def("break_conditions", &SequentialSelection::break_conditions, + py::arg("i"), + py::arg("f"), + py::arg("fopt"), + py::arg("mirror")); + + py::class_>(m, "NoSequentialSelection") + .def(py::init(), + py::arg("mirror"), + py::arg("mu"), + py::arg("seq_cuttoff_factor") = 1.0); + + py::class_>(m, "SigmaSampler") + .def(py::init(), py::arg("dimension")) + .def_readwrite("beta", &SigmaSampler::beta) + .def("sample", &SigmaSampler::sample, py::arg("sigma"), py::arg("population")); + + py::class_>(m, "NoSigmaSampler") + .def(py::init(), py::arg("dimension")); + + py::class_>(m, "Strategy") + .def("adapt", &Strategy::adapt, py::arg("weights"), + py::arg("dynamic"), + py::arg("population"), + py::arg("old_population"), + py::arg("stats"), + py::arg("lamb")) + .def_readwrite("threshold_convergence", &Strategy::tc) + .def_readwrite("sequential_selection", &Strategy::sq) + .def_readwrite("sigma_sampler", &Strategy::ss) + .def_readwrite("sigma", &Strategy::sigma) + .def_readwrite("s", &Strategy::s); + + py::class_>(m, "CSA") + .def( + py::init, std::shared_ptr, std::shared_ptr, Float>(), + py::arg("threshold_convergence"), + py::arg("sequential_selection"), + py::arg("sigma_sampler"), + py::arg("sigma0")) + .def( + "mutate", &CSA::mutate, py::arg("objective"), + py::arg("n_offspring"), + py::arg("parameters")); + + py::class_>(m, "TPA") + .def_readwrite("a_tpa", &TPA::a_tpa) + .def_readwrite("b_tpa", &TPA::b_tpa) + .def_readwrite("rank_tpa", &TPA::rank_tpa); + + py::class_>(m, "MSR") + ; + + py::class_>(m, "PSR") + .def_readwrite("success_ratio", &PSR::success_ratio); + + py::class_>(m, "XNES") + ; + + py::class_>(m, "MXNES") + ; + + py::class_>(m, "LPXNES") + ; + + py::class_>(m, "SR") + // .def_staticreadwrite("tgt_success_ratio", &SR::tgt_success_ratio) + ; } -void define_population(py::module &main) +void define_population(py::module& main) { - py::class_(main, "Population") - .def(py::init(), py::arg("dimension"), py::arg("n")) - .def(py::init(), py::arg("X"), py::arg("Z"), py::arg("Y"), py::arg("f"), py::arg("s")) - .def("sort", &Population::sort) - .def("resize_cols", &Population::resize_cols, py::arg("size")) - .def("keep_only", &Population::keep_only, py::arg("idx")) - .def_property_readonly("n_finite", &Population::n_finite) - .def("__add__", &Population::operator+=, py::arg("other")) - .def_readwrite("X", &Population::X) - .def_readwrite("Z", &Population::Z) - .def_readwrite("Y", &Population::Y) - .def_readwrite("f", &Population::f) - .def_readwrite("s", &Population::s) - .def_readwrite("d", &Population::d) - .def_readwrite("n", &Population::n); + py::class_(main, "Population") + .def(py::init(), py::arg("dimension"), py::arg("n")) + .def(py::init(), py::arg("X"), py::arg("Z"), py::arg("Y"), py::arg("f"), py::arg("s")) + .def("sort", &Population::sort) + .def("resize_cols", &Population::resize_cols, py::arg("size")) + .def("keep_only", &Population::keep_only, py::arg("idx")) + .def_property_readonly("n_finite", &Population::n_finite) + .def("__add__", &Population::operator+=, py::arg("other")) + .def_readwrite("X", &Population::X) + .def_readwrite("Z", &Population::Z) + .def_readwrite("Y", &Population::Y) + .def_readwrite("f", &Population::f) + .def_readwrite("s", &Population::s) + .def_readwrite("d", &Population::d) + .def_readwrite("n", &Population::n); } class constants_w { }; -void define_constants(py::module &m) +void define_constants(py::module& m) { - py::class_(m, "constants") - .def_property_static( - "cache_max_doubles", - [](py::object) - { return constants::cache_max_doubles; }, - [](py::object, size_t a) - { constants::cache_max_doubles = a; }) - .def_property_static( - "cache_min_samples", - [](py::object) - { return constants::cache_min_samples; }, - [](py::object, size_t a) - { constants::cache_min_samples = a; }) - .def_property_static( - "cache_samples", - [](py::object) - { return constants::cache_samples; }, - [](py::object, bool a) - { constants::cache_samples = a; }) - .def_property_static( - "clip_sigma", - [](py::object) - { return constants::clip_sigma; }, - [](py::object, bool a) - { constants::clip_sigma = a; }) - .def_property_static( - "calc_eigv", - [](py::object) - { return constants::calc_eigv; }, - [](py::object, bool a) - { constants::calc_eigv = a; }) - ; + py::class_(m, "constants") + .def_property_static( + "cache_max_doubles", + [](py::object) + { return constants::cache_max_doubles; }, + [](py::object, size_t a) + { constants::cache_max_doubles = a; }) + .def_property_static( + "cache_min_samples", + [](py::object) + { return constants::cache_min_samples; }, + [](py::object, size_t a) + { constants::cache_min_samples = a; }) + .def_property_static( + "cache_samples", + [](py::object) + { return constants::cache_samples; }, + [](py::object, bool a) + { constants::cache_samples = a; }) + .def_property_static( + "clip_sigma", + [](py::object) + { return constants::clip_sigma; }, + [](py::object, bool a) + { constants::clip_sigma = a; }) + .def_property_static( + "calc_eigv", + [](py::object) + { return constants::calc_eigv; }, + [](py::object, bool a) + { constants::calc_eigv = a; }) + ; } -struct PyCriterion: restart::Criterion +struct PyCriterion : restart::Criterion { - PyCriterion(const std::string& name): restart::Criterion(name) {} + PyCriterion(const std::string& name) : restart::Criterion(name) {} - void update(const parameters::Parameters &p) override - { - PYBIND11_OVERRIDE_PURE(void, restart::Criterion, update, p); - } + void update(const parameters::Parameters& p) override + { + PYBIND11_OVERRIDE_PURE(void, restart::Criterion, update, p); + } - void on_reset(const parameters::Parameters &p) override - { - PYBIND11_OVERRIDE(void, restart::Criterion, on_reset, p); - } + void on_reset(const parameters::Parameters& p) override + { + PYBIND11_OVERRIDE(void, restart::Criterion, on_reset, p); + } }; -void define_restart_criteria(py::module &main) +void define_restart_criteria(py::module& main) { - auto m = main.def_submodule("restart"); - using namespace restart; - - py::class_>(m, "Criterion") - .def(py::init(), py::arg("name")) - .def("on_reset", &Criterion::on_reset, py::arg("parameters")) - .def("update", &Criterion::update, py::arg("parameters")) - .def("reset", &Criterion::reset, py::arg("parameters")) - .def_readwrite("met", &Criterion::met) - .def_readwrite("name", &Criterion::name) - .def_readwrite("last_restart", &Criterion::last_restart) - .def("__repr__", [](Criterion &self) - { return "<" + self.name + " met: " + std::to_string(self.met) + ">"; }); - ; - - py::class_>(m, "ExceededMaxIter") - .def(py::init<>()) - .def_readwrite("max_iter", &ExceededMaxIter::max_iter); - - py::class_>(m, "NoImprovement") - .def(py::init<>()) - .def_readwrite("n_bin", &NoImprovement::n_bin) - .def_readwrite("best_fitnesses", &NoImprovement::best_fitnesses); - - py::class_>(m, "MaxSigma") - .def(py::init<>()) - .def_readwrite_static("tolerance", &MaxSigma::tolerance); - - py::class_>(m, "MinSigma") - .def(py::init<>()) - .def_readwrite_static("tolerance", &MinSigma::tolerance); - - py::class_>(m, "UnableToAdapt") - .def(py::init<>()); - - py::class_>(m, "FlatFitness") - .def(py::init<>()) - .def_readwrite("max_flat_fitness", &FlatFitness::max_flat_fitness) - .def_readwrite("flat_fitness_index", &FlatFitness::flat_fitness_index) - .def_readwrite("flat_fitnesses", &FlatFitness::flat_fitnesses); - - py::class_>(m, "TolX") - .def(py::init<>()) - .def_readwrite("tolx_vector", &TolX::tolx_vector) - .def_readwrite_static("tolerance", &TolX::tolerance) - ; - - py::class_>(m, "MaxDSigma") - .def(py::init<>()) - .def_readwrite_static("tolerance", &MaxDSigma::tolerance); - - py::class_>(m, "MinDSigma") - .def(py::init<>()) - .def_readwrite_static("tolerance", &MinDSigma::tolerance); - - py::class_>(m, "ConditionC") - .def(py::init<>()) - .def_readwrite_static("tolerance", &ConditionC::tolerance); - - py::class_>(m, "NoEffectAxis") - .def(py::init<>()) - .def_readwrite_static("tolerance", &NoEffectAxis::tolerance) - ; - - py::class_>(m, "NoEffectCoord") - .def(py::init<>()) - .def_readwrite_static("tolerance", &NoEffectCoord::tolerance); - - py::class_>(m, "Stagnation") - .def(py::init<>()) - .def_readwrite("n_stagnation", &Stagnation::n_stagnation) - .def_readwrite("median_fitnesses", &Stagnation::median_fitnesses) - .def_readwrite("best_fitnesses", &Stagnation::best_fitnesses) - .def_readwrite_static("tolerance", &Stagnation::tolerance); - - py::class_(m, "Criteria") - .def_readwrite("items", &Criteria::items) - .def("reset", &Criteria::reset, py::arg("parameters")) - .def("update", &Criteria::update, py::arg("parameters")) - .def("any", &Criteria::any); + auto m = main.def_submodule("restart"); + using namespace restart; + + py::class_>(m, "Criterion") + .def(py::init(), py::arg("name")) + .def("on_reset", &Criterion::on_reset, py::arg("parameters")) + .def("update", &Criterion::update, py::arg("parameters")) + .def("reset", &Criterion::reset, py::arg("parameters")) + .def_readwrite("met", &Criterion::met) + .def_readwrite("name", &Criterion::name) + .def_readwrite("last_restart", &Criterion::last_restart) + .def("__repr__", [](Criterion& self) + { return "<" + self.name + " met: " + std::to_string(self.met) + ">"; }); + ; + + py::class_>(m, "ExceededMaxIter") + .def(py::init<>()) + .def_readwrite("max_iter", &ExceededMaxIter::max_iter); + + py::class_>(m, "NoImprovement") + .def(py::init<>()) + .def_readwrite("n_bin", &NoImprovement::n_bin) + .def_readwrite("best_fitnesses", &NoImprovement::best_fitnesses); + + py::class_>(m, "MaxSigma") + .def(py::init<>()) + .def_readwrite_static("tolerance", &MaxSigma::tolerance); + + py::class_>(m, "MinSigma") + .def(py::init<>()) + .def_readwrite_static("tolerance", &MinSigma::tolerance); + + py::class_>(m, "UnableToAdapt") + .def(py::init<>()); + + py::class_>(m, "FlatFitness") + .def(py::init<>()) + .def_readwrite("max_flat_fitness", &FlatFitness::max_flat_fitness) + .def_readwrite("flat_fitness_index", &FlatFitness::flat_fitness_index) + .def_readwrite("flat_fitnesses", &FlatFitness::flat_fitnesses); + + py::class_>(m, "TolX") + .def(py::init<>()) + .def_readwrite("tolx_vector", &TolX::tolx_vector) + .def_readwrite_static("tolerance", &TolX::tolerance) + ; + + py::class_>(m, "MaxDSigma") + .def(py::init<>()) + .def_readwrite_static("tolerance", &MaxDSigma::tolerance); + + py::class_>(m, "MinDSigma") + .def(py::init<>()) + .def_readwrite_static("tolerance", &MinDSigma::tolerance); + + py::class_>(m, "ConditionC") + .def(py::init<>()) + .def_readwrite_static("tolerance", &ConditionC::tolerance); + + py::class_>(m, "NoEffectAxis") + .def(py::init<>()) + .def_readwrite_static("tolerance", &NoEffectAxis::tolerance) + ; + + py::class_>(m, "NoEffectCoord") + .def(py::init<>()) + .def_readwrite_static("tolerance", &NoEffectCoord::tolerance); + + py::class_>(m, "Stagnation") + .def(py::init<>()) + .def_readwrite("n_stagnation", &Stagnation::n_stagnation) + .def_readwrite("median_fitnesses", &Stagnation::median_fitnesses) + .def_readwrite("best_fitnesses", &Stagnation::best_fitnesses) + .def_readwrite_static("tolerance", &Stagnation::tolerance); + + py::class_(m, "Criteria") + .def_readwrite("items", &Criteria::items) + .def("reset", &Criteria::reset, py::arg("parameters")) + .def("update", &Criteria::update, py::arg("parameters")) + .def("any", &Criteria::any); } -void define_restart_strategy(py::module &main) +void define_restart_strategy(py::module& main) { - auto m = main.def_submodule("restart"); - using namespace restart; - - py::class_>(m, "Strategy") - // .def("evaluate", &Strategy::evaluate, py::arg("objective"), py::arg("parameters")) - // .def_readwrite("criteria", &Strategy::criteria) - .def("update", &Strategy::update, py::arg("parameters")); - ; - - py::class_>(m, "IPOP") - // .def(py::init(), py::arg("sigma"), py::arg("dimension"), py::arg("lamb")) - .def_readwrite("ipop_factor", &IPOP::ipop_factor); - - py::class_>(m, "BIPOP") - // .def(py::init(), py::arg("sigma"), py::arg("dimension"), py::arg("lamb"), py::arg("mu"), py::arg("budget")) - .def("large", &BIPOP::large) - .def_readwrite("mu_factor", &BIPOP::mu_factor) - .def_readwrite("lambda_init", &BIPOP::lambda_init) - .def_readwrite("budget", &BIPOP::budget) - .def_readwrite("lambda_large", &BIPOP::lambda_large) - .def_readwrite("lambda_small", &BIPOP::lambda_small) - .def_readwrite("budget_small", &BIPOP::budget_small) - .def_readwrite("budget_large", &BIPOP::budget_large) - .def_readonly("used_budget", &BIPOP::used_budget); + auto m = main.def_submodule("restart"); + using namespace restart; + + py::class_>(m, "Strategy") + // .def("evaluate", &Strategy::evaluate, py::arg("objective"), py::arg("parameters")) + // .def_readwrite("criteria", &Strategy::criteria) + .def("update", &Strategy::update, py::arg("parameters")); + ; + + py::class_>(m, "IPOP") + // .def(py::init(), py::arg("sigma"), py::arg("dimension"), py::arg("lamb")) + .def_readwrite("ipop_factor", &IPOP::ipop_factor); + + py::class_>(m, "BIPOP") + // .def(py::init(), py::arg("sigma"), py::arg("dimension"), py::arg("lamb"), py::arg("mu"), py::arg("budget")) + .def("large", &BIPOP::large) + .def_readwrite("mu_factor", &BIPOP::mu_factor) + .def_readwrite("lambda_init", &BIPOP::lambda_init) + .def_readwrite("budget", &BIPOP::budget) + .def_readwrite("lambda_large", &BIPOP::lambda_large) + .def_readwrite("lambda_small", &BIPOP::lambda_small) + .def_readwrite("budget_small", &BIPOP::budget_small) + .def_readwrite("budget_large", &BIPOP::budget_large) + .def_readonly("used_budget", &BIPOP::used_budget); } -void define_cmaes(py::module &m) +void define_cmaes(py::module& m) { - py::class_(m, "ModularCMAES") - .def(py::init>(), py::arg("parameters")) - .def(py::init(), py::arg("dimension")) - .def(py::init(), py::arg("settings")) - .def("recombine", &ModularCMAES::recombine) - .def("mutate", &ModularCMAES::mutate, py::arg("objective")) - .def("select", &ModularCMAES::select) - .def("adapt", &ModularCMAES::adapt) - .def("step", &ModularCMAES::step, py::arg("objective")) - .def("__call__", &ModularCMAES::operator(), py::arg("objective")) - .def("run", &ModularCMAES::operator(), py::arg("objective")) - .def("break_conditions", &ModularCMAES::break_conditions) - .def_readonly("p", &ModularCMAES::p); + py::class_(m, "ModularCMAES") + .def(py::init>(), py::arg("parameters")) + .def(py::init(), py::arg("dimension")) + .def(py::init(), py::arg("settings")) + .def("recombine", &ModularCMAES::recombine) + .def("mutate", &ModularCMAES::mutate, py::arg("objective")) + .def("select", &ModularCMAES::select) + .def("adapt", &ModularCMAES::adapt) + .def("step", &ModularCMAES::step, py::arg("objective")) + .def("__call__", &ModularCMAES::operator(), py::arg("objective")) + .def("run", &ModularCMAES::operator(), py::arg("objective")) + .def("break_conditions", &ModularCMAES::break_conditions) + .def_readonly("p", &ModularCMAES::p); } -void define_es(py::module &main) +void define_es(py::module& main) { - auto m = main.def_submodule("es"); - parameters::Modules default_modules; - using namespace es; - py::class_>(m, "OnePlusOneES") - .def( - py::init< - size_t, - Vector, - Float, - Float, - size_t, - Float, - parameters::Modules>(), - py::arg("d"), - py::arg("x0"), - py::arg("f0"), - py::arg("sigma0") = 1.0, - py::arg("budget") = 10'000, - py::arg("target") = 1e-8, - py::arg("modules") = default_modules) - .def("__call__", &OnePlusOneES::operator()) - .def("step", &OnePlusOneES::step) - .def("sample", &OnePlusOneES::sample) - .def_readwrite("d", &OnePlusOneES::d) - .def_readwrite("sigma", &OnePlusOneES::sigma) - .def_readwrite("decay", &OnePlusOneES::decay) - .def_readwrite("x", &OnePlusOneES::x) - .def_readwrite("f", &OnePlusOneES::f) - .def_readwrite("t", &OnePlusOneES::t) - .def_readwrite("budget", &OnePlusOneES::budget) - .def_readwrite("target", &OnePlusOneES::target) - .def_readwrite("sampler", &OnePlusOneES::sampler) - .def_readwrite("rejection_sampling", &OnePlusOneES::rejection_sampling) - .def_readwrite("corrector", &OnePlusOneES::corrector); - - py::class_>(m, "MuCommaLambdaES") - .def( - py::init< - size_t, - Vector, - Float, - size_t, - Float, - parameters::Modules>(), - py::arg("d"), - py::arg("x0"), - py::arg("sigma0") = 1.0, - py::arg("budget") = 10'000, - py::arg("target") = 1e-8, - py::arg("modules") = default_modules) - .def("__call__", &MuCommaLambdaES::operator()) - .def("step", &MuCommaLambdaES::step) - .def("sample", &MuCommaLambdaES::sample) - .def_readwrite("d", &MuCommaLambdaES::d) - .def_readwrite("lamb", &MuCommaLambdaES::lambda) - .def_readwrite("mu", &MuCommaLambdaES::mu) - - .def_readwrite("sigma", &MuCommaLambdaES::sigma) - .def_readwrite("m", &MuCommaLambdaES::m) - - .def_readwrite("X", &MuCommaLambdaES::X) - .def_readwrite("S", &MuCommaLambdaES::S) - .def_readwrite("f", &MuCommaLambdaES::f) - - .def_readwrite("tau", &MuCommaLambdaES::tau) - .def_readwrite("tau_i", &MuCommaLambdaES::tau_i) - .def_readwrite("mu_inv", &MuCommaLambdaES::mu_inv) - - .def_readwrite("f_min", &MuCommaLambdaES::f_min) - .def_readwrite("x_min", &MuCommaLambdaES::x_min) - .def_readwrite("t", &MuCommaLambdaES::t) - .def_readwrite("e", &MuCommaLambdaES::e) - .def_readwrite("budget", &MuCommaLambdaES::budget) - .def_readwrite("target", &MuCommaLambdaES::target) - .def_readwrite("sampler", &MuCommaLambdaES::sampler) - .def_readwrite("sigma_sampler", &MuCommaLambdaES::sigma_sampler) - .def_readwrite("rejection_sampling", &MuCommaLambdaES::rejection_sampling) - .def_readwrite("corrector", &MuCommaLambdaES::corrector); + auto m = main.def_submodule("es"); + parameters::Modules default_modules; + using namespace es; + py::class_>(m, "OnePlusOneES") + .def( + py::init< + size_t, + Vector, + Float, + Float, + size_t, + Float, + parameters::Modules>(), + py::arg("d"), + py::arg("x0"), + py::arg("f0"), + py::arg("sigma0") = 1.0, + py::arg("budget") = 10'000, + py::arg("target") = 1e-8, + py::arg("modules") = default_modules) + .def("__call__", &OnePlusOneES::operator()) + .def("step", &OnePlusOneES::step) + .def("sample", &OnePlusOneES::sample) + .def_readwrite("d", &OnePlusOneES::d) + .def_readwrite("sigma", &OnePlusOneES::sigma) + .def_readwrite("decay", &OnePlusOneES::decay) + .def_readwrite("x", &OnePlusOneES::x) + .def_readwrite("f", &OnePlusOneES::f) + .def_readwrite("t", &OnePlusOneES::t) + .def_readwrite("budget", &OnePlusOneES::budget) + .def_readwrite("target", &OnePlusOneES::target) + .def_readwrite("sampler", &OnePlusOneES::sampler) + .def_readwrite("rejection_sampling", &OnePlusOneES::rejection_sampling) + .def_readwrite("corrector", &OnePlusOneES::corrector); + + py::class_>(m, "MuCommaLambdaES") + .def( + py::init< + size_t, + Vector, + Float, + size_t, + Float, + parameters::Modules>(), + py::arg("d"), + py::arg("x0"), + py::arg("sigma0") = 1.0, + py::arg("budget") = 10'000, + py::arg("target") = 1e-8, + py::arg("modules") = default_modules) + .def("__call__", &MuCommaLambdaES::operator()) + .def("step", &MuCommaLambdaES::step) + .def("sample", &MuCommaLambdaES::sample) + .def_readwrite("d", &MuCommaLambdaES::d) + .def_readwrite("lamb", &MuCommaLambdaES::lambda) + .def_readwrite("mu", &MuCommaLambdaES::mu) + + .def_readwrite("sigma", &MuCommaLambdaES::sigma) + .def_readwrite("m", &MuCommaLambdaES::m) + + .def_readwrite("X", &MuCommaLambdaES::X) + .def_readwrite("S", &MuCommaLambdaES::S) + .def_readwrite("f", &MuCommaLambdaES::f) + + .def_readwrite("tau", &MuCommaLambdaES::tau) + .def_readwrite("tau_i", &MuCommaLambdaES::tau_i) + .def_readwrite("mu_inv", &MuCommaLambdaES::mu_inv) + + .def_readwrite("f_min", &MuCommaLambdaES::f_min) + .def_readwrite("x_min", &MuCommaLambdaES::x_min) + .def_readwrite("t", &MuCommaLambdaES::t) + .def_readwrite("e", &MuCommaLambdaES::e) + .def_readwrite("budget", &MuCommaLambdaES::budget) + .def_readwrite("target", &MuCommaLambdaES::target) + .def_readwrite("sampler", &MuCommaLambdaES::sampler) + .def_readwrite("sigma_sampler", &MuCommaLambdaES::sigma_sampler) + .def_readwrite("rejection_sampling", &MuCommaLambdaES::rejection_sampling) + .def_readwrite("corrector", &MuCommaLambdaES::corrector); } PYBIND11_MODULE(cmaescpp, m) { - define_constants(m); - define_options(m); - define_utils(m); - define_population(m); - define_samplers(m); - define_mutation(m); - define_restart_criteria(m); - define_restart_strategy(m); - define_matrix_adaptation(m); - define_center_placement(m); - define_repelling(m); - define_parameters(m); - define_bounds(m); - define_selection(m); - define_cmaes(m); - define_es(m); + define_constants(m); + define_options(m); + define_utils(m); + define_population(m); + define_samplers(m); + define_mutation(m); + define_restart_criteria(m); + define_restart_strategy(m); + define_matrix_adaptation(m); + define_center_placement(m); + define_repelling(m); + define_parameters(m); + define_bounds(m); + define_selection(m); + define_cmaes(m); + define_es(m); } diff --git a/src/main.cpp b/src/main.cpp index 80226ef..f0722a8 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -9,18 +9,23 @@ using std::chrono::duration_cast; using std::chrono::duration; using std::chrono::milliseconds; - -static int dim = 50; +static int dim = 30; static bool rotated = false; -static size_t budget = dim * 5000; +static size_t budget = dim * 10000; + + + struct Ellipse { - size_t evals = 0; + size_t evals; Matrix R; + FunctionType function; - Ellipse(const int dim, const bool rotated = false) : - R{ rotated ? functions::random_rotation_matrix(dim, 1) : Matrix::Identity(dim, dim) } + Ellipse(const int dim, const bool rotated, const functions::ObjectiveFunction ft) : + evals(0), + R{ rotated ? functions::random_rotation_matrix(dim, 1) : Matrix::Identity(dim, dim) }, + function(functions::get(ft)) { } @@ -28,7 +33,7 @@ struct Ellipse { evals++; const auto x_shift = R * (x.array() - 1.).matrix(); - return functions::ellipse(x_shift); + return function(x_shift); } }; @@ -55,13 +60,13 @@ struct Timer }; -void run_modcma(parameters::MatrixAdaptationType mat_t) +void run_modcma(parameters::MatrixAdaptationType mat_t, functions::ObjectiveFunction fun_t) { rng::set_seed(42); parameters::Modules m; m.matrix_adaptation = mat_t; - m.elitist = true; - m.active = false; + m.elitist = false; + m.active = true; parameters::Settings settings( dim, @@ -69,32 +74,44 @@ void run_modcma(parameters::MatrixAdaptationType mat_t) -std::numeric_limits::infinity(), std::nullopt, budget, - 2.0 + 0.1 ); auto p = std::make_shared(settings); auto cma = ModularCMAES(p); Timer t; - FunctionType f = Ellipse(dim, rotated); + FunctionType f = Ellipse(dim, rotated, fun_t); while (cma.step(f)) { - if (cma.p->stats.global_best.y < 1e-8) + if (cma.p->stats.global_best.y < 1e-9) break; } - std::cout << "modcmaes: " << parameters::to_string(mat_t) << "\n" << std::defaultfloat; - std::cout << "evals: " << cma.p->stats.evaluations << std::endl; + std::cout << "modcmaes: " << parameters::to_string(mat_t) << std::defaultfloat; + if (m.active) + std::cout << " ACTIVE"; + + if (m.elitist) + std::cout << " ELITIST"; + + std::cout << "\nfunction: " << functions::to_string(fun_t) << " " << dim << "D"; + if (rotated) + std::cout << " (rotated)"; + std::cout << "\nevals: " << cma.p->stats.evaluations << "/" << budget << std::endl; std::cout << "iters: " << cma.p->stats.t << std::endl; - std::cout << "updates: " << cma.p->stats.n_updates << std::endl; - std::cout << "best_y: " << std::scientific << std::setprecision(3) << cma.p->stats.global_best.y << std::endl; + std::cout << "updates: " << cma.p->stats.n_updates << "\n" << std::scientific << std::setprecision(3); + std::cout << "sigma: " << cma.p->mutation->sigma << std::endl; + std::cout << "best_y: " << cma.p->stats.global_best.y << std::endl; std::cout << "solved: " << std::boolalpha << (cma.p->stats.global_best.y < 1e-8) << std::endl; } int main() { - //run_modcma(parameters::MatrixAdaptationType::NONE); - //run_modcma(parameters::MatrixAdaptationType::SEPERABLE); - //run_modcma(parameters::MatrixAdaptationType::MATRIX); - run_modcma(parameters::MatrixAdaptationType::CHOLESKY); - //run_modcma(parameters::MatrixAdaptationType::COVARIANCE); + auto ft = functions::ELLIPSE; + + run_modcma(parameters::MatrixAdaptationType::NONE, ft); + run_modcma(parameters::MatrixAdaptationType::SEPERABLE, ft); + run_modcma(parameters::MatrixAdaptationType::MATRIX, ft); + run_modcma(parameters::MatrixAdaptationType::CHOLESKY, ft); + run_modcma(parameters::MatrixAdaptationType::COVARIANCE, ft); } \ No newline at end of file diff --git a/src/matrix_adaptation.cpp b/src/matrix_adaptation.cpp index 2a512e3..5c524ec 100644 --- a/src/matrix_adaptation.cpp +++ b/src/matrix_adaptation.cpp @@ -10,56 +10,46 @@ namespace matrix_adaptation } void Adaptation::adapt_evolution_paths(const Population& pop, const Weights& w, - const std::shared_ptr& mutation, const Stats& stats, const size_t mu, const size_t lambda) { - dm = (m - m_old) / mutation->sigma; + const auto sigma = pop.s.mean(); + dm = (m - m_old) / sigma; dz = pop.Z.leftCols(mu) * w.positive.head(mu); - adapt_evolution_paths_inner(pop, w, mutation, stats, mu, lambda); + adapt_evolution_paths_inner(pop, w, stats, mu, lambda); } void CovarianceAdaptation::adapt_evolution_paths_inner(const Population& pop, const Weights& w, - const std::shared_ptr& mutation, const Stats& stats, const size_t mu, const size_t lambda) { const auto& expr = constants::calc_eigv ? inv_root_C * dm : dz; - - ps = (1.0 - mutation->cs) * ps + (sqrt(mutation->cs * (2.0 - mutation->cs) * w.mueff) * expr); - const Float actual_ps_length = ps.norm() / sqrt( - 1.0 - pow(1.0 - mutation->cs, 2.0 * (stats.evaluations / lambda))); + ps = (1.0 - w.cs) * ps + (w.sqrt_cs_mueff * expr); - const Float expected_ps_length = (1.4 + (2.0 / (dd + 1.0))) * expected_length_z; + const Float actual_ps_length = ps.norm() / sqrt( + 1.0 - pow(1.0 - w.cs, 2.0 * (stats.evaluations / lambda))); - hs = actual_ps_length < expected_ps_length; - pc = (1.0 - w.cc) * pc + (hs * sqrt(w.cc * (2.0 - w.cc) * w.mueff)) * dm; + hs = actual_ps_length < w.expected_length_ps; + pc = (1.0 - w.cc) * pc + (hs * w.sqrt_cc_mueff) * dm; } void CovarianceAdaptation::adapt_covariance_matrix(const Weights& w, const Modules& m, const Population& pop, const size_t mu) { - const auto rank_one = w.c1 * pc * pc.transpose(); const auto dhs = (1 - hs) * w.cc * (2.0 - w.cc); - const auto old_c = (1 - (w.c1 * dhs) - w.c1 - (w.cmu * w.positive.sum())) * C; + const auto& rank_one = w.c1 * pc * pc.transpose(); - if (m.active) - { - auto weights = w.weights.topRows(pop.Y.cols()); - C = old_c + rank_one + w.cmu * ((pop.Y.array().rowwise() * weights.array().transpose()).matrix() * pop.Y.transpose()); - } - else - { - C = old_c + rank_one + (w.cmu * ((pop.Y.leftCols(mu).array().rowwise() * w.positive.array().transpose()).matrix() * pop.Y. - leftCols(mu).transpose())); - - } + const auto& weights = m.active ? w.weights.topRows(pop.Y.cols()) : w.positive; + const auto& popY = m.active ? pop.Y : pop.Y.leftCols(mu); + const auto& old_c = (1 - (w.c1 * dhs) - w.c1 - (w.cmu * weights.sum())) * C; + const auto& rank_mu = w.cmu * (popY * weights.asDiagonal() * popY.transpose()); + C = old_c + rank_one + rank_mu; C = 0.5 * (C + C.transpose().eval()); } bool CovarianceAdaptation::perform_eigendecomposition(const Settings& settings) { - if (!constants::calc_eigv) + if (!constants::calc_eigv) { const Eigen::LLT chol(C); if (chol.info() != Eigen::Success) @@ -74,8 +64,8 @@ namespace matrix_adaptation A = chol.matrixL(); return true; - } - + } + const Eigen::SelfAdjointEigenSolver eigen_solver(C); if (eigen_solver.info() != Eigen::Success) { @@ -117,7 +107,7 @@ namespace matrix_adaptation return perform_eigendecomposition(settings); } return true; - + } void CovarianceAdaptation::restart(const Settings& settings) @@ -141,7 +131,7 @@ namespace matrix_adaptation if (!constants::calc_eigv) { return A.triangularView().solve(yi); } - + return (B.transpose() * yi).cwiseQuotient(d); } @@ -149,43 +139,40 @@ namespace matrix_adaptation void SeperableAdaptation::adapt_evolution_paths_inner( const Population& pop, const parameters::Weights& w, - const std::shared_ptr& mutation, const parameters::Stats& stats, size_t mu, size_t lambda) { - ps = (1.0 - mutation->cs) * ps + (sqrt(mutation->cs * (2.0 - mutation->cs) * sqrt(w.mueff)) * dz); + ps = (1.0 - w.cs) * ps + (w.sqrt_cs_mueff * dz); const Float actual_ps_length = ps.norm() / sqrt( - 1.0 - pow(1.0 - mutation->cs, 2.0 * (stats.evaluations / lambda))); + 1.0 - pow(1.0 - w.cs, 2.0 * (stats.evaluations / lambda))); - const Float expected_ps_length = (1.4 + (2.0 / (dd + 1.0))) * expected_length_z; + hs = actual_ps_length < w.expected_length_ps; - hs = actual_ps_length < expected_ps_length; - pc = (1.0 - w.cc) * pc + (hs * sqrt(w.cc * (2.0 - w.cc) * w.mueff)) * dm; + pc = (1.0 - w.cc) * pc + (hs * w.sqrt_cc_mueff) * dm; } bool SeperableAdaptation::adapt_matrix(const parameters::Weights& w, const parameters::Modules& m, const Population& pop, size_t mu, const parameters::Settings& settings, parameters::Stats& stats) { - + stats.last_update = stats.t; stats.n_updates++; const auto dhs = (1 - hs) * w.cc * (2.0 - w.cc); - const auto decay_c = (1 - (w.c1 * dhs) - w.c1 - (w.cmu * w.positive.sum())); - for (auto j = 0; j < settings.dim; j++) - { - auto rank_mu = (pop.Z.leftCols(mu).row(j).array().pow(2) * w.positive.transpose().array() * c(j)).sum(); - - if (m.active) - rank_mu += (pop.Z.rightCols(pop.Z.cols() - mu).row(j).array().pow(2) * w.negative.transpose().array() * c(j)).sum(); + const auto& weights = m.active ? w.weights.topRows(pop.Y.cols()) : w.positive; + const auto& popY = m.active ? pop.Y : pop.Y.leftCols(mu); + const auto decay_c = (1 - (w.c1 * dhs) - w.c1 - (w.cmu * weights.sum())); - c(j) = decay_c * c(j) + w.c1 * pow(pc(j), 2) + w.cmu * rank_mu; + for (auto j = 0; j < settings.dim; j++) + { + const auto rank_mu = (popY.row(j).array().pow(2) * weights.transpose().array()).sum(); + c(j) = (decay_c * c(j)) + (w.c1 * pow(pc(j), 2)) + (w.cmu * rank_mu); + c(j) = std::max(c(j), 1e-12); d(j) = std::sqrt(c(j)); } - - + return true; } @@ -196,7 +183,7 @@ namespace matrix_adaptation pc.setZero(); } - Vector SeperableAdaptation::compute_y(const Vector& zi) + Vector SeperableAdaptation::compute_y(const Vector& zi) { return d.array() * zi.array(); } @@ -208,16 +195,15 @@ namespace matrix_adaptation void OnePlusOneAdaptation::adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, - const std::shared_ptr& mutation, const parameters::Stats& stats, + const parameters::Stats& stats, size_t mu, size_t lambda) { if (!stats.has_improved) return; + pc = (1.0 - w.cc) * pc; if (stats.success_ratio < max_success_ratio) - pc = ((1.0 - w.cc) * pc) + (std::sqrt(w.cc * (2.0 - w.cc)) * pop.Y.col(0)); - else - pc = (1.0 - w.cc) * pc; + pc += w.sqrt_cc_mueff * pop.Y.col(0); } bool OnePlusOneAdaptation::adapt_matrix(const parameters::Weights& w, const parameters::Modules& m, const Population& pop, size_t mu, @@ -236,39 +222,29 @@ namespace matrix_adaptation void MatrixAdaptation::adapt_evolution_paths_inner(const Population& pop, const Weights& w, - const std::shared_ptr& mutation, - const Stats& stats, const size_t mu, const size_t lambda) + const Stats& stats, const size_t mu, const size_t lambda) { - ps = (1.0 - mutation->cs) * ps + (sqrt(mutation->cs * (2.0 - mutation->cs) * w.mueff) * dz); + ps = (1.0 - w.cs) * ps + (w.sqrt_cs_mueff * dz); } bool MatrixAdaptation::adapt_matrix(const Weights& w, const Modules& m, const Population& pop, const size_t mu, - const Settings& settings, parameters::Stats& stats) + const Settings& settings, parameters::Stats& stats) { + constexpr Float epsilon = 1e-10; + stats.last_update = stats.t; stats.n_updates++; - - const auto old_m = (1. - (0.5 * w.c1) - (0.5 * w.cmu)) * M; - const auto scaled_ps = (0.5 * w.c1) * (M * ps) * ps.transpose(); - const auto old_m_inv = (1. + (0.5 * w.c1) + (0.5 * w.cmu)) * M_inv; - const auto scaled_inv_ps = (0.5 * w.c1) * ps * (ps.transpose() * M); + const auto& I = Matrix::Identity(settings.dim, settings.dim); - if (m.active) - { - const auto scaled_weights = ((0.5 * w.cmu) * w.weights.topRows(pop.Y.cols())).array().transpose(); - const auto scaled_y = (pop.Y.array().rowwise() * scaled_weights).matrix(); + const auto& weights = m.active ? w.weights.topRows(pop.Y.cols()) : w.positive; + const auto& popZ = m.active ? pop.Z : pop.Z.leftCols(mu); + const auto& Z = popZ * weights.asDiagonal() * popZ.transpose(); + const auto& ZwI = (w.cmu / 2.0) * (Z - I); + const auto& ssI = (w.c1 / 2.0) * (ps * ps.transpose() - I); - M = old_m + scaled_ps + scaled_y * pop.Z.transpose(); - M_inv = old_m_inv - scaled_inv_ps - scaled_y * (pop.Z.transpose() * M_inv); - } - else - { - const auto scaled_weights = ((0.5 * w.cmu) * w.positive).array().transpose(); - const auto scaled_y = (pop.Y.leftCols(mu).array().rowwise() * scaled_weights).matrix(); - M = old_m + scaled_ps + scaled_y * pop.Z.leftCols(mu).transpose(); - M_inv = old_m_inv - scaled_inv_ps - scaled_y * (pop.Z.leftCols(mu).transpose() * M_inv); - } + M = M * (I + ssI + ZwI); + M_inv = (I - ssI - ZwI + epsilon * I) * M_inv; return true; } @@ -291,10 +267,9 @@ namespace matrix_adaptation void None::adapt_evolution_paths_inner(const Population& pop, const Weights& w, - const std::shared_ptr& mutation, const - Stats& stats, const size_t mu, const size_t lambda) + const Stats& stats, const size_t mu, const size_t lambda) { - ps = (1.0 - mutation->cs) * ps + (sqrt(mutation->cs * (2.0 - mutation->cs) * w.mueff) * dz); + ps = (1.0 - w.cs) * ps + (w.sqrt_cs_mueff * dz); } Vector None::compute_y(const Vector& zi) @@ -322,8 +297,8 @@ namespace matrix_adaptation const Float gamma = Ajj2 * b + beta * aj2; A_prime(j, j) = std::sqrt(Ajj2 + (beta / b) * aj2); - - for (int k = j+1; k < d; k++) + + for (int k = j + 1; k < d; k++) { a(k) -= a(j) / A(j, j) * A(k, j); A_prime(k, j) = A_prime(j, j) / A(j, j) * A(k, j) + A_prime(j, j) * beta * a(j) / gamma * a(k); @@ -333,25 +308,27 @@ namespace matrix_adaptation return A_prime; } - void CholeskyAdaptation::adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, - const std::shared_ptr& mutation, const parameters::Stats& stats, - size_t mu, size_t lambda) + void CholeskyAdaptation::adapt_evolution_paths_inner( + const Population& pop, + const parameters::Weights& w, + const parameters::Stats& stats, + size_t mu, size_t lambda + ) { - pc = (1.0 - w.cc) * pc + (std::sqrt(w.cc * (2.0 - w.cc) * w.mueff)) * dm; - ps = (1.0 - mutation->cs)* ps + (sqrt(mutation->cs * (2.0 - mutation->cs) * w.mueff) * - A.triangularView().solve(dm)); + pc = (1.0 - w.cc) * pc + (w.sqrt_cc_mueff) * dm; + ps = (1.0 - w.cs) * ps + (w.sqrt_cs_mueff * A.triangularView().solve(dm)); } bool CholeskyAdaptation::adapt_matrix(const parameters::Weights& w, const parameters::Modules& m, const Population& pop, size_t mu, const parameters::Settings& settings, parameters::Stats& stats) { - + stats.last_update = stats.t; - stats.n_updates++; + stats.n_updates++; A *= std::sqrt(1 - w.c1 - w.cmu); A = rank_one_update(A, w.c1, pc); - for (auto i = 0; i < mu; i++) + for (auto i = 0; i < mu; i++) A = rank_one_update(A, w.cmu * w.positive(i), pop.Y.col(i)); if (m.active) @@ -377,5 +354,4 @@ namespace matrix_adaptation return A.triangularView().solve(yi); } - } diff --git a/src/mutation.cpp b/src/mutation.cpp index 055b668..9ee9317 100644 --- a/src/mutation.cpp +++ b/src/mutation.cpp @@ -5,8 +5,8 @@ namespace mutation { - Vector ThresholdConvergence::scale(const Vector &zi, const Float diameter, const size_t budget, - const size_t evaluations) + Vector ThresholdConvergence::scale(const Vector& zi, const Float diameter, const size_t budget, + const size_t evaluations) { const Float t = init_threshold * diameter * pow(static_cast(budget - evaluations) / static_cast(budget), decay_factor); @@ -15,25 +15,25 @@ namespace mutation return zi; } - bool SequentialSelection::break_conditions(const size_t i, const Float f, Float fopt, const parameters::Mirror &m) + bool SequentialSelection::break_conditions(const size_t i, const Float f, Float fopt, const parameters::Mirror& m) { return (f < fopt) and (i >= seq_cutoff) and (m != parameters::Mirror::PAIRWISE or i % 2 == 0); } - void CSA::adapt(const parameters::Weights &w, std::shared_ptr adaptation, - Population &pop, - const Population &old_pop, const parameters::Stats &stats, const size_t lambda) + void CSA::adapt(const parameters::Weights& w, std::shared_ptr adaptation, + Population& pop, + const Population& old_pop, const parameters::Stats& stats, const size_t lambda) { - sigma *= std::exp((cs / damps) * ((adaptation->ps.norm() / expected_length_z) - 1)); + sigma *= std::exp((w.cs / w.damps) * ((adaptation->ps.norm() / w.expected_length_z) - 1)); } - void CSA::mutate(FunctionType &objective, const size_t n_offspring, parameters::Parameters &p) + void CSA::mutate(FunctionType& objective, const size_t n_offspring, parameters::Parameters& p) { ss->sample(sigma, p.pop); p.bounds->n_out_of_bounds = 0; p.repelling->prepare_sampling(p); - + for (Eigen::Index i = 0; i < static_cast(n_offspring); ++i) { size_t n_rej = 0; @@ -44,8 +44,8 @@ namespace mutation p.pop.X.col(i).noalias() = p.pop.Y.col(i) * p.pop.s(i) + p.adaptation->m; p.bounds->correct(i, p); } while ( - (p.settings.modules.bound_correction == parameters::CorrectionMethod::RESAMPLE && n_rej++ < 5*p.settings.dim && p.bounds->is_out_of_bounds(p.pop.X.col(i)).any()) || p.repelling->is_rejected(p.pop.X.col(i), p)); - + (p.settings.modules.bound_correction == parameters::CorrectionMethod::RESAMPLE && n_rej++ < 5 * p.settings.dim && p.bounds->is_out_of_bounds(p.pop.X.col(i)).any()) || p.repelling->is_rejected(p.pop.X.col(i), p)); + p.pop.f(i) = objective(p.pop.X.col(i)); p.stats.evaluations++; if (sq->break_conditions(i, p.pop.f(i), p.stats.global_best.y, p.settings.modules.mirrored)) @@ -53,7 +53,7 @@ namespace mutation } } - void TPA::mutate(FunctionType &objective, const size_t n_offspring_, parameters::Parameters &p) + void TPA::mutate(FunctionType& objective, const size_t n_offspring_, parameters::Parameters& p) { CSA::mutate(objective, n_offspring_, p); @@ -63,25 +63,25 @@ namespace mutation this->rank_tpa = f_neg < f_pos ? -a_tpa : a_tpa + b_tpa; } - void TPA::adapt(const parameters::Weights &w, std::shared_ptr adaptation, - Population &pop, - const Population &old_pop, const parameters::Stats &stats, const size_t lambda) + void TPA::adapt(const parameters::Weights& w, std::shared_ptr adaptation, + Population& pop, + const Population& old_pop, const parameters::Stats& stats, const size_t lambda) { - s = ((1.0 - cs) * s) + (cs * this->rank_tpa); + s = ((1.0 - w.cs) * s) + (w.cs * this->rank_tpa); sigma *= std::exp(s); } //! Assumes the vector to be arready sorted - Float median(const Vector &x) + Float median(const Vector& x) { if (x.size() % 2 == 0) return (x(x.size() / 2) + x(x.size() / 2 - 1)) / 2.0; return x(x.size() / 2); } - void MSR::adapt(const parameters::Weights &w, std::shared_ptr adaptation, - Population &pop, - const Population &old_pop, const parameters::Stats &stats, const size_t lamb) + void MSR::adapt(const parameters::Weights& w, std::shared_ptr adaptation, + Population& pop, + const Population& old_pop, const parameters::Stats& stats, const size_t lamb) { const auto n = std::min(pop.n_finite(), old_pop.n_finite()); if (n != 0) @@ -89,18 +89,18 @@ namespace mutation const Float lambda = static_cast(lamb); const Float k = (pop.f.array() < median(old_pop.f)).cast().sum(); const auto z = (2.0 / lambda) * (k - ((lambda + 1.0) / 2.0)); - s = ((1.0 - cs) * s) + (cs * z); + s = ((1.0 - w.cs) * s) + (w.cs * z); sigma *= std::exp(s / (2.0 - (2.0 / adaptation->dd))); } } //! Returns the indices of the elements of query in database - Vector searchsorted(const Vector &query, const Vector &database) + Vector searchsorted(const Vector& query, const Vector& database) { Vector res(query.size()); auto i = 0; - for (const auto &xi : query) + for (const auto& xi : query) { auto it = std::find(std::begin(database), std::end(database), xi); res(i++) = static_cast(std::distance(std::begin(database), it)); @@ -108,9 +108,9 @@ namespace mutation return res; } - void PSR::adapt(const parameters::Weights &w, std::shared_ptr adaptation, - Population &pop, - const Population &old_pop, const parameters::Stats &stats, const size_t lambda) + void PSR::adapt(const parameters::Weights& w, std::shared_ptr adaptation, + Population& pop, + const Population& old_pop, const parameters::Stats& stats, const size_t lambda) { const auto n = std::min(pop.n_finite(), old_pop.n_finite()); if (n != 0) @@ -130,23 +130,23 @@ namespace mutation } const auto z = delta_r / std::pow(n, 2) - success_ratio; - s = (1.0 - cs) * s + (cs * z); + s = (1.0 - w.cs) * s + (w.cs * z); sigma *= std::exp(s / (2.0 - (2.0 / adaptation->dd))); } } - void XNES::adapt(const parameters::Weights &w, std::shared_ptr adaptation, - Population &pop, - const Population &old_pop, const parameters::Stats &stats, const size_t lambda) + void XNES::adapt(const parameters::Weights& w, std::shared_ptr adaptation, + Population& pop, + const Population& old_pop, const parameters::Stats& stats, const size_t lambda) { // const Float z = ((std::dynamic_pointer_cast(adaptation)->inv_root_C * .Y).colwise().norm().array().pow(2.) - adaptation->dd).matrix() * w.clipped(); const Float z = ((pop.Z).colwise().norm().array().pow(2.) - adaptation->dd).matrix() * w.clipped(); - sigma *= std::exp((cs / std::sqrt(adaptation->dd)) * z); + sigma *= std::exp((w.cs / std::sqrt(adaptation->dd)) * z); } - void MXNES::adapt(const parameters::Weights &w, std::shared_ptr adaptation, - Population &pop, - const Population &old_pop, const parameters::Stats &stats, const size_t lambda) + void MXNES::adapt(const parameters::Weights& w, std::shared_ptr adaptation, + Population& pop, + const Population& old_pop, const parameters::Stats& stats, const size_t lambda) { const auto n = std::min(pop.n_finite(), old_pop.n_finite()); if (n != 0) @@ -155,75 +155,61 @@ namespace mutation const auto mu = pop.n - lambda; const auto dz = (pop.Z.leftCols(mu).array().rowwise() * w.positive.array().transpose()).rowwise().sum().matrix(); const auto z = (w.mueff * std::pow(dz.norm(), 2)) - adaptation->dd; - sigma *= std::exp((cs / adaptation->dd) * z); + sigma *= std::exp((w.cs / adaptation->dd) * z); } } - void LPXNES::adapt(const parameters::Weights &w, std::shared_ptr adaptation, - Population &pop, - const Population &old_pop, const parameters::Stats &stats, const size_t lambda) + void LPXNES::adapt(const parameters::Weights& w, std::shared_ptr adaptation, + Population& pop, + const Population& old_pop, const parameters::Stats& stats, const size_t lambda) { - const auto z = std::exp(cs * pop.s.array().log().matrix().dot(w.clipped())); - sigma = std::pow(sigma, 1.0 - cs) * z; + const auto z = std::exp(w.cs * pop.s.array().log().matrix().dot(w.clipped())); + sigma = std::pow(sigma, 1.0 - w.cs) * z; } void SR::adapt(const parameters::Weights& w, std::shared_ptr adaptation, Population& pop, const Population& old_pop, const parameters::Stats& stats, const size_t lambda) { - sigma *= std::exp((1 / damps) * ((stats.success_ratio - tgt_success_ratio) / (1.0 - tgt_success_ratio))); + sigma *= std::exp((1 / w.damps) * ((stats.success_ratio - tgt_success_ratio) / (1.0 - tgt_success_ratio))); } - std::shared_ptr get(const parameters::Modules &m, const size_t mu, const Float mueff, - const Float d, const Float sigma, const std::optional cs0, - const Float expected_z) + std::shared_ptr get(const parameters::Modules& m, const size_t mu, const Float d, const Float sigma) { using namespace parameters; auto tc = m.threshold_convergence - ? std::make_shared() - : std::make_shared(); + ? std::make_shared() + : std::make_shared(); auto sq = m.sequential_selection - ? std::make_shared(m.mirrored, mu) - : std::make_shared(m.mirrored, mu); + ? std::make_shared(m.mirrored, mu) + : std::make_shared(m.mirrored, mu); auto ss = (m.sample_sigma or m.ssa == StepSizeAdaptation::LPXNES) - ? std::make_shared(d) - : std::make_shared(d); - - Float cs = cs0.value_or(0.3); - Float damps = 0.0; + ? std::make_shared(d) + : std::make_shared(d); switch (m.ssa) { case StepSizeAdaptation::TPA: - return std::make_shared(tc, sq, ss, cs, damps, sigma, expected_z); + return std::make_shared(tc, sq, ss, sigma); case StepSizeAdaptation::MSR: - return std::make_shared(tc, sq, ss, cs, damps, sigma, expected_z); + return std::make_shared(tc, sq, ss, sigma); case StepSizeAdaptation::XNES: - cs = cs0.value_or(mueff / (2.0 * std::log(std::max(Float{2.}, d)) * sqrt(d))); - return std::make_shared(tc, sq, ss, cs, damps, sigma, expected_z); + return std::make_shared(tc, sq, ss, sigma); case StepSizeAdaptation::MXNES: - cs = cs0.value_or(1.); - return std::make_shared(tc, sq, ss, cs, damps, sigma, expected_z); + return std::make_shared(tc, sq, ss, sigma); case StepSizeAdaptation::LPXNES: - cs = cs0.value_or(9.0 * mueff / (10.0 * sqrt(d))); - return std::make_shared(tc, sq, ss, cs, damps, sigma, expected_z); + return std::make_shared(tc, sq, ss, sigma); case StepSizeAdaptation::PSR: - cs = cs0.value_or(.9); - return std::make_shared(tc, sq, ss, cs, damps, sigma, expected_z); + return std::make_shared(tc, sq, ss, sigma); case StepSizeAdaptation::SR: - cs = cs0.value_or(1.0 / 12.0); - damps = 1.0 + (d / 2.0); - return std::make_shared(tc, sq, ss, cs, damps, sigma, expected_z); + return std::make_shared(tc, sq, ss, sigma); default: case StepSizeAdaptation::CSA: - cs = cs0.value_or((mueff + 2.0) / (d + mueff + 5.0)); - const Float rhs = std::sqrt((mueff - Float(1.0)) / (d + 1)) - 1; - damps = 1.0 + (2.0 * std::max(Float(0.0), rhs) + cs); - return std::make_shared(tc, sq, ss, cs, damps, sigma, expected_z); + return std::make_shared(tc, sq, ss, sigma); } } } diff --git a/src/parameters.cpp b/src/parameters.cpp index e89adb1..298347f 100644 --- a/src/parameters.cpp +++ b/src/parameters.cpp @@ -2,34 +2,33 @@ namespace parameters { - Parameters::Parameters(const Settings &settings) : successfull_adaptation(true), - lambda(settings.lambda0), - mu(settings.mu0), - settings(settings), - stats{}, - weights(settings.dim, settings.mu0, settings.lambda0, settings), - pop(settings.dim, settings.lambda0), - old_pop(settings.dim, settings.lambda0), - criteria(restart::Criteria::get(settings.modules)), - sampler(sampling::get(settings.dim, settings.modules, settings.lambda0)), - adaptation(matrix_adaptation::get(settings.modules, settings.dim, - settings.x0.value_or(Vector::Zero(settings.dim)), - sampler->expected_length())), - mutation(mutation::get(settings.modules, - settings.mu0, weights.mueff, - static_cast(settings.dim), - settings.sigma0, - settings.cs, - sampler->expected_length())), - selection(std::make_shared(settings.modules)), - restart_strategy(restart::strategy::get( - settings.modules, - static_cast(settings.lambda0), - static_cast(settings.mu0), - settings.budget)), - bounds(bounds::get(settings.modules.bound_correction, settings.lb, settings.ub)), - repelling(repelling::get(settings.modules)), - center_placement(center::get(settings.modules.center_placement)) + Parameters::Parameters(const Settings& settings) : successfull_adaptation(true), + lambda(settings.lambda0), + mu(settings.mu0), + settings(settings), + stats{}, + sampler(sampling::get(settings.dim, settings.modules, settings.lambda0)), + weights(settings.dim, settings.mu0, settings.lambda0, settings, sampler->expected_length()), + pop(settings.dim, settings.lambda0), + old_pop(settings.dim, settings.lambda0), + criteria(restart::Criteria::get(settings.modules)), + adaptation(matrix_adaptation::get(settings.modules, settings.dim, + settings.x0.value_or(Vector::Zero(settings.dim)), + sampler->expected_length())), + mutation(mutation::get(settings.modules, + settings.mu0, + static_cast(settings.dim), + settings.sigma0 + )), + selection(std::make_shared(settings.modules)), + restart_strategy(restart::strategy::get( + settings.modules, + static_cast(settings.lambda0), + static_cast(settings.mu0), + settings.budget)), + bounds(bounds::get(settings.modules.bound_correction, settings.lb, settings.ub)), + repelling(repelling::get(settings.modules)), + center_placement(center::get(settings.modules.center_placement)) { criteria.reset(*this); } @@ -38,7 +37,7 @@ namespace parameters { } - void Parameters::perform_restart(FunctionType &objective, const std::optional &sigma) + void Parameters::perform_restart(FunctionType& objective, const std::optional& sigma) { stats.solutions.push_back(stats.current_best); stats.evaluations++; @@ -47,16 +46,15 @@ namespace parameters stats.has_improved = false; repelling->update_archive(objective, *this); - weights = Weights(settings.dim, mu, lambda, settings); sampler->reset(settings.modules, lambda); + weights = Weights(settings.dim, mu, lambda, settings, sampler->expected_length()); pop = Population(settings.dim, lambda); old_pop = Population(settings.dim, lambda); - mutation = mutation::get(settings.modules, mu, weights.mueff, - static_cast(settings.dim), - sigma.value_or(settings.sigma0), - settings.cs, sampler->expected_length()); + mutation = mutation::get(settings.modules, mu, + static_cast(settings.dim), + sigma.value_or(settings.sigma0)); adaptation->restart(settings); (*center_placement)(*this); criteria.reset(*this); @@ -65,7 +63,7 @@ namespace parameters void Parameters::adapt() { - adaptation->adapt_evolution_paths(pop, weights, mutation, stats, mu, lambda); + adaptation->adapt_evolution_paths(pop, weights, stats, mu, lambda); mutation->adapt(weights, adaptation, pop, old_pop, stats, lambda); if (constants::clip_sigma) @@ -77,7 +75,7 @@ namespace parameters stats.t++; } - void Parameters::start(FunctionType &objective) + void Parameters::start(FunctionType& objective) { old_pop = pop; if (criteria.any()) @@ -88,12 +86,12 @@ namespace parameters } } -std::ostream &operator<<(std::ostream &os, const parameters::Stats &s) +std::ostream& operator<<(std::ostream& os, const parameters::Stats& s) { return os - << "Stats" - << " t=" << s.t - << " e=" << s.evaluations - << " best=" << s.global_best - << " improved=" << std::boolalpha << s.has_improved; + << "Stats" + << " t=" << s.t + << " e=" << s.evaluations + << " best=" << s.global_best + << " improved=" << std::boolalpha << s.has_improved; } diff --git a/src/restart_criteria.cpp b/src/restart_criteria.cpp index 9d9d305..8a22de7 100644 --- a/src/restart_criteria.cpp +++ b/src/restart_criteria.cpp @@ -205,8 +205,8 @@ namespace restart criteria.push_back(std::make_shared()); criteria.push_back(std::make_shared()); - if (modules.matrix_adaptation == parameters::MatrixAdaptationType::COVARIANCE || - modules.matrix_adaptation == parameters::MatrixAdaptationType::SEPERABLE) + //! TODO: make these compatible with other MA + if (modules.matrix_adaptation == parameters::MatrixAdaptationType::COVARIANCE) { criteria.push_back(std::make_shared()); criteria.push_back(std::make_shared()); diff --git a/src/weights.cpp b/src/weights.cpp index c194ab0..53b5c9f 100644 --- a/src/weights.cpp +++ b/src/weights.cpp @@ -1,9 +1,54 @@ #include "weights.hpp" + + namespace parameters { - Weights::Weights(const size_t dim, const size_t mu, const size_t lambda, const Settings& settings) - : weights(lambda), positive(mu), negative(lambda - mu) + static Float get_default_cs(const StepSizeAdaptation ssa, const Float mueff, const Float d) + { + switch (ssa) + { + case StepSizeAdaptation::XNES: + return mueff / (2.0 * std::log(std::max(Float{ 2. }, d)) * sqrt(d)); + case StepSizeAdaptation::MXNES: + return 1.0; + case StepSizeAdaptation::LPXNES: + return 9.0 * mueff / (10.0 * sqrt(d)); + case StepSizeAdaptation::PSR: + return 0.9; + case StepSizeAdaptation::SR: + return 1.0 / 12.0; + case StepSizeAdaptation::CSA: + return (mueff + 2.0) / (d + mueff + 5.0); + default: + return 0.3; + } + } + + static Float get_default_damps(const StepSizeAdaptation ssa, const Float mueff, const Float d, const Float cs) + { + switch (ssa) + { + case StepSizeAdaptation::SR: + return 1.0 + (d / 2.0); + case StepSizeAdaptation::CSA: + { + const Float rhs = std::sqrt((mueff - Float(1.0)) / (d + 1)) - 1; + return 1.0 + (2.0 * std::max(Float(0.0), rhs) + cs); + } + default: + return 0.0; + } + } + + Weights::Weights( + const size_t dim, + const size_t mu, + const size_t lambda, + const Settings& settings, + const Float expected_length_z +) + : weights(lambda), positive(mu), negative(lambda - mu), expected_length_z(expected_length_z) { const Float d = static_cast(dim); switch (settings.modules.weights) @@ -51,8 +96,18 @@ namespace parameters weights << positive, negative; lazy_update_interval = 1.0 / (c1 + cmu + 1e-23) / d / 10.0; + + cs = settings.cs.value_or(get_default_cs(settings.modules.ssa, mueff, d));// + damps = get_default_damps(settings.modules.ssa, mueff, d, cs); + sqrt_cs_mueff = std::sqrt(cs * (2.0 - cs) * mueff); + sqrt_cc_mueff = std::sqrt(cc * (2.0 - cc) * mueff); + + expected_length_ps = (1.4 + (2.0 / (d + 1.0))) * expected_length_z; } + + + void Weights::weights_default(const size_t lambda) { const Float base = std::log((static_cast(lambda) + 1.) / 2.0); From ca78b70aa43b4998e0f1d52fe6e37c1ad3a556ca Mon Sep 17 00:00:00 2001 From: Jacob de Nobel Date: Sun, 1 Jun 2025 16:45:20 +0200 Subject: [PATCH 15/74] faster rannk update for cholesky --- src/main.cpp | 8 ++++---- src/matrix_adaptation.cpp | 33 +++++---------------------------- 2 files changed, 9 insertions(+), 32 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index f0722a8..16c3d34 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -9,7 +9,7 @@ using std::chrono::duration_cast; using std::chrono::duration; using std::chrono::milliseconds; -static int dim = 30; +static int dim = 50; static bool rotated = false; static size_t budget = dim * 10000; @@ -66,7 +66,7 @@ void run_modcma(parameters::MatrixAdaptationType mat_t, functions::ObjectiveFunc parameters::Modules m; m.matrix_adaptation = mat_t; m.elitist = false; - m.active = true; + m.active = false; parameters::Settings settings( dim, @@ -83,8 +83,8 @@ void run_modcma(parameters::MatrixAdaptationType mat_t, functions::ObjectiveFunc FunctionType f = Ellipse(dim, rotated, fun_t); while (cma.step(f)) { - if (cma.p->stats.global_best.y < 1e-9) - break; + /*if (cma.p->stats.global_best.y < 1e-9) + break;*/ } std::cout << "modcmaes: " << parameters::to_string(mat_t) << std::defaultfloat; diff --git a/src/matrix_adaptation.cpp b/src/matrix_adaptation.cpp index 5c524ec..f0f8fe6 100644 --- a/src/matrix_adaptation.cpp +++ b/src/matrix_adaptation.cpp @@ -283,31 +283,6 @@ namespace matrix_adaptation return yi; } - - Matrix CholeskyAdaptation::rank_one_update(const Matrix& A, const Float beta, Vector a) - { - const auto d = a.size(); - Float b = 1.0; - A_prime.setZero(); - - for (int j = 0; j < d; j++) - { - const Float aj2 = std::pow(a(j), 2); - const Float Ajj2 = std::pow(A(j, j), 2); - const Float gamma = Ajj2 * b + beta * aj2; - - A_prime(j, j) = std::sqrt(Ajj2 + (beta / b) * aj2); - - for (int k = j + 1; k < d; k++) - { - a(k) -= a(j) / A(j, j) * A(k, j); - A_prime(k, j) = A_prime(j, j) / A(j, j) * A(k, j) + A_prime(j, j) * beta * a(j) / gamma * a(k); - } - b += beta * aj2 / Ajj2; - } - return A_prime; - } - void CholeskyAdaptation::adapt_evolution_paths_inner( const Population& pop, const parameters::Weights& w, @@ -327,13 +302,15 @@ namespace matrix_adaptation stats.n_updates++; A *= std::sqrt(1 - w.c1 - w.cmu); - A = rank_one_update(A, w.c1, pc); + + Eigen::internal::llt_rank_update_lower(A, pc, w.c1); for (auto i = 0; i < mu; i++) - A = rank_one_update(A, w.cmu * w.positive(i), pop.Y.col(i)); + Eigen::internal::llt_rank_update_lower(A, pop.Y.col(i), w.cmu * w.positive(i)); if (m.active) for (auto i = 0; i < pop.Y.cols() - mu; i++) - A = rank_one_update(A, w.cmu * w.negative(i), pop.Y.col(mu + i)); + Eigen::internal::llt_rank_update_lower(A, pop.Y.col(mu + i), w.cmu * w.negative(i)); + return true; } From 8d4df93b73331da24b45b3d381b535b5a39467f2 Mon Sep 17 00:00:00 2001 From: Jacob de Nobel Date: Sun, 1 Jun 2025 22:29:39 +0200 Subject: [PATCH 16/74] started on CSMA --- include/matrix_adaptation.hpp | 13 ++++---- include/modules.hpp | 3 +- include/mutation.hpp | 56 +++++++++++++++++++++-------------- include/to_string.hpp | 7 ++++- src/main.cpp | 25 ++++++++++------ src/matrix_adaptation.cpp | 8 ++--- src/mutation.cpp | 35 +++++++++++++++------- 7 files changed, 94 insertions(+), 53 deletions(-) diff --git a/include/matrix_adaptation.hpp b/include/matrix_adaptation.hpp index 20f14c7..657f0a4 100644 --- a/include/matrix_adaptation.hpp +++ b/include/matrix_adaptation.hpp @@ -160,7 +160,10 @@ namespace matrix_adaptation MatrixAdaptation(const size_t dim, const Vector& x0, const Float expected_length_z) : Adaptation(dim, x0, Vector::Ones(dim), expected_length_z), M(Matrix::Identity(dim, dim)), - M_inv(Matrix::Identity(dim, dim)) + M_inv(Matrix::Identity(dim, dim)), + ZwI(Matrix::Identity(dim, dim)), + ssI(Matrix::Identity(dim, dim)), + I(Matrix::Identity(dim, dim)) { } @@ -176,6 +179,9 @@ namespace matrix_adaptation Vector compute_y(const Vector&) override; Vector invert_y(const Vector&) override; + + private: + Matrix ZwI, ssI, I; }; struct CholeskyAdaptation final : Adaptation @@ -186,8 +192,7 @@ namespace matrix_adaptation CholeskyAdaptation(const size_t dim, const Vector& x0, const Float expected_length_z) : Adaptation(dim, x0, Vector::Ones(dim), expected_length_z), A(Matrix::Identity(dim, dim)), - pc(Vector::Zero(dim)), - A_prime(Matrix::Zero(dim, dim)) + pc(Vector::Zero(dim)) { } @@ -209,8 +214,6 @@ namespace matrix_adaptation Matrix rank_one_update(const Matrix& A, const Float beta, Vector a); - private: - Matrix A_prime; }; diff --git a/include/modules.hpp b/include/modules.hpp index 06ed83c..555bec7 100644 --- a/include/modules.hpp +++ b/include/modules.hpp @@ -44,7 +44,8 @@ namespace parameters MXNES, LPXNES, PSR, - SR + SR, + SA, }; enum class CorrectionMethod diff --git a/include/mutation.hpp b/include/mutation.hpp index e081ba9..c5e7f28 100644 --- a/include/mutation.hpp +++ b/include/mutation.hpp @@ -65,7 +65,12 @@ namespace mutation { Float beta; - SigmaSampler(const Float d) : beta(std::log(2.0) / std::max((std::sqrt(d) * std::log(d)), Float{ 1.0 })) {} + SigmaSampler(const Float d) : beta( + std::log(2.0) / std::max((std::sqrt(d) * std::log(d)), Float{ 1.0 }) + + //1.0 / std::sqrt(2.0 * d) + + ) {} virtual void sample(const Float sigma, Population& pop) const { @@ -99,7 +104,7 @@ namespace mutation const Float sigma0) : tc(threshold_covergence), sq(sequential_selection), ss(sigma_sampler), sigma(sigma0) { } - virtual void mutate(FunctionType& objective, const size_t n_offspring, parameters::Parameters& p) = 0; + virtual void mutate(FunctionType& objective, const size_t n_offspring, parameters::Parameters& p); virtual void adapt(const parameters::Weights& w, std::shared_ptr adaptation, Population& pop, const Population& old_pop, const parameters::Stats& stats, const size_t lambda) = 0; @@ -107,21 +112,16 @@ namespace mutation struct CSA : Strategy { - CSA(const std::shared_ptr& threshold_covergence, - const std::shared_ptr& sequential_selection, - const std::shared_ptr& sigma_sampler, - const Float sigma0) : Strategy(threshold_covergence, sequential_selection, sigma_sampler, sigma0) { - } - - void mutate(FunctionType& objective, const size_t n_offspring, parameters::Parameters& p) override; + + using Strategy::Strategy; void adapt(const parameters::Weights& w, std::shared_ptr adaptation, Population& pop, const Population& old_pop, const parameters::Stats& stats, const size_t lambda) override; }; - struct TPA : CSA + struct TPA : Strategy { - using CSA::CSA; + using Strategy::Strategy; Float a_tpa = 0.5; Float b_tpa = 0.0; @@ -133,62 +133,72 @@ namespace mutation const Population& old_pop, const parameters::Stats& stats, const size_t lambda) override; }; - struct MSR : CSA + struct MSR : Strategy { - using CSA::CSA; + using Strategy::Strategy; void adapt(const parameters::Weights& w, std::shared_ptr adaptation, Population& pop, const Population& old_pop, const parameters::Stats& stats, const size_t lambda) override; }; - struct PSR : CSA + struct PSR : Strategy { Float success_ratio = .25; Vector combined; - using CSA::CSA; + using Strategy::Strategy; void adapt(const parameters::Weights& w, std::shared_ptr adaptation, Population& pop, const Population& old_pop, const parameters::Stats& stats, const size_t lambda) override; }; - struct XNES : CSA + struct XNES : Strategy { - using CSA::CSA; + using Strategy::Strategy; void adapt(const parameters::Weights& w, std::shared_ptr adaptation, Population& pop, const Population& old_pop, const parameters::Stats& stats, const size_t lambda) override; }; - struct MXNES : CSA + struct MXNES : Strategy { - using CSA::CSA; + using Strategy::Strategy; void adapt(const parameters::Weights& w, std::shared_ptr adaptation, Population& pop, const Population& old_pop, const parameters::Stats& stats, const size_t lambda) override; }; - struct LPXNES : CSA + struct LPXNES : Strategy { - using CSA::CSA; + using Strategy::Strategy; void adapt(const parameters::Weights& w, std::shared_ptr adaptation, Population& pop, const Population& old_pop, const parameters::Stats& stats, const size_t lambda) override; }; - struct SR : CSA + struct SR : Strategy { constexpr static Float tgt_success_ratio = 2.0 / 11.0; - using CSA::CSA; + using Strategy::Strategy; + + void adapt(const parameters::Weights& w, std::shared_ptr adaptation, Population& pop, + const Population& old_pop, const parameters::Stats& stats, const size_t lambda) override; + }; + + + struct SA: Strategy + { + using Strategy::Strategy; void adapt(const parameters::Weights& w, std::shared_ptr adaptation, Population& pop, const Population& old_pop, const parameters::Stats& stats, const size_t lambda) override; }; + std::shared_ptr get(const parameters::Modules& m, const size_t mu, const Float d, const Float sigma); } \ No newline at end of file diff --git a/include/to_string.hpp b/include/to_string.hpp index 277e039..a7a044e 100644 --- a/include/to_string.hpp +++ b/include/to_string.hpp @@ -63,9 +63,14 @@ namespace parameters return "MXNES"; case StepSizeAdaptation::LPXNES: return "LPXNES"; - default: + case StepSizeAdaptation::SR: + return "SR"; case StepSizeAdaptation::PSR: return "PSR"; + case StepSizeAdaptation::SA: + return "SA"; + default: + return "unknown"; } } inline std::string to_string(const CorrectionMethod &s) diff --git a/src/main.cpp b/src/main.cpp index 16c3d34..f1464d6 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -9,7 +9,7 @@ using std::chrono::duration_cast; using std::chrono::duration; using std::chrono::milliseconds; -static int dim = 50; +static int dim = 5; static bool rotated = false; static size_t budget = dim * 10000; @@ -60,13 +60,15 @@ struct Timer }; -void run_modcma(parameters::MatrixAdaptationType mat_t, functions::ObjectiveFunction fun_t) +void run_modcma(parameters::MatrixAdaptationType mat_t, functions::ObjectiveFunction fun_t, parameters::StepSizeAdaptation ssa) { rng::set_seed(42); parameters::Modules m; m.matrix_adaptation = mat_t; m.elitist = false; m.active = false; + m.ssa = ssa; + m.weights = parameters::RecombinationWeights::EQUAL; parameters::Settings settings( dim, @@ -83,11 +85,15 @@ void run_modcma(parameters::MatrixAdaptationType mat_t, functions::ObjectiveFunc FunctionType f = Ellipse(dim, rotated, fun_t); while (cma.step(f)) { - /*if (cma.p->stats.global_best.y < 1e-9) - break;*/ + if (cma.p->stats.global_best.y < 1e-9) + break; + + //std::cout << cma.p->stats.t << " ";/* + //std::cout << cma.p->mutation->sigma << std::endl;*/ } std::cout << "modcmaes: " << parameters::to_string(mat_t) << std::defaultfloat; + std::cout << " - " << parameters::to_string(ssa); if (m.active) std::cout << " ACTIVE"; @@ -108,10 +114,11 @@ void run_modcma(parameters::MatrixAdaptationType mat_t, functions::ObjectiveFunc int main() { auto ft = functions::ELLIPSE; + auto ssa = parameters::StepSizeAdaptation::SA; - run_modcma(parameters::MatrixAdaptationType::NONE, ft); - run_modcma(parameters::MatrixAdaptationType::SEPERABLE, ft); - run_modcma(parameters::MatrixAdaptationType::MATRIX, ft); - run_modcma(parameters::MatrixAdaptationType::CHOLESKY, ft); - run_modcma(parameters::MatrixAdaptationType::COVARIANCE, ft); + //run_modcma(parameters::MatrixAdaptationType::NONE, ft); + //run_modcma(parameters::MatrixAdaptationType::SEPERABLE, ft); + //run_modcma(parameters::MatrixAdaptationType::MATRIX, ft, ssa); + //run_modcma(parameters::MatrixAdaptationType::CHOLESKY, ft); + run_modcma(parameters::MatrixAdaptationType::COVARIANCE, ft, ssa); } \ No newline at end of file diff --git a/src/matrix_adaptation.cpp b/src/matrix_adaptation.cpp index f0f8fe6..98bbe79 100644 --- a/src/matrix_adaptation.cpp +++ b/src/matrix_adaptation.cpp @@ -235,13 +235,12 @@ namespace matrix_adaptation stats.last_update = stats.t; stats.n_updates++; - const auto& I = Matrix::Identity(settings.dim, settings.dim); - const auto& weights = m.active ? w.weights.topRows(pop.Y.cols()) : w.positive; const auto& popZ = m.active ? pop.Z : pop.Z.leftCols(mu); const auto& Z = popZ * weights.asDiagonal() * popZ.transpose(); - const auto& ZwI = (w.cmu / 2.0) * (Z - I); - const auto& ssI = (w.c1 / 2.0) * (ps * ps.transpose() - I); + + ZwI.noalias() = (w.cmu / 2.0) * (Z - I); + ssI.noalias() = (w.c1 / 2.0) * (ps * ps.transpose() - I); M = M * (I + ssI + ZwI); M_inv = (I - ssI - ZwI + epsilon * I) * M_inv; @@ -319,6 +318,7 @@ namespace matrix_adaptation { Adaptation::restart(settings); A = Matrix::Identity(settings.dim, settings.dim); + pc.setZero(); } Vector CholeskyAdaptation::compute_y(const Vector& zi) diff --git a/src/mutation.cpp b/src/mutation.cpp index 9ee9317..25bf782 100644 --- a/src/mutation.cpp +++ b/src/mutation.cpp @@ -20,15 +20,9 @@ namespace mutation return (f < fopt) and (i >= seq_cutoff) and (m != parameters::Mirror::PAIRWISE or i % 2 == 0); } - void CSA::adapt(const parameters::Weights& w, std::shared_ptr adaptation, - Population& pop, - const Population& old_pop, const parameters::Stats& stats, const size_t lambda) - { - sigma *= std::exp((w.cs / w.damps) * ((adaptation->ps.norm() / w.expected_length_z) - 1)); - } - void CSA::mutate(FunctionType& objective, const size_t n_offspring, parameters::Parameters& p) + void Strategy::mutate(FunctionType& objective, const size_t n_offspring, parameters::Parameters& p) { ss->sample(sigma, p.pop); p.bounds->n_out_of_bounds = 0; @@ -53,9 +47,18 @@ namespace mutation } } + void CSA::adapt(const parameters::Weights& w, std::shared_ptr adaptation, + Population& pop, + const Population& old_pop, const parameters::Stats& stats, const size_t lambda) + + { + sigma *= std::exp((w.cs / w.damps) * ((adaptation->ps.norm() / w.expected_length_z) - 1)); + } + + void TPA::mutate(FunctionType& objective, const size_t n_offspring_, parameters::Parameters& p) { - CSA::mutate(objective, n_offspring_, p); + Strategy::mutate(objective, n_offspring_, p); const auto f_pos = objective(p.adaptation->m + (p.mutation->sigma * p.adaptation->dm)); const auto f_neg = objective(p.adaptation->m + (p.mutation->sigma * -p.adaptation->dm)); @@ -163,7 +166,8 @@ namespace mutation Population& pop, const Population& old_pop, const parameters::Stats& stats, const size_t lambda) { - const auto z = std::exp(w.cs * pop.s.array().log().matrix().dot(w.clipped())); + const auto z = std::exp( + w.cs * pop.s.array().log().matrix().dot(w.clipped())); sigma = std::pow(sigma, 1.0 - w.cs) * z; } @@ -174,6 +178,15 @@ namespace mutation sigma *= std::exp((1 / w.damps) * ((stats.success_ratio - tgt_success_ratio) / (1.0 - tgt_success_ratio))); } + void SA::adapt(const parameters::Weights& w, std::shared_ptr adaptation, + Population& pop, + const Population& old_pop, const parameters::Stats& stats, const size_t lambda) + { + sigma = pop.s.topRows(w.positive.rows()).transpose() * w.positive; + } + + + std::shared_ptr get(const parameters::Modules& m, const size_t mu, const Float d, const Float sigma) { @@ -187,7 +200,7 @@ namespace mutation ? std::make_shared(m.mirrored, mu) : std::make_shared(m.mirrored, mu); - auto ss = (m.sample_sigma or m.ssa == StepSizeAdaptation::LPXNES) + auto ss = (m.sample_sigma or m.ssa == StepSizeAdaptation::LPXNES or m.ssa == StepSizeAdaptation::SA) ? std::make_shared(d) : std::make_shared(d); @@ -207,6 +220,8 @@ namespace mutation return std::make_shared(tc, sq, ss, sigma); case StepSizeAdaptation::SR: return std::make_shared(tc, sq, ss, sigma); + case StepSizeAdaptation::SA: + return std::make_shared(tc, sq, ss, sigma); default: case StepSizeAdaptation::CSA: return std::make_shared(tc, sq, ss, sigma); From cc4efd1ca7206ab3bda4aebd15df6ffd05a218f3 Mon Sep 17 00:00:00 2001 From: Jacob de Nobel Date: Mon, 2 Jun 2025 14:39:39 +0200 Subject: [PATCH 17/74] added CSMA --- include/matrix_adaptation.hpp | 30 +++++++++- include/modules.hpp | 1 + include/mutation.hpp | 30 ++++------ include/population.hpp | 3 +- include/sampling.hpp | 1 - include/settings.hpp | 5 ++ include/to_string.hpp | 5 +- include/weights.hpp | 1 + src/main.cpp | 15 ++--- src/matrix_adaptation.cpp | 101 ++++++++++++++++++++++++++++------ src/mutation.cpp | 13 +++-- src/population.cpp | 4 ++ src/selection.cpp | 5 +- src/weights.cpp | 9 ++- 14 files changed, 162 insertions(+), 61 deletions(-) diff --git a/include/matrix_adaptation.hpp b/include/matrix_adaptation.hpp index 657f0a4..da75067 100644 --- a/include/matrix_adaptation.hpp +++ b/include/matrix_adaptation.hpp @@ -211,9 +211,34 @@ namespace matrix_adaptation Vector compute_y(const Vector&) override; Vector invert_y(const Vector&) override; + }; + + struct SelfAdaptation final : Adaptation + { + Matrix A; + Matrix C; - Matrix rank_one_update(const Matrix& A, const Float beta, Vector a); + SelfAdaptation(const size_t dim, const Vector& x0, const Float expected_length_z) + : Adaptation(dim, x0, Vector::Ones(dim), expected_length_z), + A(Matrix::Identity(dim, dim)), + C(Matrix::Identity(dim, dim)) + {} + void adapt_evolution_paths_inner( + const Population& pop, + const parameters::Weights& w, + const parameters::Stats& stats, + size_t mu, size_t lambda + ) override; + + bool adapt_matrix(const parameters::Weights& w, const parameters::Modules& m, const Population& pop, size_t mu, + const parameters::Settings& settings, parameters::Stats& stats) override; + + void restart(const parameters::Settings& settings) override; + + Vector compute_y(const Vector&) override; + + Vector invert_y(const Vector&) override; }; @@ -232,6 +257,9 @@ namespace matrix_adaptation return std::make_shared(dim, x0, expected_z); case MatrixAdaptationType::CHOLESKY: return std::make_shared(dim, x0, expected_z); + + case MatrixAdaptationType::CMSA: + return std::make_shared(dim, x0, expected_z); default: case MatrixAdaptationType::COVARIANCE: return std::make_shared(dim, x0, expected_z); diff --git a/include/modules.hpp b/include/modules.hpp index 555bec7..57dd3af 100644 --- a/include/modules.hpp +++ b/include/modules.hpp @@ -76,6 +76,7 @@ namespace parameters SEPERABLE, ONEPLUSONE, CHOLESKY, + CMSA }; enum class CenterPlacement diff --git a/include/mutation.hpp b/include/mutation.hpp index c5e7f28..fbb6241 100644 --- a/include/mutation.hpp +++ b/include/mutation.hpp @@ -48,14 +48,12 @@ namespace mutation public: SequentialSelection(const parameters::Mirror& m, const size_t mu, const Float seq_cutoff_factor = 1.0) : seq_cutoff_factor(m == parameters::Mirror::PAIRWISE ? std::max(Float{ 2. }, seq_cutoff_factor) : seq_cutoff_factor), seq_cutoff(static_cast(mu* seq_cutoff_factor)) - { - } + {} virtual bool break_conditions(const size_t i, const Float f, Float fopt, const parameters::Mirror& m); }; struct NoSequentialSelection : SequentialSelection { - using SequentialSelection::SequentialSelection; bool break_conditions(const size_t i, const Float f, Float fopt, const parameters::Mirror& m) override { return false; } @@ -63,19 +61,15 @@ namespace mutation struct SigmaSampler { - Float beta; + sampling::GaussianTransformer sampler; - SigmaSampler(const Float d) : beta( - std::log(2.0) / std::max((std::sqrt(d) * std::log(d)), Float{ 1.0 }) + SigmaSampler(const Float d) : sampler{ std::make_shared(1) } + {} - //1.0 / std::sqrt(2.0 * d) - - ) {} - - virtual void sample(const Float sigma, Population& pop) const + virtual void sample(const Float sigma, Population& pop, const Float tau) { - pop.s = sampling::Random>(pop.s.size(), - std::lognormal_distribution<>(std::log(sigma), beta))(); + sampler.sampler->d = pop.s.rows(); + pop.s.noalias() = (sigma * (tau * sampler().array()).exp()).matrix().eval(); } }; @@ -83,7 +77,7 @@ namespace mutation { using SigmaSampler::SigmaSampler; - void sample(const Float sigma, Population& pop) const override + void sample(const Float sigma, Population& pop, const Float tau) override { pop.s.setConstant(sigma); } @@ -101,8 +95,8 @@ namespace mutation const std::shared_ptr& threshold_covergence, const std::shared_ptr& sequential_selection, const std::shared_ptr& sigma_sampler, - const Float sigma0) : tc(threshold_covergence), sq(sequential_selection), ss(sigma_sampler), sigma(sigma0) { - } + const Float sigma0) : tc(threshold_covergence), sq(sequential_selection), ss(sigma_sampler), sigma(sigma0) + {} virtual void mutate(FunctionType& objective, const size_t n_offspring, parameters::Parameters& p); @@ -112,7 +106,7 @@ namespace mutation struct CSA : Strategy { - + using Strategy::Strategy; void adapt(const parameters::Weights& w, std::shared_ptr adaptation, Population& pop, @@ -189,7 +183,7 @@ namespace mutation }; - struct SA: Strategy + struct SA : Strategy { using Strategy::Strategy; diff --git a/include/population.hpp b/include/population.hpp index c7a3c08..4863ad4 100644 --- a/include/population.hpp +++ b/include/population.hpp @@ -9,12 +9,13 @@ struct Population Matrix Y; Vector f; Vector s; + Vector t; size_t d; size_t n; Population(const size_t d, const size_t n) - : X(d, n), Z(d, n), Y(d, n), f(Vector::Constant(n, std::numeric_limits::infinity())), s(n), d(d), n(n) {} + : X(d, n), Z(d, n), Y(d, n), f(Vector::Constant(n, std::numeric_limits::infinity())), s(n), d(d), n(n), t(n) {} Population(const Matrix &X, const Matrix &Z, const Matrix &Y, const Vector &f, const Vector &s) : X(X), Z(Z), Y(Y), f(f), s(s), d(X.rows()), n(X.cols()) {} diff --git a/include/sampling.hpp b/include/sampling.hpp index a3e2444..7823011 100644 --- a/include/sampling.hpp +++ b/include/sampling.hpp @@ -236,7 +236,6 @@ namespace sampling return (*sampler)(); } - protected: std::shared_ptr sampler; }; diff --git a/include/settings.hpp b/include/settings.hpp index b63f4d2..50050a1 100644 --- a/include/settings.hpp +++ b/include/settings.hpp @@ -69,6 +69,11 @@ namespace parameters mu0 = lambda0 / 2; } + if(modules.ssa == StepSizeAdaptation::SA) + { + mu0 = std::min(lambda0 / 4, mu0); + } + if (lambda0 == 1) { mu0 = 1; diff --git a/include/to_string.hpp b/include/to_string.hpp index a7a044e..e45667f 100644 --- a/include/to_string.hpp +++ b/include/to_string.hpp @@ -124,9 +124,12 @@ namespace parameters return "NONE"; case MatrixAdaptationType::SEPERABLE: return "SEPERABLE"; - default: case MatrixAdaptationType::COVARIANCE: return "COVARIANCE"; + case MatrixAdaptationType::CMSA: + return "CMSA"; + default: + return "unkown"; } } diff --git a/include/weights.hpp b/include/weights.hpp index c770b35..5003123 100644 --- a/include/weights.hpp +++ b/include/weights.hpp @@ -19,6 +19,7 @@ namespace parameters Float sigma_path_scale; Float expected_length_z; Float expected_length_ps; + Float beta; Weights(const size_t dim, const size_t mu, const size_t lambda, const Settings &settings, const Float expected_length_z); diff --git a/src/main.cpp b/src/main.cpp index f1464d6..b834837 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -9,13 +9,12 @@ using std::chrono::duration_cast; using std::chrono::duration; using std::chrono::milliseconds; -static int dim = 5; -static bool rotated = false; +static int dim = 40; +static bool rotated = true; static size_t budget = dim * 10000; - struct Ellipse { size_t evals; @@ -68,7 +67,7 @@ void run_modcma(parameters::MatrixAdaptationType mat_t, functions::ObjectiveFunc m.elitist = false; m.active = false; m.ssa = ssa; - m.weights = parameters::RecombinationWeights::EQUAL; + //m.weights = parameters::RecombinationWeights::EQUAL; parameters::Settings settings( dim, @@ -87,9 +86,6 @@ void run_modcma(parameters::MatrixAdaptationType mat_t, functions::ObjectiveFunc { if (cma.p->stats.global_best.y < 1e-9) break; - - //std::cout << cma.p->stats.t << " ";/* - //std::cout << cma.p->mutation->sigma << std::endl;*/ } std::cout << "modcmaes: " << parameters::to_string(mat_t) << std::defaultfloat; @@ -114,11 +110,12 @@ void run_modcma(parameters::MatrixAdaptationType mat_t, functions::ObjectiveFunc int main() { auto ft = functions::ELLIPSE; - auto ssa = parameters::StepSizeAdaptation::SA; + auto ssa = parameters::StepSizeAdaptation::LPXNES; - //run_modcma(parameters::MatrixAdaptationType::NONE, ft); + //run_modcma(parameters::MatrixAdaptationType::NONE, ft, ssa); //run_modcma(parameters::MatrixAdaptationType::SEPERABLE, ft); //run_modcma(parameters::MatrixAdaptationType::MATRIX, ft, ssa); //run_modcma(parameters::MatrixAdaptationType::CHOLESKY, ft); + //run_modcma(parameters::MatrixAdaptationType::CMSA, ft, ssa); run_modcma(parameters::MatrixAdaptationType::COVARIANCE, ft, ssa); } \ No newline at end of file diff --git a/src/matrix_adaptation.cpp b/src/matrix_adaptation.cpp index 98bbe79..90fa717 100644 --- a/src/matrix_adaptation.cpp +++ b/src/matrix_adaptation.cpp @@ -4,6 +4,21 @@ namespace matrix_adaptation { using namespace parameters; + + static Matrix cholesky_decomposition(const Matrix& C) + { + const Eigen::LLT chol(C); + if(chol.info() != Eigen::Success) + { + std::cout << chol.info(); + + + //raise std::exception(chol.info()); + assert(false); + } + return chol.matrixL(); + } + Vector Adaptation::invert_x(const Vector& xi, const Float sigma) { return (xi - m) / sigma; @@ -49,12 +64,12 @@ namespace matrix_adaptation bool CovarianceAdaptation::perform_eigendecomposition(const Settings& settings) { - if (!constants::calc_eigv) + if(!constants::calc_eigv) { const Eigen::LLT chol(C); - if (chol.info() != Eigen::Success) + if(chol.info() != Eigen::Success) { - if (settings.verbose) + if(settings.verbose) { std::cout << "Cholesky solver failed, we need to restart reason:" << chol.info() << '\n'; @@ -67,9 +82,9 @@ namespace matrix_adaptation } const Eigen::SelfAdjointEigenSolver eigen_solver(C); - if (eigen_solver.info() != Eigen::Success) + if(eigen_solver.info() != Eigen::Success) { - if (settings.verbose) + if(settings.verbose) { std::cout << "Eigenvalue solver failed, we need to restart reason:" << eigen_solver.info() << '\n'; @@ -79,9 +94,9 @@ namespace matrix_adaptation d = eigen_solver.eigenvalues(); B = eigen_solver.eigenvectors(); - if (d.minCoeff() < 0.0) + if(d.minCoeff() < 0.0) { - if (settings.verbose) + if(settings.verbose) { std::cout << "Negative eigenvalues after decomposition, we need to restart.\n"; } @@ -99,7 +114,7 @@ namespace matrix_adaptation const Settings& settings, parameters::Stats& stats) { - if (static_cast(stats.t) >= static_cast(stats.last_update) + w.lazy_update_interval) + if(static_cast(stats.t) >= static_cast(stats.last_update) + w.lazy_update_interval) { stats.last_update = stats.t; stats.n_updates++; @@ -128,7 +143,8 @@ namespace matrix_adaptation Vector CovarianceAdaptation::invert_y(const Vector& yi) { - if (!constants::calc_eigv) { + if(!constants::calc_eigv) + { return A.triangularView().solve(yi); } @@ -165,7 +181,7 @@ namespace matrix_adaptation const auto& popY = m.active ? pop.Y : pop.Y.leftCols(mu); const auto decay_c = (1 - (w.c1 * dhs) - w.c1 - (w.cmu * weights.sum())); - for (auto j = 0; j < settings.dim; j++) + for(auto j = 0; j < settings.dim; j++) { const auto rank_mu = (popY.row(j).array().pow(2) * weights.transpose().array()).sum(); c(j) = (decay_c * c(j)) + (w.c1 * pow(pc(j), 2)) + (w.cmu * rank_mu); @@ -176,7 +192,8 @@ namespace matrix_adaptation return true; } - void SeperableAdaptation::restart(const parameters::Settings& settings) { + void SeperableAdaptation::restart(const parameters::Settings& settings) + { Adaptation::restart(settings); c.setOnes(); d.setOnes(); @@ -198,18 +215,18 @@ namespace matrix_adaptation const parameters::Stats& stats, size_t mu, size_t lambda) { - if (!stats.has_improved) + if(!stats.has_improved) return; pc = (1.0 - w.cc) * pc; - if (stats.success_ratio < max_success_ratio) + if(stats.success_ratio < max_success_ratio) pc += w.sqrt_cc_mueff * pop.Y.col(0); } bool OnePlusOneAdaptation::adapt_matrix(const parameters::Weights& w, const parameters::Modules& m, const Population& pop, size_t mu, const parameters::Settings& settings, parameters::Stats& stats) { - if (!stats.has_improved) + if(!stats.has_improved) { return true; } @@ -301,13 +318,13 @@ namespace matrix_adaptation stats.n_updates++; A *= std::sqrt(1 - w.c1 - w.cmu); - + Eigen::internal::llt_rank_update_lower(A, pc, w.c1); - for (auto i = 0; i < mu; i++) + for(auto i = 0; i < mu; i++) Eigen::internal::llt_rank_update_lower(A, pop.Y.col(i), w.cmu * w.positive(i)); - if (m.active) - for (auto i = 0; i < pop.Y.cols() - mu; i++) + if(m.active) + for(auto i = 0; i < pop.Y.cols() - mu; i++) Eigen::internal::llt_rank_update_lower(A, pop.Y.col(mu + i), w.cmu * w.negative(i)); @@ -331,4 +348,52 @@ namespace matrix_adaptation return A.triangularView().solve(yi); } + void SelfAdaptation::adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, const parameters::Stats& stats, size_t mu, size_t lambda) + { + ps = (1.0 - w.cs) * ps + (w.sqrt_cs_mueff * A.triangularView().solve(dm)); + } + + bool SelfAdaptation::adapt_matrix(const parameters::Weights& w, const parameters::Modules& m, const Population& pop, size_t mu, const parameters::Settings& settings, parameters::Stats& stats) + { + stats.last_update = stats.t; + stats.n_updates++; + + const Float tc = 1.0 + (dd * (dd + 1)) / (2.0 * w.mueff); + const Float tc_inv = 1.0 / tc; + + const auto& weights = m.active ? w.weights.topRows(pop.Y.cols()) : w.positive; + const auto& popY = m.active ? pop.Y : pop.Y.leftCols(mu); + const auto& Y = popY * weights.asDiagonal() * popY.transpose(); + + C = (1.0 - tc_inv) * C + (tc_inv * Y); + C = 0.5 * (C + C.transpose().eval()); + + const Eigen::LLT chol(C); + if(chol.info() != Eigen::Success) + { + if(settings.verbose) + std::cout << "t: " << stats.t << "Cholesky solver failed, we need to restart reason:" + << chol.info() << '\n'; + return false; + } + A = chol.matrixL(); + + return true; + } + + void SelfAdaptation::restart(const parameters::Settings& settings) + { + A = Matrix::Identity(settings.dim, settings.dim); + } + + Vector SelfAdaptation::compute_y(const Vector& zi) + { + return A * zi; + } + + Vector SelfAdaptation::invert_y(const Vector& yi) + { + return A.triangularView().solve(yi); + } + } diff --git a/src/mutation.cpp b/src/mutation.cpp index 25bf782..24e1f99 100644 --- a/src/mutation.cpp +++ b/src/mutation.cpp @@ -24,7 +24,7 @@ namespace mutation void Strategy::mutate(FunctionType& objective, const size_t n_offspring, parameters::Parameters& p) { - ss->sample(sigma, p.pop); + ss->sample(sigma, p.pop, p.weights.beta); p.bounds->n_out_of_bounds = 0; p.repelling->prepare_sampling(p); @@ -33,6 +33,7 @@ namespace mutation size_t n_rej = 0; do { + p.pop.t(i) = p.stats.t; p.pop.Z.col(i).noalias() = p.mutation->tc->scale((*p.sampler)(), p.bounds->diameter, p.settings.budget, p.stats.evaluations); p.pop.Y.col(i).noalias() = p.adaptation->compute_y(p.pop.Z.col(i)); p.pop.X.col(i).noalias() = p.pop.Y.col(i) * p.pop.s(i) + p.adaptation->m; @@ -143,6 +144,8 @@ namespace mutation const Population& old_pop, const parameters::Stats& stats, const size_t lambda) { // const Float z = ((std::dynamic_pointer_cast(adaptation)->inv_root_C * .Y).colwise().norm().array().pow(2.) - adaptation->dd).matrix() * w.clipped(); + + const Float z = ((pop.Z).colwise().norm().array().pow(2.) - adaptation->dd).matrix() * w.clipped(); sigma *= std::exp((w.cs / std::sqrt(adaptation->dd)) * z); } @@ -166,8 +169,7 @@ namespace mutation Population& pop, const Population& old_pop, const parameters::Stats& stats, const size_t lambda) { - const auto z = std::exp( - w.cs * pop.s.array().log().matrix().dot(w.clipped())); + const auto z = std::exp(w.cs * pop.s.array().log().matrix().dot(w.clipped())); sigma = std::pow(sigma, 1.0 - w.cs) * z; } @@ -182,12 +184,11 @@ namespace mutation Population& pop, const Population& old_pop, const parameters::Stats& stats, const size_t lambda) { - sigma = pop.s.topRows(w.positive.rows()).transpose() * w.positive; + const auto& sigma_l = pop.s.topRows(w.positive.rows()); + sigma = std::exp((w.positive.array() * sigma_l.array().log()).sum()); } - - std::shared_ptr get(const parameters::Modules& m, const size_t mu, const Float d, const Float sigma) { using namespace parameters; diff --git a/src/population.cpp b/src/population.cpp index 0eae0d0..6475a62 100644 --- a/src/population.cpp +++ b/src/population.cpp @@ -8,6 +8,7 @@ void Population::sort() Y = Y(Eigen::all, idx).eval(); f = f(idx).eval(); s = s(idx).eval(); + t = t(idx).eval(); } Population& Population::operator+=(const Population& other) @@ -17,6 +18,7 @@ Population& Population::operator+=(const Population& other) utils::hstack(Z, other.Z); utils::concat(f, other.f); utils::concat(s, other.s); + utils::concat(t, other.t); n += other.n; return *this; } @@ -29,6 +31,7 @@ void Population::resize_cols(const size_t size) Z.conservativeResize(d, n); f.conservativeResize(n); s.conservativeResize(n); + t.conservativeResize(n); } @@ -39,6 +42,7 @@ void Population::keep_only(const std::vector& idx) Y = Y(Eigen::all, idx).eval(); f = f(idx).eval(); s = s(idx).eval(); + t = t(idx).eval(); n = idx.size(); } diff --git a/src/selection.cpp b/src/selection.cpp index 7071cb9..531b3f5 100644 --- a/src/selection.cpp +++ b/src/selection.cpp @@ -23,7 +23,6 @@ namespace selection p.stats.current_avg = p.pop.f.array().mean(); p.stats.update_best(p.pop.X(Eigen::all, 0), p.pop.f(0)); - } void Pairwise::operator()(parameters::Parameters& p) const @@ -43,8 +42,8 @@ namespace selection p.old_pop.resize_cols(k); for (Eigen::Index i = 0; i < static_cast(p.old_pop.n); i++) { - p.old_pop.Y.col(i) = p.adaptation->invert_x(p.old_pop.X.col(i), p.old_pop.s(i)); - p.old_pop.Z.col(i) = p.adaptation->invert_y(p.old_pop.Y.col(i)); + p.old_pop.Y.col(i).noalias() = p.adaptation->invert_x(p.old_pop.X.col(i), p.old_pop.s(i)); + p.old_pop.Z.col(i).noalias() = p.adaptation->invert_y(p.old_pop.Y.col(i)); } p.pop += p.old_pop; } diff --git a/src/weights.cpp b/src/weights.cpp index 53b5c9f..8d81c6a 100644 --- a/src/weights.cpp +++ b/src/weights.cpp @@ -13,7 +13,7 @@ namespace parameters case StepSizeAdaptation::MXNES: return 1.0; case StepSizeAdaptation::LPXNES: - return 9.0 * mueff / (10.0 * sqrt(d)); + return (9.0 * mueff) / (10.0 * sqrt(d)); case StepSizeAdaptation::PSR: return 0.9; case StepSizeAdaptation::SR: @@ -103,9 +103,12 @@ namespace parameters sqrt_cc_mueff = std::sqrt(cc * (2.0 - cc) * mueff); expected_length_ps = (1.4 + (2.0 / (d + 1.0))) * expected_length_z; - } - + beta = 1.0 / std::sqrt(2.0 * mueff) ? settings.modules.ssa != StepSizeAdaptation::LPXNES : + std::log(2.0) / (std::sqrt(d) * std::log(d)) + ; + + } void Weights::weights_default(const size_t lambda) From 729dbb871b0848bba47731775fec8dde0c81f7ea Mon Sep 17 00:00:00 2001 From: Jacob de Nobel Date: Mon, 2 Jun 2025 18:25:50 +0200 Subject: [PATCH 18/74] added eigen unsupported, added natural gradient --- CMakeLists.txt | 2 +- external/unsupported/CMakeLists.txt | 11 + external/unsupported/Eigen/AdolcForward | 159 ++ external/unsupported/Eigen/AlignedVector3 | 234 ++ external/unsupported/Eigen/ArpackSupport | 30 + external/unsupported/Eigen/AutoDiff | 46 + external/unsupported/Eigen/BVH | 95 + external/unsupported/Eigen/CMakeLists.txt | 32 + .../unsupported/Eigen/CXX11/CMakeLists.txt | 8 + external/unsupported/Eigen/CXX11/Tensor | 137 ++ .../unsupported/Eigen/CXX11/TensorSymmetry | 42 + external/unsupported/Eigen/CXX11/ThreadPool | 74 + .../Eigen/CXX11/src/Tensor/README.md | 1815 +++++++++++++++ .../Eigen/CXX11/src/Tensor/Tensor.h | 554 +++++ .../Eigen/CXX11/src/Tensor/TensorArgMax.h | 329 +++ .../Eigen/CXX11/src/Tensor/TensorAssign.h | 247 ++ .../Eigen/CXX11/src/Tensor/TensorBase.h | 1176 ++++++++++ .../Eigen/CXX11/src/Tensor/TensorBlock.h | 1559 +++++++++++++ .../CXX11/src/Tensor/TensorBroadcasting.h | 1093 +++++++++ .../Eigen/CXX11/src/Tensor/TensorChipping.h | 518 +++++ .../CXX11/src/Tensor/TensorConcatenation.h | 377 +++ .../CXX11/src/Tensor/TensorContraction.h | 1023 +++++++++ .../src/Tensor/TensorContractionBlocking.h | 73 + .../CXX11/src/Tensor/TensorContractionCuda.h | 6 + .../CXX11/src/Tensor/TensorContractionGpu.h | 1413 ++++++++++++ .../src/Tensor/TensorContractionMapper.h | 575 +++++ .../CXX11/src/Tensor/TensorContractionSycl.h | 1650 +++++++++++++ .../src/Tensor/TensorContractionThreadPool.h | 1679 ++++++++++++++ .../Eigen/CXX11/src/Tensor/TensorConversion.h | 456 ++++ .../CXX11/src/Tensor/TensorConvolution.h | 1132 +++++++++ .../CXX11/src/Tensor/TensorConvolutionSycl.h | 544 +++++ .../Eigen/CXX11/src/Tensor/TensorCostModel.h | 214 ++ .../Eigen/CXX11/src/Tensor/TensorCustomOp.h | 347 +++ .../Eigen/CXX11/src/Tensor/TensorDevice.h | 137 ++ .../Eigen/CXX11/src/Tensor/TensorDeviceCuda.h | 6 + .../CXX11/src/Tensor/TensorDeviceDefault.h | 104 + .../Eigen/CXX11/src/Tensor/TensorDeviceGpu.h | 389 ++++ .../Eigen/CXX11/src/Tensor/TensorDeviceSycl.h | 1048 +++++++++ .../CXX11/src/Tensor/TensorDeviceThreadPool.h | 409 ++++ .../CXX11/src/Tensor/TensorDimensionList.h | 236 ++ .../Eigen/CXX11/src/Tensor/TensorDimensions.h | 490 ++++ .../Eigen/CXX11/src/Tensor/TensorEvalTo.h | 236 ++ .../Eigen/CXX11/src/Tensor/TensorEvaluator.h | 983 ++++++++ .../Eigen/CXX11/src/Tensor/TensorExecutor.h | 703 ++++++ .../Eigen/CXX11/src/Tensor/TensorExpr.h | 388 ++++ .../Eigen/CXX11/src/Tensor/TensorFFT.h | 669 ++++++ .../Eigen/CXX11/src/Tensor/TensorFixedSize.h | 379 +++ .../Eigen/CXX11/src/Tensor/TensorForcedEval.h | 237 ++ .../src/Tensor/TensorForwardDeclarations.h | 191 ++ .../Eigen/CXX11/src/Tensor/TensorFunctors.h | 488 ++++ .../Eigen/CXX11/src/Tensor/TensorGenerator.h | 302 +++ .../CXX11/src/Tensor/TensorGlobalFunctions.h | 33 + .../src/Tensor/TensorGpuHipCudaDefines.h | 99 + .../src/Tensor/TensorGpuHipCudaUndefines.h | 44 + .../Eigen/CXX11/src/Tensor/TensorIO.h | 79 + .../Eigen/CXX11/src/Tensor/TensorImagePatch.h | 603 +++++ .../Eigen/CXX11/src/Tensor/TensorIndexList.h | 738 ++++++ .../Eigen/CXX11/src/Tensor/TensorInflation.h | 247 ++ .../CXX11/src/Tensor/TensorInitializer.h | 82 + .../Eigen/CXX11/src/Tensor/TensorIntDiv.h | 263 +++ .../Eigen/CXX11/src/Tensor/TensorLayoutSwap.h | 216 ++ .../Eigen/CXX11/src/Tensor/TensorMacros.h | 98 + .../Eigen/CXX11/src/Tensor/TensorMap.h | 327 +++ .../Eigen/CXX11/src/Tensor/TensorMeta.h | 311 +++ .../Eigen/CXX11/src/Tensor/TensorMorphing.h | 1102 +++++++++ .../Eigen/CXX11/src/Tensor/TensorPadding.h | 708 ++++++ .../Eigen/CXX11/src/Tensor/TensorPatch.h | 291 +++ .../Eigen/CXX11/src/Tensor/TensorRandom.h | 322 +++ .../Eigen/CXX11/src/Tensor/TensorReduction.h | 998 ++++++++ .../CXX11/src/Tensor/TensorReductionCuda.h | 6 + .../CXX11/src/Tensor/TensorReductionGpu.h | 966 ++++++++ .../CXX11/src/Tensor/TensorReductionSycl.h | 582 +++++ .../Eigen/CXX11/src/Tensor/TensorRef.h | 454 ++++ .../Eigen/CXX11/src/Tensor/TensorReverse.h | 465 ++++ .../Eigen/CXX11/src/Tensor/TensorScan.h | 528 +++++ .../Eigen/CXX11/src/Tensor/TensorScanSycl.h | 513 +++++ .../Eigen/CXX11/src/Tensor/TensorShuffling.h | 471 ++++ .../Eigen/CXX11/src/Tensor/TensorStorage.h | 161 ++ .../Eigen/CXX11/src/Tensor/TensorStriding.h | 346 +++ .../Eigen/CXX11/src/Tensor/TensorTrace.h | 303 +++ .../Eigen/CXX11/src/Tensor/TensorTraits.h | 264 +++ .../Eigen/CXX11/src/Tensor/TensorUInt128.h | 249 ++ .../CXX11/src/Tensor/TensorVolumePatch.h | 629 +++++ .../src/TensorSymmetry/DynamicSymmetry.h | 293 +++ .../CXX11/src/TensorSymmetry/StaticSymmetry.h | 236 ++ .../Eigen/CXX11/src/TensorSymmetry/Symmetry.h | 338 +++ .../TensorSymmetry/util/TemplateGroupTheory.h | 669 ++++++ .../Eigen/CXX11/src/ThreadPool/Barrier.h | 67 + .../Eigen/CXX11/src/ThreadPool/EventCount.h | 249 ++ .../src/ThreadPool/NonBlockingThreadPool.h | 486 ++++ .../Eigen/CXX11/src/ThreadPool/RunQueue.h | 236 ++ .../Eigen/CXX11/src/ThreadPool/ThreadCancel.h | 23 + .../CXX11/src/ThreadPool/ThreadEnvironment.h | 40 + .../Eigen/CXX11/src/ThreadPool/ThreadLocal.h | 301 +++ .../src/ThreadPool/ThreadPoolInterface.h | 48 + .../Eigen/CXX11/src/ThreadPool/ThreadYield.h | 20 + .../Eigen/CXX11/src/util/CXX11Meta.h | 537 +++++ .../Eigen/CXX11/src/util/CXX11Workarounds.h | 88 + .../Eigen/CXX11/src/util/EmulateArray.h | 261 +++ .../Eigen/CXX11/src/util/MaxSizeVector.h | 158 ++ external/unsupported/Eigen/EulerAngles | 43 + external/unsupported/Eigen/FFT | 419 ++++ external/unsupported/Eigen/IterativeSolvers | 51 + external/unsupported/Eigen/KroneckerProduct | 36 + external/unsupported/Eigen/LevenbergMarquardt | 49 + external/unsupported/Eigen/MPRealSupport | 213 ++ external/unsupported/Eigen/MatrixFunctions | 504 ++++ external/unsupported/Eigen/MoreVectorization | 24 + .../unsupported/Eigen/NonLinearOptimization | 140 ++ external/unsupported/Eigen/NumericalDiff | 56 + external/unsupported/Eigen/OpenGLSupport | 322 +++ external/unsupported/Eigen/Polynomials | 137 ++ external/unsupported/Eigen/Skyline | 39 + external/unsupported/Eigen/SparseExtra | 54 + external/unsupported/Eigen/SpecialFunctions | 103 + external/unsupported/Eigen/Splines | 35 + .../Eigen/src/AutoDiff/AutoDiffJacobian.h | 108 + .../Eigen/src/AutoDiff/AutoDiffScalar.h | 730 ++++++ .../Eigen/src/AutoDiff/AutoDiffVector.h | 220 ++ .../unsupported/Eigen/src/BVH/BVAlgorithms.h | 293 +++ external/unsupported/Eigen/src/BVH/KdBVH.h | 223 ++ .../ArpackSelfAdjointEigenSolver.h | 790 +++++++ .../Eigen/src/EulerAngles/CMakeLists.txt | 6 + .../Eigen/src/EulerAngles/EulerAngles.h | 355 +++ .../Eigen/src/EulerAngles/EulerSystem.h | 305 +++ .../unsupported/Eigen/src/FFT/ei_fftw_impl.h | 261 +++ .../Eigen/src/FFT/ei_kissfft_impl.h | 449 ++++ .../IterativeSolvers/ConstrainedConjGrad.h | 187 ++ .../Eigen/src/IterativeSolvers/DGMRES.h | 511 ++++ .../Eigen/src/IterativeSolvers/GMRES.h | 335 +++ .../Eigen/src/IterativeSolvers/IDRS.h | 436 ++++ .../Eigen/src/IterativeSolvers/IncompleteLU.h | 90 + .../IterativeSolvers/IterationController.h | 154 ++ .../Eigen/src/IterativeSolvers/MINRES.h | 267 +++ .../Eigen/src/IterativeSolvers/Scaling.h | 193 ++ .../KroneckerProduct/KroneckerTensorProduct.h | 305 +++ .../LevenbergMarquardt/CopyrightMINPACK.txt | 52 + .../Eigen/src/LevenbergMarquardt/LMcovar.h | 84 + .../Eigen/src/LevenbergMarquardt/LMonestep.h | 202 ++ .../Eigen/src/LevenbergMarquardt/LMpar.h | 160 ++ .../Eigen/src/LevenbergMarquardt/LMqrsolv.h | 188 ++ .../LevenbergMarquardt/LevenbergMarquardt.h | 396 ++++ .../src/MatrixFunctions/MatrixExponential.h | 441 ++++ .../src/MatrixFunctions/MatrixFunction.h | 569 +++++ .../src/MatrixFunctions/MatrixLogarithm.h | 373 +++ .../Eigen/src/MatrixFunctions/MatrixPower.h | 705 ++++++ .../src/MatrixFunctions/MatrixSquareRoot.h | 368 +++ .../Eigen/src/MatrixFunctions/StemFunction.h | 117 + .../src/MoreVectorization/MathFunctions.h | 95 + .../HybridNonLinearSolver.h | 601 +++++ .../LevenbergMarquardt.h | 657 ++++++ .../Eigen/src/NonLinearOptimization/chkder.h | 66 + .../Eigen/src/NonLinearOptimization/covar.h | 70 + .../Eigen/src/NonLinearOptimization/dogleg.h | 107 + .../Eigen/src/NonLinearOptimization/fdjac1.h | 79 + .../Eigen/src/NonLinearOptimization/lmpar.h | 298 +++ .../Eigen/src/NonLinearOptimization/qrsolv.h | 91 + .../Eigen/src/NonLinearOptimization/r1mpyq.h | 30 + .../Eigen/src/NonLinearOptimization/r1updt.h | 99 + .../Eigen/src/NonLinearOptimization/rwupdt.h | 49 + .../Eigen/src/NumericalDiff/NumericalDiff.h | 130 ++ .../Eigen/src/Polynomials/Companion.h | 280 +++ .../Eigen/src/Polynomials/PolynomialSolver.h | 428 ++++ .../Eigen/src/Polynomials/PolynomialUtils.h | 143 ++ .../Eigen/src/Skyline/SkylineInplaceLU.h | 352 +++ .../Eigen/src/Skyline/SkylineMatrix.h | 862 +++++++ .../Eigen/src/Skyline/SkylineMatrixBase.h | 212 ++ .../Eigen/src/Skyline/SkylineProduct.h | 295 +++ .../Eigen/src/Skyline/SkylineStorage.h | 259 +++ .../Eigen/src/Skyline/SkylineUtil.h | 89 + .../SparseExtra/BlockOfDynamicSparseMatrix.h | 122 + .../Eigen/src/SparseExtra/BlockSparseMatrix.h | 1079 +++++++++ .../src/SparseExtra/DynamicSparseMatrix.h | 404 ++++ .../Eigen/src/SparseExtra/MarketIO.h | 282 +++ .../src/SparseExtra/MatrixMarketIterator.h | 247 ++ .../Eigen/src/SparseExtra/RandomSetter.h | 349 +++ .../BesselFunctionsArrayAPI.h | 286 +++ .../BesselFunctionsBFloat16.h | 68 + .../BesselFunctionsFunctors.h | 357 +++ .../SpecialFunctions/BesselFunctionsHalf.h | 66 + .../SpecialFunctions/BesselFunctionsImpl.h | 1959 ++++++++++++++++ .../BesselFunctionsPacketMath.h | 118 + .../SpecialFunctions/HipVectorCompatibility.h | 67 + .../SpecialFunctionsArrayAPI.h | 167 ++ .../SpecialFunctionsBFloat16.h | 58 + .../SpecialFunctionsFunctors.h | 330 +++ .../SpecialFunctions/SpecialFunctionsHalf.h | 58 + .../SpecialFunctions/SpecialFunctionsImpl.h | 2045 +++++++++++++++++ .../SpecialFunctionsPacketMath.h | 79 + .../arch/AVX/BesselFunctions.h | 46 + .../arch/AVX/SpecialFunctions.h | 16 + .../arch/AVX512/BesselFunctions.h | 46 + .../arch/AVX512/SpecialFunctions.h | 16 + .../arch/GPU/SpecialFunctions.h | 369 +++ .../arch/NEON/BesselFunctions.h | 54 + .../arch/NEON/SpecialFunctions.h | 34 + .../unsupported/Eigen/src/Splines/Spline.h | 507 ++++ .../Eigen/src/Splines/SplineFitting.h | 431 ++++ .../unsupported/Eigen/src/Splines/SplineFwd.h | 93 + external/unsupported/README.txt | 50 + include/common.hpp | 1 + include/matrix_adaptation.hpp | 53 +- include/modules.hpp | 4 +- include/to_string.hpp | 4 + src/main.cpp | 14 +- src/matrix_adaptation.cpp | 125 +- src/mutation.cpp | 22 +- src/weights.cpp | 81 +- 208 files changed, 68985 insertions(+), 105 deletions(-) create mode 100644 external/unsupported/CMakeLists.txt create mode 100644 external/unsupported/Eigen/AdolcForward create mode 100644 external/unsupported/Eigen/AlignedVector3 create mode 100644 external/unsupported/Eigen/ArpackSupport create mode 100644 external/unsupported/Eigen/AutoDiff create mode 100644 external/unsupported/Eigen/BVH create mode 100644 external/unsupported/Eigen/CMakeLists.txt create mode 100644 external/unsupported/Eigen/CXX11/CMakeLists.txt create mode 100644 external/unsupported/Eigen/CXX11/Tensor create mode 100644 external/unsupported/Eigen/CXX11/TensorSymmetry create mode 100644 external/unsupported/Eigen/CXX11/ThreadPool create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/README.md create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/Tensor.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaUndefines.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorScanSycl.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h create mode 100644 external/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h create mode 100644 external/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h create mode 100644 external/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h create mode 100644 external/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h create mode 100644 external/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h create mode 100644 external/unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h create mode 100644 external/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h create mode 100644 external/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h create mode 100644 external/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h create mode 100644 external/unsupported/Eigen/CXX11/src/ThreadPool/ThreadCancel.h create mode 100644 external/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h create mode 100644 external/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h create mode 100644 external/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h create mode 100644 external/unsupported/Eigen/CXX11/src/ThreadPool/ThreadYield.h create mode 100644 external/unsupported/Eigen/CXX11/src/util/CXX11Meta.h create mode 100644 external/unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h create mode 100644 external/unsupported/Eigen/CXX11/src/util/EmulateArray.h create mode 100644 external/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h create mode 100644 external/unsupported/Eigen/EulerAngles create mode 100644 external/unsupported/Eigen/FFT create mode 100644 external/unsupported/Eigen/IterativeSolvers create mode 100644 external/unsupported/Eigen/KroneckerProduct create mode 100644 external/unsupported/Eigen/LevenbergMarquardt create mode 100644 external/unsupported/Eigen/MPRealSupport create mode 100644 external/unsupported/Eigen/MatrixFunctions create mode 100644 external/unsupported/Eigen/MoreVectorization create mode 100644 external/unsupported/Eigen/NonLinearOptimization create mode 100644 external/unsupported/Eigen/NumericalDiff create mode 100644 external/unsupported/Eigen/OpenGLSupport create mode 100644 external/unsupported/Eigen/Polynomials create mode 100644 external/unsupported/Eigen/Skyline create mode 100644 external/unsupported/Eigen/SparseExtra create mode 100644 external/unsupported/Eigen/SpecialFunctions create mode 100644 external/unsupported/Eigen/Splines create mode 100644 external/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h create mode 100644 external/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h create mode 100644 external/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h create mode 100644 external/unsupported/Eigen/src/BVH/BVAlgorithms.h create mode 100644 external/unsupported/Eigen/src/BVH/KdBVH.h create mode 100644 external/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h create mode 100644 external/unsupported/Eigen/src/EulerAngles/CMakeLists.txt create mode 100644 external/unsupported/Eigen/src/EulerAngles/EulerAngles.h create mode 100644 external/unsupported/Eigen/src/EulerAngles/EulerSystem.h create mode 100644 external/unsupported/Eigen/src/FFT/ei_fftw_impl.h create mode 100644 external/unsupported/Eigen/src/FFT/ei_kissfft_impl.h create mode 100644 external/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h create mode 100644 external/unsupported/Eigen/src/IterativeSolvers/DGMRES.h create mode 100644 external/unsupported/Eigen/src/IterativeSolvers/GMRES.h create mode 100644 external/unsupported/Eigen/src/IterativeSolvers/IDRS.h create mode 100644 external/unsupported/Eigen/src/IterativeSolvers/IncompleteLU.h create mode 100644 external/unsupported/Eigen/src/IterativeSolvers/IterationController.h create mode 100644 external/unsupported/Eigen/src/IterativeSolvers/MINRES.h create mode 100644 external/unsupported/Eigen/src/IterativeSolvers/Scaling.h create mode 100644 external/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h create mode 100644 external/unsupported/Eigen/src/LevenbergMarquardt/CopyrightMINPACK.txt create mode 100644 external/unsupported/Eigen/src/LevenbergMarquardt/LMcovar.h create mode 100644 external/unsupported/Eigen/src/LevenbergMarquardt/LMonestep.h create mode 100644 external/unsupported/Eigen/src/LevenbergMarquardt/LMpar.h create mode 100644 external/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h create mode 100644 external/unsupported/Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h create mode 100644 external/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h create mode 100644 external/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h create mode 100644 external/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h create mode 100644 external/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h create mode 100644 external/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h create mode 100644 external/unsupported/Eigen/src/MatrixFunctions/StemFunction.h create mode 100644 external/unsupported/Eigen/src/MoreVectorization/MathFunctions.h create mode 100644 external/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h create mode 100644 external/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h create mode 100644 external/unsupported/Eigen/src/NonLinearOptimization/chkder.h create mode 100644 external/unsupported/Eigen/src/NonLinearOptimization/covar.h create mode 100644 external/unsupported/Eigen/src/NonLinearOptimization/dogleg.h create mode 100644 external/unsupported/Eigen/src/NonLinearOptimization/fdjac1.h create mode 100644 external/unsupported/Eigen/src/NonLinearOptimization/lmpar.h create mode 100644 external/unsupported/Eigen/src/NonLinearOptimization/qrsolv.h create mode 100644 external/unsupported/Eigen/src/NonLinearOptimization/r1mpyq.h create mode 100644 external/unsupported/Eigen/src/NonLinearOptimization/r1updt.h create mode 100644 external/unsupported/Eigen/src/NonLinearOptimization/rwupdt.h create mode 100644 external/unsupported/Eigen/src/NumericalDiff/NumericalDiff.h create mode 100644 external/unsupported/Eigen/src/Polynomials/Companion.h create mode 100644 external/unsupported/Eigen/src/Polynomials/PolynomialSolver.h create mode 100644 external/unsupported/Eigen/src/Polynomials/PolynomialUtils.h create mode 100644 external/unsupported/Eigen/src/Skyline/SkylineInplaceLU.h create mode 100644 external/unsupported/Eigen/src/Skyline/SkylineMatrix.h create mode 100644 external/unsupported/Eigen/src/Skyline/SkylineMatrixBase.h create mode 100644 external/unsupported/Eigen/src/Skyline/SkylineProduct.h create mode 100644 external/unsupported/Eigen/src/Skyline/SkylineStorage.h create mode 100644 external/unsupported/Eigen/src/Skyline/SkylineUtil.h create mode 100644 external/unsupported/Eigen/src/SparseExtra/BlockOfDynamicSparseMatrix.h create mode 100644 external/unsupported/Eigen/src/SparseExtra/BlockSparseMatrix.h create mode 100644 external/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h create mode 100644 external/unsupported/Eigen/src/SparseExtra/MarketIO.h create mode 100644 external/unsupported/Eigen/src/SparseExtra/MatrixMarketIterator.h create mode 100644 external/unsupported/Eigen/src/SparseExtra/RandomSetter.h create mode 100644 external/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsArrayAPI.h create mode 100644 external/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsBFloat16.h create mode 100644 external/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsFunctors.h create mode 100644 external/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsHalf.h create mode 100644 external/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsImpl.h create mode 100644 external/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsPacketMath.h create mode 100644 external/unsupported/Eigen/src/SpecialFunctions/HipVectorCompatibility.h create mode 100644 external/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h create mode 100644 external/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsBFloat16.h create mode 100644 external/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h create mode 100644 external/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h create mode 100644 external/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h create mode 100644 external/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h create mode 100644 external/unsupported/Eigen/src/SpecialFunctions/arch/AVX/BesselFunctions.h create mode 100644 external/unsupported/Eigen/src/SpecialFunctions/arch/AVX/SpecialFunctions.h create mode 100644 external/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/BesselFunctions.h create mode 100644 external/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/SpecialFunctions.h create mode 100644 external/unsupported/Eigen/src/SpecialFunctions/arch/GPU/SpecialFunctions.h create mode 100644 external/unsupported/Eigen/src/SpecialFunctions/arch/NEON/BesselFunctions.h create mode 100644 external/unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h create mode 100644 external/unsupported/Eigen/src/Splines/Spline.h create mode 100644 external/unsupported/Eigen/src/Splines/SplineFitting.h create mode 100644 external/unsupported/Eigen/src/Splines/SplineFwd.h create mode 100644 external/unsupported/README.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index 511794c..8aaf37b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,7 +14,7 @@ add_executable(main ${SRC_FILES}) target_include_directories(main PUBLIC ${PROJECT_SOURCE_DIR}/include - ${PROJECT_SOURCE_DIR}/external + ${PROJECT_SOURCE_DIR}/external ) if (MSVC) diff --git a/external/unsupported/CMakeLists.txt b/external/unsupported/CMakeLists.txt new file mode 100644 index 0000000..34408c0 --- /dev/null +++ b/external/unsupported/CMakeLists.txt @@ -0,0 +1,11 @@ +add_subdirectory(Eigen) +if(EIGEN_BUILD_DOC) + add_subdirectory(doc EXCLUDE_FROM_ALL) +endif() +if(BUILD_TESTING) + if(EIGEN_LEAVE_TEST_IN_ALL_TARGET) + add_subdirectory(test) # can't do EXCLUDE_FROM_ALL here, breaks CTest + else() + add_subdirectory(test EXCLUDE_FROM_ALL) + endif() +endif() diff --git a/external/unsupported/Eigen/AdolcForward b/external/unsupported/Eigen/AdolcForward new file mode 100644 index 0000000..56caeae --- /dev/null +++ b/external/unsupported/Eigen/AdolcForward @@ -0,0 +1,159 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ADLOC_FORWARD +#define EIGEN_ADLOC_FORWARD + +//-------------------------------------------------------------------------------- +// +// This file provides support for adolc's adouble type in forward mode. +// ADOL-C is a C++ automatic differentiation library, +// see https://projects.coin-or.org/ADOL-C for more information. +// +// Note that the maximal number of directions is controlled by +// the preprocessor token NUMBER_DIRECTIONS. The default is 2. +// +//-------------------------------------------------------------------------------- + +#define ADOLC_TAPELESS +#ifndef NUMBER_DIRECTIONS +# define NUMBER_DIRECTIONS 2 +#endif +#include + +// adolc defines some very stupid macros: +#if defined(malloc) +# undef malloc +#endif + +#if defined(calloc) +# undef calloc +#endif + +#if defined(realloc) +# undef realloc +#endif + +#include "../../Eigen/Core" + +namespace Eigen { + +/** + * \defgroup AdolcForward_Module Adolc forward module + * This module provides support for adolc's adouble type in forward mode. + * ADOL-C is a C++ automatic differentiation library, + * see https://projects.coin-or.org/ADOL-C for more information. + * It mainly consists in: + * - a struct Eigen::NumTraits specialization + * - overloads of internal::* math function for adtl::adouble type. + * + * Note that the maximal number of directions is controlled by + * the preprocessor token NUMBER_DIRECTIONS. The default is 2. + * + * \code + * #include + * \endcode + */ + //@{ + +} // namespace Eigen + +// Eigen's require a few additional functions which must be defined in the same namespace +// than the custom scalar type own namespace +namespace adtl { + +inline const adouble& conj(const adouble& x) { return x; } +inline const adouble& real(const adouble& x) { return x; } +inline adouble imag(const adouble&) { return 0.; } +inline adouble abs(const adouble& x) { return fabs(x); } +inline adouble abs2(const adouble& x) { return x*x; } + +inline bool (isinf)(const adouble& x) { return (Eigen::numext::isinf)(x.getValue()); } +inline bool (isnan)(const adouble& x) { return (Eigen::numext::isnan)(x.getValue()); } + +} + +namespace Eigen { + +template<> struct NumTraits + : NumTraits +{ + typedef adtl::adouble Real; + typedef adtl::adouble NonInteger; + typedef adtl::adouble Nested; + enum { + IsComplex = 0, + IsInteger = 0, + IsSigned = 1, + RequireInitialization = 1, + ReadCost = 1, + AddCost = 1, + MulCost = 1 + }; +}; + +template class AdolcForwardJacobian : public Functor +{ + typedef adtl::adouble ActiveScalar; +public: + + AdolcForwardJacobian() : Functor() {} + AdolcForwardJacobian(const Functor& f) : Functor(f) {} + + // forward constructors + template + AdolcForwardJacobian(const T0& a0) : Functor(a0) {} + template + AdolcForwardJacobian(const T0& a0, const T1& a1) : Functor(a0, a1) {} + template + AdolcForwardJacobian(const T0& a0, const T1& a1, const T1& a2) : Functor(a0, a1, a2) {} + + typedef typename Functor::InputType InputType; + typedef typename Functor::ValueType ValueType; + typedef typename Functor::JacobianType JacobianType; + + typedef Matrix ActiveInput; + typedef Matrix ActiveValue; + + void operator() (const InputType& x, ValueType* v, JacobianType* _jac) const + { + eigen_assert(v!=0); + if (!_jac) + { + Functor::operator()(x, v); + return; + } + + JacobianType& jac = *_jac; + + ActiveInput ax = x.template cast(); + ActiveValue av(jac.rows()); + + for (int j=0; j +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ALIGNED_VECTOR3 +#define EIGEN_ALIGNED_VECTOR3 + +#include "../../Eigen/Geometry" + +#include "../../Eigen/src/Core/util/DisableStupidWarnings.h" + +namespace Eigen { + +/** + * \defgroup AlignedVector3_Module Aligned vector3 module + * + * \code + * #include + * \endcode + */ + //@{ + + +/** \class AlignedVector3 + * + * \brief A vectorization friendly 3D vector + * + * This class represents a 3D vector internally using a 4D vector + * such that vectorization can be seamlessly enabled. Of course, + * the same result can be achieved by directly using a 4D vector. + * This class makes this process simpler. + * + */ +// TODO specialize Cwise +template class AlignedVector3; + +namespace internal { +template struct traits > + : traits > +{ +}; +} + +template class AlignedVector3 + : public MatrixBase > +{ + typedef Matrix<_Scalar,4,1> CoeffType; + CoeffType m_coeffs; + public: + + typedef MatrixBase > Base; + EIGEN_DENSE_PUBLIC_INTERFACE(AlignedVector3) + using Base::operator*; + + inline Index rows() const { return 3; } + inline Index cols() const { return 1; } + + Scalar* data() { return m_coeffs.data(); } + const Scalar* data() const { return m_coeffs.data(); } + Index innerStride() const { return 1; } + Index outerStride() const { return 3; } + + inline const Scalar& coeff(Index row, Index col) const + { return m_coeffs.coeff(row, col); } + + inline Scalar& coeffRef(Index row, Index col) + { return m_coeffs.coeffRef(row, col); } + + inline const Scalar& coeff(Index index) const + { return m_coeffs.coeff(index); } + + inline Scalar& coeffRef(Index index) + { return m_coeffs.coeffRef(index);} + + + inline AlignedVector3() + {} + + inline AlignedVector3(const Scalar& x, const Scalar& y, const Scalar& z) + : m_coeffs(x, y, z, Scalar(0)) + {} + + inline AlignedVector3(const AlignedVector3& other) + : Base(), m_coeffs(other.m_coeffs) + {} + + template + struct generic_assign_selector {}; + + template struct generic_assign_selector + { + inline static void run(AlignedVector3& dest, const XprType& src) + { + dest.m_coeffs = src; + } + }; + + template struct generic_assign_selector + { + inline static void run(AlignedVector3& dest, const XprType& src) + { + dest.m_coeffs.template head<3>() = src; + dest.m_coeffs.w() = Scalar(0); + } + }; + + template + inline AlignedVector3(const MatrixBase& other) + { + generic_assign_selector::run(*this,other.derived()); + } + + inline AlignedVector3& operator=(const AlignedVector3& other) + { m_coeffs = other.m_coeffs; return *this; } + + template + inline AlignedVector3& operator=(const MatrixBase& other) + { + generic_assign_selector::run(*this,other.derived()); + return *this; + } + + inline AlignedVector3 operator+(const AlignedVector3& other) const + { return AlignedVector3(m_coeffs + other.m_coeffs); } + + inline AlignedVector3& operator+=(const AlignedVector3& other) + { m_coeffs += other.m_coeffs; return *this; } + + inline AlignedVector3 operator-(const AlignedVector3& other) const + { return AlignedVector3(m_coeffs - other.m_coeffs); } + + inline AlignedVector3 operator-() const + { return AlignedVector3(-m_coeffs); } + + inline AlignedVector3 operator-=(const AlignedVector3& other) + { m_coeffs -= other.m_coeffs; return *this; } + + inline AlignedVector3 operator*(const Scalar& s) const + { return AlignedVector3(m_coeffs * s); } + + inline friend AlignedVector3 operator*(const Scalar& s,const AlignedVector3& vec) + { return AlignedVector3(s * vec.m_coeffs); } + + inline AlignedVector3& operator*=(const Scalar& s) + { m_coeffs *= s; return *this; } + + inline AlignedVector3 operator/(const Scalar& s) const + { return AlignedVector3(m_coeffs / s); } + + inline AlignedVector3& operator/=(const Scalar& s) + { m_coeffs /= s; return *this; } + + inline Scalar dot(const AlignedVector3& other) const + { + eigen_assert(m_coeffs.w()==Scalar(0)); + eigen_assert(other.m_coeffs.w()==Scalar(0)); + return m_coeffs.dot(other.m_coeffs); + } + + inline void normalize() + { + m_coeffs /= norm(); + } + + inline AlignedVector3 normalized() const + { + return AlignedVector3(m_coeffs / norm()); + } + + inline Scalar sum() const + { + eigen_assert(m_coeffs.w()==Scalar(0)); + return m_coeffs.sum(); + } + + inline Scalar squaredNorm() const + { + eigen_assert(m_coeffs.w()==Scalar(0)); + return m_coeffs.squaredNorm(); + } + + inline Scalar norm() const + { + using std::sqrt; + return sqrt(squaredNorm()); + } + + inline AlignedVector3 cross(const AlignedVector3& other) const + { + return AlignedVector3(m_coeffs.cross3(other.m_coeffs)); + } + + template + inline bool isApprox(const MatrixBase& other, const RealScalar& eps=NumTraits::dummy_precision()) const + { + return m_coeffs.template head<3>().isApprox(other,eps); + } + + CoeffType& coeffs() { return m_coeffs; } + const CoeffType& coeffs() const { return m_coeffs; } +}; + +namespace internal { + +template +struct eval, Dense> +{ + typedef const AlignedVector3<_Scalar>& type; +}; + +template +struct evaluator > + : evaluator > +{ + typedef AlignedVector3 XprType; + typedef evaluator > Base; + + evaluator(const XprType &m) : Base(m.coeffs()) {} +}; + +} + +//@} + +} + +#include "../../Eigen/src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_ALIGNED_VECTOR3 diff --git a/external/unsupported/Eigen/ArpackSupport b/external/unsupported/Eigen/ArpackSupport new file mode 100644 index 0000000..67c4ac8 --- /dev/null +++ b/external/unsupported/Eigen/ArpackSupport @@ -0,0 +1,30 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ARPACKSUPPORT_MODULE_H +#define EIGEN_ARPACKSUPPORT_MODULE_H + +#include "../../Eigen/Core" + +/** \defgroup ArpackSupport_Module Arpack support module + * + * This module provides a wrapper to Arpack, a library for sparse eigenvalue decomposition. + * + * \code + * #include + * \endcode + */ + +#include "../../Eigen/SparseCholesky" + +#include "../../Eigen/src/Core/util/DisableStupidWarnings.h" +#include "src/Eigenvalues/ArpackSelfAdjointEigenSolver.h" + +#include "../../Eigen/src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_ARPACKSUPPORT_MODULE_H diff --git a/external/unsupported/Eigen/AutoDiff b/external/unsupported/Eigen/AutoDiff new file mode 100644 index 0000000..7a4ff46 --- /dev/null +++ b/external/unsupported/Eigen/AutoDiff @@ -0,0 +1,46 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_AUTODIFF_MODULE +#define EIGEN_AUTODIFF_MODULE + +namespace Eigen { + +/** + * \defgroup AutoDiff_Module Auto Diff module + * + * This module features forward automatic differentation via a simple + * templated scalar type wrapper AutoDiffScalar. + * + * Warning : this should NOT be confused with numerical differentiation, which + * is a different method and has its own module in Eigen : \ref NumericalDiff_Module. + * + * \code + * #include + * \endcode + */ +//@{ + +} +#include "../../Eigen/src/Core/util/DisableStupidWarnings.h" + + +#include "src/AutoDiff/AutoDiffScalar.h" +// #include "src/AutoDiff/AutoDiffVector.h" +#include "src/AutoDiff/AutoDiffJacobian.h" + +#include "../../Eigen/src/Core/util/ReenableStupidWarnings.h" + + + +namespace Eigen { +//@} +} + +#endif // EIGEN_AUTODIFF_MODULE diff --git a/external/unsupported/Eigen/BVH b/external/unsupported/Eigen/BVH new file mode 100644 index 0000000..666c983 --- /dev/null +++ b/external/unsupported/Eigen/BVH @@ -0,0 +1,95 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Ilya Baran +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_BVH_MODULE_H +#define EIGEN_BVH_MODULE_H + +#include "../../Eigen/Core" +#include "../../Eigen/Geometry" +#include "../../Eigen/StdVector" +#include +#include + +namespace Eigen { + +/** + * \defgroup BVH_Module BVH module + * \brief This module provides generic bounding volume hierarchy algorithms + * and reference tree implementations. + * + * + * \code + * #include + * \endcode + * + * A bounding volume hierarchy (BVH) can accelerate many geometric queries. This module provides a generic implementation + * of the two basic algorithms over a BVH: intersection of a query object against all objects in the hierarchy and minimization + * of a function over the objects in the hierarchy. It also provides intersection and minimization over a cartesian product of + * two BVH's. A BVH accelerates intersection by using the fact that if a query object does not intersect a volume, then it cannot + * intersect any object contained in that volume. Similarly, a BVH accelerates minimization because the minimum of a function + * over a volume is no greater than the minimum of a function over any object contained in it. + * + * Some sample queries that can be written in terms of intersection are: + * - Determine all points where a ray intersects a triangle mesh + * - Given a set of points, determine which are contained in a query sphere + * - Given a set of spheres, determine which contain the query point + * - Given a set of disks, determine if any is completely contained in a query rectangle (represent each 2D disk as a point \f$(x,y,r)\f$ + * in 3D and represent the rectangle as a pyramid based on the original rectangle and shrinking in the \f$r\f$ direction) + * - Given a set of points, count how many pairs are \f$d\pm\epsilon\f$ apart (done by looking at the cartesian product of the set + * of points with itself) + * + * Some sample queries that can be written in terms of function minimization over a set of objects are: + * - Find the intersection between a ray and a triangle mesh closest to the ray origin (function is infinite off the ray) + * - Given a polyline and a query point, determine the closest point on the polyline to the query + * - Find the diameter of a point cloud (done by looking at the cartesian product and using negative distance as the function) + * - Determine how far two meshes are from colliding (this is also a cartesian product query) + * + * This implementation decouples the basic algorithms both from the type of hierarchy (and the types of the bounding volumes) and + * from the particulars of the query. To enable abstraction from the BVH, the BVH is required to implement a generic mechanism + * for traversal. To abstract from the query, the query is responsible for keeping track of results. + * + * To be used in the algorithms, a hierarchy must implement the following traversal mechanism (see KdBVH for a sample implementation): \code + typedef Volume //the type of bounding volume + typedef Object //the type of object in the hierarchy + typedef Index //a reference to a node in the hierarchy--typically an int or a pointer + typedef VolumeIterator //an iterator type over node children--returns Index + typedef ObjectIterator //an iterator over object (leaf) children--returns const Object & + Index getRootIndex() const //returns the index of the hierarchy root + const Volume &getVolume(Index index) const //returns the bounding volume of the node at given index + void getChildren(Index index, VolumeIterator &outVBegin, VolumeIterator &outVEnd, + ObjectIterator &outOBegin, ObjectIterator &outOEnd) const + //getChildren takes a node index and makes [outVBegin, outVEnd) range over its node children + //and [outOBegin, outOEnd) range over its object children + \endcode + * + * To use the hierarchy, call BVIntersect or BVMinimize, passing it a BVH (or two, for cartesian product) and a minimizer or intersector. + * For an intersection query on a single BVH, the intersector encapsulates the query and must provide two functions: + * \code + bool intersectVolume(const Volume &volume) //returns true if the query intersects the volume + bool intersectObject(const Object &object) //returns true if the intersection search should terminate immediately + \endcode + * The guarantee that BVIntersect provides is that intersectObject will be called on every object whose bounding volume + * intersects the query (but possibly on other objects too) unless the search is terminated prematurely. It is the + * responsibility of the intersectObject function to keep track of the results in whatever manner is appropriate. + * The cartesian product intersection and the BVMinimize queries are similar--see their individual documentation. + * + * The following is a simple but complete example for how to use the BVH to accelerate the search for a closest red-blue point pair: + * \include BVH_Example.cpp + * Output: \verbinclude BVH_Example.out + */ +} + +//@{ + +#include "src/BVH/BVAlgorithms.h" +#include "src/BVH/KdBVH.h" + +//@} + +#endif // EIGEN_BVH_MODULE_H diff --git a/external/unsupported/Eigen/CMakeLists.txt b/external/unsupported/Eigen/CMakeLists.txt new file mode 100644 index 0000000..631a060 --- /dev/null +++ b/external/unsupported/Eigen/CMakeLists.txt @@ -0,0 +1,32 @@ +set(Eigen_HEADERS + AdolcForward + AlignedVector3 + ArpackSupport + AutoDiff + BVH + EulerAngles + FFT + IterativeSolvers + KroneckerProduct + LevenbergMarquardt + MatrixFunctions + MoreVectorization + MPRealSupport + NonLinearOptimization + NumericalDiff + OpenGLSupport + Polynomials + Skyline + SparseExtra + SpecialFunctions + Splines + ) + +install(FILES + ${Eigen_HEADERS} + DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen COMPONENT Devel + ) + +install(DIRECTORY src DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen COMPONENT Devel FILES_MATCHING PATTERN "*.h") + +add_subdirectory(CXX11) diff --git a/external/unsupported/Eigen/CXX11/CMakeLists.txt b/external/unsupported/Eigen/CXX11/CMakeLists.txt new file mode 100644 index 0000000..385ed24 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/CMakeLists.txt @@ -0,0 +1,8 @@ +set(Eigen_CXX11_HEADERS Tensor TensorSymmetry ThreadPool) + +install(FILES + ${Eigen_CXX11_HEADERS} + DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/CXX11 COMPONENT Devel + ) + +install(DIRECTORY src DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/CXX11 COMPONENT Devel FILES_MATCHING PATTERN "*.h") diff --git a/external/unsupported/Eigen/CXX11/Tensor b/external/unsupported/Eigen/CXX11/Tensor new file mode 100644 index 0000000..0938bb5 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/Tensor @@ -0,0 +1,137 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// Copyright (C) 2013 Christian Seiler +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//#ifndef EIGEN_CXX11_TENSOR_MODULE +//#define EIGEN_CXX11_TENSOR_MODULE + +#include "../../../Eigen/Core" + +#if EIGEN_HAS_CXX11 + +#include "../SpecialFunctions" + +#include "../../../Eigen/src/Core/util/DisableStupidWarnings.h" +#include "src/util/CXX11Meta.h" +#include "src/util/MaxSizeVector.h" + +/** \defgroup CXX11_Tensor_Module Tensor Module + * + * This module provides a Tensor class for storing arbitrarily indexed + * objects. + * + * \code + * #include + * \endcode + * + * Much of the documentation can be found \ref eigen_tensors "here". + */ + +#include +#include +#include +#include +#include +#include +#include + +#if defined(EIGEN_USE_THREADS) || defined(EIGEN_USE_SYCL) +#include "ThreadPool" +#endif + +#ifdef EIGEN_USE_GPU + #include + #if defined(EIGEN_USE_HIP) + #include + #else + #include + #endif +#endif + +#include "src/Tensor/TensorMacros.h" +#include "src/Tensor/TensorForwardDeclarations.h" +#include "src/Tensor/TensorMeta.h" +#include "src/Tensor/TensorFunctors.h" +#include "src/Tensor/TensorCostModel.h" +#include "src/Tensor/TensorDeviceDefault.h" +#include "src/Tensor/TensorDeviceThreadPool.h" +#include "src/Tensor/TensorDeviceGpu.h" +#ifndef gpu_assert +#define gpu_assert(x) +#endif +#include "src/Tensor/TensorDeviceSycl.h" +#include "src/Tensor/TensorIndexList.h" +#include "src/Tensor/TensorDimensionList.h" +#include "src/Tensor/TensorDimensions.h" +#include "src/Tensor/TensorInitializer.h" +#include "src/Tensor/TensorTraits.h" +#include "src/Tensor/TensorRandom.h" +#include "src/Tensor/TensorUInt128.h" +#include "src/Tensor/TensorIntDiv.h" +#include "src/Tensor/TensorGlobalFunctions.h" + +#include "src/Tensor/TensorBase.h" +#include "src/Tensor/TensorBlock.h" + +#include "src/Tensor/TensorEvaluator.h" +#include "src/Tensor/TensorExpr.h" +#include "src/Tensor/TensorReduction.h" +#include "src/Tensor/TensorReductionGpu.h" +#include "src/Tensor/TensorArgMax.h" +#include "src/Tensor/TensorConcatenation.h" +#include "src/Tensor/TensorContractionMapper.h" +#include "src/Tensor/TensorContractionBlocking.h" +#include "src/Tensor/TensorContraction.h" +#include "src/Tensor/TensorContractionThreadPool.h" +#include "src/Tensor/TensorContractionGpu.h" +#include "src/Tensor/TensorConversion.h" +#include "src/Tensor/TensorConvolution.h" +#include "src/Tensor/TensorFFT.h" +#include "src/Tensor/TensorPatch.h" +#include "src/Tensor/TensorImagePatch.h" +#include "src/Tensor/TensorVolumePatch.h" +#include "src/Tensor/TensorBroadcasting.h" +#include "src/Tensor/TensorChipping.h" +#include "src/Tensor/TensorInflation.h" +#include "src/Tensor/TensorLayoutSwap.h" +#include "src/Tensor/TensorMorphing.h" +#include "src/Tensor/TensorPadding.h" +#include "src/Tensor/TensorReverse.h" +#include "src/Tensor/TensorShuffling.h" +#include "src/Tensor/TensorStriding.h" +#include "src/Tensor/TensorCustomOp.h" +#include "src/Tensor/TensorEvalTo.h" +#include "src/Tensor/TensorForcedEval.h" +#include "src/Tensor/TensorGenerator.h" +#include "src/Tensor/TensorAssign.h" +#include "src/Tensor/TensorScan.h" +#include "src/Tensor/TensorTrace.h" + +#ifdef EIGEN_USE_SYCL +#include "src/Tensor/TensorReductionSycl.h" +#include "src/Tensor/TensorConvolutionSycl.h" +#include "src/Tensor/TensorContractionSycl.h" +#include "src/Tensor/TensorScanSycl.h" +#endif + +#include "src/Tensor/TensorExecutor.h" +#include "src/Tensor/TensorDevice.h" + +#include "src/Tensor/TensorStorage.h" +#include "src/Tensor/Tensor.h" +#include "src/Tensor/TensorFixedSize.h" +#include "src/Tensor/TensorMap.h" +#include "src/Tensor/TensorRef.h" + +#include "src/Tensor/TensorIO.h" + +#include "../../../Eigen/src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_HAS_CXX11 +//#endif // EIGEN_CXX11_TENSOR_MODULE diff --git a/external/unsupported/Eigen/CXX11/TensorSymmetry b/external/unsupported/Eigen/CXX11/TensorSymmetry new file mode 100644 index 0000000..b09c5e4 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/TensorSymmetry @@ -0,0 +1,42 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2013 Christian Seiler +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSORSYMMETRY_MODULE +#define EIGEN_CXX11_TENSORSYMMETRY_MODULE + +#include "Tensor" + +#include "../../../Eigen/src/Core/util/DisableStupidWarnings.h" + +#include "src/util/CXX11Meta.h" + +/** \defgroup CXX11_TensorSymmetry_Module Tensor Symmetry Module + * + * This module provides a classes that allow for the definition of + * symmetries w.r.t. tensor indices. + * + * Including this module will implicitly include the Tensor module. + * + * \code + * #include + * \endcode + */ + +#include "src/TensorSymmetry/util/TemplateGroupTheory.h" +#include "src/TensorSymmetry/Symmetry.h" +#include "src/TensorSymmetry/StaticSymmetry.h" +#include "src/TensorSymmetry/DynamicSymmetry.h" + +#include "../../../Eigen/src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_CXX11_TENSORSYMMETRY_MODULE + +/* + * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle; + */ diff --git a/external/unsupported/Eigen/CXX11/ThreadPool b/external/unsupported/Eigen/CXX11/ThreadPool new file mode 100644 index 0000000..c5cafb2 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/ThreadPool @@ -0,0 +1,74 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_THREADPOOL_MODULE +#define EIGEN_CXX11_THREADPOOL_MODULE + +#include "../../../Eigen/Core" + +#include "../../../Eigen/src/Core/util/DisableStupidWarnings.h" + +/** \defgroup CXX11_ThreadPool_Module C++11 ThreadPool Module + * + * This module provides 2 threadpool implementations + * - a simple reference implementation + * - a faster non blocking implementation + * + * This module requires C++11. + * + * \code + * #include + * \endcode + */ + + +// The code depends on CXX11, so only include the module if the +// compiler supports it. +#if (EIGEN_COMP_CXXVER >= 11) +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// There are non-parenthesized calls to "max" in the header, +// which trigger a check in test/main.h causing compilation to fail. +// We work around the check here by removing the check for max in +// the case where we have to emulate thread_local. +#ifdef max +#undef max +#endif +#include + +#include "src/util/CXX11Meta.h" +#include "src/util/MaxSizeVector.h" + +#include "src/ThreadPool/ThreadLocal.h" +#include "src/ThreadPool/ThreadYield.h" +#include "src/ThreadPool/ThreadCancel.h" +#include "src/ThreadPool/EventCount.h" +#include "src/ThreadPool/RunQueue.h" +#include "src/ThreadPool/ThreadPoolInterface.h" +#include "src/ThreadPool/ThreadEnvironment.h" +#include "src/ThreadPool/Barrier.h" +#include "src/ThreadPool/NonBlockingThreadPool.h" + +#endif + +#include "../../../Eigen/src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_CXX11_THREADPOOL_MODULE diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/README.md b/external/unsupported/Eigen/CXX11/src/Tensor/README.md new file mode 100644 index 0000000..2f65b1b --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/README.md @@ -0,0 +1,1815 @@ +# Eigen Tensors {#eigen_tensors} + +Tensors are multidimensional arrays of elements. Elements are typically scalars, +but more complex types such as strings are also supported. + +## Tensor Classes + +You can manipulate a tensor with one of the following classes. They all are in +the namespace `::Eigen.` + + +### Class Tensor + +This is the class to use to create a tensor and allocate memory for it. The +class is templatized with the tensor datatype, such as float or int, and the +tensor rank. The rank is the number of dimensions, for example rank 2 is a +matrix. + +Tensors of this class are resizable. For example, if you assign a tensor of a +different size to a Tensor, that tensor is resized to match its new value. + +#### Constructor Tensor(size0, size1, ...) + +Constructor for a Tensor. The constructor must be passed `rank` integers +indicating the sizes of the instance along each of the the `rank` +dimensions. + + // Create a tensor of rank 3 of sizes 2, 3, 4. This tensor owns + // memory to hold 24 floating point values (24 = 2 x 3 x 4). + Tensor t_3d(2, 3, 4); + + // Resize t_3d by assigning a tensor of different sizes, but same rank. + t_3d = Tensor(3, 4, 3); + +#### Constructor Tensor(size_array) + +Constructor where the sizes for the constructor are specified as an array of +values instead of an explicitly list of parameters. The array type to use is +`Eigen::array`. The array can be constructed automatically +from an initializer list. + + // Create a tensor of strings of rank 2 with sizes 5, 7. + Tensor t_2d({5, 7}); + + +### Class TensorFixedSize> + +Class to use for tensors of fixed size, where the size is known at compile +time. Fixed sized tensors can provide very fast computations because all their +dimensions are known by the compiler. FixedSize tensors are not resizable. + +If the total number of elements in a fixed size tensor is small enough the +tensor data is held onto the stack and does not cause heap allocation and free. + + // Create a 4 x 3 tensor of floats. + TensorFixedSize> t_4x3; + +### Class TensorMap> + +This is the class to use to create a tensor on top of memory allocated and +owned by another part of your code. It allows to view any piece of allocated +memory as a Tensor. Instances of this class do not own the memory where the +data are stored. + +A TensorMap is not resizable because it does not own the memory where its data +are stored. + +#### Constructor TensorMap>(data, size0, size1, ...) + +Constructor for a Tensor. The constructor must be passed a pointer to the +storage for the data, and "rank" size attributes. The storage has to be +large enough to hold all the data. + + // Map a tensor of ints on top of stack-allocated storage. + int storage[128]; // 2 x 4 x 2 x 8 = 128 + TensorMap> t_4d(storage, 2, 4, 2, 8); + + // The same storage can be viewed as a different tensor. + // You can also pass the sizes as an array. + TensorMap> t_2d(storage, 16, 8); + + // You can also map fixed-size tensors. Here we get a 1d view of + // the 2d fixed-size tensor. + TensorFixedSize> t_4x3; + TensorMap> t_12(t_4x3.data(), 12); + + +#### Class TensorRef + +See Assigning to a TensorRef below. + +## Accessing Tensor Elements + +#### tensor(index0, index1...) + +Return the element at position `(index0, index1...)` in tensor +`tensor`. You must pass as many parameters as the rank of `tensor`. +The expression can be used as an l-value to set the value of the element at the +specified position. The value returned is of the datatype of the tensor. + + // Set the value of the element at position (0, 1, 0); + Tensor t_3d(2, 3, 4); + t_3d(0, 1, 0) = 12.0f; + + // Initialize all elements to random values. + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 4; ++k) { + t_3d(i, j, k) = ...some random value...; + } + } + } + + // Print elements of a tensor. + for (int i = 0; i < 2; ++i) { + LOG(INFO) << t_3d(i, 0, 0); + } + + +## TensorLayout + +The tensor library supports 2 layouts: `ColMajor` (the default) and +`RowMajor`. Only the default column major layout is currently fully +supported, and it is therefore not recommended to attempt to use the row major +layout at the moment. + +The layout of a tensor is optionally specified as part of its type. If not +specified explicitly column major is assumed. + + Tensor col_major; // equivalent to Tensor + TensorMap > row_major(data, ...); + +All the arguments to an expression must use the same layout. Attempting to mix +different layouts will result in a compilation error. + +It is possible to change the layout of a tensor or an expression using the +`swap_layout()` method. Note that this will also reverse the order of the +dimensions. + + Tensor col_major(2, 4); + Tensor row_major(2, 4); + + Tensor col_major_result = col_major; // ok, layouts match + Tensor col_major_result = row_major; // will not compile + + // Simple layout swap + col_major_result = row_major.swap_layout(); + eigen_assert(col_major_result.dimension(0) == 4); + eigen_assert(col_major_result.dimension(1) == 2); + + // Swap the layout and preserve the order of the dimensions + array shuffle(1, 0); + col_major_result = row_major.swap_layout().shuffle(shuffle); + eigen_assert(col_major_result.dimension(0) == 2); + eigen_assert(col_major_result.dimension(1) == 4); + + +## Tensor Operations + +The Eigen Tensor library provides a vast library of operations on Tensors: +numerical operations such as addition and multiplication, geometry operations +such as slicing and shuffling, etc. These operations are available as methods +of the Tensor classes, and in some cases as operator overloads. For example +the following code computes the elementwise addition of two tensors: + + Tensor t1(2, 3, 4); + ...set some values in t1... + Tensor t2(2, 3, 4); + ...set some values in t2... + // Set t3 to the element wise sum of t1 and t2 + Tensor t3 = t1 + t2; + +While the code above looks easy enough, it is important to understand that the +expression `t1 + t2` is not actually adding the values of the tensors. The +expression instead constructs a "tensor operator" object of the class +TensorCwiseBinaryOp, which has references to the tensors +`t1` and `t2`. This is a small C++ object that knows how to add +`t1` and `t2`. It is only when the value of the expression is assigned +to the tensor `t3` that the addition is actually performed. Technically, +this happens through the overloading of `operator=()` in the Tensor class. + +This mechanism for computing tensor expressions allows for lazy evaluation and +optimizations which are what make the tensor library very fast. + +Of course, the tensor operators do nest, and the expression `t1 + t2 * 0.3f` +is actually represented with the (approximate) tree of operators: + + TensorCwiseBinaryOp(t1, TensorCwiseUnaryOp(t2, 0.3f)) + + +### Tensor Operations and C++ "auto" + +Because Tensor operations create tensor operators, the C++ `auto` keyword +does not have its intuitive meaning. Consider these 2 lines of code: + + Tensor t3 = t1 + t2; + auto t4 = t1 + t2; + +In the first line we allocate the tensor `t3` and it will contain the +result of the addition of `t1` and `t2`. In the second line, `t4` +is actually the tree of tensor operators that will compute the addition of +`t1` and `t2`. In fact, `t4` is *not* a tensor and you cannot get +the values of its elements: + + Tensor t3 = t1 + t2; + cout << t3(0, 0, 0); // OK prints the value of t1(0, 0, 0) + t2(0, 0, 0) + + auto t4 = t1 + t2; + cout << t4(0, 0, 0); // Compilation error! + +When you use `auto` you do not get a Tensor as a result but instead a +non-evaluated expression. So only use `auto` to delay evaluation. + +Unfortunately, there is no single underlying concrete type for holding +non-evaluated expressions, hence you have to use auto in the case when you do +want to hold non-evaluated expressions. + +When you need the results of set of tensor computations you have to assign the +result to a Tensor that will be capable of holding onto them. This can be +either a normal Tensor, a fixed size Tensor, or a TensorMap on an existing +piece of memory. All the following will work: + + auto t4 = t1 + t2; + + Tensor result = t4; // Could also be: result(t4); + cout << result(0, 0, 0); + + TensorMap result(, , ...) = t4; + cout << result(0, 0, 0); + + TensorFixedSize> result = t4; + cout << result(0, 0, 0); + +Until you need the results, you can keep the operation around, and even reuse +it for additional operations. As long as you keep the expression as an +operation, no computation is performed. + + // One way to compute exp((t1 + t2) * 0.2f); + auto t3 = t1 + t2; + auto t4 = t3 * 0.2f; + auto t5 = t4.exp(); + Tensor result = t5; + + // Another way, exactly as efficient as the previous one: + Tensor result = ((t1 + t2) * 0.2f).exp(); + +### Controlling When Expression are Evaluated + +There are several ways to control when expressions are evaluated: + +* Assignment to a Tensor, TensorFixedSize, or TensorMap. +* Use of the eval() method. +* Assignment to a TensorRef. + +#### Assigning to a Tensor, TensorFixedSize, or TensorMap. + +The most common way to evaluate an expression is to assign it to a Tensor. In +the example below, the `auto` declarations make the intermediate values +"Operations", not Tensors, and do not cause the expressions to be evaluated. +The assignment to the Tensor `result` causes the evaluation of all the +operations. + + auto t3 = t1 + t2; // t3 is an Operation. + auto t4 = t3 * 0.2f; // t4 is an Operation. + auto t5 = t4.exp(); // t5 is an Operation. + Tensor result = t5; // The operations are evaluated. + +If you know the ranks and sizes of the Operation value you can assign the +Operation to a TensorFixedSize instead of a Tensor, which is a bit more +efficient. + + // We know that the result is a 4x4x2 tensor! + TensorFixedSize> result = t5; + +Simiarly, assigning an expression to a TensorMap causes its evaluation. Like +tensors of type TensorFixedSize, TensorMaps cannot be resized so they have to +have the rank and sizes of the expression that are assigned to them. + +#### Calling eval(). + +When you compute large composite expressions, you sometimes want to tell Eigen +that an intermediate value in the expression tree is worth evaluating ahead of +time. This is done by inserting a call to the `eval()` method of the +expression Operation. + + // The previous example could have been written: + Tensor result = ((t1 + t2) * 0.2f).exp(); + + // If you want to compute (t1 + t2) once ahead of time you can write: + Tensor result = ((t1 + t2).eval() * 0.2f).exp(); + +Semantically, calling `eval()` is equivalent to materializing the value of +the expression in a temporary Tensor of the right size. The code above in +effect does: + + // .eval() knows the size! + TensorFixedSize> tmp = t1 + t2; + Tensor result = (tmp * 0.2f).exp(); + +Note that the return value of `eval()` is itself an Operation, so the +following code does not do what you may think: + + // Here t3 is an evaluation Operation. t3 has not been evaluated yet. + auto t3 = (t1 + t2).eval(); + + // You can use t3 in another expression. Still no evaluation. + auto t4 = (t3 * 0.2f).exp(); + + // The value is evaluated when you assign the Operation to a Tensor, using + // an intermediate tensor to represent t3.x + Tensor result = t4; + +While in the examples above calling `eval()` does not make a difference in +performance, in other cases it can make a huge difference. In the expression +below the `broadcast()` expression causes the `X.maximum()` expression +to be evaluated many times: + + Tensor<...> X ...; + Tensor<...> Y = ((X - X.maximum(depth_dim).reshape(dims2d).broadcast(bcast)) + * beta).exp(); + +Inserting a call to `eval()` between the `maximum()` and +`reshape()` calls guarantees that maximum() is only computed once and +greatly speeds-up execution: + + Tensor<...> Y = + ((X - X.maximum(depth_dim).eval().reshape(dims2d).broadcast(bcast)) + * beta).exp(); + +In the other example below, the tensor `Y` is both used in the expression +and its assignment. This is an aliasing problem and if the evaluation is not +done in the right order Y will be updated incrementally during the evaluation +resulting in bogus results: + + Tensor<...> Y ...; + Y = Y / (Y.sum(depth_dim).reshape(dims2d).broadcast(bcast)); + +Inserting a call to `eval()` between the `sum()` and `reshape()` +expressions ensures that the sum is computed before any updates to `Y` are +done. + + Y = Y / (Y.sum(depth_dim).eval().reshape(dims2d).broadcast(bcast)); + +Note that an eval around the full right hand side expression is not needed +because the generated has to compute the i-th value of the right hand side +before assigning it to the left hand side. + +However, if you were assigning the expression value to a shuffle of `Y` +then you would need to force an eval for correctness by adding an `eval()` +call for the right hand side: + + Y.shuffle(...) = + (Y / (Y.sum(depth_dim).eval().reshape(dims2d).broadcast(bcast))).eval(); + + +#### Assigning to a TensorRef. + +If you need to access only a few elements from the value of an expression you +can avoid materializing the value in a full tensor by using a TensorRef. + +A TensorRef is a small wrapper class for any Eigen Operation. It provides +overloads for the `()` operator that let you access individual values in +the expression. TensorRef is convenient, because the Operation themselves do +not provide a way to access individual elements. + + // Create a TensorRef for the expression. The expression is not + // evaluated yet. + TensorRef > ref = ((t1 + t2) * 0.2f).exp(); + + // Use "ref" to access individual elements. The expression is evaluated + // on the fly. + float at_0 = ref(0, 0, 0); + cout << ref(0, 1, 0); + +Only use TensorRef when you need a subset of the values of the expression. +TensorRef only computes the values you access. However note that if you are +going to access all the values it will be much faster to materialize the +results in a Tensor first. + +In some cases, if the full Tensor result would be very large, you may save +memory by accessing it as a TensorRef. But not always. So don't count on it. + + +### Controlling How Expressions Are Evaluated + +The tensor library provides several implementations of the various operations +such as contractions and convolutions. The implementations are optimized for +different environments: single threaded on CPU, multi threaded on CPU, or on a +GPU using cuda. Additional implementations may be added later. + +You can choose which implementation to use with the `device()` call. If +you do not choose an implementation explicitly the default implementation that +uses a single thread on the CPU is used. + +The default implementation has been optimized for recent Intel CPUs, taking +advantage of SSE, AVX, and FMA instructions. Work is ongoing to tune the +library on ARM CPUs. Note that you need to pass compiler-dependent flags +to enable the use of SSE, AVX, and other instructions. + +For example, the following code adds two tensors using the default +single-threaded CPU implementation: + + Tensor a(30, 40); + Tensor b(30, 40); + Tensor c = a + b; + +To choose a different implementation you have to insert a `device()` call +before the assignment of the result. For technical C++ reasons this requires +that the Tensor for the result be declared on its own. This means that you +have to know the size of the result. + + Eigen::Tensor c(30, 40); + c.device(...) = a + b; + +The call to `device()` must be the last call on the left of the operator=. + +You must pass to the `device()` call an Eigen device object. There are +presently three devices you can use: DefaultDevice, ThreadPoolDevice and +GpuDevice. + + +#### Evaluating With the DefaultDevice + +This is exactly the same as not inserting a `device()` call. + + DefaultDevice my_device; + c.device(my_device) = a + b; + +#### Evaluating with a Thread Pool + + // Create the Eigen ThreadPool + Eigen::ThreadPool pool(8 /* number of threads in pool */) + + // Create the Eigen ThreadPoolDevice. + Eigen::ThreadPoolDevice my_device(&pool, 4 /* number of threads to use */); + + // Now just use the device when evaluating expressions. + Eigen::Tensor c(30, 50); + c.device(my_device) = a.contract(b, dot_product_dims); + + +#### Evaluating On GPU + +This is presently a bit more complicated than just using a thread pool device. +You need to create a GPU device but you also need to explicitly allocate the +memory for tensors with cuda. + + +## API Reference + +### Datatypes + +In the documentation of the tensor methods and Operation we mention datatypes +that are tensor-type specific: + +#### ::Dimensions + +Acts like an array of ints. Has an `int size` attribute, and can be +indexed like an array to access individual values. Used to represent the +dimensions of a tensor. See `dimensions()`. + +#### ::Index + +Acts like an `int`. Used for indexing tensors along their dimensions. See +`operator()`, `dimension()`, and `size()`. + +#### ::Scalar + +Represents the datatype of individual tensor elements. For example, for a +`Tensor`, `Scalar` is the type `float`. See +`setConstant()`. + +#### + +We use this pseudo type to indicate that a tensor Operation is returned by a +method. We indicate in the text the type and dimensions of the tensor that the +Operation returns after evaluation. + +The Operation will have to be evaluated, for example by assigning it to a +tensor, before you can access the values of the resulting tensor. You can also +access the values through a TensorRef. + + +## Built-in Tensor Methods + +These are usual C++ methods that act on tensors immediately. They are not +Operations which provide delayed evaluation of their results. Unless specified +otherwise, all the methods listed below are available on all tensor classes: +Tensor, TensorFixedSize, and TensorMap. + +## Metadata + +### int NumDimensions + +Constant value indicating the number of dimensions of a Tensor. This is also +known as the tensor "rank". + + Eigen::Tensor a(3, 4); + cout << "Dims " << a.NumDimensions; + => Dims 2 + +### Dimensions dimensions() + +Returns an array-like object representing the dimensions of the tensor. +The actual type of the `dimensions()` result is `::``Dimensions`. + + Eigen::Tensor a(3, 4); + const Eigen::Tensor::Dimensions& d = a.dimensions(); + cout << "Dim size: " << d.size << ", dim 0: " << d[0] + << ", dim 1: " << d[1]; + => Dim size: 2, dim 0: 3, dim 1: 4 + +If you use a C++11 compiler, you can use `auto` to simplify the code: + + const auto& d = a.dimensions(); + cout << "Dim size: " << d.size << ", dim 0: " << d[0] + << ", dim 1: " << d[1]; + => Dim size: 2, dim 0: 3, dim 1: 4 + +### Index dimension(Index n) + +Returns the n-th dimension of the tensor. The actual type of the +`dimension()` result is `::``Index`, but you can +always use it like an int. + + Eigen::Tensor a(3, 4); + int dim1 = a.dimension(1); + cout << "Dim 1: " << dim1; + => Dim 1: 4 + +### Index size() + +Returns the total number of elements in the tensor. This is the product of all +the tensor dimensions. The actual type of the `size()` result is +`::``Index`, but you can always use it like an int. + + Eigen::Tensor a(3, 4); + cout << "Size: " << a.size(); + => Size: 12 + + +### Getting Dimensions From An Operation + +A few operations provide `dimensions()` directly, +e.g. `TensorReslicingOp`. Most operations defer calculating dimensions +until the operation is being evaluated. If you need access to the dimensions +of a deferred operation, you can wrap it in a TensorRef (see Assigning to a +TensorRef above), which provides `dimensions()` and `dimension()` as +above. + +TensorRef can also wrap the plain Tensor types, so this is a useful idiom in +templated contexts where the underlying object could be either a raw Tensor +or some deferred operation (e.g. a slice of a Tensor). In this case, the +template code can wrap the object in a TensorRef and reason about its +dimensionality while remaining agnostic to the underlying type. + + +## Constructors + +### Tensor + +Creates a tensor of the specified size. The number of arguments must be equal +to the rank of the tensor. The content of the tensor is not initialized. + + Eigen::Tensor a(3, 4); + cout << "NumRows: " << a.dimension(0) << " NumCols: " << a.dimension(1) << endl; + => NumRows: 3 NumCols: 4 + +### TensorFixedSize + +Creates a tensor of the specified size. The number of arguments in the Sizes<> +template parameter determines the rank of the tensor. The content of the tensor +is not initialized. + + Eigen::TensorFixedSize> a; + cout << "Rank: " << a.rank() << endl; + => Rank: 2 + cout << "NumRows: " << a.dimension(0) << " NumCols: " << a.dimension(1) << endl; + => NumRows: 3 NumCols: 4 + +### TensorMap + +Creates a tensor mapping an existing array of data. The data must not be freed +until the TensorMap is discarded, and the size of the data must be large enough +to accommodate the coefficients of the tensor. + + float data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; + Eigen::TensorMap> a(data, 3, 4); + cout << "NumRows: " << a.dimension(0) << " NumCols: " << a.dimension(1) << endl; + => NumRows: 3 NumCols: 4 + cout << "a(1, 2): " << a(1, 2) << endl; + => a(1, 2): 7 + + +## Contents Initialization + +When a new Tensor or a new TensorFixedSize are created, memory is allocated to +hold all the tensor elements, but the memory is not initialized. Similarly, +when a new TensorMap is created on top of non-initialized memory the memory its +contents are not initialized. + +You can use one of the methods below to initialize the tensor memory. These +have an immediate effect on the tensor and return the tensor itself as a +result. These are not tensor Operations which delay evaluation. + +### setConstant(const Scalar& val) + +Sets all elements of the tensor to the constant value `val`. `Scalar` +is the type of data stored in the tensor. You can pass any value that is +convertible to that type. + +Returns the tensor itself in case you want to chain another call. + + a.setConstant(12.3f); + cout << "Constant: " << endl << a << endl << endl; + => + Constant: + 12.3 12.3 12.3 12.3 + 12.3 12.3 12.3 12.3 + 12.3 12.3 12.3 12.3 + +Note that `setConstant()` can be used on any tensor where the element type +has a copy constructor and an `operator=()`: + + Eigen::Tensor a(2, 3); + a.setConstant("yolo"); + cout << "String tensor: " << endl << a << endl << endl; + => + String tensor: + yolo yolo yolo + yolo yolo yolo + + +### setZero() + +Fills the tensor with zeros. Equivalent to `setConstant(Scalar(0))`. +Returns the tensor itself in case you want to chain another call. + + a.setZero(); + cout << "Zeros: " << endl << a << endl << endl; + => + Zeros: + 0 0 0 0 + 0 0 0 0 + 0 0 0 0 + + +### setValues({..initializer_list}) + +Fills the tensor with explicit values specified in a std::initializer_list. +The type of the initializer list depends on the type and rank of the tensor. + +If the tensor has rank N, the initializer list must be nested N times. The +most deeply nested lists must contains P scalars of the Tensor type where P is +the size of the last dimension of the Tensor. + +For example, for a `TensorFixedSize` the initializer list must +contains 2 lists of 3 floats each. + +`setValues()` returns the tensor itself in case you want to chain another +call. + + Eigen::Tensor a(2, 3); + a.setValues({{0.0f, 1.0f, 2.0f}, {3.0f, 4.0f, 5.0f}}); + cout << "a" << endl << a << endl << endl; + => + a + 0 1 2 + 3 4 5 + +If a list is too short, the corresponding elements of the tensor will not be +changed. This is valid at each level of nesting. For example the following +code only sets the values of the first row of the tensor. + + Eigen::Tensor a(2, 3); + a.setConstant(1000); + a.setValues({{10, 20, 30}}); + cout << "a" << endl << a << endl << endl; + => + a + 10 20 30 + 1000 1000 1000 + +### setRandom() + +Fills the tensor with random values. Returns the tensor itself in case you +want to chain another call. + + a.setRandom(); + cout << "Random: " << endl << a << endl << endl; + => + Random: + 0.680375 0.59688 -0.329554 0.10794 + -0.211234 0.823295 0.536459 -0.0452059 + 0.566198 -0.604897 -0.444451 0.257742 + +You can customize `setRandom()` by providing your own random number +generator as a template argument: + + a.setRandom(); + +Here, `MyRandomGenerator` must be a struct with the following member +functions, where Scalar and Index are the same as `::``Scalar` +and `::``Index`. + +See `struct UniformRandomGenerator` in TensorFunctors.h for an example. + + // Custom number generator for use with setRandom(). + struct MyRandomGenerator { + // Default and copy constructors. Both are needed + MyRandomGenerator() { } + MyRandomGenerator(const MyRandomGenerator& ) { } + + // Return a random value to be used. "element_location" is the + // location of the entry to set in the tensor, it can typically + // be ignored. + Scalar operator()(Eigen::DenseIndex element_location, + Eigen::DenseIndex /*unused*/ = 0) const { + return ; + } + + // Same as above but generates several numbers at a time. + typename internal::packet_traits::type packetOp( + Eigen::DenseIndex packet_location, Eigen::DenseIndex /*unused*/ = 0) const { + return ; + } + }; + +You can also use one of the 2 random number generators that are part of the +tensor library: +* UniformRandomGenerator +* NormalRandomGenerator + + +## Data Access + +The Tensor, TensorFixedSize, and TensorRef classes provide the following +accessors to access the tensor coefficients: + + const Scalar& operator()(const array& indices) + const Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) + Scalar& operator()(const array& indices) + Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) + +The number of indices must be equal to the rank of the tensor. Moreover, these +accessors are not available on tensor expressions. In order to access the +values of a tensor expression, the expression must either be evaluated or +wrapped in a TensorRef. + + +### Scalar* data() and const Scalar* data() const + +Returns a pointer to the storage for the tensor. The pointer is const if the +tensor was const. This allows direct access to the data. The layout of the +data depends on the tensor layout: RowMajor or ColMajor. + +This access is usually only needed for special cases, for example when mixing +Eigen Tensor code with other libraries. + +Scalar is the type of data stored in the tensor. + + Eigen::Tensor a(3, 4); + float* a_data = a.data(); + a_data[0] = 123.45f; + cout << "a(0, 0): " << a(0, 0); + => a(0, 0): 123.45 + + +## Tensor Operations + +All the methods documented below return non evaluated tensor `Operations`. +These can be chained: you can apply another Tensor Operation to the value +returned by the method. + +The chain of Operation is evaluated lazily, typically when it is assigned to a +tensor. See "Controlling when Expression are Evaluated" for more details about +their evaluation. + +### constant(const Scalar& val) + +Returns a tensor of the same type and dimensions as the original tensor but +where all elements have the value `val`. + +This is useful, for example, when you want to add or subtract a constant from a +tensor, or multiply every element of a tensor by a scalar. + + Eigen::Tensor a(2, 3); + a.setConstant(1.0f); + Eigen::Tensor b = a + a.constant(2.0f); + Eigen::Tensor c = b * b.constant(0.2f); + cout << "a" << endl << a << endl << endl; + cout << "b" << endl << b << endl << endl; + cout << "c" << endl << c << endl << endl; + => + a + 1 1 1 + 1 1 1 + + b + 3 3 3 + 3 3 3 + + c + 0.6 0.6 0.6 + 0.6 0.6 0.6 + +### random() + +Returns a tensor of the same type and dimensions as the current tensor +but where all elements have random values. + +This is for example useful to add random values to an existing tensor. +The generation of random values can be customized in the same manner +as for `setRandom()`. + + Eigen::Tensor a(2, 3); + a.setConstant(1.0f); + Eigen::Tensor b = a + a.random(); + cout << "a" << endl << a << endl << endl; + cout << "b" << endl << b << endl << endl; + => + a + 1 1 1 + 1 1 1 + + b + 1.68038 1.5662 1.82329 + 0.788766 1.59688 0.395103 + + +## Unary Element Wise Operations + +All these operations take a single input tensor as argument and return a tensor +of the same type and dimensions as the tensor to which they are applied. The +requested operations are applied to each element independently. + +### operator-() + +Returns a tensor of the same type and dimensions as the original tensor +containing the opposite values of the original tensor. + + Eigen::Tensor a(2, 3); + a.setConstant(1.0f); + Eigen::Tensor b = -a; + cout << "a" << endl << a << endl << endl; + cout << "b" << endl << b << endl << endl; + => + a + 1 1 1 + 1 1 1 + + b + -1 -1 -1 + -1 -1 -1 + +### sqrt() + +Returns a tensor of the same type and dimensions as the original tensor +containing the square roots of the original tensor. + +### rsqrt() + +Returns a tensor of the same type and dimensions as the original tensor +containing the inverse square roots of the original tensor. + +### square() + +Returns a tensor of the same type and dimensions as the original tensor +containing the squares of the original tensor values. + +### inverse() + +Returns a tensor of the same type and dimensions as the original tensor +containing the inverse of the original tensor values. + +### exp() + +Returns a tensor of the same type and dimensions as the original tensor +containing the exponential of the original tensor. + +### log() + +Returns a tensor of the same type and dimensions as the original tensor +containing the natural logarithms of the original tensor. + +### abs() + +Returns a tensor of the same type and dimensions as the original tensor +containing the absolute values of the original tensor. + +### pow(Scalar exponent) + +Returns a tensor of the same type and dimensions as the original tensor +containing the coefficients of the original tensor to the power of the +exponent. + +The type of the exponent, Scalar, is always the same as the type of the +tensor coefficients. For example, only integer exponents can be used in +conjuntion with tensors of integer values. + +You can use cast() to lift this restriction. For example this computes +cubic roots of an int Tensor: + + Eigen::Tensor a(2, 3); + a.setValues({{0, 1, 8}, {27, 64, 125}}); + Eigen::Tensor b = a.cast().pow(1.0 / 3.0); + cout << "a" << endl << a << endl << endl; + cout << "b" << endl << b << endl << endl; + => + a + 0 1 8 + 27 64 125 + + b + 0 1 2 + 3 4 5 + +### operator * (Scalar scale) + +Multiplies all the coefficients of the input tensor by the provided scale. + +### cwiseMax(Scalar threshold) +TODO + +### cwiseMin(Scalar threshold) +TODO + +### unaryExpr(const CustomUnaryOp& func) +TODO + + +## Binary Element Wise Operations + +These operations take two input tensors as arguments. The 2 input tensors should +be of the same type and dimensions. The result is a tensor of the same +dimensions as the tensors to which they are applied, and unless otherwise +specified it is also of the same type. The requested operations are applied to +each pair of elements independently. + +### operator+(const OtherDerived& other) + +Returns a tensor of the same type and dimensions as the input tensors +containing the coefficient wise sums of the inputs. + +### operator-(const OtherDerived& other) + +Returns a tensor of the same type and dimensions as the input tensors +containing the coefficient wise differences of the inputs. + +### operator*(const OtherDerived& other) + +Returns a tensor of the same type and dimensions as the input tensors +containing the coefficient wise products of the inputs. + +### operator/(const OtherDerived& other) + +Returns a tensor of the same type and dimensions as the input tensors +containing the coefficient wise quotients of the inputs. + +This operator is not supported for integer types. + +### cwiseMax(const OtherDerived& other) + +Returns a tensor of the same type and dimensions as the input tensors +containing the coefficient wise maximums of the inputs. + +### cwiseMin(const OtherDerived& other) + +Returns a tensor of the same type and dimensions as the input tensors +containing the coefficient wise mimimums of the inputs. + +### Logical operators + +The following logical operators are supported as well: + +* operator&&(const OtherDerived& other) +* operator||(const OtherDerived& other) +* operator<(const OtherDerived& other) +* operator<=(const OtherDerived& other) +* operator>(const OtherDerived& other) +* operator>=(const OtherDerived& other) +* operator==(const OtherDerived& other) +* operator!=(const OtherDerived& other) + +They all return a tensor of boolean values. + + +## Selection (select(const ThenDerived& thenTensor, const ElseDerived& elseTensor) + +Selection is a coefficient-wise ternary operator that is the tensor equivalent +to the if-then-else operation. + + Tensor if = ...; + Tensor then = ...; + Tensor else = ...; + Tensor result = if.select(then, else); + +The 3 arguments must be of the same dimensions, which will also be the dimension +of the result. The 'if' tensor must be of type boolean, the 'then' and the +'else' tensor must be of the same type, which will also be the type of the +result. + +Each coefficient in the result is equal to the corresponding coefficient in the +'then' tensor if the corresponding value in the 'if' tensor is true. If not, the +resulting coefficient will come from the 'else' tensor. + + +## Contraction + +Tensor *contractions* are a generalization of the matrix product to the +multidimensional case. + + // Create 2 matrices using tensors of rank 2 + Eigen::Tensor a(2, 3); + a.setValues({{1, 2, 3}, {6, 5, 4}}); + Eigen::Tensor b(3, 2); + b.setValues({{1, 2}, {4, 5}, {5, 6}}); + + // Compute the traditional matrix product + Eigen::array, 1> product_dims = { Eigen::IndexPair(1, 0) }; + Eigen::Tensor AB = a.contract(b, product_dims); + + // Compute the product of the transpose of the matrices + Eigen::array, 1> transposed_product_dims = { Eigen::IndexPair(0, 1) }; + Eigen::Tensor AtBt = a.contract(b, transposed_product_dims); + + // Contraction to scalar value using a double contraction. + // First coordinate of both tensors are contracted as well as both second coordinates, i.e., this computes the sum of the squares of the elements. + Eigen::array, 2> double_contraction_product_dims = { Eigen::IndexPair(0, 0), Eigen::IndexPair(1, 1) }; + Eigen::Tensor AdoubleContractedA = a.contract(a, double_contraction_product_dims); + + // Extracting the scalar value of the tensor contraction for further usage + int value = AdoubleContractedA(0); + +## Reduction Operations + +A *Reduction* operation returns a tensor with fewer dimensions than the +original tensor. The values in the returned tensor are computed by applying a +*reduction operator* to slices of values from the original tensor. You specify +the dimensions along which the slices are made. + +The Eigen Tensor library provides a set of predefined reduction operators such +as `maximum()` and `sum()` and lets you define additional operators by +implementing a few methods from a reductor template. + +### Reduction Dimensions + +All reduction operations take a single parameter of type +`::``Dimensions` which can always be specified as an array of +ints. These are called the "reduction dimensions." The values are the indices +of the dimensions of the input tensor over which the reduction is done. The +parameter can have at most as many element as the rank of the input tensor; +each element must be less than the tensor rank, as it indicates one of the +dimensions to reduce. + +Each dimension of the input tensor should occur at most once in the reduction +dimensions as the implementation does not remove duplicates. + +The order of the values in the reduction dimensions does not affect the +results, but the code may execute faster if you list the dimensions in +increasing order. + +Example: Reduction along one dimension. + + // Create a tensor of 2 dimensions + Eigen::Tensor a(2, 3); + a.setValues({{1, 2, 3}, {6, 5, 4}}); + // Reduce it along the second dimension (1)... + Eigen::array dims({1 /* dimension to reduce */}); + // ...using the "maximum" operator. + // The result is a tensor with one dimension. The size of + // that dimension is the same as the first (non-reduced) dimension of a. + Eigen::Tensor b = a.maximum(dims); + cout << "a" << endl << a << endl << endl; + cout << "b" << endl << b << endl << endl; + => + a + 1 2 3 + 6 5 4 + + b + 3 + 6 + +Example: Reduction along two dimensions. + + Eigen::Tensor a(2, 3, 4); + a.setValues({{{0.0f, 1.0f, 2.0f, 3.0f}, + {7.0f, 6.0f, 5.0f, 4.0f}, + {8.0f, 9.0f, 10.0f, 11.0f}}, + {{12.0f, 13.0f, 14.0f, 15.0f}, + {19.0f, 18.0f, 17.0f, 16.0f}, + {20.0f, 21.0f, 22.0f, 23.0f}}}); + // The tensor a has 3 dimensions. We reduce along the + // first 2, resulting in a tensor with a single dimension + // of size 4 (the last dimension of a.) + // Note that we pass the array of reduction dimensions + // directly to the maximum() call. + Eigen::Tensor b = + a.maximum(Eigen::array({0, 1})); + cout << "b" << endl << b << endl << endl; + => + b + 20 + 21 + 22 + 23 + +#### Reduction along all dimensions + +As a special case, if you pass no parameter to a reduction operation the +original tensor is reduced along *all* its dimensions. The result is a +scalar, represented as a zero-dimension tensor. + + Eigen::Tensor a(2, 3, 4); + a.setValues({{{0.0f, 1.0f, 2.0f, 3.0f}, + {7.0f, 6.0f, 5.0f, 4.0f}, + {8.0f, 9.0f, 10.0f, 11.0f}}, + {{12.0f, 13.0f, 14.0f, 15.0f}, + {19.0f, 18.0f, 17.0f, 16.0f}, + {20.0f, 21.0f, 22.0f, 23.0f}}}); + // Reduce along all dimensions using the sum() operator. + Eigen::Tensor b = a.sum(); + cout << "b" << endl << b << endl << endl; + => + b + 276 + + +### sum(const Dimensions& new_dims) +### sum() + +Reduce a tensor using the sum() operator. The resulting values +are the sum of the reduced values. + +### mean(const Dimensions& new_dims) +### mean() + +Reduce a tensor using the mean() operator. The resulting values +are the mean of the reduced values. + +### maximum(const Dimensions& new_dims) +### maximum() + +Reduce a tensor using the maximum() operator. The resulting values are the +largest of the reduced values. + +### minimum(const Dimensions& new_dims) +### minimum() + +Reduce a tensor using the minimum() operator. The resulting values +are the smallest of the reduced values. + +### prod(const Dimensions& new_dims) +### prod() + +Reduce a tensor using the prod() operator. The resulting values +are the product of the reduced values. + +### all(const Dimensions& new_dims) +### all() +Reduce a tensor using the all() operator. Casts tensor to bool and then checks +whether all elements are true. Runs through all elements rather than +short-circuiting, so may be significantly inefficient. + +### any(const Dimensions& new_dims) +### any() +Reduce a tensor using the any() operator. Casts tensor to bool and then checks +whether any element is true. Runs through all elements rather than +short-circuiting, so may be significantly inefficient. + + +### reduce(const Dimensions& new_dims, const Reducer& reducer) + +Reduce a tensor using a user-defined reduction operator. See `SumReducer` +in TensorFunctors.h for information on how to implement a reduction operator. + + +## Trace + +A *Trace* operation returns a tensor with fewer dimensions than the original +tensor. It returns a tensor whose elements are the sum of the elements of the +original tensor along the main diagonal for a list of specified dimensions, the +"trace dimensions". Similar to the `Reduction Dimensions`, the trace dimensions +are passed as an input parameter to the operation, are of type `::``Dimensions` +, and have the same requirements when passed as an input parameter. In addition, +the trace dimensions must have the same size. + +Example: Trace along 2 dimensions. + + // Create a tensor of 3 dimensions + Eigen::Tensor a(2, 2, 3); + a.setValues({{{1, 2, 3}, {4, 5, 6}}, {{7, 8, 9}, {10, 11, 12}}}); + // Specify the dimensions along which the trace will be computed. + // In this example, the trace can only be computed along the dimensions + // with indices 0 and 1 + Eigen::array dims({0, 1}); + // The output tensor contains all but the trace dimensions. + Tensor a_trace = a.trace(dims); + cout << "a_trace:" << endl; + cout << a_trace << endl; + => + a_trace: + 11 + 13 + 15 + + +### trace(const Dimensions& new_dims) +### trace() + +As a special case, if no parameter is passed to the operation, trace is computed +along *all* dimensions of the input tensor. + +Example: Trace along all dimensions. + + // Create a tensor of 3 dimensions, with all dimensions having the same size. + Eigen::Tensor a(3, 3, 3); + a.setValues({{{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}, + {{10, 11, 12}, {13, 14, 15}, {16, 17, 18}}, + {{19, 20, 21}, {22, 23, 24}, {25, 26, 27}}}); + // Result is a zero dimension tensor + Tensor a_trace = a.trace(); + cout<<"a_trace:"< + a_trace: + 42 + + +## Scan Operations + +A *Scan* operation returns a tensor with the same dimensions as the original +tensor. The operation performs an inclusive scan along the specified +axis, which means it computes a running total along the axis for a given +reduction operation. +If the reduction operation corresponds to summation, then this computes the +prefix sum of the tensor along the given axis. + +Example: +dd a comment to this line + + // Create a tensor of 2 dimensions + Eigen::Tensor a(2, 3); + a.setValues({{1, 2, 3}, {4, 5, 6}}); + // Scan it along the second dimension (1) using summation + Eigen::Tensor b = a.cumsum(1); + // The result is a tensor with the same size as the input + cout << "a" << endl << a << endl << endl; + cout << "b" << endl << b << endl << endl; + => + a + 1 2 3 + 4 5 6 + + b + 1 3 6 + 4 9 15 + +### cumsum(const Index& axis) + +Perform a scan by summing consecutive entries. + +### cumprod(const Index& axis) + +Perform a scan by multiplying consecutive entries. + + +## Convolutions + +### convolve(const Kernel& kernel, const Dimensions& dims) + +Returns a tensor that is the output of the convolution of the input tensor with the kernel, +along the specified dimensions of the input tensor. The dimension size for dimensions of the output tensor +which were part of the convolution will be reduced by the formula: +output_dim_size = input_dim_size - kernel_dim_size + 1 (requires: input_dim_size >= kernel_dim_size). +The dimension sizes for dimensions that were not part of the convolution will remain the same. +Performance of the convolution can depend on the length of the stride(s) of the input tensor dimension(s) along which the +convolution is computed (the first dimension has the shortest stride for ColMajor, whereas RowMajor's shortest stride is +for the last dimension). + + // Compute convolution along the second and third dimension. + Tensor input(3, 3, 7, 11); + Tensor kernel(2, 2); + Tensor output(3, 2, 6, 11); + input.setRandom(); + kernel.setRandom(); + + Eigen::array dims({1, 2}); // Specify second and third dimension for convolution. + output = input.convolve(kernel, dims); + + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 2; ++j) { + for (int k = 0; k < 6; ++k) { + for (int l = 0; l < 11; ++l) { + const float result = output(i,j,k,l); + const float expected = input(i,j+0,k+0,l) * kernel(0,0) + + input(i,j+1,k+0,l) * kernel(1,0) + + input(i,j+0,k+1,l) * kernel(0,1) + + input(i,j+1,k+1,l) * kernel(1,1); + VERIFY_IS_APPROX(result, expected); + } + } + } + } + + +## Geometrical Operations + +These operations return a Tensor with different dimensions than the original +Tensor. They can be used to access slices of tensors, see them with different +dimensions, or pad tensors with additional data. + +### reshape(const Dimensions& new_dims) + +Returns a view of the input tensor that has been reshaped to the specified +new dimensions. The argument new_dims is an array of Index values. The +rank of the resulting tensor is equal to the number of elements in new_dims. + +The product of all the sizes in the new dimension array must be equal to +the number of elements in the input tensor. + + // Increase the rank of the input tensor by introducing a new dimension + // of size 1. + Tensor input(7, 11); + array three_dims{{7, 11, 1}}; + Tensor result = input.reshape(three_dims); + + // Decrease the rank of the input tensor by merging 2 dimensions; + array one_dim{{7 * 11}}; + Tensor result = input.reshape(one_dim); + +This operation does not move any data in the input tensor, so the resulting +contents of a reshaped Tensor depend on the data layout of the original Tensor. + +For example this is what happens when you `reshape()` a 2D ColMajor tensor +to one dimension: + + Eigen::Tensor a(2, 3); + a.setValues({{0.0f, 100.0f, 200.0f}, {300.0f, 400.0f, 500.0f}}); + Eigen::array one_dim({3 * 2}); + Eigen::Tensor b = a.reshape(one_dim); + cout << "b" << endl << b << endl; + => + b + 0 + 300 + 100 + 400 + 200 + 500 + +This is what happens when the 2D Tensor is RowMajor: + + Eigen::Tensor a(2, 3); + a.setValues({{0.0f, 100.0f, 200.0f}, {300.0f, 400.0f, 500.0f}}); + Eigen::array one_dim({3 * 2}); + Eigen::Tensor b = a.reshape(one_dim); + cout << "b" << endl << b << endl; + => + b + 0 + 100 + 200 + 300 + 400 + 500 + +The reshape operation is a lvalue. In other words, it can be used on the left +side of the assignment operator. + +The previous example can be rewritten as follow: + + Eigen::Tensor a(2, 3); + a.setValues({{0.0f, 100.0f, 200.0f}, {300.0f, 400.0f, 500.0f}}); + Eigen::array two_dim({2, 3}); + Eigen::Tensor b(6); + b.reshape(two_dim) = a; + cout << "b" << endl << b << endl; + => + b + 0 + 300 + 100 + 400 + 200 + 500 + +Note that "b" itself was not reshaped but that instead the assignment is done to +the reshape view of b. + + +### shuffle(const Shuffle& shuffle) + +Returns a copy of the input tensor whose dimensions have been +reordered according to the specified permutation. The argument shuffle +is an array of Index values. Its size is the rank of the input +tensor. It must contain a permutation of 0, 1, ..., rank - 1. The i-th +dimension of the output tensor equals to the size of the shuffle[i]-th +dimension of the input tensor. For example: + + // Shuffle all dimensions to the left by 1. + Tensor input(20, 30, 50); + // ... set some values in input. + Tensor output = input.shuffle({1, 2, 0}) + + eigen_assert(output.dimension(0) == 30); + eigen_assert(output.dimension(1) == 50); + eigen_assert(output.dimension(2) == 20); + +Indices into the output tensor are shuffled accordingly to formulate +indices into the input tensor. For example, one can assert in the above +code snippet that: + + eigen_assert(output(3, 7, 11) == input(11, 3, 7)); + +In general, one can assert that + + eigen_assert(output(..., indices[shuffle[i]], ...) == + input(..., indices[i], ...)) + +The shuffle operation results in a lvalue, which means that it can be assigned +to. In other words, it can be used on the left side of the assignment operator. + +Let's rewrite the previous example to take advantage of this feature: + + // Shuffle all dimensions to the left by 1. + Tensor input(20, 30, 50); + // ... set some values in input. + Tensor output(30, 50, 20); + output.shuffle({2, 0, 1}) = input; + + +### stride(const Strides& strides) + +Returns a view of the input tensor that strides (skips stride-1 +elements) along each of the dimensions. The argument strides is an +array of Index values. The dimensions of the resulting tensor are +ceil(input_dimensions[i] / strides[i]). + +For example this is what happens when you `stride()` a 2D tensor: + + Eigen::Tensor a(4, 3); + a.setValues({{0, 100, 200}, {300, 400, 500}, {600, 700, 800}, {900, 1000, 1100}}); + Eigen::array strides({3, 2}); + Eigen::Tensor b = a.stride(strides); + cout << "b" << endl << b << endl; + => + b + 0 200 + 900 1100 + +It is possible to assign a tensor to a stride: + Tensor input(20, 30, 50); + // ... set some values in input. + Tensor output(40, 90, 200); + output.stride({2, 3, 4}) = input; + + +### slice(const StartIndices& offsets, const Sizes& extents) + +Returns a sub-tensor of the given tensor. For each dimension i, the slice is +made of the coefficients stored between offset[i] and offset[i] + extents[i] in +the input tensor. + + Eigen::Tensor a(4, 3); + a.setValues({{0, 100, 200}, {300, 400, 500}, + {600, 700, 800}, {900, 1000, 1100}}); + Eigen::array offsets = {1, 0}; + Eigen::array extents = {2, 2}; + Eigen::Tensor slice = a.slice(offsets, extents); + cout << "a" << endl << a << endl; + => + a + 0 100 200 + 300 400 500 + 600 700 800 + 900 1000 1100 + cout << "slice" << endl << slice << endl; + => + slice + 300 400 + 600 700 + + +### chip(const Index offset, const Index dim) + +A chip is a special kind of slice. It is the subtensor at the given offset in +the dimension dim. The returned tensor has one fewer dimension than the input +tensor: the dimension dim is removed. + +For example, a matrix chip would be either a row or a column of the input +matrix. + + Eigen::Tensor a(4, 3); + a.setValues({{0, 100, 200}, {300, 400, 500}, + {600, 700, 800}, {900, 1000, 1100}}); + Eigen::Tensor row_3 = a.chip(2, 0); + Eigen::Tensor col_2 = a.chip(1, 1); + cout << "a" << endl << a << endl; + => + a + 0 100 200 + 300 400 500 + 600 700 800 + 900 1000 1100 + cout << "row_3" << endl << row_3 << endl; + => + row_3 + 600 700 800 + cout << "col_2" << endl << col_2 << endl; + => + col_2 + 100 400 700 1000 + +It is possible to assign values to a tensor chip since the chip operation is a +lvalue. For example: + + Eigen::Tensor a(3); + a.setValues({{100, 200, 300}}); + Eigen::Tensor b(2, 3); + b.setZero(); + b.chip(0, 0) = a; + cout << "a" << endl << a << endl; + => + a + 100 + 200 + 300 + cout << "b" << endl << b << endl; + => + b + 100 200 300 + 0 0 0 + + +### reverse(const ReverseDimensions& reverse) + +Returns a view of the input tensor that reverses the order of the coefficients +along a subset of the dimensions. The argument reverse is an array of boolean +values that indicates whether or not the order of the coefficients should be +reversed along each of the dimensions. This operation preserves the dimensions +of the input tensor. + +For example this is what happens when you `reverse()` the first dimension +of a 2D tensor: + + Eigen::Tensor a(4, 3); + a.setValues({{0, 100, 200}, {300, 400, 500}, + {600, 700, 800}, {900, 1000, 1100}}); + Eigen::array reverse({true, false}); + Eigen::Tensor b = a.reverse(reverse); + cout << "a" << endl << a << endl << "b" << endl << b << endl; + => + a + 0 100 200 + 300 400 500 + 600 700 800 + 900 1000 1100 + b + 900 1000 1100 + 600 700 800 + 300 400 500 + 0 100 200 + + +### broadcast(const Broadcast& broadcast) + +Returns a view of the input tensor in which the input is replicated one to many +times. +The broadcast argument specifies how many copies of the input tensor need to be +made in each of the dimensions. + + Eigen::Tensor a(2, 3); + a.setValues({{0, 100, 200}, {300, 400, 500}}); + Eigen::array bcast({3, 2}); + Eigen::Tensor b = a.broadcast(bcast); + cout << "a" << endl << a << endl << "b" << endl << b << endl; + => + a + 0 100 200 + 300 400 500 + b + 0 100 200 0 100 200 + 300 400 500 300 400 500 + 0 100 200 0 100 200 + 300 400 500 300 400 500 + 0 100 200 0 100 200 + 300 400 500 300 400 500 + +### concatenate(const OtherDerived& other, Axis axis) + +TODO + +### pad(const PaddingDimensions& padding) + +Returns a view of the input tensor in which the input is padded with zeros. + + Eigen::Tensor a(2, 3); + a.setValues({{0, 100, 200}, {300, 400, 500}}); + Eigen::array, 2> paddings; + paddings[0] = make_pair(0, 1); + paddings[1] = make_pair(2, 3); + Eigen::Tensor b = a.pad(paddings); + cout << "a" << endl << a << endl << "b" << endl << b << endl; + => + a + 0 100 200 + 300 400 500 + b + 0 0 0 0 + 0 0 0 0 + 0 100 200 0 + 300 400 500 0 + 0 0 0 0 + 0 0 0 0 + 0 0 0 0 + + +### extract_patches(const PatchDims& patch_dims) + +Returns a tensor of coefficient patches extracted from the input tensor, where +each patch is of dimension specified by 'patch_dims'. The returned tensor has +one greater dimension than the input tensor, which is used to index each patch. +The patch index in the output tensor depends on the data layout of the input +tensor: the patch index is the last dimension ColMajor layout, and the first +dimension in RowMajor layout. + +For example, given the following input tensor: + + Eigen::Tensor tensor(3,4); + tensor.setValues({{0.0f, 1.0f, 2.0f, 3.0f}, + {4.0f, 5.0f, 6.0f, 7.0f}, + {8.0f, 9.0f, 10.0f, 11.0f}}); + + cout << "tensor: " << endl << tensor << endl; + => + tensor: + 0 1 2 3 + 4 5 6 7 + 8 9 10 11 + +Six 2x2 patches can be extracted and indexed using the following code: + + Eigen::Tensor patch; + Eigen::array patch_dims; + patch_dims[0] = 2; + patch_dims[1] = 2; + patch = tensor.extract_patches(patch_dims); + for (int k = 0; k < 6; ++k) { + cout << "patch index: " << k << endl; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 2; ++j) { + if (DataLayout == ColMajor) { + cout << patch(i, j, k) << " "; + } else { + cout << patch(k, i, j) << " "; + } + } + cout << endl; + } + } + +This code results in the following output when the data layout is ColMajor: + + patch index: 0 + 0 1 + 4 5 + patch index: 1 + 4 5 + 8 9 + patch index: 2 + 1 2 + 5 6 + patch index: 3 + 5 6 + 9 10 + patch index: 4 + 2 3 + 6 7 + patch index: 5 + 6 7 + 10 11 + +This code results in the following output when the data layout is RowMajor: +(NOTE: the set of patches is the same as in ColMajor, but are indexed differently). + + patch index: 0 + 0 1 + 4 5 + patch index: 1 + 1 2 + 5 6 + patch index: 2 + 2 3 + 6 7 + patch index: 3 + 4 5 + 8 9 + patch index: 4 + 5 6 + 9 10 + patch index: 5 + 6 7 + 10 11 + +### extract_image_patches(const Index patch_rows, const Index patch_cols, const Index row_stride, const Index col_stride, const PaddingType padding_type) + +Returns a tensor of coefficient image patches extracted from the input tensor, +which is expected to have dimensions ordered as follows (depending on the data +layout of the input tensor, and the number of additional dimensions 'N'): + +*) ColMajor +1st dimension: channels (of size d) +2nd dimension: rows (of size r) +3rd dimension: columns (of size c) +4th-Nth dimension: time (for video) or batch (for bulk processing). + +*) RowMajor (reverse order of ColMajor) +1st-Nth dimension: time (for video) or batch (for bulk processing). +N+1'th dimension: columns (of size c) +N+2'th dimension: rows (of size r) +N+3'th dimension: channels (of size d) + +The returned tensor has one greater dimension than the input tensor, which is +used to index each patch. The patch index in the output tensor depends on the +data layout of the input tensor: the patch index is the 4'th dimension in +ColMajor layout, and the 4'th from the last dimension in RowMajor layout. + +For example, given the following input tensor with the following dimension +sizes: + *) depth: 2 + *) rows: 3 + *) columns: 5 + *) batch: 7 + + Tensor tensor(2,3,5,7); + Tensor tensor_row_major = tensor.swap_layout(); + +2x2 image patches can be extracted and indexed using the following code: + +*) 2D patch: ColMajor (patch indexed by second-to-last dimension) + + Tensor twod_patch; + twod_patch = tensor.extract_image_patches<2, 2>(); + // twod_patch.dimension(0) == 2 + // twod_patch.dimension(1) == 2 + // twod_patch.dimension(2) == 2 + // twod_patch.dimension(3) == 3*5 + // twod_patch.dimension(4) == 7 + +*) 2D patch: RowMajor (patch indexed by the second dimension) + + Tensor twod_patch_row_major; + twod_patch_row_major = tensor_row_major.extract_image_patches<2, 2>(); + // twod_patch_row_major.dimension(0) == 7 + // twod_patch_row_major.dimension(1) == 3*5 + // twod_patch_row_major.dimension(2) == 2 + // twod_patch_row_major.dimension(3) == 2 + // twod_patch_row_major.dimension(4) == 2 + +## Special Operations + +### cast() + +Returns a tensor of type T with the same dimensions as the original tensor. +The returned tensor contains the values of the original tensor converted to +type T. + + Eigen::Tensor a(2, 3); + Eigen::Tensor b = a.cast(); + +This can be useful for example if you need to do element-wise division of +Tensors of integers. This is not currently supported by the Tensor library +but you can easily cast the tensors to floats to do the division: + + Eigen::Tensor a(2, 3); + a.setValues({{0, 1, 2}, {3, 4, 5}}); + Eigen::Tensor b = + (a.cast() / a.constant(2).cast()).cast(); + cout << "a" << endl << a << endl << endl; + cout << "b" << endl << b << endl << endl; + => + a + 0 1 2 + 3 4 5 + + b + 0 0 1 + 1 2 2 + + +### eval() + +TODO + + +## Representation of scalar values + +Scalar values are often represented by tensors of size 1 and rank 0.For example +Tensor::maximum() currently returns a Tensor. Similarly, the inner +product of 2 1d tensors (through contractions) returns a 0d tensor. + +## Limitations + +* The number of tensor dimensions is currently limited to 250 when using a + compiler that supports cxx11. It is limited to only 5 for older compilers. +* The IndexList class requires a cxx11 compliant compiler. You can use an + array of indices instead if you don't have access to a modern compiler. +* On GPUs only floating point values are properly tested and optimized for. +* Complex and integer values are known to be broken on GPUs. If you try to use + them you'll most likely end up triggering a static assertion failure such as + EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + + diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/Tensor.h b/external/unsupported/Eigen/CXX11/src/Tensor/Tensor.h new file mode 100644 index 0000000..8cac2bb --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/Tensor.h @@ -0,0 +1,554 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// Copyright (C) 2013 Christian Seiler +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_H +#define EIGEN_CXX11_TENSOR_TENSOR_H + +namespace Eigen { + +/** \class Tensor + * \ingroup CXX11_Tensor_Module + * + * \brief The tensor class. + * + * The %Tensor class is the work-horse for all \em dense tensors within Eigen. + * + * The %Tensor class encompasses only dynamic-size objects so far. + * + * The first two template parameters are required: + * \tparam Scalar_ Numeric type, e.g. float, double, int or `std::complex`. + * User defined scalar types are supported as well (see \ref user_defined_scalars "here"). + * \tparam NumIndices_ Number of indices (i.e. rank of the tensor) + * + * The remaining template parameters are optional -- in most cases you don't have to worry about them. + * \tparam Options_ A combination of either \b #RowMajor or \b #ColMajor, and of either + * \b #AutoAlign or \b #DontAlign. + * The former controls \ref TopicStorageOrders "storage order", and defaults to column-major. The latter controls alignment, which is required + * for vectorization. It defaults to aligning tensors. Note that tensors currently do not support any operations that profit from vectorization. + * Support for such operations (i.e. adding two tensors etc.) is planned. + * + * You can access elements of tensors using normal subscripting: + * + * \code + * Eigen::Tensor t(10, 10, 10, 10); + * t(0, 1, 2, 3) = 42.0; + * \endcode + * + * This class can be extended with the help of the plugin mechanism described on the page + * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_TENSOR_PLUGIN. + * + * Some notes: + * + *
+ *
Relation to other parts of Eigen:
+ *
The midterm development goal for this class is to have a similar hierarchy as Eigen uses for matrices, so that + * taking blocks or using tensors in expressions is easily possible, including an interface with the vector/matrix code + * by providing .asMatrix() and .asVector() (or similar) methods for rank 2 and 1 tensors. However, currently, the %Tensor + * class does not provide any of these features and is only available as a stand-alone class that just allows for + * coefficient access. Also, when fixed-size tensors are implemented, the number of template arguments is likely to + * change dramatically.
+ *
+ * + * \ref TopicStorageOrders + */ + +template +class Tensor : public TensorBase > +{ + public: + typedef Tensor Self; + typedef TensorBase > Base; + typedef typename Eigen::internal::nested::type Nested; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Index Index; + typedef Scalar_ Scalar; + typedef typename NumTraits::Real RealScalar; + typedef typename Base::CoeffReturnType CoeffReturnType; + + enum { + IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0) & !(Options_&DontAlign), + Layout = Options_ & RowMajor ? RowMajor : ColMajor, + CoordAccess = true, + RawAccess = true + }; + + static const int Options = Options_; + static const int NumIndices = NumIndices_; + typedef DSizes Dimensions; + + protected: + TensorStorage m_storage; + +#ifdef EIGEN_HAS_SFINAE + template + struct isOfNormalIndex{ + static const bool is_array = internal::is_base_of, CustomIndices>::value; + static const bool is_int = NumTraits::IsInteger; + static const bool value = is_array | is_int; + }; +#endif + + public: + // Metadata + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rank() const { return NumIndices; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index dimension(std::size_t n) const { return m_storage.dimensions()[n]; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_storage.dimensions(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const { return m_storage.size(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar *data() { return m_storage.data(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar *data() const { return m_storage.data(); } + + // This makes EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + // work, because that uses base().coeffRef() - and we don't yet + // implement a similar class hierarchy + inline Self& base() { return *this; } + inline const Self& base() const { return *this; } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const + { + // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + return coeff(array{{firstIndex, secondIndex, otherIndices...}}); + } +#endif + + // normal indices + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(const array& indices) const + { + eigen_internal_assert(checkIndexRange(indices)); + return m_storage.data()[linearizedIndex(indices)]; + } + + // custom indices +#ifdef EIGEN_HAS_SFINAE + template::value) ) + > + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(CustomIndices& indices) const + { + return coeff(internal::customIndices2Array(indices)); + } +#endif + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff() const + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + return m_storage.data()[0]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const + { + eigen_internal_assert(index >= 0 && index < size()); + return m_storage.data()[index]; + } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template + inline Scalar& coeffRef(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) + { + // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + return coeffRef(array{{firstIndex, secondIndex, otherIndices...}}); + } +#endif + + // normal indices + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(const array& indices) + { + eigen_internal_assert(checkIndexRange(indices)); + return m_storage.data()[linearizedIndex(indices)]; + } + + // custom indices +#ifdef EIGEN_HAS_SFINAE + template::value) ) + > + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(CustomIndices& indices) + { + return coeffRef(internal::customIndices2Array(indices)); + } +#endif + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef() + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + return m_storage.data()[0]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) + { + eigen_internal_assert(index >= 0 && index < size()); + return m_storage.data()[index]; + } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template + inline const Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const + { + // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + return this->operator()(array{{firstIndex, secondIndex, otherIndices...}}); + } +#else + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1) const + { + return coeff(array(i0, i1)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2) const + { + return coeff(array(i0, i1, i2)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3) const + { + return coeff(array(i0, i1, i2, i3)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const + { + return coeff(array(i0, i1, i2, i3, i4)); + } +#endif + + // custom indices +#ifdef EIGEN_HAS_SFINAE + template::value) ) + > + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(CustomIndices& indices) const + { + return coeff(internal::customIndices2Array(indices)); + } +#endif + + // normal indices + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(const array& indices) const + { + return coeff(indices); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(Index index) const + { + eigen_internal_assert(index >= 0 && index < size()); + return coeff(index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()() const + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + return coeff(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator[](Index index) const + { + // The bracket operator is only for vectors, use the parenthesis operator instead. + EIGEN_STATIC_ASSERT(NumIndices == 1, YOU_MADE_A_PROGRAMMING_MISTAKE); + return coeff(index); + } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template + inline Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) + { + // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + return operator()(array{{firstIndex, secondIndex, otherIndices...}}); + } +#else + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1) + { + return coeffRef(array(i0, i1)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2) + { + return coeffRef(array(i0, i1, i2)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3) + { + return coeffRef(array(i0, i1, i2, i3)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) + { + return coeffRef(array(i0, i1, i2, i3, i4)); + } +#endif + + // normal indices + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(const array& indices) + { + return coeffRef(indices); + } + + // custom indices +#ifdef EIGEN_HAS_SFINAE + template::value) ) + > + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(CustomIndices& indices) + { + return coeffRef(internal::customIndices2Array(indices)); + } +#endif + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(Index index) + { + eigen_assert(index >= 0 && index < size()); + return coeffRef(index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()() + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + return coeffRef(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator[](Index index) + { + // The bracket operator is only for vectors, use the parenthesis operator instead + EIGEN_STATIC_ASSERT(NumIndices == 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + return coeffRef(index); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Tensor() + : m_storage() + { + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Tensor(const Self& other) + : m_storage(other.m_storage) + { + } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index firstDimension, IndexTypes... otherDimensions) + : m_storage(firstDimension, otherDimensions...) + { + // The number of dimensions used to construct a tensor must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + } +#else + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Tensor(Index dim1) + : m_storage(dim1, array(dim1)) + { + EIGEN_STATIC_ASSERT(1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index dim1, Index dim2) + : m_storage(dim1*dim2, array(dim1, dim2)) + { + EIGEN_STATIC_ASSERT(2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index dim1, Index dim2, Index dim3) + : m_storage(dim1*dim2*dim3, array(dim1, dim2, dim3)) + { + EIGEN_STATIC_ASSERT(3 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index dim1, Index dim2, Index dim3, Index dim4) + : m_storage(dim1*dim2*dim3*dim4, array(dim1, dim2, dim3, dim4)) + { + EIGEN_STATIC_ASSERT(4 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index dim1, Index dim2, Index dim3, Index dim4, Index dim5) + : m_storage(dim1*dim2*dim3*dim4*dim5, array(dim1, dim2, dim3, dim4, dim5)) + { + EIGEN_STATIC_ASSERT(5 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + } +#endif + + /** Normal Dimension */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Tensor(const array& dimensions) + : m_storage(internal::array_prod(dimensions), dimensions) + { + EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Tensor(const TensorBase& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other.derived()); + resize(TensorEvaluator(assign, DefaultDevice()).dimensions()); + internal::TensorExecutor::run(assign, DefaultDevice()); + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Tensor(const TensorBase& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other.derived()); + resize(TensorEvaluator(assign, DefaultDevice()).dimensions()); + internal::TensorExecutor::run(assign, DefaultDevice()); + } + + #if EIGEN_HAS_RVALUE_REFERENCES + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Tensor(Self&& other) + : m_storage(std::move(other.m_storage)) + { + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Tensor& operator=(Self&& other) + { + m_storage = std::move(other.m_storage); + return *this; + } + #endif + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Tensor& operator=(const Tensor& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other); + resize(TensorEvaluator(assign, DefaultDevice()).dimensions()); + internal::TensorExecutor::run(assign, DefaultDevice()); + return *this; + } + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Tensor& operator=(const OtherDerived& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other); + resize(TensorEvaluator(assign, DefaultDevice()).dimensions()); + internal::TensorExecutor::run(assign, DefaultDevice()); + return *this; + } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template EIGEN_DEVICE_FUNC + void resize(Index firstDimension, IndexTypes... otherDimensions) + { + // The number of dimensions used to resize a tensor must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + resize(array{{firstDimension, otherDimensions...}}); + } +#endif + + /** Normal Dimension */ + EIGEN_DEVICE_FUNC void resize(const array& dimensions) + { + int i; + Index size = Index(1); + for (i = 0; i < NumIndices; i++) { + internal::check_rows_cols_for_overflow::run(size, dimensions[i]); + size *= dimensions[i]; + } + #ifdef EIGEN_INITIALIZE_COEFFS + bool size_changed = size != this->size(); + m_storage.resize(size, dimensions); + if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + #else + m_storage.resize(size, dimensions); + #endif + } + + // Why this overload, DSizes is derived from array ??? // + EIGEN_DEVICE_FUNC void resize(const DSizes& dimensions) { + array dims; + for (int i = 0; i < NumIndices; ++i) { + dims[i] = dimensions[i]; + } + resize(dims); + } + + EIGEN_DEVICE_FUNC + void resize() + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + // Nothing to do: rank 0 tensors have fixed size + } + +#ifdef EIGEN_HAS_INDEX_LIST + template + EIGEN_DEVICE_FUNC + void resize(const Eigen::IndexList& dimensions) { + array dims; + for (int i = 0; i < NumIndices; ++i) { + dims[i] = static_cast(dimensions[i]); + } + resize(dims); + } +#endif + + /** Custom Dimension */ +#ifdef EIGEN_HAS_SFINAE + template::value) ) + > + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(CustomDimension& dimensions) + { + resize(internal::customIndices2Array(dimensions)); + } +#endif + +#ifndef EIGEN_EMULATE_CXX11_META_H + template + EIGEN_DEVICE_FUNC + void resize(const Sizes& dimensions) { + array dims; + for (int i = 0; i < NumIndices; ++i) { + dims[i] = static_cast(dimensions[i]); + } + resize(dims); + } +#else + template + EIGEN_DEVICE_FUNC + void resize(const Sizes& dimensions) { + array dims; + for (int i = 0; i < NumIndices; ++i) { + dims[i] = static_cast(dimensions[i]); + } + resize(dims); + } +#endif + + protected: + + bool checkIndexRange(const array& indices) const + { + using internal::array_apply_and_reduce; + using internal::array_zip_and_reduce; + using internal::greater_equal_zero_op; + using internal::logical_and_op; + using internal::lesser_op; + + return + // check whether the indices are all >= 0 + array_apply_and_reduce(indices) && + // check whether the indices fit in the dimensions + array_zip_and_reduce(indices, m_storage.dimensions()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index linearizedIndex(const array& indices) const + { + if (Options&RowMajor) { + return m_storage.dimensions().IndexOfRowMajor(indices); + } else { + return m_storage.dimensions().IndexOfColMajor(indices); + } + } +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h new file mode 100644 index 0000000..8b8fb92 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h @@ -0,0 +1,329 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Eugene Brevdo +// Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_ARG_MAX_H +#define EIGEN_CXX11_TENSOR_TENSOR_ARG_MAX_H + +namespace Eigen { +namespace internal { + +/** \class TensorIndexTuple + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor + Index Tuple class. + * + * + */ +template +struct traits > : public traits +{ + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef Tuple Scalar; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorIndexTupleOpEIGEN_DEVICE_REF type; +}; + +template +struct nested, 1, + typename eval >::type> +{ + typedef TensorIndexTupleOp type; +}; + +} // end namespace internal + +template +class TensorIndexTupleOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + typedef Tuple CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIndexTupleOp(const XprType& expr) + : m_xpr(expr) {} + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + protected: + typename XprType::Nested m_xpr; +}; + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorIndexTupleOp XprType; + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + typedef typename TensorEvaluator::Dimensions Dimensions; + static const int NumDims = internal::array_size::value; + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; + + enum { + IsAligned = /*TensorEvaluator::IsAligned*/ false, + PacketAccess = /*TensorEvaluator::PacketAccess*/ false, + BlockAccess = false, + PreferBlockAccess = TensorEvaluator::PreferBlockAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockNotImplemented TensorBlock; + //===--------------------------------------------------------------------===// + + EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device) { } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { + return m_impl.dimensions(); + } + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + return CoeffReturnType(index, m_impl.coeff(index)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, 1); + } + + EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; } + +#ifdef EIGEN_USE_SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_impl.bind(cgh); + } +#endif + + protected: + TensorEvaluator m_impl; +}; + +namespace internal { + +/** \class TensorTupleIndex + * \ingroup CXX11_Tensor_Module + * + * \brief Converts to Tensor > and reduces to Tensor. + * + */ +template +struct traits > : public traits +{ + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef Index Scalar; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions - array_size::value; + static const int Layout = XprTraits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorTupleReducerOpEIGEN_DEVICE_REF type; +}; + +template +struct nested, 1, + typename eval >::type> +{ + typedef TensorTupleReducerOp type; +}; + +} // end namespace internal + +template +class TensorTupleReducerOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + typedef Index CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorTupleReducerOp(const XprType& expr, + const ReduceOp& reduce_op, + const Index return_dim, + const Dims& reduce_dims) + : m_xpr(expr), m_reduce_op(reduce_op), m_return_dim(return_dim), m_reduce_dims(reduce_dims) {} + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + EIGEN_DEVICE_FUNC + const ReduceOp& reduce_op() const { return m_reduce_op; } + + EIGEN_DEVICE_FUNC + const Dims& reduce_dims() const { return m_reduce_dims; } + + EIGEN_DEVICE_FUNC + Index return_dim() const { return m_return_dim; } + + protected: + typename XprType::Nested m_xpr; + const ReduceOp m_reduce_op; + const Index m_return_dim; + const Dims m_reduce_dims; +}; + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorTupleReducerOp XprType; + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename TensorIndexTupleOp::CoeffReturnType TupleType; + typedef typename TensorEvaluator >, Device>::Dimensions Dimensions; + typedef typename TensorEvaluator , Device>::Dimensions InputDimensions; + static const int NumDims = internal::array_size::value; + typedef array StrideDims; + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; + typedef StorageMemory TupleStorageMem; + + enum { + IsAligned = /*TensorEvaluator::IsAligned*/ false, + PacketAccess = /*TensorEvaluator::PacketAccess*/ false, + BlockAccess = false, + PreferBlockAccess = TensorEvaluator::PreferBlockAccess, + Layout = TensorEvaluator >, Device>::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockNotImplemented TensorBlock; + //===--------------------------------------------------------------------===// + + EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_orig_impl(op.expression(), device), + m_impl(op.expression().index_tuples().reduce(op.reduce_dims(), op.reduce_op()), device), + m_return_dim(op.return_dim()) + { + gen_strides(m_orig_impl.dimensions(), m_strides); + if (Layout == static_cast(ColMajor)) { + const Index total_size = internal::array_prod(m_orig_impl.dimensions()); + m_stride_mod = (m_return_dim < NumDims - 1) ? m_strides[m_return_dim + 1] : total_size; + } else { + const Index total_size = internal::array_prod(m_orig_impl.dimensions()); + m_stride_mod = (m_return_dim > 0) ? m_strides[m_return_dim - 1] : total_size; + } + // If m_return_dim is not a valid index, returns 1 or this can crash on Windows. + m_stride_div = ((m_return_dim >= 0) && + (m_return_dim < static_cast(m_strides.size()))) + ? m_strides[m_return_dim] : 1; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { + return m_impl.dimensions(); + } + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + const TupleType v = m_impl.coeff(index); + return (m_return_dim < 0) ? v.first : (v.first % m_stride_mod) / m_stride_div; + } + + EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; } +#ifdef EIGEN_USE_SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_impl.bind(cgh); + m_orig_impl.bind(cgh); + } +#endif + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + const double compute_cost = 1.0 + + (m_return_dim < 0 ? 0.0 : (TensorOpCost::ModCost() + TensorOpCost::DivCost())); + return m_orig_impl.costPerCoeff(vectorized) + + m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, compute_cost); + } + + private: + EIGEN_DEVICE_FUNC void gen_strides(const InputDimensions& dims, StrideDims& strides) { + if (m_return_dim < 0) { + return; // Won't be using the strides. + } + eigen_assert(m_return_dim < NumDims && + "Asking to convert index to a dimension outside of the rank"); + + // Calculate m_stride_div and m_stride_mod, which are used to + // calculate the value of an index w.r.t. the m_return_dim. + if (Layout == static_cast(ColMajor)) { + strides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + strides[i] = strides[i-1] * dims[i-1]; + } + } else { + strides[NumDims-1] = 1; + for (int i = NumDims - 2; i >= 0; --i) { + strides[i] = strides[i+1] * dims[i+1]; + } + } + } + + protected: + TensorEvaluator, Device> m_orig_impl; + TensorEvaluator >, Device> m_impl; + const Index m_return_dim; + StrideDims m_strides; + Index m_stride_mod; + Index m_stride_div; +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_ARG_MAX_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h new file mode 100644 index 0000000..e5811d6 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h @@ -0,0 +1,247 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H +#define EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H + +namespace Eigen { + +/** \class TensorAssign + * \ingroup CXX11_Tensor_Module + * + * \brief The tensor assignment class. + * + * This class is represents the assignment of the values resulting from the evaluation of + * the rhs expression to the memory locations denoted by the lhs expression. + */ +namespace internal { +template +struct traits > +{ + typedef typename LhsXprType::Scalar Scalar; + typedef typename traits::StorageKind StorageKind; + typedef typename promote_index_type::Index, + typename traits::Index>::type Index; + typedef typename LhsXprType::Nested LhsNested; + typedef typename RhsXprType::Nested RhsNested; + typedef typename remove_reference::type _LhsNested; + typedef typename remove_reference::type _RhsNested; + static const std::size_t NumDimensions = internal::traits::NumDimensions; + static const int Layout = internal::traits::Layout; + typedef typename traits::PointerType PointerType; + + enum { + Flags = 0 + }; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorAssignOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorAssignOp type; +}; + +} // end namespace internal + + + +template +class TensorAssignOp : public TensorBase > +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename LhsXprType::CoeffReturnType CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + static const int NumDims = Eigen::internal::traits::NumDimensions; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorAssignOp(LhsXprType& lhs, const RhsXprType& rhs) + : m_lhs_xpr(lhs), m_rhs_xpr(rhs) {} + + /** \returns the nested expressions */ + EIGEN_DEVICE_FUNC + typename internal::remove_all::type& + lhsExpression() const { return *((typename internal::remove_all::type*)&m_lhs_xpr); } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + rhsExpression() const { return m_rhs_xpr; } + + protected: + typename internal::remove_all::type& m_lhs_xpr; + const typename internal::remove_all::type& m_rhs_xpr; +}; + + +template +struct TensorEvaluator, Device> +{ + typedef TensorAssignOp XprType; + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + typedef typename TensorEvaluator::Dimensions Dimensions; + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; + + static const int PacketSize = PacketType::size; + static const int NumDims = XprType::NumDims; + + enum { + IsAligned = int(TensorEvaluator::IsAligned) & + int(TensorEvaluator::IsAligned), + PacketAccess = int(TensorEvaluator::PacketAccess) & + int(TensorEvaluator::PacketAccess), + BlockAccess = int(TensorEvaluator::BlockAccess) & + int(TensorEvaluator::BlockAccess), + PreferBlockAccess = int(TensorEvaluator::PreferBlockAccess) | + int(TensorEvaluator::PreferBlockAccess), + Layout = TensorEvaluator::Layout, + RawAccess = TensorEvaluator::RawAccess + }; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockDescriptor TensorBlockDesc; + typedef internal::TensorBlockScratchAllocator TensorBlockScratch; + + typedef typename TensorEvaluator::TensorBlock + RightTensorBlock; + //===--------------------------------------------------------------------===// + + TensorEvaluator(const XprType& op, const Device& device) : + m_leftImpl(op.lhsExpression(), device), + m_rightImpl(op.rhsExpression(), device) + { + EIGEN_STATIC_ASSERT( + (static_cast(TensorEvaluator::Layout) == + static_cast(TensorEvaluator::Layout)), + YOU_MADE_A_PROGRAMMING_MISTAKE); + } + + EIGEN_DEVICE_FUNC const Dimensions& dimensions() const + { + // The dimensions of the lhs and the rhs tensors should be equal to prevent + // overflows and ensure the result is fully initialized. + // TODO: use left impl instead if right impl dimensions are known at compile time. + return m_rightImpl.dimensions(); + } + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { + eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions())); + m_leftImpl.evalSubExprsIfNeeded(NULL); + // If the lhs provides raw access to its storage area (i.e. if m_leftImpl.data() returns a non + // null value), attempt to evaluate the rhs expression in place. Returns true iff in place + // evaluation isn't supported and the caller still needs to manually assign the values generated + // by the rhs to the lhs. + return m_rightImpl.evalSubExprsIfNeeded(m_leftImpl.data()); + } + +#ifdef EIGEN_USE_THREADS + template + EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync( + EvaluatorPointerType, EvalSubExprsCallback done) { + m_leftImpl.evalSubExprsIfNeededAsync(nullptr, [this, done](bool) { + m_rightImpl.evalSubExprsIfNeededAsync( + m_leftImpl.data(), [done](bool need_assign) { done(need_assign); }); + }); + } +#endif // EIGEN_USE_THREADS + + EIGEN_STRONG_INLINE void cleanup() { + m_leftImpl.cleanup(); + m_rightImpl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalScalar(Index i) { + m_leftImpl.coeffRef(i) = m_rightImpl.coeff(i); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalPacket(Index i) { + + const int LhsStoreMode = TensorEvaluator::IsAligned ? Aligned : Unaligned; + const int RhsLoadMode = TensorEvaluator::IsAligned ? Aligned : Unaligned; + m_leftImpl.template writePacket(i, m_rightImpl.template packet(i)); + } + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_leftImpl.coeff(index); + } + template + EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const + { + return m_leftImpl.template packet(index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + // We assume that evalPacket or evalScalar is called to perform the + // assignment and account for the cost of the write here, but reduce left + // cost by one load because we are using m_leftImpl.coeffRef. + TensorOpCost left = m_leftImpl.costPerCoeff(vectorized); + return m_rightImpl.costPerCoeff(vectorized) + + TensorOpCost( + numext::maxi(0.0, left.bytes_loaded() - sizeof(CoeffReturnType)), + left.bytes_stored(), left.compute_cycles()) + + TensorOpCost(0, sizeof(CoeffReturnType), 0, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + internal::TensorBlockResourceRequirements getResourceRequirements() const { + return internal::TensorBlockResourceRequirements::merge( + m_leftImpl.getResourceRequirements(), + m_rightImpl.getResourceRequirements()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalBlock( + TensorBlockDesc& desc, TensorBlockScratch& scratch) { + if (TensorEvaluator::RawAccess && + m_leftImpl.data() != NULL) { + // If destination has raw data access, we pass it as a potential + // destination for a block descriptor evaluation. + desc.template AddDestinationBuffer( + /*dst_base=*/m_leftImpl.data() + desc.offset(), + /*dst_strides=*/internal::strides(m_leftImpl.dimensions())); + } + + RightTensorBlock block = m_rightImpl.block(desc, scratch, /*root_of_expr_ast=*/true); + // If block was evaluated into a destination, there is no need to do assignment. + if (block.kind() != internal::TensorBlockKind::kMaterializedInOutput) { + m_leftImpl.writeBlock(desc, block); + } + block.cleanup(); + } + +#ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_leftImpl.bind(cgh); + m_rightImpl.bind(cgh); + } +#endif + + EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_leftImpl.data(); } + + private: + TensorEvaluator m_leftImpl; + TensorEvaluator m_rightImpl; +}; + +} + + +#endif // EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h new file mode 100644 index 0000000..35b6458 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -0,0 +1,1176 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_BASE_H +#define EIGEN_CXX11_TENSOR_TENSOR_BASE_H + +// clang-format off + +namespace Eigen { + +/** \class TensorBase + * \ingroup CXX11_Tensor_Module + * + * \brief The tensor base class. + * + * This class is the common parent of the Tensor and TensorMap class, thus + * making it possible to use either class interchangeably in expressions. + */ +#ifndef EIGEN_PARSED_BY_DOXYGEN +// FIXME Doxygen does not like the inheritance with different template parameters +// Since there is no doxygen documentation inside, we disable it for now +template +class TensorBase +{ + public: + typedef internal::traits DerivedTraits; + typedef typename DerivedTraits::Scalar Scalar; + typedef typename DerivedTraits::Index Index; + typedef typename internal::remove_const::type CoeffReturnType; + static const int NumDimensions = DerivedTraits::NumDimensions; + + // Generic nullary operation support. + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseNullaryOp + nullaryExpr(const CustomNullaryOp& func) const { + return TensorCwiseNullaryOp(derived(), func); + } + + // Coefficient-wise nullary operators + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseNullaryOp, const Derived> + constant(const Scalar& value) const { + return nullaryExpr(internal::scalar_constant_op(value)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseNullaryOp, const Derived> + random() const { + return nullaryExpr(internal::UniformRandomGenerator()); + } + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseNullaryOp + random(const RandomGenerator& gen = RandomGenerator()) const { + return nullaryExpr(gen); + } + + // Tensor generation + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorGeneratorOp + generate(const Generator& generator) const { + return TensorGeneratorOp(derived(), generator); + } + + // Generic unary operation support. + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp + unaryExpr(const CustomUnaryOp& func) const { + return TensorCwiseUnaryOp(derived(), func); + } + + // Coefficient-wise unary operators + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + operator-() const { + return unaryExpr(internal::scalar_opposite_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + sqrt() const { + return unaryExpr(internal::scalar_sqrt_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + sign() const { + return unaryExpr(internal::scalar_sign_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + rsqrt() const { + return unaryExpr(internal::scalar_rsqrt_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + square() const { + return unaryExpr(internal::scalar_square_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + cube() const { + return unaryExpr(internal::scalar_cube_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + inverse() const { + return unaryExpr(internal::scalar_inverse_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + tanh() const { + return unaryExpr(internal::scalar_tanh_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + lgamma() const { + return unaryExpr(internal::scalar_lgamma_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + digamma() const { + return unaryExpr(internal::scalar_digamma_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + bessel_i0() const { + return unaryExpr(internal::scalar_bessel_i0_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + bessel_i0e() const { + return unaryExpr(internal::scalar_bessel_i0e_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + bessel_i1() const { + return unaryExpr(internal::scalar_bessel_i1_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + bessel_i1e() const { + return unaryExpr(internal::scalar_bessel_i1e_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + bessel_j0() const { + return unaryExpr(internal::scalar_bessel_j0_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + bessel_y0() const { + return unaryExpr(internal::scalar_bessel_y0_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + bessel_j1() const { + return unaryExpr(internal::scalar_bessel_j1_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + bessel_y1() const { + return unaryExpr(internal::scalar_bessel_y1_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + bessel_k0() const { + return unaryExpr(internal::scalar_bessel_k0_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + bessel_k0e() const { + return unaryExpr(internal::scalar_bessel_k0e_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + bessel_k1() const { + return unaryExpr(internal::scalar_bessel_k1_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + bessel_k1e() const { + return unaryExpr(internal::scalar_bessel_k1e_op()); + } + + // igamma(a = this, x = other) + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + igamma(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_igamma_op()); + } + + // igamma_der_a(a = this, x = other) + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + igamma_der_a(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_igamma_der_a_op()); + } + + // gamma_sample_der_alpha(alpha = this, sample = other) + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + gamma_sample_der_alpha(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_gamma_sample_der_alpha_op()); + } + + // igammac(a = this, x = other) + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + igammac(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_igammac_op()); + } + + // zeta(x = this, q = other) + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + zeta(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_zeta_op()); + } + + // polygamma(n = this, x = other) + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + polygamma(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_polygamma_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + erf() const { + return unaryExpr(internal::scalar_erf_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + erfc() const { + return unaryExpr(internal::scalar_erfc_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + ndtri() const { + return unaryExpr(internal::scalar_ndtri_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + sigmoid() const { + return unaryExpr(internal::scalar_logistic_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + exp() const { + return unaryExpr(internal::scalar_exp_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + expm1() const { + return unaryExpr(internal::scalar_expm1_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + log() const { + return unaryExpr(internal::scalar_log_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + log1p() const { + return unaryExpr(internal::scalar_log1p_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + log2() const { + return unaryExpr(internal::scalar_log2_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + abs() const { + return unaryExpr(internal::scalar_abs_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + clip(Scalar min, Scalar max) const { + return unaryExpr(internal::scalar_clamp_op(min, max)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const typename internal::conditional::IsComplex, + TensorCwiseUnaryOp, const Derived>, + Derived>::type + conjugate() const { + return choose(Cond::IsComplex>(), unaryExpr(internal::scalar_conjugate_op()), derived()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp >, const Derived> + pow(Scalar exponent) const { + return unaryExpr(internal::bind2nd_op >(exponent)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + real() const { + return unaryExpr(internal::scalar_real_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + imag() const { + return unaryExpr(internal::scalar_imag_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp >, const Derived> + operator+ (Scalar rhs) const { + return unaryExpr(internal::bind2nd_op >(rhs)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE friend + const TensorCwiseUnaryOp >, const Derived> + operator+ (Scalar lhs, const Derived& rhs) { + return rhs.unaryExpr(internal::bind1st_op >(lhs)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp >, const Derived> + operator- (Scalar rhs) const { + EIGEN_STATIC_ASSERT((NumTraits::IsSigned || internal::is_same >::value), YOU_MADE_A_PROGRAMMING_MISTAKE); + return unaryExpr(internal::bind2nd_op >(rhs)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE friend + const TensorCwiseUnaryOp >, const Derived> + operator- (Scalar lhs, const Derived& rhs) { + return rhs.unaryExpr(internal::bind1st_op >(lhs)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp >, const Derived> + operator* (Scalar rhs) const { + return unaryExpr(internal::bind2nd_op >(rhs)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE friend + const TensorCwiseUnaryOp >, const Derived> + operator* (Scalar lhs, const Derived& rhs) { + return rhs.unaryExpr(internal::bind1st_op >(lhs)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp >, const Derived> + operator/ (Scalar rhs) const { + return unaryExpr(internal::bind2nd_op >(rhs)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE friend + const TensorCwiseUnaryOp >, const Derived> + operator/ (Scalar lhs, const Derived& rhs) { + return rhs.unaryExpr(internal::bind1st_op >(lhs)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + operator% (Scalar rhs) const { + EIGEN_STATIC_ASSERT(NumTraits::IsInteger, YOU_MADE_A_PROGRAMMING_MISTAKE_TRY_MOD); + return unaryExpr(internal::scalar_mod_op(rhs)); + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + cwiseMax(Scalar threshold) const { + return cwiseMax(constant(threshold)); + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + cwiseMin(Scalar threshold) const { + return cwiseMin(constant(threshold)); + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const typename internal::conditional::value, + Derived, + TensorConversionOp >::type + cast() const { + return choose(Cond::value>(), derived(), TensorConversionOp(derived())); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + round() const { + return unaryExpr(internal::scalar_round_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + rint() const { + return unaryExpr(internal::scalar_rint_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + ceil() const { + return unaryExpr(internal::scalar_ceil_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + floor() const { + return unaryExpr(internal::scalar_floor_op()); + } + + // Generic binary operation support. + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp + binaryExpr(const OtherDerived& other, const CustomBinaryOp& func) const { + return TensorCwiseBinaryOp(derived(), other, func); + } + + // Coefficient-wise binary operators. + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + operator+(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_sum_op()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + operator-(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_difference_op()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + operator*(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_product_op()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + operator/(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_quotient_op()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + cwiseMax(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_max_op()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + cwiseMin(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_min_op()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp + operator&&(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_boolean_and_op()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp + operator||(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_boolean_or_op()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp + operator^(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_boolean_xor_op()); + } + + // Comparisons and tests. + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + operator<(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_cmp_op()); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + operator<=(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_cmp_op()); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + operator>(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_cmp_op()); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + operator>=(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_cmp_op()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + operator==(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_cmp_op()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + operator!=(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_cmp_op()); + } + + // comparisons and tests for Scalars + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + operator<(Scalar threshold) const { + return operator<(constant(threshold)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + operator<=(Scalar threshold) const { + return operator<=(constant(threshold)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + operator>(Scalar threshold) const { + return operator>(constant(threshold)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + operator>=(Scalar threshold) const { + return operator>=(constant(threshold)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + operator==(Scalar threshold) const { + return operator==(constant(threshold)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + operator!=(Scalar threshold) const { + return operator!=(constant(threshold)); + } + + // Checks + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + (isnan)() const { + return unaryExpr(internal::scalar_isnan_op()); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + (isinf)() const { + return unaryExpr(internal::scalar_isinf_op()); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + (isfinite)() const { + return unaryExpr(internal::scalar_isfinite_op()); + } + + // Coefficient-wise ternary operators. + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorSelectOp + select(const ThenDerived& thenTensor, const ElseDerived& elseTensor) const { + return TensorSelectOp(derived(), thenTensor.derived(), elseTensor.derived()); + } + + // Contractions. + typedef Eigen::IndexPair DimensionPair; + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorContractionOp + contract(const OtherDerived& other, const Dimensions& dims) const { + return TensorContractionOp(derived(), other.derived(), dims); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorContractionOp + contract(const OtherDerived& other, const Dimensions& dims, const OutputKernel& output_kernel) const { + return TensorContractionOp(derived(), other.derived(), dims, output_kernel); + } + + // Convolutions. + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorConvolutionOp + convolve(const KernelDerived& kernel, const Dimensions& dims) const { + return TensorConvolutionOp(derived(), kernel.derived(), dims); + } + + // Fourier transforms + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorFFTOp + fft(const FFT& dims) const { + return TensorFFTOp(derived(), dims); + } + + // Scan. + typedef TensorScanOp, const Derived> TensorScanSumOp; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorScanSumOp + cumsum(const Index& axis, bool exclusive = false) const { + return TensorScanSumOp(derived(), axis, exclusive); + } + + typedef TensorScanOp, const Derived> TensorScanProdOp; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorScanProdOp + cumprod(const Index& axis, bool exclusive = false) const { + return TensorScanProdOp(derived(), axis, exclusive); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorScanOp + scan(const Index& axis, const Reducer& reducer, bool exclusive = false) const { + return TensorScanOp(derived(), axis, exclusive, reducer); + } + + // Reductions. + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp, const Dims, const Derived> + sum(const Dims& dims) const { + return TensorReductionOp, const Dims, const Derived>(derived(), dims, internal::SumReducer()); + } + + const TensorReductionOp, const DimensionList, const Derived> + sum() const { + DimensionList in_dims; + return TensorReductionOp, const DimensionList, const Derived>(derived(), in_dims, internal::SumReducer()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp, const Dims, const Derived> + mean(const Dims& dims) const { + return TensorReductionOp, const Dims, const Derived>(derived(), dims, internal::MeanReducer()); + } + + const TensorReductionOp, const DimensionList, const Derived> + mean() const { + DimensionList in_dims; + return TensorReductionOp, const DimensionList, const Derived>(derived(), in_dims, internal::MeanReducer()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp, const Dims, const Derived> + prod(const Dims& dims) const { + return TensorReductionOp, const Dims, const Derived>(derived(), dims, internal::ProdReducer()); + } + + const TensorReductionOp, const DimensionList, const Derived> + prod() const { + DimensionList in_dims; + return TensorReductionOp, const DimensionList, const Derived>(derived(), in_dims, internal::ProdReducer()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp, const Dims, const Derived> + maximum(const Dims& dims) const { + return TensorReductionOp, const Dims, const Derived>(derived(), dims, internal::MaxReducer()); + } + + template + const TensorReductionOp, const DimensionList, const Derived> + maximum() const { + DimensionList in_dims; + return TensorReductionOp, const DimensionList, const Derived>(derived(), in_dims, internal::MaxReducer()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp, const Dims, const Derived> + minimum(const Dims& dims) const { + return TensorReductionOp, const Dims, const Derived>(derived(), dims, internal::MinReducer()); + } + + template + const TensorReductionOp, const DimensionList, const Derived> + minimum() const { + DimensionList in_dims; + return TensorReductionOp, const DimensionList, const Derived>(derived(), in_dims, internal::MinReducer()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp::value, Derived, TensorConversionOp >::type > + all(const Dims& dims) const { + return cast().reduce(dims, internal::AndReducer()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp, const typename internal::conditional::value, Derived, TensorConversionOp >::type > + all() const { + DimensionList in_dims; + return cast().reduce(in_dims, internal::AndReducer()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp::value, Derived, TensorConversionOp >::type > + any(const Dims& dims) const { + return cast().reduce(dims, internal::OrReducer()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp, const typename internal::conditional::value, Derived, TensorConversionOp >::type > + any() const { + DimensionList in_dims; + return cast().reduce(in_dims, internal::OrReducer()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorTupleReducerOp< + internal::ArgMaxTupleReducer >, + const array, const Derived> + argmax() const { + array in_dims; + for (Index d = 0; d < NumDimensions; ++d) in_dims[d] = d; + return TensorTupleReducerOp< + internal::ArgMaxTupleReducer >, + const array, + const Derived>(derived(), internal::ArgMaxTupleReducer >(), -1, in_dims); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorTupleReducerOp< + internal::ArgMinTupleReducer >, + const array, const Derived> + argmin() const { + array in_dims; + for (Index d = 0; d < NumDimensions; ++d) in_dims[d] = d; + return TensorTupleReducerOp< + internal::ArgMinTupleReducer >, + const array, + const Derived>(derived(), internal::ArgMinTupleReducer >(), -1, in_dims); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorTupleReducerOp< + internal::ArgMaxTupleReducer >, + const array, const Derived> + argmax(const Index return_dim) const { + array in_dims; + in_dims[0] = return_dim; + return TensorTupleReducerOp< + internal::ArgMaxTupleReducer >, + const array, + const Derived>(derived(), internal::ArgMaxTupleReducer >(), return_dim, in_dims); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorTupleReducerOp< + internal::ArgMinTupleReducer >, + const array, const Derived> + argmin(const Index return_dim) const { + array in_dims; + in_dims[0] = return_dim; + return TensorTupleReducerOp< + internal::ArgMinTupleReducer >, + const array, + const Derived>(derived(), internal::ArgMinTupleReducer >(), return_dim, in_dims); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp + reduce(const Dims& dims, const Reducer& reducer) const { + return TensorReductionOp(derived(), dims, reducer); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorTraceOp + trace(const Dims& dims) const { + return TensorTraceOp(derived(), dims); + } + + const TensorTraceOp, const Derived> + trace() const { + DimensionList in_dims; + return TensorTraceOp, const Derived>(derived(), in_dims); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorBroadcastingOp + broadcast(const Broadcast& bcast) const { + return TensorBroadcastingOp(derived(), bcast); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorConcatenationOp + concatenate(const OtherDerived& other, Axis axis) const { + return TensorConcatenationOp(derived(), other.derived(), axis); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorPatchOp + extract_patches(const PatchDims& patch_dims) const { + return TensorPatchOp(derived(), patch_dims); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorImagePatchOp + extract_image_patches(const Index patch_rows = 1, const Index patch_cols = 1, + const Index row_stride = 1, const Index col_stride = 1, + const Index in_row_stride = 1, const Index in_col_stride = 1, + const PaddingType padding_type = PADDING_SAME, const Scalar padding_value = Scalar(0)) const { + return TensorImagePatchOp(derived(), patch_rows, patch_cols, row_stride, col_stride, + in_row_stride, in_col_stride, 1, 1, padding_type, padding_value); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorImagePatchOp + extract_image_patches(const Index patch_rows, const Index patch_cols, + const Index row_stride, const Index col_stride, + const Index in_row_stride, const Index in_col_stride, + const Index row_inflate_stride, const Index col_inflate_stride, + const Index padding_top, const Index padding_bottom, + const Index padding_left,const Index padding_right, + const Scalar padding_value) const { + return TensorImagePatchOp(derived(), patch_rows, patch_cols, row_stride, col_stride, + in_row_stride, in_col_stride, row_inflate_stride, col_inflate_stride, + padding_top, padding_bottom, padding_left, padding_right, padding_value); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorVolumePatchOp + extract_volume_patches(const Index patch_planes, const Index patch_rows, const Index patch_cols, + const Index plane_stride = 1, const Index row_stride = 1, const Index col_stride = 1, + const PaddingType padding_type = PADDING_SAME, const Scalar padding_value = Scalar(0)) const { + return TensorVolumePatchOp(derived(), patch_planes, patch_rows, patch_cols, plane_stride, row_stride, col_stride, 1, 1, 1, 1, 1, 1, padding_type, padding_value); + } + + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorVolumePatchOp + extract_volume_patches(const Index patch_planes, const Index patch_rows, const Index patch_cols, + const Index plane_stride, const Index row_stride, const Index col_stride, + const Index plane_inflate_stride, const Index row_inflate_stride, const Index col_inflate_stride, + const Index padding_top_z, const Index padding_bottom_z, + const Index padding_top, const Index padding_bottom, + const Index padding_left, const Index padding_right, const Scalar padding_value = Scalar(0)) const { + return TensorVolumePatchOp(derived(), patch_planes, patch_rows, patch_cols, plane_stride, row_stride, col_stride, 1, 1, 1, plane_inflate_stride, row_inflate_stride, col_inflate_stride, padding_top_z, padding_bottom_z, padding_top, padding_bottom, padding_left, padding_right, padding_value); + } + + // Morphing operators. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorLayoutSwapOp + swap_layout() const { + return TensorLayoutSwapOp(derived()); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReshapingOp + reshape(const NewDimensions& newDimensions) const { + return TensorReshapingOp(derived(), newDimensions); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorSlicingOp + slice(const StartIndices& startIndices, const Sizes& sizes) const { + return TensorSlicingOp(derived(), startIndices, sizes); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorStridingSlicingOp + stridedSlice(const StartIndices& startIndices, const StopIndices& stopIndices, const Strides& strides) const { + return TensorStridingSlicingOp(derived(), startIndices, stopIndices, strides); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorChippingOp + chip(const Index offset) const { + return TensorChippingOp(derived(), offset, DimId); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorChippingOp + chip(const Index offset, const Index dim) const { + return TensorChippingOp(derived(), offset, dim); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReverseOp + reverse(const ReverseDimensions& rev) const { + return TensorReverseOp(derived(), rev); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorPaddingOp + pad(const PaddingDimensions& padding) const { + return TensorPaddingOp(derived(), padding, internal::scalar_cast_op()(0)); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorPaddingOp + pad(const PaddingDimensions& padding, const Scalar padding_value) const { + return TensorPaddingOp(derived(), padding, padding_value); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorShufflingOp + shuffle(const Shuffle& shfl) const { + return TensorShufflingOp(derived(), shfl); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorStridingOp + stride(const Strides& strides) const { + return TensorStridingOp(derived(), strides); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorInflationOp + inflate(const Strides& strides) const { + return TensorInflationOp(derived(), strides); + } + + // Returns a tensor containing index/value tuples + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorIndexTupleOp + index_tuples() const { + return TensorIndexTupleOp(derived()); + } + + // Support for custom unary and binary operations + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCustomUnaryOp customOp(const CustomUnaryFunc& op) const { + return TensorCustomUnaryOp(derived(), op); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCustomBinaryOp customOp(const OtherDerived& other, const CustomBinaryFunc& op) const { + return TensorCustomBinaryOp(derived(), other, op); + } + + // Force the evaluation of the expression. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorForcedEvalOp eval() const { + return TensorForcedEvalOp(derived()); + } + + protected: + template friend class Tensor; + template friend class TensorFixedSize; + // the Eigen:: prefix is required to workaround a compilation issue with nvcc 9.0 + template friend class Eigen::TensorBase; + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Derived& derived() const { return *static_cast(this); } +}; + +template::value> +class TensorBase : public TensorBase { + public: + typedef TensorBase Base; + typedef internal::traits DerivedTraits; + typedef typename DerivedTraits::Scalar Scalar; + typedef typename DerivedTraits::Index Index; + typedef Scalar CoeffReturnType; + static const int NumDimensions = DerivedTraits::NumDimensions; + + template friend class Tensor; + template friend class TensorFixedSize; + // the Eigen:: prefix is required to workaround a compilation issue with nvcc 9.0 + template friend class Eigen::TensorBase; + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Derived& setZero() { + return setConstant(Scalar(0)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Derived& setConstant(const Scalar& val) { + return derived() = this->constant(val); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Derived& setRandom() { + return derived() = this->random(); + } + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Derived& setRandom() { + return derived() = this->template random(); + } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Derived& setValues( + const typename internal::Initializer::InitList& vals) { + TensorEvaluator eval(derived(), DefaultDevice()); + internal::initialize_tensor(eval, vals); + return derived(); + } +#endif // EIGEN_HAS_VARIADIC_TEMPLATES + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator+=(const OtherDerived& other) { + return derived() = derived() + other.derived(); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator-=(const OtherDerived& other) { + return derived() = derived() - other.derived(); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator*=(const OtherDerived& other) { + return derived() = derived() * other.derived(); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator/=(const OtherDerived& other) { + return derived() = derived() / other.derived(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorLayoutSwapOp + swap_layout() const { + return TensorLayoutSwapOp(derived()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorLayoutSwapOp + swap_layout() { + return TensorLayoutSwapOp(derived()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorConcatenationOp + concatenate(const OtherDerived& other, const Axis& axis) const { + return TensorConcatenationOp(derived(), other, axis); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorConcatenationOp + concatenate(const OtherDerived& other, const Axis& axis) { + return TensorConcatenationOp(derived(), other, axis); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReshapingOp + reshape(const NewDimensions& newDimensions) const { + return TensorReshapingOp(derived(), newDimensions); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorReshapingOp + reshape(const NewDimensions& newDimensions) { + return TensorReshapingOp(derived(), newDimensions); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorSlicingOp + slice(const StartIndices& startIndices, const Sizes& sizes) const { + return TensorSlicingOp(derived(), startIndices, sizes); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorSlicingOp + slice(const StartIndices& startIndices, const Sizes& sizes) { + return TensorSlicingOp(derived(), startIndices, sizes); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorStridingSlicingOp + stridedSlice(const StartIndices& startIndices, const StopIndices& stopIndices, const Strides& strides) const { + return TensorStridingSlicingOp(derived(), startIndices, stopIndices, strides); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorStridingSlicingOp + stridedSlice(const StartIndices& startIndices, const StopIndices& stopIndices, const Strides& strides) { + return TensorStridingSlicingOp(derived(), startIndices, stopIndices, strides); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorChippingOp + chip(const Index offset) const { + return TensorChippingOp(derived(), offset, DimId); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorChippingOp + chip(const Index offset) { + return TensorChippingOp(derived(), offset, DimId); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorChippingOp + chip(const Index offset, const Index dim) const { + return TensorChippingOp(derived(), offset, dim); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorChippingOp + chip(const Index offset, const Index dim) { + return TensorChippingOp(derived(), offset, dim); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReverseOp + reverse(const ReverseDimensions& rev) const { + return TensorReverseOp(derived(), rev); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorReverseOp + reverse(const ReverseDimensions& rev) { + return TensorReverseOp(derived(), rev); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorShufflingOp + shuffle(const Shuffle& shfl) const { + return TensorShufflingOp(derived(), shfl); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorShufflingOp + shuffle(const Shuffle& shfl) { + return TensorShufflingOp(derived(), shfl); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorStridingOp + stride(const Strides& strides) const { + return TensorStridingOp(derived(), strides); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorStridingOp + stride(const Strides& strides) { + return TensorStridingOp(derived(), strides); + } + + // Select the device on which to evaluate the expression. + template + TensorDevice device(const DeviceType& dev) { + return TensorDevice(dev, derived()); + } + + // Select the async device on which to evaluate the expression. + template + TensorAsyncDevice device(const DeviceType& dev, DoneCallback done) { + return TensorAsyncDevice(dev, derived(), std::move(done)); + } + + protected: + EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(TensorBase) + EIGEN_DEFAULT_COPY_CONSTRUCTOR(TensorBase) + + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Derived& operator=(const OtherDerived& other) + { + typedef TensorAssignOp Assign; + Assign assign(derived(), other.derived()); + internal::TensorExecutor::run(assign, DefaultDevice()); + return derived(); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Derived& derived() { return *static_cast(this); } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Derived& derived() const { return *static_cast(this); } +}; +#endif // EIGEN_PARSED_BY_DOXYGEN +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_BASE_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h new file mode 100644 index 0000000..1e55d12 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h @@ -0,0 +1,1559 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_BLOCK_H +#define EIGEN_CXX11_TENSOR_TENSOR_BLOCK_H + +namespace Eigen { +namespace internal { + +// -------------------------------------------------------------------------- // +// Forward declarations for templates defined below. +template +class TensorBlockIO; + +// -------------------------------------------------------------------------- // +// Helper function to compute strides for densely stored buffer of given +// dimensions. + +// TODO(ezhulenev): We compute strides 1000 times in different evaluators, use +// this function instead everywhere. +template +EIGEN_ALWAYS_INLINE DSizes strides( + const DSizes& dimensions) { + DSizes strides; + if (NumDims == 0) return strides; + + // TODO(ezhulenev): Use templates to unroll this loop (similar to + // h_array_reduce in CXX11meta.h)? Benchmark it. + if (static_cast(Layout) == static_cast(ColMajor)) { + strides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + strides[i] = strides[i - 1] * dimensions[i - 1]; + } + } else { + strides[NumDims - 1] = 1; + for (int i = NumDims - 2; i >= 0; --i) { + strides[i] = strides[i + 1] * dimensions[i + 1]; + } + } + + return strides; +} + +template +EIGEN_ALWAYS_INLINE DSizes strides( + const Eigen::array& dimensions) { + return strides(DSizes(dimensions)); +} + +template +EIGEN_STRONG_INLINE DSizes strides( + const Sizes& sizes) { + return strides(DSizes(sizes)); +} + +// -------------------------------------------------------------------------- // + +// Tensor block shape type defines what are the shape preference for the blocks +// extracted from the larger tensor. +// +// Example: blocks of 100 elements from the large 100x100 tensor: +// - tensor: 100x100 +// - target_block_size: 100 +// +// TensorBlockShapeType: +// - kUniformAllDims: 100 blocks of size 10x10 +// - kSkewedInnerDims: 100 blocks of size 100x1 (or 1x100 depending on a column +// or row major layout) +enum class TensorBlockShapeType { kUniformAllDims, kSkewedInnerDims }; + +struct TensorBlockResourceRequirements { + TensorBlockShapeType shape_type; // target block shape + size_t size; // target block size + TensorOpCost cost_per_coeff; // cost of computing a single block element + +#ifdef EIGEN_HIPCC + // For HIPCC, we need to explicitly declare as a "device fun", the constructor + // which is implicitly invoked in the "merge" / "any" routines. else HIPCC + // errors out complaining about the lack of a matching constructor + EIGEN_DEVICE_FUNC + TensorBlockResourceRequirements(TensorBlockShapeType shape_type_, size_t size_, + TensorOpCost cost_) + : shape_type(shape_type_), size(size_), cost_per_coeff(cost_) + {} +#endif + + template + EIGEN_DEVICE_FUNC static TensorBlockResourceRequirements withShapeAndSize( + TensorBlockShapeType shape_type, size_t size_in_bytes, + TensorOpCost cost) { + const size_t size = numext::maxi(size_t(1), size_in_bytes / sizeof(Scalar)); + return {shape_type, size, cost}; + } + + template + EIGEN_DEVICE_FUNC static TensorBlockResourceRequirements withShapeAndSize( + TensorBlockShapeType shape_type, size_t size_in_bytes) { + // This default cost per coefficient is valid for most materialized tensor + // block evaluation implementations, because they typically just read + // coefficients from the underlying tensor storage, and write to the tensor + // block buffer (scratch or destination memory, reads and writes have linear + // access pattern). We ignore the fixed cost of block evaluation, because in + // practice it should negligible. + // + // Lazy block evaluation adds the cost of calling a functor for each + // coefficient. + // + // All non-trivial block evaluation implementations must provide their own + // cost approximation (e.g. shuffling inner dimension has a much higher cost + // because it reads memory randomly, although the total number of moved + // bytes is the same). + return withShapeAndSize(shape_type, size_in_bytes, + {/*bytes_loaded=*/sizeof(Scalar), + /*bytes_stored=*/sizeof(Scalar), + /*compute_cycles=*/0}); + } + + template + EIGEN_DEVICE_FUNC static TensorBlockResourceRequirements skewed( + size_t size_in_bytes) { + return withShapeAndSize(TensorBlockShapeType::kSkewedInnerDims, + size_in_bytes); + } + + template + EIGEN_DEVICE_FUNC static TensorBlockResourceRequirements uniform( + size_t size_in_bytes) { + return withShapeAndSize(TensorBlockShapeType::kUniformAllDims, + size_in_bytes); + } + + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE TensorBlockResourceRequirements + merge(const TensorBlockResourceRequirements& lhs, + const TensorBlockResourceRequirements& rhs) { + return {merge(lhs.shape_type, rhs.shape_type), // shape_type + merge(lhs.size, rhs.size), // size + merge(lhs.cost_per_coeff, rhs.cost_per_coeff)}; // cost_per_coeff + } + + EIGEN_DEVICE_FUNC TensorBlockResourceRequirements& addCostPerCoeff( + TensorOpCost cost) { + cost_per_coeff += cost; + return *this; + } + + // This is a resource requirement that should be returned from expressions + // that do not have any block evaluation preference (e.g. default tensor + // expression with raw buffer access). + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE TensorBlockResourceRequirements any() { + return {TensorBlockShapeType::kUniformAllDims, 1, {0, 0, 0}}; + } + + private: + using Requirements = TensorBlockResourceRequirements; + + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE size_t merge(size_t lhs_size, size_t rhs_size) { + return numext::maxi(lhs_size, rhs_size); + } + + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE TensorBlockShapeType + merge(TensorBlockShapeType lhs, TensorBlockShapeType rhs) { + return (lhs == TensorBlockShapeType::kSkewedInnerDims || + rhs == TensorBlockShapeType::kSkewedInnerDims) + ? TensorBlockShapeType::kSkewedInnerDims + : TensorBlockShapeType::kUniformAllDims; + } + + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE TensorOpCost merge(TensorOpCost lhs_cost, + TensorOpCost rhs_cost) { + return lhs_cost + rhs_cost; + } +}; + +// -------------------------------------------------------------------------- // +// TensorBlockDescriptor specifies a block offset within a tensor and the block +// sizes along each of the tensor dimensions. + +template +class TensorBlockDescriptor { + public: + typedef DSizes Dimensions; + + // If we evaluate a Tensor assignment, and expression on the left, already has + // a memory buffer, then we might do performance optimization, and evaluate + // the root expression directly into the final output memory. Some time it's + // possible to reuse it for materializing subexpressions inside an expression + // tree, to to avoid dynamic memory allocation. + // + // The pointer type of the underlying storage is erased, because passing + // Scalar type through all the expression evaluation layers is way too many + // templates. In practice destination buffer type should always match the + // evaluated expression scalar type. + class DestinationBuffer { + public: + enum DestinationBufferKind : int { + // The above explicit specification of "int" as the enum basetype is + // needed to get around a HIPCC link error ("the field type is not + // amp-compatible") + // which is issued for class members with the enum type. + // TODO(rocm): + // remove the "int" basetype once HIPCC has been fixed to not error out + // in the above scenario. + + // Destination buffer is not defined (`m_data` == nullptr). + kEmpty, + + // Tensor block defined by an owning tensor block descriptor can fit + // contiguously into the destination buffer. In this case it's safe to + // materialize tensor block in the destination buffer, wrap it in a + // TensorMap, and use to build Eigen expression on top of it. + kContiguous, + + // Destination buffer strides do not match strides of the contiguously + // stored block, and it's impossible to define a TensorMap over this + // buffer. However if we are evaluating a root of an expression tree, we + // still can materialize an output into this destination, because we can + // guarantee that no one will ever access it through block API. + // + // In theory it is possible to build valid TensorStriding + // expression on top of this destination buffer, however it has + // inefficient coeff/packet access, and defeats the purpose of fast block + // evaluation API. + kStrided + }; + + template + Scalar* data() const { + eigen_assert(m_data_type_size == sizeof(Scalar)); + return static_cast(m_data); + } + + const Dimensions& strides() const { return m_strides; } + const DestinationBufferKind& kind() const { return m_kind; } + + private: + friend class TensorBlockDescriptor; + + DestinationBuffer() : m_data(NULL), m_data_type_size(0), m_kind(kEmpty) {} + + template + DestinationBuffer(Scalar* data, const Dimensions& strides, + DestinationBufferKind kind) + : m_data(static_cast(data)), + m_data_type_size(sizeof(Scalar)), + m_strides(strides), + m_kind(kind) {} + + template + static DestinationBuffer make(const TensorBlockDescriptor& desc, + Scalar* data, const Dimensions& strides) { + return DestinationBuffer(data, strides, kind(desc, strides)); + } + + template + static DestinationBufferKind kind(const TensorBlockDescriptor& desc, + const Dimensions& strides) { + const Dimensions& desc_dims = desc.dimensions(); + const Dimensions& desc_strides = internal::strides(desc_dims); + for (int i = 0; i < NumDims; ++i) { + if (desc_dims[i] == 1) continue; + if (desc_strides[i] != strides[i]) return kStrided; + } + return kContiguous; + } + + // Storage pointer is type erased, to reduce template bloat, but we still + // keep the size of the underlying element type for error checking. + void* m_data; + size_t m_data_type_size; + + // Destination buffer dimensions always match the dimensions of a tensor + // block descriptor it belongs to, however strides might be different. + Dimensions m_strides; + + DestinationBufferKind m_kind; + }; + + TensorBlockDescriptor(const IndexType offset, const Dimensions& dimensions, + const DestinationBuffer& destination) + : m_offset(offset), + m_dimensions(dimensions), + m_destination(destination) {} + + TensorBlockDescriptor(const IndexType offset, const Dimensions& dimensions) + : m_offset(offset), + m_dimensions(dimensions), + m_destination(DestinationBuffer()) {} + + IndexType offset() const { return m_offset; } + const Dimensions& dimensions() const { return m_dimensions; } + IndexType dimension(int index) const { return m_dimensions[index]; } + IndexType size() const { return array_prod(m_dimensions); } + + const DestinationBuffer& destination() const { return m_destination; } + + template + void AddDestinationBuffer(Scalar* dst_base, const Dimensions& dst_strides) { + eigen_assert(dst_base != NULL); + m_destination = + DestinationBuffer::template make(*this, dst_base, dst_strides); + } + + template + void AddDestinationBuffer( + Scalar* dst_base, + const DSizes& dst_strides) { + // DSizes constructor will do index type promotion if it's safe. + AddDestinationBuffer(dst_base, Dimensions(dst_strides)); + } + + TensorBlockDescriptor& DropDestinationBuffer() { + m_destination.m_data = NULL; + m_destination.m_kind = DestinationBuffer::kEmpty; + return *this; + } + + bool HasDestinationBuffer() const { + return m_destination.kind() != DestinationBuffer::kEmpty; + } + + // Returns a copy of `*this` with updated offset. + TensorBlockDescriptor WithOffset(IndexType offset) const { + return TensorBlockDescriptor(offset, m_dimensions, m_destination); + } + + private: + // Offset and dimensions are immutable after construction. Block descriptor + // can only be mutated by adding or dropping destination. + const IndexType m_offset; + const Dimensions m_dimensions; + DestinationBuffer m_destination; +}; + +// -------------------------------------------------------------------------- // +// TensorBlockMapper is responsible for iterating over the blocks of a tensor. + +template +class TensorBlockMapper { + typedef TensorBlockDescriptor BlockDescriptor; + + public: + typedef DSizes Dimensions; + + TensorBlockMapper() = default; + TensorBlockMapper(const DSizes& dimensions, + const TensorBlockResourceRequirements& requirements) + : m_tensor_dimensions(dimensions), m_requirements(requirements) { + // Compute block dimensions and the total number of blocks. + InitializeBlockDimensions(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE IndexType blockCount() const { + return m_total_block_count; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE IndexType blockTotalSize() const { + return m_block_dimensions.TotalSize(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const DSizes& + blockDimensions() const { + return m_block_dimensions; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE BlockDescriptor + blockDescriptor(IndexType block_index) const { + static const bool isColMajor = Layout == static_cast(ColMajor); + + IndexType offset = 0; + DSizes dimensions; + + if (NumDims == 0) return BlockDescriptor(offset, dimensions); + + // Iterate outer -> inner dimensions. + for (int i = NumDims - 1; i >= 0; --i) { + const int dim = isColMajor ? i : NumDims - i - 1; + + const IndexType idx = block_index / m_block_strides[dim]; + block_index -= idx * m_block_strides[dim]; + + const IndexType coord = idx * m_block_dimensions[dim]; + dimensions[dim] = numext::mini(m_tensor_dimensions[dim] - coord, + m_block_dimensions[dim]); + offset += coord * m_tensor_strides[dim]; + } + + return {offset, dimensions}; + } + + private: + void InitializeBlockDimensions() { + // Requested block shape and size. + const TensorBlockShapeType shape_type = m_requirements.shape_type; + IndexType target_block_size = + numext::maxi(1, static_cast(m_requirements.size)); + + IndexType tensor_size = m_tensor_dimensions.TotalSize(); + + // Corner case: one of the dimensions is zero. Logic below is too complex + // to handle this case on a general basis, just use unit block size. + // Note: we must not yield blocks with zero dimensions (recipe for + // overflows/underflows, divisions by zero and NaNs later). + if (tensor_size == 0) { + for (int i = 0; i < NumDims; ++i) { + m_block_dimensions[i] = 1; + } + m_total_block_count = 0; + return; + } + + // If tensor fits into a target block size, evaluate it as a single block. + if (tensor_size <= target_block_size) { + m_block_dimensions = m_tensor_dimensions; + m_total_block_count = 1; + // The only valid block index is `0`, and in this case we do not need + // to compute real strides for tensor or blocks (see blockDescriptor). + for (int i = 0; i < NumDims; ++i) { + m_tensor_strides[i] = 0; + m_block_strides[i] = 1; + } + return; + } + + static const bool isColMajor = Layout == static_cast(ColMajor); + + // Block shape skewed towards inner dimension. + if (shape_type == TensorBlockShapeType::kSkewedInnerDims) { + IndexType coeff_to_allocate = target_block_size; + + for (int i = 0; i < NumDims; ++i) { + const int dim = isColMajor ? i : NumDims - i - 1; + m_block_dimensions[dim] = + numext::mini(coeff_to_allocate, m_tensor_dimensions[dim]); + coeff_to_allocate = divup( + coeff_to_allocate, + numext::maxi(static_cast(1), m_block_dimensions[dim])); + } + eigen_assert(coeff_to_allocate == 1); + + } else if (shape_type == TensorBlockShapeType::kUniformAllDims) { + // Tensor will not fit within 'target_block_size' budget: calculate tensor + // block dimension sizes based on "square" dimension size target. + const IndexType dim_size_target = convert_index( + std::pow(static_cast(target_block_size), + 1.0f / static_cast(m_block_dimensions.rank()))); + + for (int i = 0; i < NumDims; ++i) { + // TODO(andydavis) Adjust the inner most 'block_dim_size' to make it + // a multiple of the packet size. Note that reducing + // 'block_dim_size' in this manner can increase the number of + // blocks, and so will amplify any per-block overhead. + m_block_dimensions[i] = + numext::mini(dim_size_target, m_tensor_dimensions[i]); + } + + // Add any un-allocated coefficients to inner dimension(s). + IndexType total_size = m_block_dimensions.TotalSize(); + for (int i = 0; i < NumDims; ++i) { + const int dim = isColMajor ? i : NumDims - i - 1; + + if (m_block_dimensions[dim] < m_tensor_dimensions[dim]) { + const IndexType total_size_other_dims = + total_size / m_block_dimensions[dim]; + const IndexType alloc_avail = + divup(target_block_size, total_size_other_dims); + if (alloc_avail == m_block_dimensions[dim]) { + // Insufficient excess coefficients to allocate. + break; + } + m_block_dimensions[dim] = + numext::mini(m_tensor_dimensions[dim], alloc_avail); + total_size = total_size_other_dims * m_block_dimensions[dim]; + } + } + + } else { + eigen_assert(false); // unknown block shape + } + + eigen_assert(m_block_dimensions.TotalSize() >= + numext::mini(target_block_size, + m_tensor_dimensions.TotalSize())); + + // Calculate block counts by dimension and total block count. + DSizes block_count; + for (int i = 0; i < NumDims; ++i) { + block_count[i] = divup(m_tensor_dimensions[i], m_block_dimensions[i]); + } + m_total_block_count = array_prod(block_count); + + // Calculate block strides (used for enumerating blocks). + m_tensor_strides = strides(m_tensor_dimensions); + m_block_strides = strides(block_count); + } + + DSizes m_tensor_dimensions; + TensorBlockResourceRequirements m_requirements; + + DSizes m_block_dimensions; + IndexType m_total_block_count; + + DSizes m_tensor_strides; + DSizes m_block_strides; +}; + +// -------------------------------------------------------------------------- // +// TensorBlockScratchAllocator is responsible for allocating temporary buffers +// for block evaluation (output or input block materialization). Given that +// Eigen expression traversal order is deterministic, all temporary allocations +// are happening in the same order, and usually have exactly the same size. +// Scratch allocator keeps a trace of all dynamic allocations, and after the +// first block evaluation is completed, we should be able to reuse all the +// temporary buffers for the next block evaluation. + +template +class TensorBlockScratchAllocator { + public: + explicit TensorBlockScratchAllocator(const Device& device) + : m_device(device), m_allocation_index(0) {} + + ~TensorBlockScratchAllocator() { + for (size_t i = 0; i < m_allocations.size(); ++i) { + m_device.deallocate(m_allocations[i].ptr); + } + } + + void* allocate(size_t size) { + // TODO(ezhulenev): Remove when replaced with inlined vector. + if (m_allocations.capacity() == 0) m_allocations.reserve(8); + + // Check if we already have an existing allocation att current index. + const int num_allocations = static_cast(m_allocations.size()); + const bool has_allocation = m_allocation_index < num_allocations; + + // Allocation index can't be larger than the number of allocations. + eigen_assert(m_allocation_index <= num_allocations); + + // If we have existing allocation, and its size is larger or equal to + // requested size, we do nothing. + + // If current allocation can't fit requested size, we deallocate it, and + // replace with a larger allocation. + if (has_allocation && m_allocations[m_allocation_index].size < size) { + m_device.deallocate(m_allocations[m_allocation_index].ptr); + m_allocations[m_allocation_index].ptr = m_device.allocate(size); + m_allocations[m_allocation_index].size = size; + } + + // Make a new allocation if we don't have and existing one. + if (!has_allocation) { + Allocation allocation; + allocation.ptr = m_device.allocate(size); + allocation.size = size; + m_allocations.push_back(allocation); + } + + eigen_assert(m_allocations[m_allocation_index].ptr != NULL); + eigen_assert(m_allocations[m_allocation_index].size >= size); + + return m_allocations[m_allocation_index++].ptr; + } + + void reset() { m_allocation_index = 0; } + + private: + struct Allocation { + void* ptr; + size_t size; + }; + + const Device& m_device; + int m_allocation_index; + // TODO(ezhulenev): This should be an inlined vector. + std::vector m_allocations; +}; + +// -------------------------------------------------------------------------- // +// TensorBlockKind represents all possible block kinds, that can be produced by +// TensorEvaluator::evalBlock function. +enum TensorBlockKind { + // Tensor block that is a lazy expression that must be assigned to a + // destination using TensorBlockAssign. + kExpr, + + // Tensor block that is a view into a memory buffer owned by an underlying + // Tensor expression (e.g. it can be a view into a Tensor buffer). + kView, + + // Tensor block that was materialized in a scratch memory buffer, allocated + // with TensorBlockScratchAllocator. This block must be copied to a + // destination, similar to a block of `kExpr` type. + kMaterializedInScratch, + + // Tensor block that was materialized directly into the final output memory + // buffer. For example if the left side of an assignment is a Tensor, we can + // directly materialize the block in the destination memory. + // + // If strides in the output buffer do not match tensor block strides, the + // Tensor expression will be invalid, and should not be used by + // TensorBlockAssign or for constructing another block expression. + kMaterializedInOutput +}; + +// -------------------------------------------------------------------------- // +// TensorBlockNotImplemented should be used to defined TensorBlock typedef in +// TensorEvaluators that do not support block evaluation. + +class TensorBlockNotImplemented { + public: + typedef void XprType; +}; + +// -------------------------------------------------------------------------- // +// XprScalar extracts Scalar type from the Eigen expressions (if expression type +// is not void). It's required to be able to define lazy block expression for +// argument types, that do not support block evaluation. + +template +struct XprScalar { + typedef typename XprType::Scalar type; +}; +template <> +struct XprScalar { + typedef void type; +}; + +// -------------------------------------------------------------------------- // +// TensorMaterializedBlock is a fully evaluated block of the original tensor, +// and XprType is just a TensorMap over the data. This block type is typically +// used to materialize blocks of tensor expressions, that can't be efficiently +// represented as lazy Tensor expressions with fast coeff/packet operations, +// e.g. we materialize all broadcasts into evaluated blocks. +// +// TensorMaterializedBlock does not own its memory buffer, it's either a memory +// buffer that backs the original expression (e.g. block is just a view into a +// Tensor), or a memory buffer allocated with scratch allocator, and in this +// case the scratch allocator will deallocate it at the end of block based +// expression execution. +// +// If the block was evaluated directly into the output buffer, and strides in +// the output buffer do not match block strides, the TensorMap expression will +// be invalid, and should never be used in block assignment or any other tensor +// expression. + +template +class TensorMaterializedBlock { + public: + typedef DSizes Dimensions; + typedef TensorMap > XprType; + + TensorMaterializedBlock(TensorBlockKind kind, const Scalar* data, + const Dimensions& dimensions, bool valid_expr = true) + : m_kind(kind), + m_data(data), + m_dimensions(dimensions), + m_expr(m_data, m_dimensions), + m_valid_expr(valid_expr) { + eigen_assert(m_kind == internal::TensorBlockKind::kView || + m_kind == internal::TensorBlockKind::kMaterializedInScratch || + m_kind == internal::TensorBlockKind::kMaterializedInOutput); + } + + TensorBlockKind kind() const { return m_kind; } + // NOTE(ezhulenev): Returning XprType by value like in other block types + // causes asan failures. The theory is that XprType::Nested doesn't work + // properly for TensorMap. + const XprType& expr() const { + eigen_assert(m_valid_expr); + return m_expr; + } + const Scalar* data() const { return m_data; } + void cleanup() {} + + typedef internal::TensorBlockDescriptor TensorBlockDesc; + + // TensorMaterializedBlock can be backed by different types of storage: + // + // (1) Contiguous block of memory allocated with scratch allocator. + // (2) Contiguous block of memory reused from tensor block descriptor + // destination buffer. + // (3) Strided block of memory reused from tensor block descriptor + // destination buffer. + // + class Storage { + public: + Scalar* data() const { return m_data; } + const Dimensions& dimensions() const { return m_dimensions; } + const Dimensions& strides() const { return m_strides; } + + TensorMaterializedBlock AsTensorMaterializedBlock() const { + return TensorMaterializedBlock( + m_materialized_in_output + ? internal::TensorBlockKind::kMaterializedInOutput + : internal::TensorBlockKind::kMaterializedInScratch, + m_data, m_dimensions, !m_strided_storage); + } + + private: + friend class TensorMaterializedBlock; + + Storage(Scalar* data, const Dimensions& dimensions, + const Dimensions& strides, bool materialized_in_output, + bool strided_storage) + : m_data(data), + m_dimensions(dimensions), + m_strides(strides), + m_materialized_in_output(materialized_in_output), + m_strided_storage(strided_storage) {} + + Scalar* m_data; + Dimensions m_dimensions; + Dimensions m_strides; + bool m_materialized_in_output; + bool m_strided_storage; + }; + + // Creates a storage for materialized block either from the block descriptor + // destination buffer, or allocates a new buffer with scratch allocator. + template + EIGEN_STRONG_INLINE static Storage prepareStorage( + TensorBlockDesc& desc, TensorBlockScratch& scratch, + bool allow_strided_storage = false) { + // Try to reuse destination as an output block buffer. + typedef typename TensorBlockDesc::DestinationBuffer DestinationBuffer; + + if (desc.destination().kind() == DestinationBuffer::kContiguous) { + Scalar* buffer = desc.destination().template data(); + desc.DropDestinationBuffer(); + return Storage(buffer, desc.dimensions(), + internal::strides(desc.dimensions()), + /*materialized_in_output=*/true, + /*strided_storage=*/false); + + } else if (desc.destination().kind() == DestinationBuffer::kStrided && + allow_strided_storage) { + Scalar* buffer = desc.destination().template data(); + desc.DropDestinationBuffer(); + return Storage(buffer, desc.dimensions(), desc.destination().strides(), + /*materialized_in_output=*/true, /*strided_storage=*/true); + + } else { + void* mem = scratch.allocate(desc.size() * sizeof(Scalar)); + return Storage(static_cast(mem), desc.dimensions(), + internal::strides(desc.dimensions()), + /*materialized_in_output=*/false, + /*strided_storage=*/false); + } + } + + // Creates a materialized block for the given descriptor from a memory buffer. + template + EIGEN_STRONG_INLINE static TensorMaterializedBlock materialize( + const Scalar* data, const DataDimensions& data_dims, + TensorBlockDesc& desc, TensorBlockScratch& scratch) { + eigen_assert(array_size::value == desc.dimensions().size()); + + // If a tensor block dimensions covers a contiguous block of the underlying + // memory, we can skip block buffer memory allocation, and construct a block + // from existing `data` memory buffer. + // + // Example: (RowMajor layout) + // data_dims: [11, 12, 13, 14] + // desc.dimensions(): [1, 1, 3, 14] + // + // In this case we can construct a TensorBlock starting at + // `data + desc.offset()`, with a `desc.dimensions()` block sizes. + static const bool is_col_major = Layout == ColMajor; + + // Find out how many inner dimensions have a matching size. + int num_matching_inner_dims = 0; + for (int i = 0; i < NumDims; ++i) { + int dim = is_col_major ? i : NumDims - i - 1; + if (data_dims[dim] != desc.dimensions()[dim]) break; + ++num_matching_inner_dims; + } + + // All the outer dimensions must be of size `1`, except a single dimension + // before the matching inner dimension (`3` in the example above). + bool can_use_direct_access = true; + for (int i = num_matching_inner_dims + 1; i < NumDims; ++i) { + int dim = is_col_major ? i : NumDims - i - 1; + if (desc.dimension(dim) != 1) { + can_use_direct_access = false; + break; + } + } + + if (can_use_direct_access) { + const Scalar* block_start = data + desc.offset(); + return TensorMaterializedBlock(internal::TensorBlockKind::kView, + block_start, desc.dimensions()); + + } else { + // Reuse destination buffer or allocate new buffer with scratch allocator. + const Storage storage = prepareStorage(desc, scratch); + + typedef internal::TensorBlockIO + TensorBlockIO; + typedef typename TensorBlockIO::Dst TensorBlockIODst; + typedef typename TensorBlockIO::Src TensorBlockIOSrc; + + TensorBlockIOSrc src(internal::strides(Dimensions(data_dims)), + data, desc.offset()); + TensorBlockIODst dst(storage.dimensions(), storage.strides(), + storage.data()); + + TensorBlockIO::Copy(dst, src); + return storage.AsTensorMaterializedBlock(); + } + } + + private: + TensorBlockKind m_kind; + const Scalar* m_data; + Dimensions m_dimensions; + XprType m_expr; + bool m_valid_expr; +}; + +// -------------------------------------------------------------------------- // +// TensorCwiseUnaryBlock is a lazy tensor expression block that applies UnaryOp +// functor to the blocks produced by the underlying Tensor expression. + +template +class TensorCwiseUnaryBlock { + static const bool NoArgBlockAccess = + internal::is_void::value; + + public: + typedef typename conditional< + NoArgBlockAccess, void, + TensorCwiseUnaryOp >:: + type XprType; + + typedef typename XprScalar::type Scalar; + + TensorCwiseUnaryBlock(const ArgTensorBlock& arg_block, const UnaryOp& functor) + : m_arg_block(arg_block), m_functor(functor) {} + + TensorBlockKind kind() const { return internal::TensorBlockKind::kExpr; } + + XprType expr() const { return XprType(m_arg_block.expr(), m_functor); } + const Scalar* data() const { return NULL; } + void cleanup() { m_arg_block.cleanup(); } + + private: + ArgTensorBlock m_arg_block; + UnaryOp m_functor; +}; + +// -------------------------------------------------------------------------- // +// TensorCwiseUnaryBlock is a lazy tensor expression block that applies BinaryOp +// functor to the blocks produced by the underlying Tensor expression. + +template +class TensorCwiseBinaryBlock { + static const bool NoArgBlockAccess = + internal::is_void::value || + internal::is_void::value; + + public: + typedef typename conditional< + NoArgBlockAccess, void, + TensorCwiseBinaryOp >::type + XprType; + + typedef typename XprScalar::type Scalar; + + TensorCwiseBinaryBlock(const LhsTensorBlock& left_block, + const RhsTensorBlock& right_block, + const BinaryOp& functor) + : m_left_block(left_block), + m_right_block(right_block), + m_functor(functor) {} + + TensorBlockKind kind() const { return internal::TensorBlockKind::kExpr; } + + XprType expr() const { + return XprType(m_left_block.expr(), m_right_block.expr(), m_functor); + } + + const Scalar* data() const { return NULL; } + + void cleanup() { + m_left_block.cleanup(); + m_right_block.cleanup(); + } + + private: + LhsTensorBlock m_left_block; + RhsTensorBlock m_right_block; + BinaryOp m_functor; +}; + +// -------------------------------------------------------------------------- // +// TensorUnaryExprBlock is a lazy tensor expression block that can construct +// an arbitrary tensor expression from a block of the underlying type (this is a +// generalization of the TensorCwiseUnaryBlock for arbitrary expressions). + +template +class TensorUnaryExprBlock { + typedef typename ArgTensorBlock::XprType ArgXprType; + static const bool NoArgBlockAccess = internal::is_void::value; + + public: + typedef typename conditional< + NoArgBlockAccess, void, + typename BlockFactory::template XprType::type>::type XprType; + + typedef typename XprScalar::type Scalar; + + TensorUnaryExprBlock(const ArgTensorBlock& arg_block, + const BlockFactory& factory) + : m_arg_block(arg_block), m_factory(factory) {} + + TensorBlockKind kind() const { return internal::TensorBlockKind::kExpr; } + XprType expr() const { return m_factory.expr(m_arg_block.expr()); } + const Scalar* data() const { return NULL; } + void cleanup() { m_arg_block.cleanup(); } + + private: + ArgTensorBlock m_arg_block; + BlockFactory m_factory; +}; + +// -------------------------------------------------------------------------- // +// TensorTernaryExprBlock is a lazy tensor expression block that can construct +// an arbitrary tensor expression from three blocks of the underlying type. + +template +class TensorTernaryExprBlock { + typedef typename Arg1TensorBlock::XprType Arg1XprType; + typedef typename Arg2TensorBlock::XprType Arg2XprType; + typedef typename Arg3TensorBlock::XprType Arg3XprType; + + static const bool NoArgBlockAccess = internal::is_void::value || + internal::is_void::value || + internal::is_void::value; + + public: + typedef typename conditional< + NoArgBlockAccess, void, + typename BlockFactory::template XprType::type>::type XprType; + + typedef typename XprScalar::type Scalar; + + TensorTernaryExprBlock(const Arg1TensorBlock& arg1_block, + const Arg2TensorBlock& arg2_block, + const Arg3TensorBlock& arg3_block, + const BlockFactory& factory) + : m_arg1_block(arg1_block), + m_arg2_block(arg2_block), + m_arg3_block(arg3_block), + m_factory(factory) {} + + TensorBlockKind kind() const { return internal::TensorBlockKind::kExpr; } + XprType expr() const { + return m_factory.expr(m_arg1_block.expr(), m_arg2_block.expr(), + m_arg3_block.expr()); + } + const Scalar* data() const { return NULL; } + void cleanup() { + m_arg1_block.cleanup(); + m_arg2_block.cleanup(); + m_arg3_block.cleanup(); + } + + private: + Arg1TensorBlock m_arg1_block; + Arg2TensorBlock m_arg2_block; + Arg3TensorBlock m_arg3_block; + BlockFactory m_factory; +}; + +// -------------------------------------------------------------------------- // +// StridedLinearBufferCopy provides a method to copy data between two linear +// buffers with different strides, with optimized paths for scatter/gather. + +template +class StridedLinearBufferCopy { + typedef typename packet_traits::type Packet; + enum { + Vectorizable = packet_traits::Vectorizable, + PacketSize = packet_traits::size + }; + + public: + // Specifying linear copy kind statically gives ~30% speedup for small sizes. + enum class Kind { + Linear = 0, // src_stride == 1 && dst_stride == 1 + Scatter = 1, // src_stride == 1 && dst_stride != 1 + FillLinear = 2, // src_stride == 0 && dst_stride == 1 + FillScatter = 3, // src_stride == 0 && dst_stride != 1 + Gather = 4, // dst_stride == 1 + Random = 5 // everything else + }; + + struct Dst { + Dst(IndexType o, IndexType s, Scalar* d) : offset(o), stride(s), data(d) {} + + IndexType offset; + IndexType stride; + Scalar* data; + }; + + struct Src { + Src(IndexType o, IndexType s, const Scalar* d) + : offset(o), stride(s), data(d) {} + + IndexType offset; + IndexType stride; + const Scalar* data; + }; + + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(const Dst& dst, + const Src& src, + const size_t count) { + Run(count, dst.offset, dst.stride, dst.data, src.offset, src.stride, + src.data); + } + + private: + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( + const IndexType count, const IndexType dst_offset, + const IndexType dst_stride, Scalar* EIGEN_RESTRICT dst_data, + const IndexType src_offset, const IndexType src_stride, + const Scalar* EIGEN_RESTRICT src_data) { + const Scalar* src = &src_data[src_offset]; + Scalar* dst = &dst_data[dst_offset]; + + if (!Vectorizable) { + for (Index i = 0; i < count; ++i) { + dst[i * dst_stride] = src[i * src_stride]; + } + return; + } + + const IndexType vectorized_size = count - PacketSize; + IndexType i = 0; + + if (kind == StridedLinearBufferCopy::Kind::Linear) { + // ******************************************************************** // + // Linear copy from `src` to `dst`. + const IndexType unrolled_size = count - 4 * PacketSize; + eigen_assert(src_stride == 1 && dst_stride == 1); + for (; i <= unrolled_size; i += 4 * PacketSize) { + for (int j = 0; j < 4; ++j) { + Packet p = ploadu(src + i + j * PacketSize); + pstoreu(dst + i + j * PacketSize, p); + } + } + for (; i <= vectorized_size; i += PacketSize) { + Packet p = ploadu(src + i); + pstoreu(dst + i, p); + } + for (; i < count; ++i) { + dst[i] = src[i]; + } + // ******************************************************************** // + } else if (kind == StridedLinearBufferCopy::Kind::Scatter) { + // Scatter from `src` to `dst`. + eigen_assert(src_stride == 1 && dst_stride != 1); + for (; i <= vectorized_size; i += PacketSize) { + Packet p = ploadu(src + i); + pscatter(dst + i * dst_stride, p, dst_stride); + } + for (; i < count; ++i) { + dst[i * dst_stride] = src[i]; + } + // ******************************************************************** // + } else if (kind == StridedLinearBufferCopy::Kind::FillLinear) { + // Fill `dst` with value at `*src`. + eigen_assert(src_stride == 0 && dst_stride == 1); + const IndexType unrolled_size = count - 4 * PacketSize; + Packet p = pload1(src); + for (; i <= unrolled_size; i += 4 * PacketSize) { + for (int j = 0; j < 4; ++j) { + pstoreu(dst + i + j * PacketSize, p); + } + } + for (; i <= vectorized_size; i += PacketSize) { + pstoreu(dst + i, p); + } + for (; i < count; ++i) { + dst[i] = *src; + } + // ******************************************************************** // + } else if (kind == StridedLinearBufferCopy::Kind::FillScatter) { + // Scatter `*src` into `dst`. + eigen_assert(src_stride == 0 && dst_stride != 1); + Packet p = pload1(src); + for (; i <= vectorized_size; i += PacketSize) { + pscatter(dst + i * dst_stride, p, dst_stride); + } + for (; i < count; ++i) { + dst[i * dst_stride] = *src; + } + // ******************************************************************** // + } else if (kind == StridedLinearBufferCopy::Kind::Gather) { + // Gather from `src` into `dst`. + eigen_assert(dst_stride == 1); + for (; i <= vectorized_size; i += PacketSize) { + Packet p = pgather(src + i * src_stride, src_stride); + pstoreu(dst + i, p); + } + for (; i < count; ++i) { + dst[i] = src[i * src_stride]; + } + // ******************************************************************** // + } else if (kind == StridedLinearBufferCopy::Kind::Random) { + // Random. + for (; i < count; ++i) { + dst[i * dst_stride] = src[i * src_stride]; + } + } else { + eigen_assert(false); + } + } +}; + +// -------------------------------------------------------------------------- // +// TensorBlockIO copies data from `src` tensor block, to the `dst` tensor block. +// It's possible to specify src->dst dimension mapping for the copy operation. +// Dimensions of `dst` specify how many elements have to be copied, for the +// `src` we need to know only stride to navigate through source memory buffer. + +template +class TensorBlockIO { + static const bool IsColMajor = (Layout == ColMajor); + + typedef StridedLinearBufferCopy LinCopy; + + public: + typedef DSizes Dimensions; + typedef DSizes DimensionsMap; + + struct Dst { + Dst(const Dimensions& dst_dims, const Dimensions& dst_strides, Scalar* dst, + IndexType dst_offset = 0) + : dims(dst_dims), strides(dst_strides), data(dst), offset(dst_offset) {} + + Dimensions dims; + Dimensions strides; + Scalar* data; + IndexType offset; + }; + + struct Src { + Src(const Dimensions& src_strides, const Scalar* src, + IndexType src_offset = 0) + : strides(src_strides), data(src), offset(src_offset) {} + + Dimensions strides; + const Scalar* data; + IndexType offset; + }; + + // Copies data to `dst` from `src`, using provided dimensions mapping: + // + // src_dimension_index = dst_to_src_dim_map[dst_dimension_index] + // + // Returns the number of copied elements. + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE IndexType Copy( + const Dst& dst, const Src& src, const DimensionsMap& dst_to_src_dim_map) { + // Copy single scalar value from `src` to `dst`. + if (NumDims == 0) { + *(dst.data + dst.offset) = *(src.data + src.offset); + return 1; + } + + // Both `dst` and `src` must have contiguous innermost dimension. We also + // accept the special case with stride '0', because it's used as a trick to + // implement broadcasting. + { + int inner_dim = IsColMajor ? 0 : NumDims - 1; + EIGEN_UNUSED_VARIABLE(inner_dim); + eigen_assert(dst.strides[inner_dim] == 1 || dst.strides[inner_dim] == 0); + eigen_assert(src.strides[inner_dim] == 1 || src.strides[inner_dim] == 0); + } + + // Give a shorter name to `dst_to_src_dim_map`. + const DimensionsMap& dim_map = dst_to_src_dim_map; + + // Do not squeeze reordered inner dimensions. + int num_squeezable_dims = NumSqueezableInnerDims(dim_map); + + // NOTE: We find the innermost dimension (contiguous in memory) in the dst + // block, and we write data linearly into that dimension, reading it from + // the src. If dimensions are reordered, we might end up reading data from + // the src with `stride != 1`. + // + // NOTE: Random-Read/Linear-Write can be up to ~2X faster than + // Linear-Read/Random-Write: https://stackoverflow.com/a/54935680 + + // Find the innermost dimension in the dst whose size is not 1. This is the + // effective inner dim. + int num_size_one_inner_dims = 0; + for (int i = 0; i < num_squeezable_dims; ++i) { + const int dst_dim = IsColMajor ? i : NumDims - i - 1; + if (dst.dims[dst_dim] != 1) break; + num_size_one_inner_dims++; + } + + // If all dimensions are of size 1, just copy a scalar from `src` to `dst`. + if (num_size_one_inner_dims == NumDims) { + *(dst.data + dst.offset) = *(src.data + src.offset); + return 1; + } + + // Outermost dimension in the dst with `stride == 1` (contiguous in memory). + const int dst_stride1_dim = IsColMajor + ? num_size_one_inner_dims + : NumDims - num_size_one_inner_dims - 1; + + // Dimension in the src that corresponds to the dst innermost dimension. + const int src_dim_for_dst_stride1_dim = + NumDims == 0 ? 1 : dim_map[dst_stride1_dim]; + + // Size of the innermost dimension (length of contiguous blocks of memory). + IndexType dst_inner_dim_size = NumDims == 0 ? 1 : dst.dims[dst_stride1_dim]; + + // Squeeze multiple inner dims into one if they are contiguous in `dst` and + // `src` memory, so we can do less linear copy calls. + for (int i = num_size_one_inner_dims + 1; i < num_squeezable_dims; ++i) { + const int dst_dim = IsColMajor ? i : NumDims - i - 1; + const IndexType dst_stride = dst.strides[dst_dim]; + const IndexType src_stride = src.strides[dim_map[dst_dim]]; + if (dst_inner_dim_size == dst_stride && dst_stride == src_stride) { + dst_inner_dim_size *= dst.dims[dst_dim]; + ++num_size_one_inner_dims; + } else { + break; + } + } + + // Setup strides to read data from `src` and write to `dst`. + IndexType input_offset = src.offset; + IndexType output_offset = dst.offset; + IndexType input_stride = + NumDims == 0 ? 1 : src.strides[src_dim_for_dst_stride1_dim]; + IndexType output_stride = NumDims == 0 ? 1 : dst.strides[dst_stride1_dim]; + + const int at_least_1_dim = NumDims <= 1 ? 1 : NumDims - 1; + array it; + + // Initialize block iterator state. Squeeze away any dimension of size 1. + int idx = 0; // currently initialized iterator state index + for (int i = num_size_one_inner_dims; i < NumDims - 1; ++i) { + const int dst_dim = IsColMajor ? i + 1 : NumDims - i - 2; + if (dst.dims[dst_dim] == 1) continue; + + it[idx].size = dst.dims[dst_dim]; + it[idx].input_stride = src.strides[dim_map[dst_dim]]; + it[idx].output_stride = dst.strides[dst_dim]; + + it[idx].input_span = it[idx].input_stride * (it[idx].size - 1); + it[idx].output_span = it[idx].output_stride * (it[idx].size - 1); + + idx++; + } + + // Iterate copying data from src to dst. + const IndexType block_total_size = NumDims == 0 ? 1 : dst.dims.TotalSize(); + +#define COPY_INNER_DIM(KIND) \ + IndexType num_copied = 0; \ + for (num_copied = 0; num_copied < block_total_size; \ + num_copied += dst_inner_dim_size) { \ + LinCopy::template Run( \ + typename LinCopy::Dst(output_offset, output_stride, dst.data), \ + typename LinCopy::Src(input_offset, input_stride, src.data), \ + dst_inner_dim_size); \ + \ + for (int j = 0; j < idx; ++j) { \ + if (++it[j].count < it[j].size) { \ + input_offset += it[j].input_stride; \ + output_offset += it[j].output_stride; \ + break; \ + } \ + it[j].count = 0; \ + input_offset -= it[j].input_span; \ + output_offset -= it[j].output_span; \ + } \ + } \ + return num_copied; + + if (input_stride == 1 && output_stride == 1) { + COPY_INNER_DIM(LinCopy::Kind::Linear); + } else if (input_stride == 1 && output_stride != 1) { + COPY_INNER_DIM(LinCopy::Kind::Scatter); + } else if (input_stride == 0 && output_stride == 1) { + COPY_INNER_DIM(LinCopy::Kind::FillLinear); + } else if (input_stride == 0 && output_stride != 1) { + COPY_INNER_DIM(LinCopy::Kind::FillScatter); + } else if (output_stride == 1) { + COPY_INNER_DIM(LinCopy::Kind::Gather); + } else { + COPY_INNER_DIM(LinCopy::Kind::Random); + } + +#undef COPY_INNER_DIM + } + + // Copy from `src` to `dst` with an identity src->dst dimension map. Returns + // the number of copied elements. + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE IndexType Copy(const Dst& dst, + const Src& src) { + DimensionsMap dst_to_src_map; + for (int i = 0; i < NumDims; ++i) dst_to_src_map[i] = i; + return Copy(dst, src, dst_to_src_map); + } + + private: + struct BlockIteratorState { + BlockIteratorState() + : size(0), + count(0), + input_stride(0), + output_stride(0), + input_span(0), + output_span(0) {} + + IndexType size; + IndexType count; + IndexType input_stride; + IndexType output_stride; + IndexType input_span; + IndexType output_span; + }; + + // Compute how many inner dimensions it's allowed to squeeze when doing IO + // between two tensor blocks. It's safe to squeeze inner dimensions, only + // if they are not reordered. + static int NumSqueezableInnerDims(const DimensionsMap& dim_map) { + int num_squeezable_dims = 0; + for (int i = 0; i < NumDims; ++i) { + const int dim = IsColMajor ? i : NumDims - i - 1; + if (dim_map[dim] != dim) break; + num_squeezable_dims++; + } + return num_squeezable_dims; + } +}; + +// -------------------------------------------------------------------------- // +// TensorBlockAssignment assigns a block expression of type `TensorBlockExpr` to +// a Tensor block defined by `desc`, backed by a memory buffer at `target`. +// +// Currently there is no way to write from a Tensor expression to a block of +// memory, if dimensions are reordered. If you need to do that, you should +// materialize a Tensor block expression into a memory buffer, and then use +// TensorBlockIO to copy data between two memory buffers with a custom +// `target->src` dimension map (see definition above). +// +// Also currently the innermost dimension of `target` must have a stride '1' +// (contiguous in memory). This restriction could be lifted with a `pscatter`, +// but in practice it's never needed, and there is a similar TensorBlockIO +// workaround for that. +// +// TODO(ezhulenev): TensorBlockAssignment is a special case of TensorBlockIO +// where `src` is a tensor expression. Explore if it is possible to rewrite IO +// to use expressions instead of pointers, and after that TensorBlockAssignment +// will become an alias to IO. +template +class TensorBlockAssignment { + // We will use coeff/packet path to evaluate block expressions. + typedef TensorEvaluator + TensorBlockEvaluator; + + typedef DSizes Dimensions; + + enum { + Vectorizable = packet_traits::Vectorizable, + PacketSize = packet_traits::size + }; + + template + struct InnerDimAssign { + EIGEN_ALWAYS_INLINE static void Run(Scalar* target, IndexType count, + const Evaluator& eval, + IndexType eval_offset) { + for (IndexType i = 0; i < count; ++i) { + target[i] = eval.coeff(eval_offset + i); + } + } + }; + + template + struct InnerDimAssign { + EIGEN_ALWAYS_INLINE static void Run(Scalar* target, IndexType count, + const Evaluator& eval, + IndexType eval_offset) { + typedef typename packet_traits::type Packet; + + const IndexType unrolled_size = count - 4 * PacketSize; + const IndexType vectorized_size = count - PacketSize; + IndexType i = 0; + + for (; i <= unrolled_size; i += 4 * PacketSize) { + for (int j = 0; j < 4; ++j) { + const IndexType idx = eval_offset + i + j * PacketSize; + Packet p = eval.template packet(idx); + pstoreu(target + i + j * PacketSize, p); + } + } + + for (; i <= vectorized_size; i += PacketSize) { + Packet p = eval.template packet(eval_offset + i); + pstoreu(target + i, p); + } + + for (; i < count; ++i) { + target[i] = eval.coeff(eval_offset + i); + } + } + }; + + public: + struct Target { + Target(const Dimensions& target_dims, const Dimensions& target_strides, + Scalar* target_data, IndexType target_offset = 0) + : dims(target_dims), + strides(target_strides), + data(target_data), + offset(target_offset) {} + + Dimensions dims; + Dimensions strides; + Scalar* data; + IndexType offset; + }; + + static Target target(const Dimensions& target_dims, + const Dimensions& target_strides, Scalar* target_data, + IndexType target_offset = 0) { + return Target(target_dims, target_strides, target_data, target_offset); + } + + template + static Target target( + const DSizes& target_dims, + const DSizes& target_strides, + Scalar* target_data, IndexType target_offset = 0) { + // DSizes constructor will do index type promotion if it's safe. + return Target(Dimensions(target_dims), Dimensions(target_strides), + target_data, target_offset); + } + + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( + const Target& target, const TensorBlockExpr& expr) { + // Prepare evaluator for block expression. + DefaultDevice default_device; + TensorBlockEvaluator eval(expr, default_device); + + // Tensor block expression dimension should match destination dimensions. + eigen_assert(dimensions_match(target.dims, eval.dimensions())); + + static const int Layout = TensorBlockEvaluator::Layout; + static const bool is_col_major = Layout == ColMajor; + + // Initialize output inner dimension size based on a layout. + const IndexType output_size = NumDims == 0 ? 1 : target.dims.TotalSize(); + const int inner_dim_idx = is_col_major ? 0 : NumDims - 1; + IndexType output_inner_dim_size = target.dims[inner_dim_idx]; + + // Target inner dimension stride must be '1'. + eigen_assert(target.strides[inner_dim_idx] == 1); + + // Squeeze multiple inner dims into one if they are contiguous in `target`. + IndexType num_squeezed_dims = 0; + for (Index i = 1; i < NumDims; ++i) { + const Index dim = is_col_major ? i : NumDims - i - 1; + const IndexType target_stride = target.strides[dim]; + + if (output_inner_dim_size == target_stride) { + output_inner_dim_size *= target.dims[dim]; + num_squeezed_dims++; + } else { + break; + } + } + + // Initialize output block iterator state. Dimension in this array are + // always in inner_most -> outer_most order (col major layout). + array it; + + int idx = 0; // currently initialized iterator state index + for (Index i = num_squeezed_dims; i < NumDims - 1; ++i) { + const Index dim = is_col_major ? i + 1 : NumDims - i - 2; + + it[idx].count = 0; + it[idx].size = target.dims[dim]; + it[idx].output_stride = target.strides[dim]; + it[idx].output_span = it[idx].output_stride * (it[idx].size - 1); + idx++; + } + + // We read block expression from the beginning, and start writing data to + // `target` at given offset. + IndexType input_offset = 0; + IndexType output_offset = target.offset; + + // Iterate copying data from `eval` to `target`. + for (IndexType i = 0; i < output_size; i += output_inner_dim_size) { + // Assign to `target` at current offset. + InnerDimAssign::Run(target.data + output_offset, + output_inner_dim_size, eval, + input_offset); + + // Move input offset forward by the number of assigned coefficients. + input_offset += output_inner_dim_size; + + // Update index. + for (int j = 0; j < idx; ++j) { + if (++it[j].count < it[j].size) { + output_offset += it[j].output_stride; + break; + } + it[j].count = 0; + output_offset -= it[j].output_span; + } + } + } + + private: + struct BlockIteratorState { + BlockIteratorState() + : count(0), size(0), output_stride(0), output_span(0) {} + + IndexType count; + IndexType size; + IndexType output_stride; + IndexType output_span; + }; +}; + +// -------------------------------------------------------------------------- // + +} // namespace internal +} // namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_BLOCK_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h new file mode 100644 index 0000000..a354132 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h @@ -0,0 +1,1093 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H +#define EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H + +namespace Eigen { + +/** \class TensorBroadcasting + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor broadcasting class. + * + * + */ +namespace internal { +template +struct traits > : public traits +{ + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; + typedef typename XprTraits::PointerType PointerType; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorBroadcastingOp EIGEN_DEVICE_REF type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorBroadcastingOp type; +}; + +template +struct is_input_scalar { + static const bool value = false; +}; +template <> +struct is_input_scalar > { + static const bool value = true; +}; +#ifndef EIGEN_EMULATE_CXX11_META_H +template +struct is_input_scalar > { + static const bool value = (Sizes::total_size == 1); +}; +#endif + +} // end namespace internal + + + +template +class TensorBroadcastingOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBroadcastingOp(const XprType& expr, const Broadcast& broadcast) + : m_xpr(expr), m_broadcast(broadcast) {} + + EIGEN_DEVICE_FUNC + const Broadcast& broadcast() const { return m_broadcast; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + protected: + typename XprType::Nested m_xpr; + const Broadcast m_broadcast; +}; + + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorBroadcastingOp XprType; + typedef typename XprType::Index Index; + static const int NumDims = internal::array_size::Dimensions>::value; + typedef DSizes Dimensions; + typedef typename XprType::Scalar Scalar; + typedef typename TensorEvaluator::Dimensions InputDimensions; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = PacketType::size; + protected: // all the non-static fields must have the same access control, otherwise the TensorEvaluator wont be standard layout; + bool isCopy, nByOne, oneByN; + public: + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; + + enum { + IsAligned = TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess, + BlockAccess = TensorEvaluator::BlockAccess, + PreferBlockAccess = true, + Layout = TensorEvaluator::Layout, + RawAccess = false + }; + + typedef typename internal::remove_const::type ScalarNoConst; + + // We do block based broadcasting using a trick with 2x tensor rank and 0 + // strides. See block method implementation for details. + typedef DSizes BroadcastDimensions; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockDescriptor TensorBlockDesc; + typedef internal::TensorBlockScratchAllocator TensorBlockScratch; + + typedef typename TensorEvaluator::TensorBlock + ArgTensorBlock; + + typedef typename internal::TensorMaterializedBlock + TensorBlock; + //===--------------------------------------------------------------------===// + + EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : isCopy(false), nByOne(false), oneByN(false), + m_device(device), m_broadcast(op.broadcast()), m_impl(op.expression(), device) + { + + // The broadcasting op doesn't change the rank of the tensor. One can't broadcast a scalar + // and store the result in a scalar. Instead one should reshape the scalar into a a N-D + // tensor with N >= 1 of 1 element first and then broadcast. + EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); + const InputDimensions& input_dims = m_impl.dimensions(); + isCopy = true; + for (int i = 0; i < NumDims; ++i) { + eigen_assert(input_dims[i] > 0); + m_dimensions[i] = input_dims[i] * m_broadcast[i]; + if (m_broadcast[i] != 1) { + isCopy = false; + } + } + + if (static_cast(Layout) == static_cast(ColMajor)) { + m_inputStrides[0] = 1; + m_outputStrides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; + m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; + } + } else { + m_inputStrides[NumDims-1] = 1; + m_outputStrides[NumDims-1] = 1; + for (int i = NumDims-2; i >= 0; --i) { + m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; + m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1]; + } + } + + if (input_dims[0] == 1) { + oneByN = true; + for (int i = 1; i < NumDims; ++i) { + if (m_broadcast[i] != 1) { + oneByN = false; + break; + } + } + } else if (input_dims[NumDims-1] == 1) { + nByOne = true; + for (int i = 0; i < NumDims-1; ++i) { + if (m_broadcast[i] != 1) { + nByOne = false; + break; + } + } + } + + // Handle special format like NCHW, its input shape is '[1, N..., 1]' and + // broadcast shape is '[N, 1..., N]' + if (!oneByN && !nByOne) { + if (input_dims[0] == 1 && input_dims[NumDims-1] == 1 && NumDims > 2) { + nByOne = true; + oneByN = true; + for (int i = 1; i < NumDims-1; ++i) { + if (m_broadcast[i] != 1) { + nByOne = false; + oneByN = false; + break; + } + } + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + +#ifdef EIGEN_USE_THREADS + template + EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync( + EvaluatorPointerType, EvalSubExprsCallback done) { + m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); }); + } +#endif // EIGEN_USE_THREADS + + EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffReturnType coeff(Index index) const + { + if (internal::is_input_scalar::type>::value) { + return m_impl.coeff(0); + } + + if (static_cast(Layout) == static_cast(ColMajor)) { + if (isCopy) { + return m_impl.coeff(index); + } else { + return coeffColMajor(index); + } + } else { + if (isCopy) { + return m_impl.coeff(index); + } else { + return coeffRowMajor(index); + } + } + } + + // TODO: attempt to speed this up. The integer divisions and modulo are slow + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index indexColMajor(Index index) const { + Index inputIndex = 0; + EIGEN_UNROLL_LOOP + for (int i = NumDims - 1; i > 0; --i) { + const Index idx = index / m_outputStrides[i]; + if (internal::index_statically_eq(i, 1)) { + eigen_assert(idx < m_impl.dimensions()[i]); + inputIndex += idx * m_inputStrides[i]; + } else { + if (internal::index_statically_eq(i, 1)) { + eigen_assert(idx % m_impl.dimensions()[i] == 0); + } else { + inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; + } + } + index -= idx * m_outputStrides[i]; + } + if (internal::index_statically_eq(0, 1)) { + eigen_assert(index < m_impl.dimensions()[0]); + inputIndex += index; + } else { + if (internal::index_statically_eq(0, 1)) { + eigen_assert(index % m_impl.dimensions()[0] == 0); + } else { + inputIndex += (index % m_impl.dimensions()[0]); + } + } + return inputIndex; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffColMajor(Index index) const + { + return m_impl.coeff(indexColMajor(index)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index indexRowMajor(Index index) const { + Index inputIndex = 0; + EIGEN_UNROLL_LOOP + for (int i = 0; i < NumDims - 1; ++i) { + const Index idx = index / m_outputStrides[i]; + if (internal::index_statically_eq(i, 1)) { + eigen_assert(idx < m_impl.dimensions()[i]); + inputIndex += idx * m_inputStrides[i]; + } else { + if (internal::index_statically_eq(i, 1)) { + eigen_assert(idx % m_impl.dimensions()[i] == 0); + } else { + inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; + } + } + index -= idx * m_outputStrides[i]; + } + if (internal::index_statically_eq(NumDims - 1, 1)) { + eigen_assert(index < m_impl.dimensions()[NumDims - 1]); + inputIndex += index; + } else { + if (internal::index_statically_eq(NumDims - 1, 1)) { + eigen_assert(index % m_impl.dimensions()[NumDims - 1] == 0); + } else { + inputIndex += (index % m_impl.dimensions()[NumDims - 1]); + } + } + return inputIndex; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffRowMajor(Index index) const + { + return m_impl.coeff(indexRowMajor(index)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketReturnType packet(Index index) const + { + if (internal::is_input_scalar::type>::value) { + return internal::pset1(m_impl.coeff(0)); + } + + if (static_cast(Layout) == static_cast(ColMajor)) { + if (isCopy) { + #ifdef EIGEN_GPU_COMPILE_PHASE + // See PR 437: on NVIDIA P100 and K20m we observed a x3-4 speed up by enforcing + // unaligned loads here. The reason is unclear though. + return m_impl.template packet(index); + #else + return m_impl.template packet(index); + #endif + } else if (oneByN && !nByOne) { + return packetNByOne(index); + } else if (!oneByN && nByOne) { + return packetOneByN(index); + } else if (oneByN && nByOne) { + return packetOneByNByOne(index); + } else { + return packetColMajor(index); + } + } else { + if (isCopy) { + #ifdef EIGEN_GPU_COMPILE_PHASE + // See above. + return m_impl.template packet(index); + #else + return m_impl.template packet(index); + #endif + } else if (oneByN && !nByOne) { + return packetOneByN(index); + } else if (!oneByN && nByOne) { + return packetNByOne(index); + } else if (oneByN && nByOne) { + return packetOneByNByOne(index); + } else { + return packetRowMajor(index); + } + } + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetOneByNByOne + (Index index) const + { + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); + + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + Index startDim, endDim; + Index inputIndex, outputOffset, batchedIndex; + + if (static_cast(Layout) == static_cast(ColMajor)) { + startDim = NumDims - 1; + endDim = 1; + } else { + startDim = 0; + endDim = NumDims - 2; + } + + batchedIndex = index % m_outputStrides[startDim]; + inputIndex = batchedIndex / m_outputStrides[endDim]; + outputOffset = batchedIndex % m_outputStrides[endDim]; + + if (outputOffset + PacketSize <= m_outputStrides[endDim]) { + values[0] = m_impl.coeff(inputIndex); + return internal::pload1(values); + } else { + EIGEN_UNROLL_LOOP + for (int i = 0, cur = 0; i < PacketSize; ++i, ++cur) { + if (outputOffset + cur < m_outputStrides[endDim]) { + values[i] = m_impl.coeff(inputIndex); + } else { + ++inputIndex; + inputIndex = (inputIndex == m_inputStrides[startDim] ? 0 : inputIndex); + values[i] = m_impl.coeff(inputIndex); + outputOffset = 0; + cur = 0; + } + } + return internal::pload(values); + } + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetOneByN(Index index) const + { + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); + + Index dim, inputIndex; + + if (static_cast(Layout) == static_cast(ColMajor)) { + dim = NumDims - 1; + } else { + dim = 0; + } + + inputIndex = index % m_inputStrides[dim]; + if (inputIndex + PacketSize <= m_inputStrides[dim]) { + return m_impl.template packet(inputIndex); + } else { + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + EIGEN_UNROLL_LOOP + for (int i = 0; i < PacketSize; ++i) { + if (inputIndex > m_inputStrides[dim]-1) { + inputIndex = 0; + } + values[i] = m_impl.coeff(inputIndex++); + } + return internal::pload(values); + } + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetNByOne(Index index) const + { + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); + + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + Index dim, inputIndex, outputOffset; + + if (static_cast(Layout) == static_cast(ColMajor)) { + dim = 1; + } else { + dim = NumDims - 2; + } + + inputIndex = index / m_outputStrides[dim]; + outputOffset = index % m_outputStrides[dim]; + if (outputOffset + PacketSize <= m_outputStrides[dim]) { + values[0] = m_impl.coeff(inputIndex); + return internal::pload1(values); + } else { + EIGEN_UNROLL_LOOP + for (int i = 0, cur = 0; i < PacketSize; ++i, ++cur) { + if (outputOffset + cur < m_outputStrides[dim]) { + values[i] = m_impl.coeff(inputIndex); + } else { + values[i] = m_impl.coeff(++inputIndex); + outputOffset = 0; + cur = 0; + } + } + return internal::pload(values); + } + } + + // Ignore the LoadMode and always use unaligned loads since we can't guarantee + // the alignment at compile time. + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const + { + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); + + const Index originalIndex = index; + + Index inputIndex = 0; + EIGEN_UNROLL_LOOP + for (int i = NumDims - 1; i > 0; --i) { + const Index idx = index / m_outputStrides[i]; + if (internal::index_statically_eq(i, 1)) { + eigen_assert(idx < m_impl.dimensions()[i]); + inputIndex += idx * m_inputStrides[i]; + } else { + if (internal::index_statically_eq(i, 1)) { + eigen_assert(idx % m_impl.dimensions()[i] == 0); + } else { + inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; + } + } + index -= idx * m_outputStrides[i]; + } + Index innermostLoc; + if (internal::index_statically_eq(0, 1)) { + eigen_assert(index < m_impl.dimensions()[0]); + innermostLoc = index; + } else { + if (internal::index_statically_eq(0, 1)) { + eigen_assert(index % m_impl.dimensions()[0] == 0); + innermostLoc = 0; + } else { + innermostLoc = index % m_impl.dimensions()[0]; + } + } + inputIndex += innermostLoc; + + // Todo: this could be extended to the second dimension if we're not + // broadcasting alongside the first dimension, and so on. + if (innermostLoc + PacketSize <= m_impl.dimensions()[0]) { + return m_impl.template packet(inputIndex); + } else { + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + values[0] = m_impl.coeff(inputIndex); + EIGEN_UNROLL_LOOP + for (int i = 1; i < PacketSize; ++i) { + if (innermostLoc + i < m_impl.dimensions()[0]) { + values[i] = m_impl.coeff(inputIndex+i); + } else { + values[i] = coeffColMajor(originalIndex+i); + } + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const + { + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); + + const Index originalIndex = index; + + Index inputIndex = 0; + EIGEN_UNROLL_LOOP + for (int i = 0; i < NumDims - 1; ++i) { + const Index idx = index / m_outputStrides[i]; + if (internal::index_statically_eq(i, 1)) { + eigen_assert(idx < m_impl.dimensions()[i]); + inputIndex += idx * m_inputStrides[i]; + } else { + if (internal::index_statically_eq(i, 1)) { + eigen_assert(idx % m_impl.dimensions()[i] == 0); + } else { + inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; + } + } + index -= idx * m_outputStrides[i]; + } + Index innermostLoc; + if (internal::index_statically_eq(NumDims-1, 1)) { + eigen_assert(index < m_impl.dimensions()[NumDims-1]); + innermostLoc = index; + } else { + if (internal::index_statically_eq(NumDims-1, 1)) { + eigen_assert(index % m_impl.dimensions()[NumDims-1] == 0); + innermostLoc = 0; + } else { + innermostLoc = index % m_impl.dimensions()[NumDims-1]; + } + } + inputIndex += innermostLoc; + + // Todo: this could be extended to the second dimension if we're not + // broadcasting alongside the first dimension, and so on. + if (innermostLoc + PacketSize <= m_impl.dimensions()[NumDims-1]) { + return m_impl.template packet(inputIndex); + } else { + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + values[0] = m_impl.coeff(inputIndex); + EIGEN_UNROLL_LOOP + for (int i = 1; i < PacketSize; ++i) { + if (innermostLoc + i < m_impl.dimensions()[NumDims-1]) { + values[i] = m_impl.coeff(inputIndex+i); + } else { + values[i] = coeffRowMajor(originalIndex+i); + } + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + double compute_cost = TensorOpCost::AddCost(); + if (!isCopy && NumDims > 0) { + EIGEN_UNROLL_LOOP + for (int i = NumDims - 1; i > 0; --i) { + compute_cost += TensorOpCost::DivCost(); + if (internal::index_statically_eq(i, 1)) { + compute_cost += + TensorOpCost::MulCost() + TensorOpCost::AddCost(); + } else { + if (!internal::index_statically_eq(i, 1)) { + compute_cost += TensorOpCost::MulCost() + + TensorOpCost::ModCost() + + TensorOpCost::AddCost(); + } + } + compute_cost += + TensorOpCost::MulCost() + TensorOpCost::AddCost(); + } + } + return m_impl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, compute_cost, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + internal::TensorBlockResourceRequirements getResourceRequirements() const { + // TODO(wuke): Targeting L1 size is 30% faster than targeting L{-1} on large + // tensors. But this might need further tuning. + const size_t target_size = m_device.firstLevelCacheSize(); + return internal::TensorBlockResourceRequirements::merge( + m_impl.getResourceRequirements(), + internal::TensorBlockResourceRequirements::skewed(target_size)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock + block(TensorBlockDesc& desc, TensorBlockScratch& scratch, + bool /*root_of_expr_ast*/ = false) const { + BlockBroadcastingParams params = blockBroadcastingParams(desc); + + if (params.inner_dim_size == 0 || params.bcast_dim_size == 0) { + return emptyBlock(); + } + + // Prepare storage for the materialized broadcasting result. + const typename TensorBlock::Storage block_storage = + TensorBlock::prepareStorage(desc, scratch); + ScalarNoConst* materialized_output = block_storage.data(); + + // We potentially will need to materialize input blocks. + size_t materialized_input_size = 0; + ScalarNoConst* materialized_input = NULL; + + // Initialize block broadcating iterator state for outer dimensions (outer + // with regard to bcast dimension). Dimension in this array are always in + // inner_most -> outer_most order (col major layout). + array it; + int idx = 0; + + for (int i = params.inner_dim_count + 1; i < NumDims; ++i) { + const Index dim = IsColMajor ? i : NumDims - 1 - i; + it[idx].size = params.output_dims[dim]; + it[idx].count = 0; + it[idx].output_stride = m_outputStrides[dim]; + it[idx].output_span = it[idx].output_stride * (it[idx].size - 1); + idx++; + } + + // Write output into the beginning of `materialized_output`. + Index output_offset = 0; + + // We will fill output block by broadcasting along the bcast dim, and + // iterating over outer dimension. + const Index output_size = NumDims == 0 ? 1 : params.output_dims.TotalSize(); + + for (Index num_output_coeffs = 0; num_output_coeffs < output_size;) { + ScalarNoConst* bcast_output = materialized_output + num_output_coeffs; + Index bcast_offset = desc.offset() + output_offset; + + // Broadcast along the bcast dimension. + num_output_coeffs += BroadcastBlockAlongBcastDim( + params, bcast_offset, scratch, bcast_output, &materialized_input, + &materialized_input_size); + + // Switch to the next outer dimension. + for (int j = 0; j < idx; ++j) { + if (++it[j].count < it[j].size) { + output_offset += it[j].output_stride; + break; + } + it[j].count = 0; + output_offset -= it[j].output_span; + } + } + + return block_storage.AsTensorMaterializedBlock(); + } + + EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; } + + const TensorEvaluator& impl() const { return m_impl; } + + Broadcast functor() const { return m_broadcast; } +#ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind( + cl::sycl::handler& cgh) const { + m_impl.bind(cgh); + } +#endif + private: + static const bool IsColMajor = + static_cast(Layout) == static_cast(ColMajor); + + // We will build a general case block broadcasting on top of broadcasting + // primitive that will do broadcasting only for the inner dimension(s) along + // the first dimension smaller than the input size (it's called `bcast_dim`). + // + // Example: + // dim: 0 1 2 (ColMajor) + // input size: [9, 3, 6] + // block size: [9, 2, 6] + // + // We will compute broadcasted block by iterating over the outer dimensions + // before `bcast_dim` (only dimension `2` in this example) and computing + // broadcasts along the `bcast_dim` (dimension `1` in this example). + + // BlockBroadcastingParams holds precomputed parameters for broadcasting a + // single block along the broadcasting dimension. Sizes and strides along the + // `bcast_dim` might be invalid, they will be adjusted later in + // `BroadcastBlockAlongBcastDim`. + struct BlockBroadcastingParams { + Dimensions input_dims; // input expression dimensions + Dimensions output_dims; // output block sizes + Dimensions output_strides; // output block strides + + int inner_dim_count; // count inner dimensions matching in size + int bcast_dim; // broadcasting dimension index + Index bcast_dim_size; // broadcasting dimension size + Index inner_dim_size; // inner dimensions size + + // Block sizes and strides for the input block where all dimensions before + // `bcast_dim` are equal to `1`. + Dimensions input_block_sizes; + Dimensions input_block_strides; + + // Block sizes and strides for blocks with extra dimensions and strides `0`. + BroadcastDimensions bcast_block_sizes; + BroadcastDimensions bcast_block_strides; + BroadcastDimensions bcast_input_strides; + }; + + struct BlockBroadcastingIteratorState { + Index size; + Index count; + Index output_stride; + Index output_span; + }; + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlockBroadcastingParams + blockBroadcastingParams(TensorBlockDesc& desc) const { + BlockBroadcastingParams params; + + params.input_dims = Dimensions(m_impl.dimensions()); + + // Output block sizes and strides. + params.output_dims = desc.dimensions(); + params.output_strides = internal::strides(params.output_dims); + + // Find the broadcasting dimension (first dimension with output size smaller + // that the input size). + params.bcast_dim = 0; + params.bcast_dim_size = 1; + params.inner_dim_size = 1; + + // Count the number of inner dimensions that have the same size in the block + // and in the broadcast expression. + params.inner_dim_count = 0; + + for (int i = 0; i < NumDims; ++i) { + const int dim = IsColMajor ? i : NumDims - i - 1; + + if (params.output_dims[dim] == m_dimensions[dim]) { + params.inner_dim_size *= params.output_dims[dim]; + ++params.inner_dim_count; + continue; + } + + // First non-matching dimension is the broadcasting dimension. + eigen_assert(params.output_dims[dim] < m_dimensions[dim]); + params.bcast_dim = dim; + params.bcast_dim_size = params.output_dims[dim]; + break; + } + + // Calculate the input block size for looking into the input. + for (int i = 0; i < params.inner_dim_count; ++i) { + const int dim = IsColMajor ? i : NumDims - i - 1; + params.input_block_sizes[dim] = params.input_dims[dim]; + } + for (int i = params.inner_dim_count; i < NumDims; ++i) { + const int dim = IsColMajor ? i : NumDims - i - 1; + params.input_block_sizes[dim] = 1; + } + params.input_block_strides = + internal::strides(params.input_block_sizes); + + // Broadcast with the 0-stride trick: Create 1 extra dim for each + // broadcast, set the input stride to 0. + // + // When ColMajor: + // + // - bcast_block_sizes: + // [d_0, b_0, d_1, b_1, ...] + // + // - bcast_block_strides: + // [output_block_strides[0], output_block_strides[0] * d_0, + // output_block_strides[1], output_block_strides[1] * d_1, + // ...] + // + // - bcast_input_strides: + // [input_block_strides[0], 0, + // input_block_strides[1], 0, + // ...]. + // + for (int i = 0; i < params.inner_dim_count; ++i) { + const int dim = IsColMajor ? i : NumDims - i - 1; + + const int copy_dim = IsColMajor ? 2 * i : 2 * NumDims - 2 * i - 1; + const int broadcast_dim = IsColMajor ? copy_dim + 1 : copy_dim - 1; + + params.bcast_block_sizes[copy_dim] = params.input_dims[dim]; + params.bcast_block_sizes[broadcast_dim] = m_broadcast[dim]; + params.bcast_block_strides[copy_dim] = params.output_strides[dim]; + params.bcast_block_strides[broadcast_dim] = + params.output_strides[dim] * params.input_dims[dim]; + params.bcast_input_strides[copy_dim] = params.input_block_strides[dim]; + params.bcast_input_strides[broadcast_dim] = 0; + } + + for (int i = 2 * params.inner_dim_count; i < 2 * NumDims; ++i) { + const int dim = IsColMajor ? i : 2 * NumDims - i - 1; + params.bcast_block_sizes[dim] = 1; + params.bcast_block_strides[dim] = 0; + params.bcast_input_strides[dim] = 0; + } + + return params; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock emptyBlock() const { + DSizes dimensions; + for (int i = 0; i < NumDims; ++i) dimensions[i] = 0; + return TensorBlock(internal::TensorBlockKind::kView, NULL, dimensions); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index BroadcastBlockAlongBcastDim( + BlockBroadcastingParams params, Index bcast_offset, + TensorBlockScratch& scratch, ScalarNoConst* materialized_output, + ScalarNoConst** materialized_input, + size_t* materialized_input_size) const { + if (params.bcast_dim_size == 1) { + // We just need one block read using the ready-set values above. + return BroadcastBlock( + params.input_block_sizes, params.input_block_strides, + params.bcast_block_sizes, params.bcast_block_strides, + params.bcast_input_strides, bcast_offset, 0, scratch, + materialized_output, materialized_input, materialized_input_size); + + } else if (params.input_dims[params.bcast_dim] == 1) { + // Broadcast bcast dimension (< NumDims) by bcast_dim_size. + const int broadcast_bcast_dim = + IsColMajor ? 2 * params.inner_dim_count + 1 + : 2 * NumDims - 2 * params.inner_dim_count - 2; + + params.bcast_block_sizes[broadcast_bcast_dim] = params.bcast_dim_size; + params.bcast_input_strides[broadcast_bcast_dim] = 0; + params.bcast_block_strides[broadcast_bcast_dim] = + params.output_strides[params.bcast_dim]; + + return BroadcastBlock( + params.input_block_sizes, params.input_block_strides, + params.bcast_block_sizes, params.bcast_block_strides, + params.bcast_input_strides, bcast_offset, 0, scratch, + materialized_output, materialized_input, materialized_input_size); + + } else { + // Keep track of the total number of the coefficients written to the + // output block. + Index num_output_coeffs = 0; + + // The general case. Let's denote the output block as + // + // x[..., a:a+bcast_dim_size, :, ..., :] + // + // where a:a+bcast_dim_size is a slice on the bcast_dim dimension + // (< NumDims). We need to split the a:a+bcast_dim_size into possibly 3 + // sub-blocks: + // + // (1) a:b, where b is the smallest multiple of + // input_dims[bcast_dim_start] in [a, a+bcast_dim_size]. + // + // (2) b:c, where c is the largest multiple of input_dims[bcast_dim_start] + // in [a, a+bcast_dim_size]. + // + // (3) c:a+bcast_dim_size . + // + // Or, when b and c do not exist, we just need to process the whole block + // together. + + // Find a. + const Index bcast_dim_left_index = + bcast_offset / m_outputStrides[params.bcast_dim]; + + // Find b and c. + const Index input_bcast_dim_size = params.input_dims[params.bcast_dim]; + + // First multiple after a. This is b when <= bcast_dim_left_index + + // bcast_dim_size. + const Index first_multiple = + divup(bcast_dim_left_index, input_bcast_dim_size) * + input_bcast_dim_size; + + if (first_multiple <= bcast_dim_left_index + params.bcast_dim_size) { + // b exists, so does c. Find it. + const Index last_multiple = + (bcast_dim_left_index + params.bcast_dim_size) / + input_bcast_dim_size * input_bcast_dim_size; + const int copy_bcast_dim = + IsColMajor ? 2 * params.inner_dim_count + : 2 * NumDims - 2 * params.inner_dim_count - 1; + const int broadcast_bcast_dim = + IsColMajor ? 2 * params.inner_dim_count + 1 + : 2 * NumDims - 2 * params.inner_dim_count - 2; + + if (first_multiple > bcast_dim_left_index) { + const Index head_size = first_multiple - bcast_dim_left_index; + params.input_block_sizes[params.bcast_dim] = head_size; + params.bcast_block_sizes[copy_bcast_dim] = head_size; + params.bcast_input_strides[copy_bcast_dim] = + params.input_block_strides[params.bcast_dim]; + params.bcast_block_strides[copy_bcast_dim] = + params.output_strides[params.bcast_dim]; + params.bcast_block_sizes[broadcast_bcast_dim] = 1; + params.bcast_input_strides[broadcast_bcast_dim] = 0; + params.bcast_block_strides[broadcast_bcast_dim] = + params.output_strides[params.bcast_dim] * + params.input_dims[params.bcast_dim]; + + num_output_coeffs += BroadcastBlock( + params.input_block_sizes, params.input_block_strides, + params.bcast_block_sizes, params.bcast_block_strides, + params.bcast_input_strides, bcast_offset, 0, scratch, + materialized_output, materialized_input, materialized_input_size); + } + if (first_multiple < last_multiple) { + params.input_block_sizes[params.bcast_dim] = input_bcast_dim_size; + params.bcast_block_sizes[copy_bcast_dim] = input_bcast_dim_size; + params.bcast_input_strides[copy_bcast_dim] = + params.input_block_strides[params.bcast_dim]; + params.bcast_block_strides[copy_bcast_dim] = + params.output_strides[params.bcast_dim]; + params.bcast_block_sizes[broadcast_bcast_dim] = + (last_multiple - first_multiple) / input_bcast_dim_size; + params.bcast_input_strides[broadcast_bcast_dim] = 0; + params.bcast_block_strides[broadcast_bcast_dim] = + params.output_strides[params.bcast_dim] * + params.input_dims[params.bcast_dim]; + const Index offset = (first_multiple - bcast_dim_left_index) * + m_outputStrides[params.bcast_dim]; + + num_output_coeffs += BroadcastBlock( + params.input_block_sizes, params.input_block_strides, + params.bcast_block_sizes, params.bcast_block_strides, + params.bcast_input_strides, bcast_offset, offset, scratch, + materialized_output, materialized_input, materialized_input_size); + } + if (last_multiple < bcast_dim_left_index + params.bcast_dim_size) { + const Index tail_size = + bcast_dim_left_index + params.bcast_dim_size - last_multiple; + params.input_block_sizes[params.bcast_dim] = tail_size; + params.bcast_block_sizes[copy_bcast_dim] = tail_size; + params.bcast_input_strides[copy_bcast_dim] = + params.input_block_strides[params.bcast_dim]; + params.bcast_block_strides[copy_bcast_dim] = + params.output_strides[params.bcast_dim]; + params.bcast_block_sizes[broadcast_bcast_dim] = 1; + params.bcast_input_strides[broadcast_bcast_dim] = 0; + params.bcast_block_strides[broadcast_bcast_dim] = + params.output_strides[params.bcast_dim] * + params.input_dims[params.bcast_dim]; + const Index offset = (last_multiple - bcast_dim_left_index) * + m_outputStrides[params.bcast_dim]; + + num_output_coeffs += BroadcastBlock( + params.input_block_sizes, params.input_block_strides, + params.bcast_block_sizes, params.bcast_block_strides, + params.bcast_input_strides, bcast_offset, offset, scratch, + materialized_output, materialized_input, materialized_input_size); + } + } else { + // b and c do not exist. + const int copy_bcast_dim = + IsColMajor ? 2 * params.inner_dim_count + : 2 * NumDims - 2 * params.inner_dim_count - 1; + params.input_block_sizes[params.bcast_dim] = params.bcast_dim_size; + params.bcast_block_sizes[copy_bcast_dim] = params.bcast_dim_size; + params.bcast_input_strides[copy_bcast_dim] = + params.input_block_strides[params.bcast_dim]; + params.bcast_block_strides[copy_bcast_dim] = + params.output_strides[params.bcast_dim]; + + num_output_coeffs += BroadcastBlock( + params.input_block_sizes, params.input_block_strides, + params.bcast_block_sizes, params.bcast_block_strides, + params.bcast_input_strides, bcast_offset, 0, scratch, + materialized_output, materialized_input, materialized_input_size); + } + + return num_output_coeffs; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index BroadcastBlock( + const Dimensions& input_block_sizes, + const Dimensions& input_block_strides, + const BroadcastDimensions& bcast_block_sizes, + const BroadcastDimensions& bcast_block_strides, + const BroadcastDimensions& bcast_input_strides, Index bcast_offset, + Index offset, TensorBlockScratch& scratch, + ScalarNoConst* materialized_output, ScalarNoConst** materialized_input, + size_t* materialized_input_size) const { + // ---------------------------------------------------------------------- // + // Tensor block descriptor for reading block from the input. + const Index input_offset = bcast_offset + offset; + TensorBlockDesc input_desc( + IsColMajor ? indexColMajor(input_offset) : indexRowMajor(input_offset), + input_block_sizes); + + ArgTensorBlock input_block = m_impl.block(input_desc, scratch); + + // ---------------------------------------------------------------------- // + // Materialize input block into a temporary memory buffer only if it's not + // already available in the arg block. + const ScalarNoConst* input_buffer = NULL; + + if (input_block.data() != NULL) { + // Input block already has raw data, there is no need to materialize it. + input_buffer = input_block.data(); + + } else { + // Otherwise we have to do block assignment into a temporary buffer. + + // Maybe reuse previously allocated buffer, or allocate a new one with a + // scratch allocator. + const size_t input_total_size = input_block_sizes.TotalSize(); + if (*materialized_input == NULL || + *materialized_input_size < input_total_size) { + *materialized_input_size = input_total_size; + void* mem = scratch.allocate(*materialized_input_size * sizeof(Scalar)); + *materialized_input = static_cast(mem); + } + + typedef internal::TensorBlockAssignment< + ScalarNoConst, NumDims, typename ArgTensorBlock::XprType, Index> + TensorBlockAssignment; + + TensorBlockAssignment::Run( + TensorBlockAssignment::target(input_block_sizes, input_block_strides, + *materialized_input), + input_block.expr()); + + input_buffer = *materialized_input; + } + + // ---------------------------------------------------------------------- // + // Copy data from materialized input block to the materialized output, using + // given broadcast strides (strides with zeroes). + typedef internal::TensorBlockIO + TensorBlockIO; + + typename TensorBlockIO::Src src(bcast_input_strides, input_buffer); + typename TensorBlockIO::Dst dst(bcast_block_sizes, bcast_block_strides, + materialized_output + offset); + + return TensorBlockIO::Copy(dst, src); + } + +protected: + const Device EIGEN_DEVICE_REF m_device; + const typename internal::remove_reference::type m_broadcast; + Dimensions m_dimensions; + array m_outputStrides; + array m_inputStrides; + TensorEvaluator m_impl; +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h new file mode 100644 index 0000000..3764573 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h @@ -0,0 +1,518 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H +#define EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H + +namespace Eigen { + +/** \class TensorKChippingReshaping + * \ingroup CXX11_Tensor_Module + * + * \brief A chip is a thin slice, corresponding to a column or a row in a 2-d tensor. + * + * + */ + +namespace internal { +template +struct traits > : public traits +{ + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions - 1; + static const int Layout = XprTraits::Layout; + typedef typename XprTraits::PointerType PointerType; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorChippingOp EIGEN_DEVICE_REF type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorChippingOp type; +}; + +template +struct DimensionId +{ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DimensionId(DenseIndex dim) { + EIGEN_UNUSED_VARIABLE(dim); + eigen_assert(dim == DimId); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const { + return DimId; + } +}; +template <> +struct DimensionId +{ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DimensionId(DenseIndex dim) : actual_dim(dim) { + eigen_assert(dim >= 0); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const { + return actual_dim; + } + private: + const DenseIndex actual_dim; +}; + + +} // end namespace internal + + + +template +class TensorChippingOp : public TensorBase > +{ + public: + typedef TensorBase > Base; + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorChippingOp(const XprType& expr, const Index offset, const Index dim) + : m_xpr(expr), m_offset(offset), m_dim(dim) { + } + + EIGEN_DEVICE_FUNC + const Index offset() const { return m_offset; } + EIGEN_DEVICE_FUNC + const Index dim() const { return m_dim.actualDim(); } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorChippingOp) + + protected: + typename XprType::Nested m_xpr; + const Index m_offset; + const internal::DimensionId m_dim; +}; + + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorChippingOp XprType; + static const int NumInputDims = internal::array_size::Dimensions>::value; + static const int NumDims = NumInputDims-1; + typedef typename XprType::Index Index; + typedef DSizes Dimensions; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = PacketType::size; + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; + + enum { + // Alignment can't be guaranteed at compile time since it depends on the + // slice offsets. + IsAligned = false, + Layout = TensorEvaluator::Layout, + PacketAccess = TensorEvaluator::PacketAccess, + BlockAccess = TensorEvaluator::BlockAccess, + // Chipping of outer-most dimension is a trivial operation, because we can + // read and write directly from the underlying tensor using single offset. + IsOuterChipping = (static_cast(Layout) == ColMajor && DimId == NumInputDims - 1) || + (static_cast(Layout) == RowMajor && DimId == 0), + // Chipping inner-most dimension. + IsInnerChipping = (static_cast(Layout) == ColMajor && DimId == 0) || + (static_cast(Layout) == RowMajor && DimId == NumInputDims - 1), + // Prefer block access if the underlying expression prefers it, otherwise + // only if chipping is not trivial. + PreferBlockAccess = TensorEvaluator::PreferBlockAccess || + !IsOuterChipping, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + typedef typename internal::remove_const::type ScalarNoConst; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockDescriptor TensorBlockDesc; + typedef internal::TensorBlockScratchAllocator TensorBlockScratch; + + typedef internal::TensorBlockDescriptor + ArgTensorBlockDesc; + typedef typename TensorEvaluator::TensorBlock + ArgTensorBlock; + + typedef typename internal::TensorMaterializedBlock + TensorBlock; + //===--------------------------------------------------------------------===// + + EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device), m_dim(op.dim()), m_device(device) + { + EIGEN_STATIC_ASSERT((NumInputDims >= 1), YOU_MADE_A_PROGRAMMING_MISTAKE); + eigen_assert(NumInputDims > m_dim.actualDim()); + + const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); + eigen_assert(op.offset() < input_dims[m_dim.actualDim()]); + + int j = 0; + for (int i = 0; i < NumInputDims; ++i) { + if (i != m_dim.actualDim()) { + m_dimensions[j] = input_dims[i]; + ++j; + } + } + + m_stride = 1; + m_inputStride = 1; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = 0; i < m_dim.actualDim(); ++i) { + m_stride *= input_dims[i]; + m_inputStride *= input_dims[i]; + } + } else { + for (int i = NumInputDims-1; i > m_dim.actualDim(); --i) { + m_stride *= input_dims[i]; + m_inputStride *= input_dims[i]; + } + } + m_inputStride *= input_dims[m_dim.actualDim()]; + m_inputOffset = m_stride * op.offset(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + + EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + return m_impl.coeff(srcCoeff(index)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); + + if (isInnerChipping()) { + // m_stride is equal to 1, so let's avoid the integer division. + eigen_assert(m_stride == 1); + Index inputIndex = index * m_inputStride + m_inputOffset; + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + EIGEN_UNROLL_LOOP + for (int i = 0; i < PacketSize; ++i) { + values[i] = m_impl.coeff(inputIndex); + inputIndex += m_inputStride; + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } else if (isOuterChipping()) { + // m_stride is always greater than index, so let's avoid the integer division. + eigen_assert(m_stride > index); + return m_impl.template packet(index + m_inputOffset); + } else { + const Index idx = index / m_stride; + const Index rem = index - idx * m_stride; + if (rem + PacketSize <= m_stride) { + Index inputIndex = idx * m_inputStride + m_inputOffset + rem; + return m_impl.template packet(inputIndex); + } else { + // Cross the stride boundary. Fallback to slow path. + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + EIGEN_UNROLL_LOOP + for (int i = 0; i < PacketSize; ++i) { + values[i] = coeff(index); + ++index; + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + double cost = 0; + if ((static_cast(Layout) == static_cast(ColMajor) && + m_dim.actualDim() == 0) || + (static_cast(Layout) == static_cast(RowMajor) && + m_dim.actualDim() == NumInputDims - 1)) { + cost += TensorOpCost::MulCost() + TensorOpCost::AddCost(); + } else if ((static_cast(Layout) == static_cast(ColMajor) && + m_dim.actualDim() == NumInputDims - 1) || + (static_cast(Layout) == static_cast(RowMajor) && + m_dim.actualDim() == 0)) { + cost += TensorOpCost::AddCost(); + } else { + cost += 3 * TensorOpCost::MulCost() + TensorOpCost::DivCost() + + 3 * TensorOpCost::AddCost(); + } + + return m_impl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, cost, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + internal::TensorBlockResourceRequirements getResourceRequirements() const { + const size_t target_size = m_device.lastLevelCacheSize(); + return internal::TensorBlockResourceRequirements::merge( + internal::TensorBlockResourceRequirements::skewed(target_size), + m_impl.getResourceRequirements()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock + block(TensorBlockDesc& desc, TensorBlockScratch& scratch, + bool root_of_expr_ast = false) const { + const Index chip_dim = m_dim.actualDim(); + + DSizes input_block_dims; + for (int i = 0; i < NumInputDims; ++i) { + input_block_dims[i] + = i < chip_dim ? desc.dimension(i) + : i > chip_dim ? desc.dimension(i - 1) + : 1; + } + + ArgTensorBlockDesc arg_desc(srcCoeff(desc.offset()), input_block_dims); + + // Try to reuse destination buffer for materializing argument block. + if (desc.HasDestinationBuffer()) { + DSizes arg_destination_strides; + for (int i = 0; i < NumInputDims; ++i) { + arg_destination_strides[i] + = i < chip_dim ? desc.destination().strides()[i] + : i > chip_dim ? desc.destination().strides()[i - 1] + : 0; // for dimensions of size `1` stride should never be used. + } + + arg_desc.template AddDestinationBuffer( + desc.destination().template data(), + arg_destination_strides); + } + + ArgTensorBlock arg_block = m_impl.block(arg_desc, scratch, root_of_expr_ast); + if (!arg_desc.HasDestinationBuffer()) desc.DropDestinationBuffer(); + + if (arg_block.data() != NULL) { + // Forward argument block buffer if possible. + return TensorBlock(arg_block.kind(), arg_block.data(), + desc.dimensions()); + + } else { + // Assign argument block expression to a buffer. + + // Prepare storage for the materialized chipping result. + const typename TensorBlock::Storage block_storage = + TensorBlock::prepareStorage(desc, scratch); + + typedef internal::TensorBlockAssignment< + ScalarNoConst, NumInputDims, typename ArgTensorBlock::XprType, Index> + TensorBlockAssignment; + + TensorBlockAssignment::Run( + TensorBlockAssignment::target( + arg_desc.dimensions(), + internal::strides(arg_desc.dimensions()), + block_storage.data()), + arg_block.expr()); + + return block_storage.AsTensorMaterializedBlock(); + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Storage::Type data() const { + typename Storage::Type result = constCast(m_impl.data()); + if (isOuterChipping() && result) { + return result + m_inputOffset; + } else { + return NULL; + } + } +#ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_impl.bind(cgh); + } +#endif + + protected: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const + { + Index inputIndex; + if (isInnerChipping()) { + // m_stride is equal to 1, so let's avoid the integer division. + eigen_assert(m_stride == 1); + inputIndex = index * m_inputStride + m_inputOffset; + } else if (isOuterChipping()) { + // m_stride is always greater than index, so let's avoid the integer + // division. + eigen_assert(m_stride > index); + inputIndex = index + m_inputOffset; + } else { + const Index idx = index / m_stride; + inputIndex = idx * m_inputStride + m_inputOffset; + index -= idx * m_stride; + inputIndex += index; + } + return inputIndex; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool isInnerChipping() const { + return IsInnerChipping || + (static_cast(Layout) == ColMajor && m_dim.actualDim() == 0) || + (static_cast(Layout) == RowMajor && m_dim.actualDim() == NumInputDims - 1); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool isOuterChipping() const { + return IsOuterChipping || + (static_cast(Layout) == ColMajor && m_dim.actualDim() == NumInputDims-1) || + (static_cast(Layout) == RowMajor && m_dim.actualDim() == 0); + } + + Dimensions m_dimensions; + Index m_stride; + Index m_inputOffset; + Index m_inputStride; + TensorEvaluator m_impl; + const internal::DimensionId m_dim; + const Device EIGEN_DEVICE_REF m_device; +}; + + +// Eval as lvalue +template +struct TensorEvaluator, Device> + : public TensorEvaluator, Device> +{ + typedef TensorEvaluator, Device> Base; + typedef TensorChippingOp XprType; + static const int NumInputDims = internal::array_size::Dimensions>::value; + static const int NumDims = NumInputDims-1; + typedef typename XprType::Index Index; + typedef DSizes Dimensions; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = PacketType::size; + + enum { + IsAligned = false, + PacketAccess = TensorEvaluator::PacketAccess, + BlockAccess = TensorEvaluator::RawAccess, + Layout = TensorEvaluator::Layout, + RawAccess = false + }; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockDescriptor TensorBlockDesc; + //===--------------------------------------------------------------------===// + + EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : Base(op, device) + { } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) + { + return this->m_impl.coeffRef(this->srcCoeff(index)); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketReturnType& x) + { + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + + if (this->isInnerChipping()) { + // m_stride is equal to 1, so let's avoid the integer division. + eigen_assert(this->m_stride == 1); + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + internal::pstore(values, x); + Index inputIndex = index * this->m_inputStride + this->m_inputOffset; + EIGEN_UNROLL_LOOP + for (int i = 0; i < PacketSize; ++i) { + this->m_impl.coeffRef(inputIndex) = values[i]; + inputIndex += this->m_inputStride; + } + } else if (this->isOuterChipping()) { + // m_stride is always greater than index, so let's avoid the integer division. + eigen_assert(this->m_stride > index); + this->m_impl.template writePacket(index + this->m_inputOffset, x); + } else { + const Index idx = index / this->m_stride; + const Index rem = index - idx * this->m_stride; + if (rem + PacketSize <= this->m_stride) { + const Index inputIndex = idx * this->m_inputStride + this->m_inputOffset + rem; + this->m_impl.template writePacket(inputIndex, x); + } else { + // Cross stride boundary. Fallback to slow path. + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + internal::pstore(values, x); + EIGEN_UNROLL_LOOP + for (int i = 0; i < PacketSize; ++i) { + this->coeffRef(index) = values[i]; + ++index; + } + } + } + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock( + const TensorBlockDesc& desc, const TensorBlock& block) { + assert(this->m_impl.data() != NULL); + + const Index chip_dim = this->m_dim.actualDim(); + + DSizes input_block_dims; + for (int i = 0; i < NumInputDims; ++i) { + input_block_dims[i] = i < chip_dim ? desc.dimension(i) + : i > chip_dim ? desc.dimension(i - 1) + : 1; + } + + typedef TensorReshapingOp, + const typename TensorBlock::XprType> + TensorBlockExpr; + + typedef internal::TensorBlockAssignment + TensorBlockAssign; + + TensorBlockAssign::Run( + TensorBlockAssign::target( + input_block_dims, + internal::strides(this->m_impl.dimensions()), + this->m_impl.data(), this->srcCoeff(desc.offset())), + block.expr().reshape(input_block_dims)); + } +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h new file mode 100644 index 0000000..5235a8e --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h @@ -0,0 +1,377 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONCATENATION_H +#define EIGEN_CXX11_TENSOR_TENSOR_CONCATENATION_H + +namespace Eigen { + +/** \class TensorConcatenationOp + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor concatenation class. + * + * + */ +namespace internal { +template +struct traits > +{ + // Type promotion to handle the case where the types of the lhs and the rhs are different. + typedef typename promote_storage_type::ret Scalar; + typedef typename promote_storage_type::StorageKind, + typename traits::StorageKind>::ret StorageKind; + typedef typename promote_index_type::Index, + typename traits::Index>::type Index; + typedef typename LhsXprType::Nested LhsNested; + typedef typename RhsXprType::Nested RhsNested; + typedef typename remove_reference::type _LhsNested; + typedef typename remove_reference::type _RhsNested; + static const int NumDimensions = traits::NumDimensions; + static const int Layout = traits::Layout; + enum { Flags = 0 }; + typedef typename conditional::val, + typename traits::PointerType, typename traits::PointerType>::type PointerType; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorConcatenationOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorConcatenationOp type; +}; + +} // end namespace internal + + +template +class TensorConcatenationOp : public TensorBase, WriteAccessors> +{ + public: + typedef TensorBase, WriteAccessors> Base; + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Index Index; + typedef typename internal::nested::type Nested; + typedef typename internal::promote_storage_type::ret CoeffReturnType; + typedef typename NumTraits::Real RealScalar; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConcatenationOp(const LhsXprType& lhs, const RhsXprType& rhs, Axis axis) + : m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_axis(axis) {} + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + lhsExpression() const { return m_lhs_xpr; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + rhsExpression() const { return m_rhs_xpr; } + + EIGEN_DEVICE_FUNC const Axis& axis() const { return m_axis; } + + EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorConcatenationOp) + protected: + typename LhsXprType::Nested m_lhs_xpr; + typename RhsXprType::Nested m_rhs_xpr; + const Axis m_axis; +}; + + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorConcatenationOp XprType; + typedef typename XprType::Index Index; + static const int NumDims = internal::array_size::Dimensions>::value; + static const int RightNumDims = internal::array_size::Dimensions>::value; + typedef DSizes Dimensions; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; + enum { + IsAligned = false, + PacketAccess = TensorEvaluator::PacketAccess && + TensorEvaluator::PacketAccess, + BlockAccess = false, + PreferBlockAccess = TensorEvaluator::PreferBlockAccess || + TensorEvaluator::PreferBlockAccess, + Layout = TensorEvaluator::Layout, + RawAccess = false + }; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockNotImplemented TensorBlock; + //===--------------------------------------------------------------------===// + + EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_leftImpl(op.lhsExpression(), device), m_rightImpl(op.rhsExpression(), device), m_axis(op.axis()) + { + EIGEN_STATIC_ASSERT((static_cast(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout) || NumDims == 1), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((NumDims == RightNumDims), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); + + eigen_assert(0 <= m_axis && m_axis < NumDims); + const Dimensions& lhs_dims = m_leftImpl.dimensions(); + const Dimensions& rhs_dims = m_rightImpl.dimensions(); + { + int i = 0; + for (; i < m_axis; ++i) { + eigen_assert(lhs_dims[i] > 0); + eigen_assert(lhs_dims[i] == rhs_dims[i]); + m_dimensions[i] = lhs_dims[i]; + } + eigen_assert(lhs_dims[i] > 0); // Now i == m_axis. + eigen_assert(rhs_dims[i] > 0); + m_dimensions[i] = lhs_dims[i] + rhs_dims[i]; + for (++i; i < NumDims; ++i) { + eigen_assert(lhs_dims[i] > 0); + eigen_assert(lhs_dims[i] == rhs_dims[i]); + m_dimensions[i] = lhs_dims[i]; + } + } + + if (static_cast(Layout) == static_cast(ColMajor)) { + m_leftStrides[0] = 1; + m_rightStrides[0] = 1; + m_outputStrides[0] = 1; + + for (int j = 1; j < NumDims; ++j) { + m_leftStrides[j] = m_leftStrides[j-1] * lhs_dims[j-1]; + m_rightStrides[j] = m_rightStrides[j-1] * rhs_dims[j-1]; + m_outputStrides[j] = m_outputStrides[j-1] * m_dimensions[j-1]; + } + } else { + m_leftStrides[NumDims - 1] = 1; + m_rightStrides[NumDims - 1] = 1; + m_outputStrides[NumDims - 1] = 1; + + for (int j = NumDims - 2; j >= 0; --j) { + m_leftStrides[j] = m_leftStrides[j+1] * lhs_dims[j+1]; + m_rightStrides[j] = m_rightStrides[j+1] * rhs_dims[j+1]; + m_outputStrides[j] = m_outputStrides[j+1] * m_dimensions[j+1]; + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + // TODO(phli): Add short-circuit memcpy evaluation if underlying data are linear? + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) + { + m_leftImpl.evalSubExprsIfNeeded(NULL); + m_rightImpl.evalSubExprsIfNeeded(NULL); + return true; + } + + EIGEN_STRONG_INLINE void cleanup() + { + m_leftImpl.cleanup(); + m_rightImpl.cleanup(); + } + + // TODO(phli): attempt to speed this up. The integer divisions and modulo are slow. + // See CL/76180724 comments for more ideas. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + // Collect dimension-wise indices (subs). + array subs; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumDims - 1; i > 0; --i) { + subs[i] = index / m_outputStrides[i]; + index -= subs[i] * m_outputStrides[i]; + } + subs[0] = index; + } else { + for (int i = 0; i < NumDims - 1; ++i) { + subs[i] = index / m_outputStrides[i]; + index -= subs[i] * m_outputStrides[i]; + } + subs[NumDims - 1] = index; + } + + const Dimensions& left_dims = m_leftImpl.dimensions(); + if (subs[m_axis] < left_dims[m_axis]) { + Index left_index; + if (static_cast(Layout) == static_cast(ColMajor)) { + left_index = subs[0]; + EIGEN_UNROLL_LOOP + for (int i = 1; i < NumDims; ++i) { + left_index += (subs[i] % left_dims[i]) * m_leftStrides[i]; + } + } else { + left_index = subs[NumDims - 1]; + EIGEN_UNROLL_LOOP + for (int i = NumDims - 2; i >= 0; --i) { + left_index += (subs[i] % left_dims[i]) * m_leftStrides[i]; + } + } + return m_leftImpl.coeff(left_index); + } else { + subs[m_axis] -= left_dims[m_axis]; + const Dimensions& right_dims = m_rightImpl.dimensions(); + Index right_index; + if (static_cast(Layout) == static_cast(ColMajor)) { + right_index = subs[0]; + EIGEN_UNROLL_LOOP + for (int i = 1; i < NumDims; ++i) { + right_index += (subs[i] % right_dims[i]) * m_rightStrides[i]; + } + } else { + right_index = subs[NumDims - 1]; + EIGEN_UNROLL_LOOP + for (int i = NumDims - 2; i >= 0; --i) { + right_index += (subs[i] % right_dims[i]) * m_rightStrides[i]; + } + } + return m_rightImpl.coeff(right_index); + } + } + + // TODO(phli): Add a real vectorization. + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + const int packetSize = PacketType::size; + EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index + packetSize - 1 < dimensions().TotalSize()); + + EIGEN_ALIGN_MAX CoeffReturnType values[packetSize]; + EIGEN_UNROLL_LOOP + for (int i = 0; i < packetSize; ++i) { + values[i] = coeff(index+i); + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + const double compute_cost = NumDims * (2 * TensorOpCost::AddCost() + + 2 * TensorOpCost::MulCost() + + TensorOpCost::DivCost() + + TensorOpCost::ModCost()); + const double lhs_size = m_leftImpl.dimensions().TotalSize(); + const double rhs_size = m_rightImpl.dimensions().TotalSize(); + return (lhs_size / (lhs_size + rhs_size)) * + m_leftImpl.costPerCoeff(vectorized) + + (rhs_size / (lhs_size + rhs_size)) * + m_rightImpl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, compute_cost); + } + + EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; } + + #ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_leftImpl.bind(cgh); + m_rightImpl.bind(cgh); + } + #endif + + protected: + Dimensions m_dimensions; + array m_outputStrides; + array m_leftStrides; + array m_rightStrides; + TensorEvaluator m_leftImpl; + TensorEvaluator m_rightImpl; + const Axis m_axis; +}; + +// Eval as lvalue +template + struct TensorEvaluator, Device> + : public TensorEvaluator, Device> +{ + typedef TensorEvaluator, Device> Base; + typedef TensorConcatenationOp XprType; + typedef typename Base::Dimensions Dimensions; + enum { + IsAligned = false, + PacketAccess = TensorEvaluator::PacketAccess && + TensorEvaluator::PacketAccess, + BlockAccess = false, + PreferBlockAccess = TensorEvaluator::PreferBlockAccess || + TensorEvaluator::PreferBlockAccess, + Layout = TensorEvaluator::Layout, + RawAccess = false + }; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockNotImplemented TensorBlock; + //===--------------------------------------------------------------------===// + + EIGEN_STRONG_INLINE TensorEvaluator(XprType& op, const Device& device) + : Base(op, device) + { + EIGEN_STATIC_ASSERT((static_cast(Layout) == static_cast(ColMajor)), YOU_MADE_A_PROGRAMMING_MISTAKE); + } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) + { + // Collect dimension-wise indices (subs). + array subs; + for (int i = Base::NumDims - 1; i > 0; --i) { + subs[i] = index / this->m_outputStrides[i]; + index -= subs[i] * this->m_outputStrides[i]; + } + subs[0] = index; + + const Dimensions& left_dims = this->m_leftImpl.dimensions(); + if (subs[this->m_axis] < left_dims[this->m_axis]) { + Index left_index = subs[0]; + for (int i = 1; i < Base::NumDims; ++i) { + left_index += (subs[i] % left_dims[i]) * this->m_leftStrides[i]; + } + return this->m_leftImpl.coeffRef(left_index); + } else { + subs[this->m_axis] -= left_dims[this->m_axis]; + const Dimensions& right_dims = this->m_rightImpl.dimensions(); + Index right_index = subs[0]; + for (int i = 1; i < Base::NumDims; ++i) { + right_index += (subs[i] % right_dims[i]) * this->m_rightStrides[i]; + } + return this->m_rightImpl.coeffRef(right_index); + } + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketReturnType& x) + { + const int packetSize = PacketType::size; + EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index + packetSize - 1 < this->dimensions().TotalSize()); + + EIGEN_ALIGN_MAX CoeffReturnType values[packetSize]; + internal::pstore(values, x); + for (int i = 0; i < packetSize; ++i) { + coeffRef(index+i) = values[i]; + } + } +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_CONCATENATION_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h new file mode 100644 index 0000000..8b35f79 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h @@ -0,0 +1,1023 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_H +#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_H + +namespace Eigen { + +/** \class TensorContraction + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor contraction class. + * + * + */ +namespace internal { + +template +struct traits > +{ + // Type promotion to handle the case where the types of the lhs and the rhs are different. + typedef typename gebp_traits::type, + typename remove_const::type>::ResScalar Scalar; + + typedef typename promote_storage_type::StorageKind, + typename traits::StorageKind>::ret StorageKind; + typedef typename promote_index_type::Index, + typename traits::Index>::type Index; + typedef typename LhsXprType::Nested LhsNested; + typedef typename RhsXprType::Nested RhsNested; + typedef typename remove_reference::type _LhsNested; + typedef typename remove_reference::type _RhsNested; + + // From NumDims below. + static const int NumDimensions = traits::NumDimensions + traits::NumDimensions - 2 * array_size::value; + static const int Layout = traits::Layout; + typedef typename conditional::val, + typename traits::PointerType, + typename traits::PointerType>::type + PointerType; + + enum { + Flags = 0 + }; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorContractionOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorContractionOp type; +}; + +template +struct traits, Device_> > { + typedef Indices_ Indices; + typedef LeftArgType_ LeftArgType; + typedef RightArgType_ RightArgType; + typedef OutputKernelType_ OutputKernelType; + typedef Device_ Device; + + // From NumDims below. + static const int NumDimensions = traits::NumDimensions + traits::NumDimensions - 2 * array_size::value; +}; + +// Helper class to allocate and deallocate temporary memory for packed buffers. +template +struct TensorContractionBlockMemAllocator { + typedef void* BlockMemHandle; + + template + EIGEN_DEVICE_FUNC static BlockMemHandle allocate(Device& d, const Index bm, + const Index bk, + const Index bn, + LhsScalar** lhs_block, + RhsScalar** rhs_block) { + eigen_assert(lhs_block); + eigen_assert(rhs_block); + BlockSizes sz = ComputeLhsRhsBlockSizes(bm, bk, bn); + char* block_mem = static_cast(d.allocate(sz.lhs_size + sz.rhs_size)); + eigen_assert(block_mem); + *lhs_block = reinterpret_cast(block_mem); + *rhs_block = reinterpret_cast(block_mem + sz.lhs_size); + return block_mem; + } + + template + EIGEN_DEVICE_FUNC static BlockMemHandle allocateSlices( + Device& d, const Index bm, const Index bk, const Index bn, + const Index num_lhs, const Index num_rhs, const Index num_slices, + std::vector* lhs_blocks, + std::vector* rhs_blocks) { + eigen_assert(num_slices > 0); + eigen_assert(num_lhs >= 0 && num_rhs >= 0); + eigen_assert(num_lhs == 0 || lhs_blocks); + eigen_assert(num_rhs == 0 || rhs_blocks); + BlockSizes sz = ComputeLhsRhsBlockSizes(bm, bk, bn); + void* block_mem = d.allocate( + (num_lhs * sz.lhs_size + num_rhs * sz.rhs_size) * num_slices); + eigen_assert(block_mem); + char* mem = static_cast(block_mem); + + for (Index x = 0; x < num_slices; x++) { + if (num_lhs > 0) lhs_blocks[x].resize(num_lhs); + for (Index m = 0; m < num_lhs; m++) { + lhs_blocks[x][m] = reinterpret_cast(mem); + mem += sz.lhs_size; + } + if (num_rhs > 0) rhs_blocks[x].resize(num_rhs); + for (Index n = 0; n < num_rhs; n++) { + rhs_blocks[x][n] = reinterpret_cast(mem); + mem += sz.rhs_size; + } + } + + return block_mem; + } + + template + EIGEN_DEVICE_FUNC static void deallocate(Device& d, BlockMemHandle handle) { + d.deallocate(handle); + } + + private: + struct BlockSizes { + Index lhs_size; + Index rhs_size; + }; + EIGEN_DEVICE_FUNC static BlockSizes ComputeLhsRhsBlockSizes(const Index bm, + const Index bk, + const Index bn) { + Index align = numext::maxi(EIGEN_MAX_ALIGN_BYTES, 1); + BlockSizes sz; + sz.lhs_size = divup(bm * bk * sizeof(LhsScalar), align) * align; + sz.rhs_size = divup(bn * bk * sizeof(RhsScalar), align) * align; + return sz; + } +}; + +// WARNING: In this code we assume that Lhs and Rhs tensor expressions are in +// ColMajor storage order. This property is guaranteed by the +// TensorContractionOp evaluator. TensorContractionKernel specifies how we pack +// blocks of Lhs and Rhs tensor expressions, and how we invoke matrix +// multiplication for these blocks. Default tensor contraction uses +// gemm_pack_rhs, gemm_pack_lhs and gebp_kernel from Eigen Core (see +// GeneralBlocPanelKernel.h for details). +// +// By specializing contraction kernels we can use other low level libraries to +// perform matrix multiplication, and still rely on Eigen contraction evaluator. +// This also includes full support in TensorContractionThreadPool, assuming that +// underlying gemm do not use it's own threading. +// +// - ResScalar/LhsScalar/RhsScalar - scalar type for the result of +// multiplication, lhs tensor and rhs tensor respectively. +// +// - StorageIndex - index type for the tensor expressions. In practice almost +// always is Eigen::Index. +// +// - OutputMapper provides access to the memory of the output matrix. In +// practice it's always column major blas_data_mapper (it must be of ResScalar +// type). +// +// - LhsMapper/RhsMapper similarly to blas_data_mapper provide a two dimensional +// view into the Lhs/Rhs tensor expressions. In practice it's +// TensorContractionInputMapper, or some specialization of it based on the +// type of tensor expression (e.g. TensorImagePatchOp has optimized input +// mapper). +template +struct TensorContractionKernel { + // True if `invoke()` supports `beta` in `C <- alpha * A * B + beta * C` + // (otherwise beta should be always equal to 1). + enum { HasBeta = false }; + + EIGEN_DEVICE_FUNC + TensorContractionKernel(StorageIndex m_, StorageIndex k_, StorageIndex n_, + StorageIndex bm_, StorageIndex bk_, StorageIndex bn_) + : m(m_), k(k_), n(n_), bm(bm_), bk(bk_), bn(bn_) {} + + // Pack blocks of Lhs and Rhs into contiguous blocks in memory. + typedef LhsScalar* LhsBlock; + typedef RhsScalar* RhsBlock; + + // Packed Lhs/Rhs block memory allocator. + typedef TensorContractionBlockMemAllocator + BlockMemAllocator; + typedef typename BlockMemAllocator::BlockMemHandle BlockMemHandle; + + typedef typename internal::gebp_traits Traits; + + typedef internal::gemm_pack_lhs< + LhsScalar, StorageIndex, typename LhsMapper::SubMapper, Traits::mr, + Traits::LhsProgress, typename Traits::LhsPacket4Packing, ColMajor> + LhsPacker; + + typedef internal::gemm_pack_rhs + RhsPacker; + + typedef internal::gebp_kernel + GebpKernel; + + template + EIGEN_DEVICE_FUNC BlockMemHandle allocate(Device& d, LhsBlock* lhs_block, + RhsBlock* rhs_block) { + return BlockMemAllocator::allocate(d, bm, bk, bn, lhs_block, rhs_block); + } + + template + EIGEN_DEVICE_FUNC BlockMemHandle allocateSlices( + Device& d, const StorageIndex num_lhs, const StorageIndex num_rhs, + const StorageIndex num_slices, std::vector* lhs_blocks, + std::vector* rhs_blocks) { + return BlockMemAllocator::allocateSlices( + d, bm, bk, bn, num_lhs, num_rhs, num_slices, lhs_blocks, rhs_blocks); + } + + template + EIGEN_DEVICE_FUNC static void deallocate(Device& d, BlockMemHandle handle) { + BlockMemAllocator::deallocate(d, handle); + } + + EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void packLhs( + LhsBlock* lhsBlock, const typename LhsMapper::SubMapper& data_mapper, + const StorageIndex depth, const StorageIndex rows) { + LhsPacker()(*lhsBlock, data_mapper, depth, rows, /*stride*/ 0, + /*offset*/ 0); + } + + EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void packRhs( + RhsBlock* rhsBlock, const typename RhsMapper::SubMapper& data_mapper, + const StorageIndex depth, const StorageIndex cols) { + RhsPacker()(*rhsBlock, data_mapper, depth, cols); + } + + EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void invoke( + const OutputMapper& output_mapper, const LhsBlock& lhsBlock, + const RhsBlock& rhsBlock, const StorageIndex rows, + const StorageIndex depth, const StorageIndex cols, + const ResScalar alpha, const ResScalar beta) { + // Default GEBP kernel does not support beta. + eigen_assert(beta == ResScalar(1)); + static const int kComputeStrideFromBlockDimensions = -1; + GebpKernel()(output_mapper, lhsBlock, rhsBlock, rows, depth, cols, alpha, + /*strideA*/ kComputeStrideFromBlockDimensions, + /*strideB*/ kComputeStrideFromBlockDimensions, + /*offsetA*/ 0, /*offsetB*/ 0); + } + + private: + // These are dimensions of the original Tensors, and selected block sizes. The + // actual block sizes passed to all function above might be smaller because of + // the partial blocks at the end. + const StorageIndex m; + const StorageIndex k; + const StorageIndex n; + const StorageIndex bm; + const StorageIndex bk; + const StorageIndex bn; +}; + +} // end namespace internal + +// Tensor contraction params that should enable to get from output matrix +// 2-dimensional coordinates to the output tensor dimensions. +struct TensorContractionParams { + // TensorContraction evaluator assumes that both tensors are in ColMajor + // layout, if tensors are in RowMajor evaluator swap lhs with rhs. + bool swapped_arguments; +}; + +// Output kernel allows to fuse operations into the tensor contraction. +// +// Examples: +// 1. Elementwise Relu transformation following Conv2D. +// 2. AddBias to the Conv2D output channels dimension. +// +// The NoOpOutputKernel implements an output kernel that does absolutely nothing. +struct NoOpOutputKernel { + /** + * Tensor contraction evaluator calls this kernel after finishing each block + * of output matrix. Output blocks belong to the 2-dimensional output tensor. + * + * TensorContractionParams contains contraction dimensions information + * required to map output 2-d space into the expected output tensor space + * (potentially higher dimensional). + * + * \param[in] output_mapper Access to output tensor memory + * \param[in] params Tensor contraction parameters + * \param[in] i Index of a first row available through output_mapper + * \param[in] j Index of a first column available through output_mapper + * \param[in] num_rows Number of available rows + * \param[in] num_cols Number of available columns + */ + template + EIGEN_ALWAYS_INLINE void operator()( + const internal::blas_data_mapper& output_mapper, + const TensorContractionParams& params, Index i, + Index j, Index num_rows, Index num_cols) const { + EIGEN_UNUSED_VARIABLE(output_mapper); + EIGEN_UNUSED_VARIABLE(params); + EIGEN_UNUSED_VARIABLE(i); + EIGEN_UNUSED_VARIABLE(j); + EIGEN_UNUSED_VARIABLE(num_rows); + EIGEN_UNUSED_VARIABLE(num_cols); + } +}; + +template +class TensorContractionOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename internal::gebp_traits::ResScalar CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorContractionOp( + const LhsXprType& lhs, const RhsXprType& rhs, const Indices& dims, + const OutputKernelType& output_kernel = OutputKernelType()) + : m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_indices(dims), + m_output_kernel(output_kernel) {} + + EIGEN_DEVICE_FUNC + const Indices& indices() const { return m_indices; } + + /** \returns the nested expressions */ + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + lhsExpression() const { return m_lhs_xpr; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + rhsExpression() const { return m_rhs_xpr; } + + EIGEN_DEVICE_FUNC + const OutputKernelType& outputKernel() const { return m_output_kernel; } + + protected: + typename LhsXprType::Nested m_lhs_xpr; + typename RhsXprType::Nested m_rhs_xpr; + const Indices m_indices; + const OutputKernelType m_output_kernel; +}; + + +template +struct TensorContractionEvaluatorBase : internal::no_assignment_operator +{ + typedef typename internal::traits::Indices Indices; + typedef typename internal::traits::LeftArgType LeftArgType; + typedef typename internal::traits::RightArgType RightArgType; + typedef typename internal::traits::OutputKernelType OutputKernelType; + typedef typename internal::traits::Device Device; + + typedef TensorContractionOp XprType; + typedef typename internal::remove_const::type Scalar; + typedef typename XprType::Index Index; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; + + enum { + IsAligned = true, + PacketAccess = (PacketType::size > 1), + BlockAccess = false, + PreferBlockAccess = false, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = true + }; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockNotImplemented TensorBlock; + //===--------------------------------------------------------------------===// + + // Most of the code is assuming that both input tensors are ColMajor. If the + // inputs are RowMajor, we will "cheat" by swapping the LHS and RHS: + // If we want to compute A * B = C, where A is LHS and B is RHS, the code + // will pretend B is LHS and A is RHS. + typedef typename internal::conditional< + static_cast(Layout) == static_cast(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType; + typedef typename internal::conditional< + static_cast(Layout) == static_cast(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType; + + typedef TensorEvaluator LeftEvaluatorType; + typedef TensorEvaluator RightEvaluatorType; + + static const int LDims = + internal::array_size::Dimensions>::value; + static const int RDims = + internal::array_size::Dimensions>::value; + static const int ContractDims = internal::array_size::value; + static const int NumDims = LDims + RDims - 2 * ContractDims; + + typedef array contract_t; + typedef array left_nocontract_t; + typedef array right_nocontract_t; + + typedef DSizes Dimensions; + + EIGEN_STRONG_INLINE + TensorContractionEvaluatorBase(const XprType& op, const Device& device) + : m_leftImpl(choose(Cond(Layout) == static_cast(ColMajor)>(), + op.lhsExpression(), op.rhsExpression()), device), + m_rightImpl(choose(Cond(Layout) == static_cast(ColMajor)>(), + op.rhsExpression(), op.lhsExpression()), device), + m_device(device), + m_output_kernel(op.outputKernel()), + m_result(NULL) { + EIGEN_STATIC_ASSERT((static_cast(TensorEvaluator::Layout) == + static_cast(TensorEvaluator::Layout)), + YOU_MADE_A_PROGRAMMING_MISTAKE); + + + DSizes eval_left_dims; + DSizes eval_right_dims; + array, ContractDims> eval_op_indices; + if (static_cast(Layout) == static_cast(ColMajor)) { + // For ColMajor, we keep using the existing dimensions + for (int i = 0; i < LDims; i++) { + eval_left_dims[i] = m_leftImpl.dimensions()[i]; + } + for (int i = 0; i < RDims; i++) { + eval_right_dims[i] = m_rightImpl.dimensions()[i]; + } + // We keep the pairs of contracting indices. + for (int i = 0; i < ContractDims; i++) { + eval_op_indices[i].first = op.indices()[i].first; + eval_op_indices[i].second = op.indices()[i].second; + } + } else { + // For RowMajor, we need to reverse the existing dimensions + for (int i = 0; i < LDims; i++) { + eval_left_dims[i] = m_leftImpl.dimensions()[LDims - i - 1]; + } + for (int i = 0; i < RDims; i++) { + eval_right_dims[i] = m_rightImpl.dimensions()[RDims - i - 1]; + } + // We need to flip all the pairs of contracting indices as well as + // reversing the dimensions. + for (int i = 0; i < ContractDims; i++) { + eval_op_indices[i].first = LDims - 1 - op.indices()[ContractDims - 1 - i].second; + eval_op_indices[i].second = RDims - 1 - op.indices()[ContractDims - 1 - i].first; + } + } + + // Check for duplicate axes and make sure the first index in eval_op_indices + // is increasing. Using O(n^2) sorting is OK since ContractDims is small + for (int i = 0; i < ContractDims; i++) { + for (int j = i + 1; j < ContractDims; j++) { + eigen_assert(eval_op_indices[j].first != eval_op_indices[i].first && + eval_op_indices[j].second != eval_op_indices[i].second && + "contraction axes should be unique"); + if (eval_op_indices[j].first < eval_op_indices[i].first) { + numext::swap(eval_op_indices[j], eval_op_indices[i]); + } + } + } + + array lhs_strides; + lhs_strides[0] = 1; + for (int i = 0; i < LDims-1; ++i) { + lhs_strides[i+1] = lhs_strides[i] * eval_left_dims[i]; + } + + array rhs_strides; + rhs_strides[0] = 1; + for (int i = 0; i < RDims-1; ++i) { + rhs_strides[i+1] = rhs_strides[i] * eval_right_dims[i]; + } + + if (m_i_strides.size() > 0) m_i_strides[0] = 1; + if (m_j_strides.size() > 0) m_j_strides[0] = 1; + if (m_k_strides.size() > 0) m_k_strides[0] = 1; + + m_i_size = 1; + m_j_size = 1; + m_k_size = 1; + + // To compute the dimension, we simply concatenate the non-contracting + // dimensions of the left and then the right tensor. Additionally, we also + // compute the strides corresponding to the left non-contracting + // dimensions and right non-contracting dimensions. + m_lhs_inner_dim_contiguous = true; + int dim_idx = 0; + Index nocontract_idx = 0; + + for (int i = 0; i < LDims; i++) { + // find if we are contracting on index i of left tensor + bool contracting = false; + for (int j = 0; j < ContractDims; j++) { + if (eval_op_indices[j].first == i) { + contracting = true; + break; + } + } + if (!contracting) { + // add dimension size to output dimensions + m_dimensions[dim_idx] = eval_left_dims[i]; + m_left_nocontract_strides[nocontract_idx] = lhs_strides[i]; + if (dim_idx != i) { + m_lhs_inner_dim_contiguous = false; + } + if (nocontract_idx+1 < internal::array_size::value) { + m_i_strides[nocontract_idx+1] = + m_i_strides[nocontract_idx] * eval_left_dims[i]; + } else { + m_i_size = m_i_strides[nocontract_idx] * eval_left_dims[i]; + } + dim_idx++; + nocontract_idx++; + } + } + + nocontract_idx = 0; + for (int i = 0; i < RDims; i++) { + bool contracting = false; + // find if we are contracting on index i of right tensor + for (int j = 0; j < ContractDims; j++) { + if (eval_op_indices[j].second == i) { + contracting = true; + break; + } + } + if (!contracting) { + m_dimensions[dim_idx] = eval_right_dims[i]; + if (nocontract_idx+1 < internal::array_size::value) { + m_j_strides[nocontract_idx+1] = + m_j_strides[nocontract_idx] * eval_right_dims[i]; + } else { + m_j_size = m_j_strides[nocontract_idx] * eval_right_dims[i]; + } + m_right_nocontract_strides[nocontract_idx] = rhs_strides[i]; + dim_idx++; + nocontract_idx++; + } + } + + // Now compute the strides corresponding to the contracting dimensions. We + // assumed above that non-contracting axes are represented in the same order + // in the matrix as they are in the tensor. This is not the case for + // contracting axes. As the contracting axes must be of the same size in + // each tensor, we'll only look at the first tensor here. + m_rhs_inner_dim_contiguous = true; + m_rhs_inner_dim_reordered = false; + for (int i = 0; i < ContractDims; i++) { + Index left = eval_op_indices[i].first; + Index right = eval_op_indices[i].second; + + Index size = eval_left_dims[left]; + eigen_assert(size == eval_right_dims[right] && + "Contraction axes must be same size"); + + if (i+1 < static_cast(internal::array_size::value)) { + m_k_strides[i+1] = m_k_strides[i] * size; + } else { + m_k_size = m_k_strides[i] * size; + } + m_left_contracting_strides[i] = lhs_strides[left]; + m_right_contracting_strides[i] = rhs_strides[right]; + + if (i > 0 && right < eval_op_indices[i-1].second) { + m_rhs_inner_dim_reordered = true; + } + if (right != i) { + m_rhs_inner_dim_contiguous = false; + } + } + + // If the layout is RowMajor, we need to reverse the m_dimensions + if (static_cast(Layout) == static_cast(RowMajor)) { + for (int i = 0, j = NumDims - 1; i < j; i++, j--) { + numext::swap(m_dimensions[i], m_dimensions[j]); + } + } + + // A set of parameters that will allow output kernel to get from output + // tensor dimensions (i, j) into the original tensor dimensions. + // TODO(ezhulenev): Add parameters required to infer output tensor index for + // more complex contractions than 2x2 on internal dimension. + m_tensor_contraction_params.swapped_arguments = static_cast(Layout) == RowMajor; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) { + m_leftImpl.evalSubExprsIfNeeded(NULL); + m_rightImpl.evalSubExprsIfNeeded(NULL); + if (data) { + evalTo(data); + return false; + } else { + m_result = static_cast(m_device.allocate(dimensions().TotalSize() * sizeof(Scalar))); + evalTo(m_result); + return true; + } + } + +#ifdef EIGEN_USE_THREADS + template + EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync( + EvaluatorPointerType dest, EvalSubExprsCallback done) { + m_leftImpl.evalSubExprsIfNeededAsync(nullptr, [this, done, dest](bool) { + m_rightImpl.evalSubExprsIfNeededAsync(nullptr, [this, done, dest](bool) { + if (dest) { + evalToAsync(dest, [done]() { done(false); }); + } else { + m_result = static_cast( + m_device.allocate(dimensions().TotalSize() * sizeof(Scalar))); + evalToAsync(m_result, [done]() { done(true); }); + } + }); + }); + } +#endif // EIGEN_USE_THREADS + +#ifndef TENSOR_CONTRACTION_DISPATCH +#define TENSOR_CONTRACTION_DISPATCH(METHOD, ALIGNMENT, ARGS) \ + if (this->m_lhs_inner_dim_contiguous) { \ + if (this->m_rhs_inner_dim_contiguous) { \ + if (this->m_rhs_inner_dim_reordered) { \ + METHOD ARGS; \ + } else { \ + METHOD ARGS; \ + } \ + } else { \ + if (this->m_rhs_inner_dim_reordered) { \ + METHOD ARGS; \ + } else { \ + METHOD ARGS; \ + } \ + } \ + } else { \ + if (this->m_rhs_inner_dim_contiguous) { \ + if (this->m_rhs_inner_dim_reordered) { \ + METHOD ARGS; \ + } else { \ + METHOD ARGS; \ + } \ + } else { \ + if (this->m_rhs_inner_dim_reordered) { \ + METHOD ARGS; \ + } else { \ + METHOD ARGS; \ + } \ + } \ + } +#endif + +#ifndef TENSOR_CONTRACTION_ASYNC_DISPATCH +#define TENSOR_CONTRACTION_ASYNC_DISPATCH(METHOD, DONE, ALIGNMENT, ARGS, FN) \ + if (this->m_lhs_inner_dim_contiguous) { \ + if (this->m_rhs_inner_dim_contiguous) { \ + if (this->m_rhs_inner_dim_reordered) { \ + (new METHOD ARGS)->FN; \ + } else { \ + (new METHOD ARGS)->FN; \ + } \ + } else { \ + if (this->m_rhs_inner_dim_reordered) { \ + (new METHOD ARGS)->FN; \ + } else { \ + (new METHOD ARGS)->FN; \ + } \ + } \ + } else { \ + if (this->m_rhs_inner_dim_contiguous) { \ + if (this->m_rhs_inner_dim_reordered) { \ + (new METHOD ARGS)->FN; \ + } else { \ + (new METHOD ARGS)->FN; \ + } \ + } else { \ + if (this->m_rhs_inner_dim_reordered) { \ + (new METHOD ARGS)->FN; \ + } else { \ + (new METHOD ARGS)->FN; \ + } \ + } \ + } +#endif + + EIGEN_DEVICE_FUNC void evalTo(Scalar* buffer) const { + static_cast(this)->template evalProduct(buffer); + } + +#ifdef EIGEN_USE_THREADS + template + void evalToAsync(Scalar* buffer, EvalToCallback done) const { + static_cast(this) + ->template evalProductAsync(buffer, + std::move(done)); + } +#endif // EIGEN_USE_THREADS + + template + void evalProductSequential(Scalar* buffer) const { + if (this->m_j_size == 1) { + this->template evalGemv(buffer); + } else { + this->template evalGemm(buffer); + } + } + + template + #if !defined(EIGEN_HIPCC) + EIGEN_DEVICE_FUNC + #endif + void evalGemv(Scalar* buffer) const { + const Index rows = m_i_size; + const Index cols = m_k_size; + + typedef typename internal::remove_const::type LhsScalar; + typedef typename internal::remove_const::type RhsScalar; + typedef TensorEvaluator LeftEvaluator; + typedef TensorEvaluator RightEvaluator; + const Index lhs_packet_size = internal::unpacket_traits::size; + const Index rhs_packet_size = internal::unpacket_traits::size; + const int lhs_alignment = LeftEvaluator::IsAligned ? Aligned : Unaligned; + const int rhs_alignment = RightEvaluator::IsAligned ? Aligned : Unaligned; + typedef internal::TensorContractionInputMapper LhsMapper; + + typedef internal::TensorContractionInputMapper RhsMapper; + + LhsMapper lhs(m_leftImpl, m_left_nocontract_strides, m_i_strides, + m_left_contracting_strides, m_k_strides); + RhsMapper rhs(m_rightImpl, m_right_nocontract_strides, m_j_strides, + m_right_contracting_strides, m_k_strides); + + const Scalar alpha(1); + const Index resIncr(1); + + // zero out the result buffer (which must be of size at least rows * sizeof(Scalar) + m_device.memset(buffer, 0, rows * sizeof(Scalar)); + + internal::general_matrix_vector_product::run( + rows, cols, lhs, rhs, + buffer, resIncr, alpha); + + typedef internal::blas_data_mapper OutputMapper; + m_output_kernel(OutputMapper(buffer, rows), m_tensor_contraction_params, + static_cast(0), static_cast(0), rows, + static_cast(1)); + } + + template + #if !defined(EIGEN_HIPCC) + EIGEN_DEVICE_FUNC + #endif + void evalGemm(Scalar* buffer) const { + // columns in left side, rows in right side + const Index k = this->m_k_size; + this->template evalGemmPartial(buffer, 0, k, 1); + } + + template + EIGEN_DEVICE_FUNC void evalGemmPartialWithoutOutputKernel( + Scalar* buffer, Index k_start, Index k_end, int num_threads) const { + evalGemmPartial(buffer, k_start, k_end, + num_threads); + } + + template + EIGEN_DEVICE_FUNC void evalGemmPartial(Scalar* buffer, Index k_start, Index k_end, int num_threads) const { + eigen_assert(k_end >= k_start && k_start >= 0 && k_end <= this->m_k_size); + // columns in slice on left side, rows on right side + const Index k_slice = k_end - k_start; + + // rows in left side + const Index m = this->m_i_size; + + // columns in right side + const Index n = this->m_j_size; + + // define data mappers for Lhs and Rhs + typedef typename internal::remove_const::type LhsScalar; + typedef typename internal::remove_const::type RhsScalar; + + typedef TensorEvaluator LeftEvaluator; + typedef TensorEvaluator RightEvaluator; + + const Index lhs_packet_size = internal::unpacket_traits::size; + const Index rhs_packet_size = internal::unpacket_traits::size; + + typedef internal::TensorContractionInputMapper LhsMapper; + + typedef internal::TensorContractionInputMapper RhsMapper; + + typedef internal::blas_data_mapper OutputMapper; + + typedef internal::TensorContractionKernel< + Scalar, LhsScalar, RhsScalar, Index, OutputMapper, LhsMapper, RhsMapper> + TensorContractionKernel; + + // initialize data mappers + LhsMapper lhs(this->m_leftImpl, this->m_left_nocontract_strides, this->m_i_strides, + this->m_left_contracting_strides, this->m_k_strides); + + RhsMapper rhs(this->m_rightImpl, this->m_right_nocontract_strides, this->m_j_strides, + this->m_right_contracting_strides, this->m_k_strides); + + OutputMapper output(buffer, m); + + // Sizes of the blocks to load in cache. See the Goto paper for details. + internal::TensorContractionBlocking + blocking(k_slice, m, n, num_threads); + const Index kc = blocking.kc(); + const Index mc = numext::mini(m, blocking.mc()); + const Index nc = numext::mini(n, blocking.nc()); + + typedef typename TensorContractionKernel::LhsBlock LhsBlock; + typedef typename TensorContractionKernel::RhsBlock RhsBlock; + + LhsBlock blockA; + RhsBlock blockB; + + TensorContractionKernel kernel(m, k_slice, n, mc, kc, nc); + + typedef typename TensorContractionKernel::BlockMemHandle BlockMemHandle; + const BlockMemHandle packed_mem = + kernel.allocate(this->m_device, &blockA, &blockB); + + // If a contraction kernel does not support beta, explicitly initialize + // output buffer with zeroes. + if (!TensorContractionKernel::HasBeta) { + this->m_device.memset(buffer, 0, m * n * sizeof(Scalar)); + } + + for(Index i2=0; i2= k_end) { + m_output_kernel(output_mapper, m_tensor_contraction_params, i2, j2, + actual_mc, actual_nc); + } + } + } + } + + kernel.deallocate(this->m_device, packed_mem); + } + + EIGEN_STRONG_INLINE void cleanup() { + m_leftImpl.cleanup(); + m_rightImpl.cleanup(); + + if (m_result != NULL) { + m_device.deallocate(m_result); + m_result = NULL; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + return m_result[index]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool) const { + return TensorOpCost(sizeof(CoeffReturnType), 0, 0); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { + return internal::ploadt(m_result + index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const { return m_result; } + +protected: + Dimensions m_dimensions; + + contract_t m_k_strides; + contract_t m_left_contracting_strides; + contract_t m_right_contracting_strides; + + bool m_lhs_inner_dim_contiguous; + bool m_rhs_inner_dim_contiguous; + bool m_rhs_inner_dim_reordered; + + left_nocontract_t m_i_strides; + right_nocontract_t m_j_strides; + left_nocontract_t m_left_nocontract_strides; + right_nocontract_t m_right_nocontract_strides; + + Index m_i_size; + Index m_j_size; + Index m_k_size; + + TensorContractionParams m_tensor_contraction_params; + + TensorEvaluator m_leftImpl; + TensorEvaluator m_rightImpl; + const Device EIGEN_DEVICE_REF m_device; + OutputKernelType m_output_kernel; + EvaluatorPointerType m_result; +}; + + +// evaluator for default device +template +struct TensorEvaluator, Device> : + public TensorContractionEvaluatorBase< + TensorEvaluator, Device> > { + typedef TensorEvaluator, Device> Self; + typedef TensorContractionEvaluatorBase Base; + + typedef TensorContractionOp XprType; + typedef typename internal::remove_const::type Scalar; + typedef typename XprType::Index Index; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + + enum { + Layout = TensorEvaluator::Layout + }; + + // Most of the code is assuming that both input tensors are ColMajor. If the + // inputs are RowMajor, we will "cheat" by swapping the LHS and RHS: + // If we want to compute A * B = C, where A is LHS and B is RHS, the code + // will pretend B is LHS and A is RHS. + typedef typename internal::conditional< + static_cast(Layout) == static_cast(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType; + typedef typename internal::conditional< + static_cast(Layout) == static_cast(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType; + + static const int LDims = + internal::array_size::Dimensions>::value; + static const int RDims = + internal::array_size::Dimensions>::value; + static const int ContractDims = internal::array_size::value; + + typedef array contract_t; + typedef array left_nocontract_t; + typedef array right_nocontract_t; + + static const int NumDims = LDims + RDims - 2 * ContractDims; + + // Could we use NumDimensions here? + typedef DSizes Dimensions; + + TensorEvaluator(const XprType& op, const Device& device) : + Base(op, device) { } + + template + void evalProduct(Scalar* buffer) const { + TENSOR_CONTRACTION_DISPATCH(this->template evalProductSequential, Alignment, (buffer)); + } +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h new file mode 100644 index 0000000..974feb0 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h @@ -0,0 +1,73 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_BLOCKING_H +#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_BLOCKING_H + + +namespace Eigen { +namespace internal { + +enum { + ShardByRow = 0, + ShardByCol = 1 +}; + + +// Default Blocking Strategy +template +class TensorContractionBlocking { + public: + + /* + adding EIGEN_DEVICE_FUNC unconditionally to 'TensorContractionBlocking' constructor in `TensorContractionBlocking.h` + requires adding EIGEN_DEVICE_FUNC to `computeProductBlockingSizes` in `GeneralBlockPanelKernel.h` + which in turn, requires adding EIGEN_DEVICE_FUNC to `evaluateProductBlockingSizesHeuristic` in `GeneralBlockPanelKernel.h` + which in turn, requires adding EIGEN_DEVICE_FUNC to `manage_caching_sizes` in `GeneralBlockPanelKernel.h` + (else HIPCC will error out) + + However adding EIGEN_DEVICE_FUNC to `manage_caching_sizes` in `GeneralBlockPanelKernel.h` + results in NVCC erroring out with the following error + + ../Eigen/src/Core/products/GeneralBlockPanelKernel.h(57): error #2901: + dynamic initialization is not supported for function-scope static variables within a __device__/__global__ function + */ + + #if !defined(EIGEN_HIPCC) + EIGEN_DEVICE_FUNC + #endif + TensorContractionBlocking(StorageIndex k, StorageIndex m, StorageIndex n, StorageIndex num_threads = 1) : + kc_(k), mc_(m), nc_(n) + { + if (ShardingType == ShardByCol) { + computeProductBlockingSizes(kc_, mc_, nc_, num_threads); + } + else { + computeProductBlockingSizes(kc_, nc_, mc_, num_threads); + } + + const int rhs_packet_size = internal::packet_traits::size; + kc_ = (rhs_packet_size <= 8 || kc_ <= rhs_packet_size) ? + kc_ : (kc_ / rhs_packet_size) * rhs_packet_size; + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE StorageIndex kc() const { return kc_; } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE StorageIndex mc() const { return mc_; } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE StorageIndex nc() const { return nc_; } + + private: + StorageIndex kc_; + StorageIndex mc_; + StorageIndex nc_; +}; + +} // end namespace internal +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_BLOCKING_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h new file mode 100644 index 0000000..3f315fe --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h @@ -0,0 +1,6 @@ + +#if defined(__clang__) || defined(__GNUC__) +#warning "Deprecated header file, please either include the main Eigen/CXX11/Tensor header or the respective TensorContractionGpu.h file" +#endif + +#include "TensorContractionGpu.h" diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h new file mode 100644 index 0000000..c818038 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h @@ -0,0 +1,1413 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014-2015 Benoit Steiner +// Copyright (C) 2015 Navdeep Jaitly +// Copyright (C) 2014 Eric Martin +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_GPU_H +#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_GPU_H + +#if defined(EIGEN_USE_GPU) && defined(EIGEN_GPUCC) + +namespace Eigen { + +template +__device__ EIGEN_STRONG_INLINE void +EigenContractionKernelInternal(const LhsMapper lhs, const RhsMapper rhs, + const OutputMapper output, Scalar* lhs_shmem, Scalar* rhs_shmem, + const Index m_size, const Index n_size, const Index k_size) { + + const Index m_block_idx = blockIdx.x; + const Index n_block_idx = blockIdx.y; + + const Index base_m = 64 * m_block_idx; + const Index base_n = 64 * n_block_idx; + + // declare and initialize 64 registers for output 8x8 block + + // prefetch registers + Scalar lhs_pf0; + Scalar lhs_pf1; + Scalar lhs_pf2; + Scalar lhs_pf3; + Scalar lhs_pf4; + Scalar lhs_pf5; + Scalar lhs_pf6; + Scalar lhs_pf7; + + Scalar rhs_pf0; + Scalar rhs_pf1; + Scalar rhs_pf2; + Scalar rhs_pf3; + Scalar rhs_pf4; + Scalar rhs_pf5; + Scalar rhs_pf6; + Scalar rhs_pf7; + + // shared memory is formatted + // (contract idx in block, nocontract idx in block, block idx) + // where block idx is column major. This transposition limits the number of + // bank conflicts when reading the LHS. The core idea is that since the contracting + // index is shared by both sides, then the contracting index should be in threadIdx.x. + + // On the LHS, we pad each row inside of each block with an extra element. This makes + // each block 8 rows of 9 elements, which is 72 elements. This gives no bank conflicts + // on writes and very few 2-way conflicts on reads. There is an 8x8 grid of these blocks. + + // On the RHS we just add 8 padding elements to the end of each block. This gives no bank + // conflicts on writes and also none on reads. + + // storage indices + const Index lhs_store_idx_base = threadIdx.y * 72 + threadIdx.x * 9 + threadIdx.z; + const Index rhs_store_idx_base = threadIdx.y * 72 + threadIdx.z * 8 + threadIdx.x; + + const Index lhs_store_idx_0 = lhs_store_idx_base + 576 * 0; + const Index lhs_store_idx_1 = lhs_store_idx_base + 576 * 1; + const Index lhs_store_idx_2 = lhs_store_idx_base + 576 * 2; + const Index lhs_store_idx_3 = lhs_store_idx_base + 576 * 3; + const Index lhs_store_idx_4 = lhs_store_idx_base + 576 * 4; + const Index lhs_store_idx_5 = lhs_store_idx_base + 576 * 5; + const Index lhs_store_idx_6 = lhs_store_idx_base + 576 * 6; + const Index lhs_store_idx_7 = lhs_store_idx_base + 576 * 7; + + const Index rhs_store_idx_0 = rhs_store_idx_base + 576 * 0; + const Index rhs_store_idx_1 = rhs_store_idx_base + 576 * 1; + const Index rhs_store_idx_2 = rhs_store_idx_base + 576 * 2; + const Index rhs_store_idx_3 = rhs_store_idx_base + 576 * 3; + const Index rhs_store_idx_4 = rhs_store_idx_base + 576 * 4; + const Index rhs_store_idx_5 = rhs_store_idx_base + 576 * 5; + const Index rhs_store_idx_6 = rhs_store_idx_base + 576 * 6; + const Index rhs_store_idx_7 = rhs_store_idx_base + 576 * 7; + + // in the loading code, the following variables are important: + // threadIdx.x: the vertical position in an 8x8 block + // threadIdx.y: the vertical index of the 8x8 block in the grid + // threadIdx.z: the horizontal position in an 8x8 block + // k: the horizontal index of the 8x8 block in the grid + // + // The k parameter is implicit (it was the loop counter for a loop that went + // from 0 to <8, but now that loop is unrolled in the below code. + + const Index load_idx_vert = threadIdx.x + 8 * threadIdx.y; + const Index lhs_vert = base_m + load_idx_vert; + +#define prefetchIntoRegisters(base_k) \ + { \ + lhs_pf0 = conv(0); \ + lhs_pf1 = conv(0); \ + lhs_pf2 = conv(0); \ + lhs_pf3 = conv(0); \ + lhs_pf4 = conv(0); \ + lhs_pf5 = conv(0); \ + lhs_pf6 = conv(0); \ + lhs_pf7 = conv(0); \ + \ + rhs_pf0 = conv(0); \ + rhs_pf1 = conv(0); \ + rhs_pf2 = conv(0); \ + rhs_pf3 = conv(0); \ + rhs_pf4 = conv(0); \ + rhs_pf5 = conv(0); \ + rhs_pf6 = conv(0); \ + rhs_pf7 = conv(0); \ + \ + if (!needs_edge_check || lhs_vert < m_size) { \ + const Index lhs_horiz_0 = base_k + threadIdx.z + 0 * 8; \ + const Index lhs_horiz_1 = base_k + threadIdx.z + 1 * 8; \ + const Index lhs_horiz_2 = base_k + threadIdx.z + 2 * 8; \ + const Index lhs_horiz_3 = base_k + threadIdx.z + 3 * 8; \ + const Index lhs_horiz_4 = base_k + threadIdx.z + 4 * 8; \ + const Index lhs_horiz_5 = base_k + threadIdx.z + 5 * 8; \ + const Index lhs_horiz_6 = base_k + threadIdx.z + 6 * 8; \ + const Index lhs_horiz_7 = base_k + threadIdx.z + 7 * 8; \ + \ + if (!needs_edge_check || lhs_horiz_7 < k_size) { \ + lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \ + lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \ + lhs_pf2 = lhs(lhs_vert, lhs_horiz_2); \ + lhs_pf3 = lhs(lhs_vert, lhs_horiz_3); \ + lhs_pf4 = lhs(lhs_vert, lhs_horiz_4); \ + lhs_pf5 = lhs(lhs_vert, lhs_horiz_5); \ + lhs_pf6 = lhs(lhs_vert, lhs_horiz_6); \ + lhs_pf7 = lhs(lhs_vert, lhs_horiz_7); \ + } else if (lhs_horiz_6 < k_size) { \ + lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \ + lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \ + lhs_pf2 = lhs(lhs_vert, lhs_horiz_2); \ + lhs_pf3 = lhs(lhs_vert, lhs_horiz_3); \ + lhs_pf4 = lhs(lhs_vert, lhs_horiz_4); \ + lhs_pf5 = lhs(lhs_vert, lhs_horiz_5); \ + lhs_pf6 = lhs(lhs_vert, lhs_horiz_6); \ + } else if (lhs_horiz_5 < k_size) { \ + lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \ + lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \ + lhs_pf2 = lhs(lhs_vert, lhs_horiz_2); \ + lhs_pf3 = lhs(lhs_vert, lhs_horiz_3); \ + lhs_pf4 = lhs(lhs_vert, lhs_horiz_4); \ + lhs_pf5 = lhs(lhs_vert, lhs_horiz_5); \ + } else if (lhs_horiz_4 < k_size) { \ + lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \ + lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \ + lhs_pf2 = lhs(lhs_vert, lhs_horiz_2); \ + lhs_pf3 = lhs(lhs_vert, lhs_horiz_3); \ + lhs_pf4 = lhs(lhs_vert, lhs_horiz_4); \ + } else if (lhs_horiz_3 < k_size) { \ + lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \ + lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \ + lhs_pf2 = lhs(lhs_vert, lhs_horiz_2); \ + lhs_pf3 = lhs(lhs_vert, lhs_horiz_3); \ + } else if (lhs_horiz_2 < k_size) { \ + lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \ + lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \ + lhs_pf2 = lhs(lhs_vert, lhs_horiz_2); \ + } else if (lhs_horiz_1 < k_size) { \ + lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \ + lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \ + } else if (lhs_horiz_0 < k_size) { \ + lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \ + } \ + } \ + \ + const Index rhs_vert = base_k + load_idx_vert; \ + if (!needs_edge_check || rhs_vert < k_size) { \ + const Index rhs_horiz_0 = base_n + threadIdx.z + 0 * 8; \ + const Index rhs_horiz_1 = base_n + threadIdx.z + 1 * 8; \ + const Index rhs_horiz_2 = base_n + threadIdx.z + 2 * 8; \ + const Index rhs_horiz_3 = base_n + threadIdx.z + 3 * 8; \ + const Index rhs_horiz_4 = base_n + threadIdx.z + 4 * 8; \ + const Index rhs_horiz_5 = base_n + threadIdx.z + 5 * 8; \ + const Index rhs_horiz_6 = base_n + threadIdx.z + 6 * 8; \ + const Index rhs_horiz_7 = base_n + threadIdx.z + 7 * 8; \ + \ + if (rhs_horiz_7 < n_size) { \ + rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \ + rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \ + rhs_pf2 = rhs(rhs_vert, rhs_horiz_2); \ + rhs_pf3 = rhs(rhs_vert, rhs_horiz_3); \ + rhs_pf4 = rhs(rhs_vert, rhs_horiz_4); \ + rhs_pf5 = rhs(rhs_vert, rhs_horiz_5); \ + rhs_pf6 = rhs(rhs_vert, rhs_horiz_6); \ + rhs_pf7 = rhs(rhs_vert, rhs_horiz_7); \ + } else if (rhs_horiz_6 < n_size) { \ + rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \ + rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \ + rhs_pf2 = rhs(rhs_vert, rhs_horiz_2); \ + rhs_pf3 = rhs(rhs_vert, rhs_horiz_3); \ + rhs_pf4 = rhs(rhs_vert, rhs_horiz_4); \ + rhs_pf5 = rhs(rhs_vert, rhs_horiz_5); \ + rhs_pf6 = rhs(rhs_vert, rhs_horiz_6); \ + } else if (rhs_horiz_5 < n_size) { \ + rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \ + rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \ + rhs_pf2 = rhs(rhs_vert, rhs_horiz_2); \ + rhs_pf3 = rhs(rhs_vert, rhs_horiz_3); \ + rhs_pf4 = rhs(rhs_vert, rhs_horiz_4); \ + rhs_pf5 = rhs(rhs_vert, rhs_horiz_5); \ + } else if (rhs_horiz_4 < n_size) { \ + rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \ + rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \ + rhs_pf2 = rhs(rhs_vert, rhs_horiz_2); \ + rhs_pf3 = rhs(rhs_vert, rhs_horiz_3); \ + rhs_pf4 = rhs(rhs_vert, rhs_horiz_4); \ + } else if (rhs_horiz_3 < n_size) { \ + rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \ + rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \ + rhs_pf2 = rhs(rhs_vert, rhs_horiz_2); \ + rhs_pf3 = rhs(rhs_vert, rhs_horiz_3); \ + } else if (rhs_horiz_2 < n_size) { \ + rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \ + rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \ + rhs_pf2 = rhs(rhs_vert, rhs_horiz_2); \ + } else if (rhs_horiz_1 < n_size) { \ + rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \ + rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \ + } else if (rhs_horiz_0 < n_size) { \ + rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \ + } \ + } \ + } \ + +#define writeRegToShmem(_) \ + lhs_shmem[lhs_store_idx_0] = lhs_pf0; \ + rhs_shmem[rhs_store_idx_0] = rhs_pf0; \ + \ + lhs_shmem[lhs_store_idx_1] = lhs_pf1; \ + rhs_shmem[rhs_store_idx_1] = rhs_pf1; \ + \ + lhs_shmem[lhs_store_idx_2] = lhs_pf2; \ + rhs_shmem[rhs_store_idx_2] = rhs_pf2; \ + \ + lhs_shmem[lhs_store_idx_3] = lhs_pf3; \ + rhs_shmem[rhs_store_idx_3] = rhs_pf3; \ + \ + lhs_shmem[lhs_store_idx_4] = lhs_pf4; \ + rhs_shmem[rhs_store_idx_4] = rhs_pf4; \ + \ + lhs_shmem[lhs_store_idx_5] = lhs_pf5; \ + rhs_shmem[rhs_store_idx_5] = rhs_pf5; \ + \ + lhs_shmem[lhs_store_idx_6] = lhs_pf6; \ + rhs_shmem[rhs_store_idx_6] = rhs_pf6; \ + \ + lhs_shmem[lhs_store_idx_7] = lhs_pf7; \ + rhs_shmem[rhs_store_idx_7] = rhs_pf7; \ + + // declare and initialize result array +#define res(i, j) _res_##i##j +#define initResultRow(i) \ + Scalar res(i, 0) = conv(0); \ + Scalar res(i, 1) = conv(0); \ + Scalar res(i, 2) = conv(0); \ + Scalar res(i, 3) = conv(0); \ + Scalar res(i, 4) = conv(0); \ + Scalar res(i, 5) = conv(0); \ + Scalar res(i, 6) = conv(0); \ + Scalar res(i, 7) = conv(0); \ + + internal::scalar_cast_op conv; + initResultRow(0); + initResultRow(1); + initResultRow(2); + initResultRow(3); + initResultRow(4); + initResultRow(5); + initResultRow(6); + initResultRow(7); +#undef initResultRow + + for (Index base_k = 0; base_k < k_size; base_k += 64) { + // wait for previous iteration to finish with shmem. Despite common sense, + // the code is a bit faster with this here then at bottom of loop + __syncthreads(); + + prefetchIntoRegisters(base_k); + writeRegToShmem(); + + #undef prefetchIntoRegisters + #undef writeRegToShmem + + // wait for shared mem packing to be done before starting computation + __syncthreads(); + + // compute 8x8 matrix product by outer product. This involves packing one column + // of LHS and one row of RHS into registers (takes 16 registers). + +#define lcol(i) _lcol##i + Scalar lcol(0); + Scalar lcol(1); + Scalar lcol(2); + Scalar lcol(3); + Scalar lcol(4); + Scalar lcol(5); + Scalar lcol(6); + Scalar lcol(7); + +#define rrow(j) _rrow##j + Scalar rrow(0); + Scalar rrow(1); + Scalar rrow(2); + Scalar rrow(3); + Scalar rrow(4); + Scalar rrow(5); + Scalar rrow(6); + Scalar rrow(7); + + // Now x corresponds to k, y to m, and z to n + const Scalar* lhs_block = &lhs_shmem[threadIdx.x + 9 * threadIdx.y]; + const Scalar* rhs_block = &rhs_shmem[threadIdx.x + 8 * threadIdx.z]; + +#define lhs_element(i, j) lhs_block[72 * ((i) + 8 * (j))] +#define rhs_element(i, j) rhs_block[72 * ((i) + 8 * (j))] + +#define loadData(i, j) \ + lcol(0) = lhs_element(0, j); \ + rrow(0) = rhs_element(i, 0); \ + lcol(1) = lhs_element(1, j); \ + rrow(1) = rhs_element(i, 1); \ + lcol(2) = lhs_element(2, j); \ + rrow(2) = rhs_element(i, 2); \ + lcol(3) = lhs_element(3, j); \ + rrow(3) = rhs_element(i, 3); \ + lcol(4) = lhs_element(4, j); \ + rrow(4) = rhs_element(i, 4); \ + lcol(5) = lhs_element(5, j); \ + rrow(5) = rhs_element(i, 5); \ + lcol(6) = lhs_element(6, j); \ + rrow(6) = rhs_element(i, 6); \ + lcol(7) = lhs_element(7, j); \ + rrow(7) = rhs_element(i, 7); \ + +#define computeCol(j) \ + res(0, j) += lcol(0) * rrow(j); \ + res(1, j) += lcol(1) * rrow(j); \ + res(2, j) += lcol(2) * rrow(j); \ + res(3, j) += lcol(3) * rrow(j); \ + res(4, j) += lcol(4) * rrow(j); \ + res(5, j) += lcol(5) * rrow(j); \ + res(6, j) += lcol(6) * rrow(j); \ + res(7, j) += lcol(7) * rrow(j); \ + +#define computePass(i) \ + loadData(i, i); \ + \ + computeCol(0); \ + computeCol(1); \ + computeCol(2); \ + computeCol(3); \ + computeCol(4); \ + computeCol(5); \ + computeCol(6); \ + computeCol(7); \ + + computePass(0); + computePass(1); + computePass(2); + computePass(3); + computePass(4); + computePass(5); + computePass(6); + computePass(7); + +#undef lcol +#undef rrow +#undef lhs_element +#undef rhs_element +#undef loadData +#undef computeCol +#undef computePass + } // end loop over k + + // we've now iterated over all of the large (ie width 64) k blocks and + // accumulated results in registers. At this point thread (x, y, z) contains + // the sum across all big k blocks of the product of little k block of index (x, y) + // with block of index (y, z). To compute the final output, we need to reduce + // the 8 threads over y by summation. +#if defined(EIGEN_HIPCC) || (defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000) +#define shuffleInc(i, j, mask) res(i, j) += __shfl_xor(res(i, j), mask) +#else +#define shuffleInc(i, j, mask) res(i, j) += __shfl_xor_sync(0xFFFFFFFF, res(i, j), mask) +#endif + +#define reduceRow(i, mask) \ + shuffleInc(i, 0, mask); \ + shuffleInc(i, 1, mask); \ + shuffleInc(i, 2, mask); \ + shuffleInc(i, 3, mask); \ + shuffleInc(i, 4, mask); \ + shuffleInc(i, 5, mask); \ + shuffleInc(i, 6, mask); \ + shuffleInc(i, 7, mask); \ + +#define reduceMatrix(mask) \ + reduceRow(0, mask); \ + reduceRow(1, mask); \ + reduceRow(2, mask); \ + reduceRow(3, mask); \ + reduceRow(4, mask); \ + reduceRow(5, mask); \ + reduceRow(6, mask); \ + reduceRow(7, mask); \ + + // actually perform the reduction, now each thread of index (_, y, z) + // contains the correct values in its registers that belong in the output + // block + reduceMatrix(1); + reduceMatrix(2); + reduceMatrix(4); + +#undef shuffleInc +#undef reduceRow +#undef reduceMatrix + + // now we need to copy the 64 values into main memory. We can't split work + // among threads because all variables are in registers. There's 2 ways + // to do this: + // (1) have 1 thread do 64 writes from registers into global memory + // (2) have 1 thread do 64 writes into shared memory, and then 8 threads + // each do 8 writes into global memory. We can just overwrite the shared + // memory from the problem we just solved. + // (2) is slightly faster than (1) due to less branching and more ILP + + // TODO: won't yield much gain, but could just use currently unused shared mem + // and then we won't have to sync + // wait for shared mem to be out of use + __syncthreads(); + +#define writeResultShmem(i, j) \ + lhs_shmem[i + 8 * threadIdx.y + 64 * threadIdx.z + 512 * j] = res(i, j); \ + +#define writeRow(i) \ + writeResultShmem(i, 0); \ + writeResultShmem(i, 1); \ + writeResultShmem(i, 2); \ + writeResultShmem(i, 3); \ + writeResultShmem(i, 4); \ + writeResultShmem(i, 5); \ + writeResultShmem(i, 6); \ + writeResultShmem(i, 7); \ + + if (threadIdx.x == 0) { + writeRow(0); + writeRow(1); + writeRow(2); + writeRow(3); + writeRow(4); + writeRow(5); + writeRow(6); + writeRow(7); + } +#undef writeResultShmem +#undef writeRow + + const int max_i_write = numext::mini((int)((m_size - base_m - threadIdx.y + 7) / 8), 8); + const int max_j_write = numext::mini((int)((n_size - base_n - threadIdx.z + 7) / 8), 8); + + if (threadIdx.x < max_i_write) { + if (max_j_write == 8) { + // TODO: can i trade bank conflicts for coalesced writes? + Scalar val0 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 0]; + Scalar val1 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 1]; + Scalar val2 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 2]; + Scalar val3 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 3]; + Scalar val4 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 4]; + Scalar val5 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 5]; + Scalar val6 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 6]; + Scalar val7 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 7]; + + output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 0) = val0; + output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 1) = val1; + output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 2) = val2; + output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 3) = val3; + output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 4) = val4; + output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 5) = val5; + output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 6) = val6; + output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 7) = val7; + } else { +#pragma unroll 7 + for (int j = 0; j < max_j_write; j++) { + Scalar val = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * j]; + output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * j) = val; + } + } + } +#undef res +} + + +template +__global__ void +#if defined(EIGEN_HIPCC) +__launch_bounds__(512, 1) +#else +__launch_bounds__(512) +#endif +EigenContractionKernel(const LhsMapper lhs, const RhsMapper rhs, + const OutputMapper output, + const Index m_size, const Index n_size, const Index k_size) { + __shared__ Scalar lhs_shmem[72 * 64]; + __shared__ Scalar rhs_shmem[72 * 64]; + + const Index m_block_idx = blockIdx.x; + const Index n_block_idx = blockIdx.y; + + const Index base_m = 64 * m_block_idx; + const Index base_n = 64 * n_block_idx; + + if (base_m + 63 < m_size && base_n + 63 < n_size) { + EigenContractionKernelInternal(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size); + } else { + EigenContractionKernelInternal(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size); + } +} + + +template +__device__ __forceinline__ void +EigenFloatContractionKernelInternal16x16(const LhsMapper lhs, const RhsMapper rhs, + const OutputMapper output, float2 lhs_shmem2[][16], + float2 rhs_shmem2[][8], const Index m_size, + const Index n_size, const Index k_size, + const Index base_m, const Index base_n) { + + // prefetch registers + float4 lhs_pf0, rhs_pf0; + + float4 results[4]; + for (int i=0; i < 4; i++) { + results[i].x = results[i].y = results[i].z = results[i].w = 0; + } + +#define prefetch_lhs(reg, row, col) \ + if (!CHECK_LHS_BOUNDARY) { \ + if (col < k_size) { \ + reg =lhs.template loadPacket(row, col); \ + } \ + } else { \ + if (col < k_size) { \ + if (row + 3 < m_size) { \ + reg =lhs.template loadPacket(row, col); \ + } else if (row + 2 < m_size) { \ + reg.x =lhs(row + 0, col); \ + reg.y =lhs(row + 1, col); \ + reg.z =lhs(row + 2, col); \ + } else if (row + 1 < m_size) { \ + reg.x =lhs(row + 0, col); \ + reg.y =lhs(row + 1, col); \ + } else if (row < m_size) { \ + reg.x =lhs(row + 0, col); \ + } \ + } \ + } \ + + Index lhs_vert = base_m+threadIdx.x*4; + + for (Index k = 0; k < k_size; k += 16) { + + lhs_pf0 = internal::pset1(0); + rhs_pf0 = internal::pset1(0); + + Index lhs_horiz = threadIdx.y+k; + prefetch_lhs(lhs_pf0, lhs_vert, lhs_horiz) + + Index rhs_vert = k+(threadIdx.x%4)*4; + Index rhs_horiz0 = (threadIdx.x>>2)+threadIdx.y*4+base_n; + + if (!CHECK_RHS_BOUNDARY) { + if ((rhs_vert + 3) < k_size) { + // just CHECK_RHS_BOUNDARY + rhs_pf0 = rhs.template loadPacket(rhs_vert, rhs_horiz0); + } else if (rhs_vert + 2 < k_size) { + // just CHECK_RHS_BOUNDARY + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); + rhs_pf0.z = rhs(rhs_vert + 2, rhs_horiz0); + } else if (rhs_vert + 1 < k_size) { + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); + } else if (rhs_vert < k_size) { + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + } + } else { + if (rhs_horiz0 < n_size) { + if ((rhs_vert + 3) < k_size) { + rhs_pf0 = rhs.template loadPacket(rhs_vert, rhs_horiz0); + } else if ((rhs_vert + 2) < k_size) { + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); + rhs_pf0.z = rhs(rhs_vert + 2, rhs_horiz0); + } else if ((rhs_vert + 1) < k_size) { + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); + } else if (rhs_vert < k_size) { + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + } + } + } + float x1, x2 ; + // the following can be a bitwise operation..... some day. + if((threadIdx.x%8) < 4) { + x1 = rhs_pf0.y; + x2 = rhs_pf0.w; + } else { + x1 = rhs_pf0.x; + x2 = rhs_pf0.z; + } + #if defined(EIGEN_HIPCC) || (defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000) + x1 = __shfl_xor(x1, 4); + x2 = __shfl_xor(x2, 4); + #else + x1 = __shfl_xor_sync(0xFFFFFFFF, x1, 4); + x2 = __shfl_xor_sync(0xFFFFFFFF, x2, 4); + #endif + if((threadIdx.x%8) < 4) { + rhs_pf0.y = x1; + rhs_pf0.w = x2; + } else { + rhs_pf0.x = x1; + rhs_pf0.z = x2; + } + + // We have 64 features. + // Row 0 -> times (0, 4, 8, 12, 1, 5, 9, 13) for features 0, 1. + // Row 1 -> times (0, 4, 8, 12, 1, 5, 9, 13) for features 2, 3. + // ... + // Row 31 -> times (0, 4, 8, 12, 1, 5, 9, 13) for features 62, 63 + // Row 32 -> times (2, 6, 10, 14, 3, 7, 11, 15) for features 0, 1 + // ... + rhs_shmem2[(threadIdx.x>>3)+ threadIdx.y*2][threadIdx.x%8] = make_float2(rhs_pf0.x, rhs_pf0.y); + rhs_shmem2[(threadIdx.x>>3)+ threadIdx.y*2+32][threadIdx.x%8] = make_float2(rhs_pf0.z, rhs_pf0.w); + + // Row 0 (time 0) -> features (0, 1), (4, 5), .. (28, 29), (32, 33), .. (60, 61) + // Row 1 (time 1) -> features (0, 1), (4, 5), .. (28, 29), (32, 33), .. (60, 61) + // ... + // Row 15 (time 15) -> features (0, 1), (4, 5), .. (28, 29), (32, 33), .. (60, 61) + // Row 16 (time 0) -> features (2, 3), (6, 7), .. (30, 31), (34, 35), .. (62, 63) + // ... + + lhs_shmem2[threadIdx.y][threadIdx.x] = make_float2(lhs_pf0.x, lhs_pf0.y); + lhs_shmem2[threadIdx.y+16][threadIdx.x] = make_float2(lhs_pf0.z, lhs_pf0.w); + + +#define add_vals(fl1, fl2, fr1, fr2)\ + results[0].x += fl1.x * fr1.x;\ + results[0].y += fl1.y * fr1.x;\ + results[0].z += fl2.x * fr1.x;\ + results[0].w += fl2.y * fr1.x;\ +\ + results[1].x += fl1.x * fr1.y;\ + results[1].y += fl1.y * fr1.y;\ + results[1].z += fl2.x * fr1.y;\ + results[1].w += fl2.y * fr1.y;\ +\ + results[2].x += fl1.x * fr2.x;\ + results[2].y += fl1.y * fr2.x;\ + results[2].z += fl2.x * fr2.x;\ + results[2].w += fl2.y * fr2.x;\ +\ + results[3].x += fl1.x * fr2.y;\ + results[3].y += fl1.y * fr2.y;\ + results[3].z += fl2.x * fr2.y;\ + results[3].w += fl2.y * fr2.y;\ + + __syncthreads(); + + // Do the multiplies. + #pragma unroll + for (int koff = 0; koff < 16; koff ++) { + // 32 x threads. + float2 fl1 = lhs_shmem2[koff][threadIdx.x]; + float2 fl2 = lhs_shmem2[koff + 16][threadIdx.x]; + + int start_feature = threadIdx.y * 4; + float2 fr1 = rhs_shmem2[(start_feature>>1) + 32*((koff%4)/2)][koff/4 + (koff%2)*4]; + float2 fr2 = rhs_shmem2[(start_feature>>1) + 1 + 32*((koff%4)/2)][koff/4 + (koff%2)*4]; + + add_vals(fl1, fl2, fr1, fr2) + } + __syncthreads(); + } + +#undef prefetch_lhs +#undef add_vals + + Index horiz_base = threadIdx.y*4+base_n; + if (!CHECK_LHS_BOUNDARY && !CHECK_RHS_BOUNDARY) { + for (int i = 0; i < 4; i++) { + output(lhs_vert, horiz_base + i) = results[i].x; + output(lhs_vert + 1, horiz_base + i) = results[i].y; + output(lhs_vert + 2, horiz_base + i) = results[i].z; + output(lhs_vert + 3, horiz_base + i) = results[i].w; + } + } else if (!CHECK_RHS_BOUNDARY) { + // CHECK LHS + if (lhs_vert + 3 < m_size) { + for (int i = 0; i < 4; i++) { + output(lhs_vert, horiz_base + i) = results[i].x; + output(lhs_vert + 1, horiz_base + i) = results[i].y; + output(lhs_vert + 2, horiz_base + i) = results[i].z; + output(lhs_vert + 3, horiz_base + i) = results[i].w; + } + } else if (lhs_vert + 2 < m_size) { + for (int i = 0; i < 4; i++) { + output(lhs_vert, horiz_base + i) = results[i].x; + output(lhs_vert + 1, horiz_base + i) = results[i].y; + output(lhs_vert + 2, horiz_base + i) = results[i].z; + } + } else if (lhs_vert + 1 < m_size) { + for (int i = 0; i < 4; i++) { + output(lhs_vert, horiz_base + i) = results[i].x; + output(lhs_vert + 1, horiz_base + i) = results[i].y; + } + } else if (lhs_vert < m_size) { + for (int i = 0; i < 4; i++) { + output(lhs_vert, horiz_base + i) = results[i].x; + } + } + } else if (!CHECK_LHS_BOUNDARY) { + // CHECK RHS + /* + int ncols_rem = fminf(n_size- horiz_base, 4); + for (int i = 0; i < ncols_rem; i++) { + output(lhs_vert, horiz_base + i) = results[i].x; + output(lhs_vert + 1, horiz_base + i) = results[i].y; + output(lhs_vert + 2, horiz_base + i) = results[i].z; + output(lhs_vert + 3, horiz_base + i) = results[i].w; + }*/ + for (int i = 0; i < 4; i++) { + if (horiz_base+i < n_size) { + output(lhs_vert, horiz_base + i) = results[i].x; + output(lhs_vert + 1, horiz_base + i) = results[i].y; + output(lhs_vert + 2, horiz_base + i) = results[i].z; + output(lhs_vert + 3, horiz_base + i) = results[i].w; + } + } + } else { + // CHECK both boundaries. + for (int i = 0; i < 4; i++) { + if (horiz_base+i < n_size) { + if (lhs_vert < m_size) + output(lhs_vert, horiz_base + i) = results[i].x; + if (lhs_vert + 1 < m_size) + output(lhs_vert + 1, horiz_base + i) = results[i].y; + if (lhs_vert + 2 < m_size) + output(lhs_vert + 2, horiz_base + i) = results[i].z; + if (lhs_vert + 3 < m_size) + output(lhs_vert + 3, horiz_base + i) = results[i].w; + } + } + } +} + + +template +__device__ __forceinline__ void +EigenFloatContractionKernelInternal(const LhsMapper lhs, const RhsMapper rhs, + const OutputMapper output, float2 lhs_shmem2[][32], + float2 rhs_shmem2[][8], const Index m_size, + const Index n_size, const Index k_size, + const Index base_m, const Index base_n) { + + // prefetch registers + float4 lhs_pf0, lhs_pf1, lhs_pf2, lhs_pf3; + float4 rhs_pf0, rhs_pf1; + + float4 results[8]; + for (int i=0; i < 8; i++) { + results[i].x = results[i].y = results[i].z = results[i].w = 0; + } + + Index lhs_vert = base_m+threadIdx.x*4+(threadIdx.y%4)*32; + for (Index k = 0; k < k_size; k += 32) { + lhs_pf0 = internal::pset1(0); + lhs_pf1 = internal::pset1(0); + lhs_pf2 = internal::pset1(0); + lhs_pf3 = internal::pset1(0); + + rhs_pf0 = internal::pset1(0); + rhs_pf1 = internal::pset1(0); + + if (!CHECK_LHS_BOUNDARY) { + if ((threadIdx.y/4+k+24) < k_size) { + lhs_pf0 =lhs.template loadPacket(lhs_vert, (threadIdx.y/4+k)); + lhs_pf1 =lhs.template loadPacket(lhs_vert, (threadIdx.y/4+k+8)); + lhs_pf2 =lhs.template loadPacket(lhs_vert, (threadIdx.y/4+k+16)); + lhs_pf3 =lhs.template loadPacket(lhs_vert, (threadIdx.y/4+k+24)); + } else if ((threadIdx.y/4+k+16) < k_size) { + lhs_pf0 =lhs.template loadPacket(lhs_vert, (threadIdx.y/4+k)); + lhs_pf1 =lhs.template loadPacket(lhs_vert, (threadIdx.y/4+k+8)); + lhs_pf2 =lhs.template loadPacket(lhs_vert, (threadIdx.y/4+k+16)); + } else if ((threadIdx.y/4+k+8) < k_size) { + lhs_pf0 =lhs.template loadPacket(lhs_vert, (threadIdx.y/4+k)); + lhs_pf1 =lhs.template loadPacket(lhs_vert, (threadIdx.y/4+k+8)); + } else if ((threadIdx.y/4+k) < k_size) { + lhs_pf0 =lhs.template loadPacket(lhs_vert, (threadIdx.y/4+k)); + } + } else { + // just CHECK_LHS_BOUNDARY + if (lhs_vert + 3 < m_size) { + if ((threadIdx.y/4+k+24) < k_size) { + lhs_pf0 =lhs.template loadPacket(lhs_vert, (threadIdx.y/4+k)); + lhs_pf1 =lhs.template loadPacket(lhs_vert, (threadIdx.y/4+k+8)); + lhs_pf2 =lhs.template loadPacket(lhs_vert, (threadIdx.y/4+k+16)); + lhs_pf3 =lhs.template loadPacket(lhs_vert, (threadIdx.y/4+k+24)); + } else if ((threadIdx.y/4+k+16) < k_size) { + lhs_pf0 =lhs.template loadPacket(lhs_vert, (threadIdx.y/4+k)); + lhs_pf1 =lhs.template loadPacket(lhs_vert, (threadIdx.y/4+k+8)); + lhs_pf2 =lhs.template loadPacket(lhs_vert, (threadIdx.y/4+k+16)); + } else if ((threadIdx.y/4+k+8) < k_size) { + lhs_pf0 =lhs.template loadPacket(lhs_vert, (threadIdx.y/4+k)); + lhs_pf1 =lhs.template loadPacket(lhs_vert, (threadIdx.y/4+k+8)); + } else if ((threadIdx.y/4+k) < k_size) { + lhs_pf0 =lhs.template loadPacket(lhs_vert, (threadIdx.y/4+k)); + } + } else if (lhs_vert + 2 < m_size) { + if ((threadIdx.y/4+k+24) < k_size) { + lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); + lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k)); + lhs_pf0.z =lhs(lhs_vert + 2, (threadIdx.y/4+k)); + lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); + lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8)); + lhs_pf1.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+8)); + lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16)); + lhs_pf2.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+16)); + lhs_pf2.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+16)); + lhs_pf3.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+24)); + lhs_pf3.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+24)); + lhs_pf3.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+24)); + } else if ((threadIdx.y/4+k+16) < k_size) { + lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); + lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k)); + lhs_pf0.z =lhs(lhs_vert + 2, (threadIdx.y/4+k)); + lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); + lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8)); + lhs_pf1.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+8)); + lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16)); + lhs_pf2.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+16)); + lhs_pf2.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+16)); + } else if ((threadIdx.y/4+k+8) < k_size) { + lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); + lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k)); + lhs_pf0.z =lhs(lhs_vert + 2, (threadIdx.y/4+k)); + lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); + lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8)); + lhs_pf1.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+8)); + } else if ((threadIdx.y/4+k) < k_size) { + lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); + lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k)); + lhs_pf0.z =lhs(lhs_vert + 2, (threadIdx.y/4+k)); + } + } else if (lhs_vert + 1 < m_size) { + if ((threadIdx.y/4+k+24) < k_size) { + lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); + lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k)); + lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); + lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8)); + lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16)); + lhs_pf2.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+16)); + lhs_pf3.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+24)); + lhs_pf3.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+24)); + } else if ((threadIdx.y/4+k+16) < k_size) { + lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); + lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k)); + lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); + lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8)); + lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16)); + lhs_pf2.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+16)); + } else if ((threadIdx.y/4+k+8) < k_size) { + lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); + lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k)); + lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); + lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8)); + } else if ((threadIdx.y/4+k) < k_size) { + lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); + lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k)); + } + } else if (lhs_vert < m_size) { + if ((threadIdx.y/4+k+24) < k_size) { + lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); + lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); + lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16)); + lhs_pf3.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+24)); + } else if ((threadIdx.y/4+k+16) < k_size) { + lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); + lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); + lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16)); + } else if ((threadIdx.y/4+k+8) < k_size) { + lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); + lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); + } else if ((threadIdx.y/4+k) < k_size) { + lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); + } + } + } + __syncthreads(); + Index rhs_vert = k+threadIdx.x*4; + Index rhs_horiz0 = threadIdx.y*2+base_n; + Index rhs_horiz1 = threadIdx.y*2+1+base_n; + if (!CHECK_RHS_BOUNDARY) { + if ((rhs_vert + 3) < k_size) { + // just CHECK_RHS_BOUNDARY + rhs_pf0 = rhs.template loadPacket(rhs_vert, rhs_horiz0); + rhs_pf1 = rhs.template loadPacket(rhs_vert, rhs_horiz1); + } else if (rhs_vert + 2 < k_size) { + // just CHECK_RHS_BOUNDARY + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); + rhs_pf0.z = rhs(rhs_vert + 2, rhs_horiz0); + rhs_pf1.x = rhs(rhs_vert, rhs_horiz1); + rhs_pf1.y = rhs(rhs_vert + 1, rhs_horiz1); + rhs_pf1.z = rhs(rhs_vert + 2, rhs_horiz1); + } else if (rhs_vert + 1 < k_size) { + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); + rhs_pf1.x = rhs(rhs_vert, rhs_horiz1); + rhs_pf1.y = rhs(rhs_vert + 1, rhs_horiz1); + } else if (rhs_vert < k_size) { + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + rhs_pf1.x = rhs(rhs_vert, rhs_horiz1); + } + } else { + if (rhs_horiz1 < n_size) { + if ((rhs_vert + 3) < k_size) { + // just CHECK_RHS_BOUNDARY + rhs_pf0 = rhs.template loadPacket(rhs_vert, rhs_horiz0); + rhs_pf1 = rhs.template loadPacket(rhs_vert, rhs_horiz1); + } else if (rhs_vert + 2 < k_size) { + // just CHECK_RHS_BOUNDARY + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); + rhs_pf0.z = rhs(rhs_vert + 2, rhs_horiz0); + rhs_pf1.x = rhs(rhs_vert, rhs_horiz1); + rhs_pf1.y = rhs(rhs_vert + 1, rhs_horiz1); + rhs_pf1.z = rhs(rhs_vert + 2, rhs_horiz1); + } else if (k+threadIdx.x*4 + 1 < k_size) { + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); + rhs_pf1.x = rhs(rhs_vert, rhs_horiz1); + rhs_pf1.y = rhs(rhs_vert + 1, rhs_horiz1); + } else if (k+threadIdx.x*4 < k_size) { + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + rhs_pf1.x = rhs(rhs_vert, rhs_horiz1); + } + } else if (rhs_horiz0 < n_size) { + if ((rhs_vert + 3) < k_size) { + // just CHECK_RHS_BOUNDARY + rhs_pf0 = rhs.template loadPacket(rhs_vert, rhs_horiz0); + } else if ((rhs_vert + 2) < k_size) { + // just CHECK_RHS_BOUNDARY + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); + rhs_pf0.z = rhs(rhs_vert + 2, rhs_horiz0); + } else if ((rhs_vert + 1) < k_size) { + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); + } else if (rhs_vert < k_size) { + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + } + } + } + __syncthreads(); + // Loaded. Do computation + // Row 0 -> times (0, 4, 8, .. 28) for features 0, 1. + // Row 1 -> times (0, 4, 8, .. 28) for features 2, 3. + // .. + // Row 31 -> times (0, 4, 8, .. 28) for features 62, 63 + rhs_shmem2[threadIdx.y][threadIdx.x] = make_float2(rhs_pf0.x, rhs_pf1.x); + // Row 32 -> times (1, 5, 9, .. 29) for features 0, 1. + // Row 33 -> times (1, 5, 9, .. 29) for features 2, 3. + // .. + rhs_shmem2[threadIdx.y+32][threadIdx.x] = make_float2(rhs_pf0.y, rhs_pf1.y); + // Row 64 -> times (2, 6, 10, .. 30) for features 0, 1. + // Row 65 -> times (2, 6, 10, .. 30) for features 2, 3. + rhs_shmem2[threadIdx.y+64][threadIdx.x] = make_float2(rhs_pf0.z, rhs_pf1.z); + // Row 96 -> times (3, 7, 11, .. 31) for features 0, 1. + // Row 97 -> times (3, 7, 11, .. 31) for features 2, 3. + rhs_shmem2[threadIdx.y+96][threadIdx.x] = make_float2(rhs_pf0.w, rhs_pf1.w); + + // LHS. + // Row 0 (time 0) -> features (0, 1), (4, 5), .. (28, 29), (32, 33), .. (60, 61) .. (124, 125) + // Row 1 (time 1) -> features (0, 1), (4, 5), .. (28, 29), (32, 33), .. (60, 61) .. (124, 125) + // ... + // Row 8 (time 0) -> features (2, 3), (6, 7), .. (30, 31), (34, 35), .. (62, 63) .. (126, 127) + // Row 15 (time 7) -> features (2, 3), (6, 7), .. (30, 31), (34, 35), .. (62, 63) .. (126, 127) + + +#define add_vals(a_feat1, a_feat2, f1, f2, f3, f4)\ + results[0].x += a_feat1.x * f1.x;\ + results[1].x += a_feat1.x * f1.y;\ + results[2].x += a_feat1.x * f2.x;\ + results[3].x += a_feat1.x * f2.y;\ + results[4].x += a_feat1.x * f3.x;\ + results[5].x += a_feat1.x * f3.y;\ + results[6].x += a_feat1.x * f4.x;\ + results[7].x += a_feat1.x * f4.y;\ +\ + results[0].y += a_feat1.y * f1.x;\ + results[1].y += a_feat1.y * f1.y;\ + results[2].y += a_feat1.y * f2.x;\ + results[3].y += a_feat1.y * f2.y;\ + results[4].y += a_feat1.y * f3.x;\ + results[5].y += a_feat1.y * f3.y;\ + results[6].y += a_feat1.y * f4.x;\ + results[7].y += a_feat1.y * f4.y;\ +\ + results[0].z += a_feat2.x * f1.x;\ + results[1].z += a_feat2.x * f1.y;\ + results[2].z += a_feat2.x * f2.x;\ + results[3].z += a_feat2.x * f2.y;\ + results[4].z += a_feat2.x * f3.x;\ + results[5].z += a_feat2.x * f3.y;\ + results[6].z += a_feat2.x * f4.x;\ + results[7].z += a_feat2.x * f4.y;\ +\ + results[0].w += a_feat2.y * f1.x;\ + results[1].w += a_feat2.y * f1.y;\ + results[2].w += a_feat2.y * f2.x;\ + results[3].w += a_feat2.y * f2.y;\ + results[4].w += a_feat2.y * f3.x;\ + results[5].w += a_feat2.y * f3.y;\ + results[6].w += a_feat2.y * f4.x;\ + results[7].w += a_feat2.y * f4.y;\ + + lhs_shmem2[threadIdx.y/4][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf0.x, lhs_pf0.y); + lhs_shmem2[threadIdx.y/4+8][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf1.x, lhs_pf1.y); + lhs_shmem2[threadIdx.y/4+16][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf2.x, lhs_pf2.y); + lhs_shmem2[threadIdx.y/4+24][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf3.x, lhs_pf3.y); + + lhs_shmem2[threadIdx.y/4 + 32][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf0.z, lhs_pf0.w); + lhs_shmem2[threadIdx.y/4 + 40][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf1.z, lhs_pf1.w); + lhs_shmem2[threadIdx.y/4 + 48][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf2.z, lhs_pf2.w); + lhs_shmem2[threadIdx.y/4 + 56][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf3.z, lhs_pf3.w); + + __syncthreads(); + + // Do the multiplies. + #pragma unroll + for (int koff = 0; koff < 32; koff ++) { + float2 a3 = lhs_shmem2[koff][threadIdx.x + (threadIdx.y % 4) * 8]; + float2 a4 = lhs_shmem2[koff + 32][threadIdx.x + (threadIdx.y % 4) * 8]; + + // first feature is at (threadIdx.y/4) * 8 last is at start + 8. + int start_feature = (threadIdx.y / 4) * 8; + + float2 br1 = rhs_shmem2[start_feature/2 + (koff % 4) * 32][koff/4]; + float2 br2 = rhs_shmem2[start_feature/2 + 1 + (koff % 4) * 32][koff/4]; + float2 br3 = rhs_shmem2[start_feature/2 + 2 + (koff % 4) * 32][koff/4]; + float2 br4 = rhs_shmem2[start_feature/2 + 3 + (koff % 4) * 32][koff/4]; + + add_vals(a3, a4, br1, br2, br3, br4) + } + __syncthreads(); + } // end loop over k + + __syncthreads(); + Index horiz_base = (threadIdx.y/4)*8+base_n; + if (!CHECK_LHS_BOUNDARY && !CHECK_RHS_BOUNDARY) { + for (int i = 0; i < 8; i++) { + output(lhs_vert, horiz_base + i) = results[i].x; + output(lhs_vert + 1, horiz_base + i) = results[i].y; + output(lhs_vert + 2, horiz_base + i) = results[i].z; + output(lhs_vert + 3, horiz_base + i) = results[i].w; + } + } else if (!CHECK_RHS_BOUNDARY) { + if (lhs_vert + 3 < m_size) { + for (int i = 0; i < 8; i++) { + output(lhs_vert, horiz_base + i) = results[i].x; + output(lhs_vert + 1, horiz_base + i) = results[i].y; + output(lhs_vert + 2, horiz_base + i) = results[i].z; + output(lhs_vert + 3, horiz_base + i) = results[i].w; + } + } else if (lhs_vert + 2 < m_size) { + for (int i = 0; i < 8; i++) { + output(lhs_vert, horiz_base + i) = results[i].x; + output(lhs_vert + 1, horiz_base + i) = results[i].y; + output(lhs_vert + 2, horiz_base + i) = results[i].z; + } + } else if (lhs_vert + 1 < m_size) { + for (int i = 0; i < 8; i++) { + output(lhs_vert, horiz_base + i) = results[i].x; + output(lhs_vert + 1, horiz_base + i) = results[i].y; + } + } else if (lhs_vert < m_size) { + for (int i = 0; i < 8; i++) { + output(lhs_vert, horiz_base + i) = results[i].x; + } + } + } else if (!CHECK_LHS_BOUNDARY) { + // CHECK BOUNDARY_B + for (int i = 0; i < 8; i++) { + if (horiz_base + i < n_size) { + output(lhs_vert, horiz_base + i) = results[i].x; + output(lhs_vert + 1, horiz_base + i) = results[i].y; + output(lhs_vert + 2, horiz_base + i) = results[i].z; + output(lhs_vert + 3, horiz_base + i) = results[i].w; + } + } + } else { + // CHECK both boundaries. + for (int i = 0; i < 8; i++) { + if (horiz_base + i < n_size) { + if (lhs_vert < m_size) + output(lhs_vert, horiz_base + i) = results[i].x; + if (lhs_vert + 1 < m_size) + output(lhs_vert + 1, horiz_base + i) = results[i].y; + if (lhs_vert + 2 < m_size) + output(lhs_vert + 2, horiz_base + i) = results[i].z; + if (lhs_vert + 3 < m_size) + output(lhs_vert + 3, horiz_base + i) = results[i].w; + } + } + } +} + + +template +__global__ void +#if defined(EIGEN_HIPCC) +__launch_bounds__(256, 1) +#else +__launch_bounds__(256) +#endif +EigenFloatContractionKernel(const LhsMapper lhs, const RhsMapper rhs, + const OutputMapper output, + const Index m_size, const Index n_size, const Index k_size) { + __shared__ float2 lhs_shmem[64*32]; + __shared__ float2 rhs_shmem[128*8]; + + typedef float2 LHS_MEM[64][32]; + typedef float2 RHS_MEM[128][8]; + + const Index m_block_idx = blockIdx.x; + const Index n_block_idx = blockIdx.y; + + const Index base_m = 128 * m_block_idx; + const Index base_n = 64 * n_block_idx; + + bool check_rhs = (base_n + 63) >= n_size; + bool check_lhs128 = (base_m + 127) >= m_size; + + if (!check_rhs) { + if (!check_lhs128) { + // >= 128 rows left + EigenFloatContractionKernelInternal( + lhs, rhs, output, *((LHS_MEM *) lhs_shmem), *((RHS_MEM *) rhs_shmem), m_size, n_size, k_size, base_m, base_n); + } else { + EigenFloatContractionKernelInternal( + lhs, rhs, output, *((LHS_MEM *) lhs_shmem), *((RHS_MEM *) rhs_shmem), m_size, n_size, k_size, base_m, base_n); + } + } else { + if (!check_lhs128) { + // >= 128 rows left + EigenFloatContractionKernelInternal( + lhs, rhs, output, *((LHS_MEM *) lhs_shmem), *((RHS_MEM *) rhs_shmem), m_size, n_size, k_size, base_m, base_n); + } else { + EigenFloatContractionKernelInternal( + lhs, rhs, output, *((LHS_MEM *) lhs_shmem), *((RHS_MEM *) rhs_shmem), m_size, n_size, k_size, base_m, base_n); + } + } +} + +template +__global__ void +#if defined(EIGEN_HIPCC) +__launch_bounds__(256, 1) +#else +__launch_bounds__(256) +#endif +EigenFloatContractionKernel16x16(const LhsMapper lhs, const RhsMapper rhs, + const OutputMapper output, + const Index m_size, const Index n_size, const Index k_size) { + __shared__ float2 lhs_shmem[32][16]; + __shared__ float2 rhs_shmem[64][8]; + + const Index m_block_idx = blockIdx.x; + const Index n_block_idx = blockIdx.y; + + const Index base_m = 64 * m_block_idx; + const Index base_n = 64 * n_block_idx; + + if (base_m + 63 < m_size) { + if (base_n + 63 < n_size) { + EigenFloatContractionKernelInternal16x16(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size, base_m, base_n); + } else { + EigenFloatContractionKernelInternal16x16(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size, base_m, base_n); + } + } else { + if (base_n + 63 < n_size) { + EigenFloatContractionKernelInternal16x16(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size, base_m, base_n); + } else { + EigenFloatContractionKernelInternal16x16(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size, base_m, base_n); + } + } +} + + +template +struct TensorEvaluator, GpuDevice> : + public TensorContractionEvaluatorBase, GpuDevice> > { + + typedef GpuDevice Device; + + typedef TensorEvaluator, Device> Self; + typedef TensorContractionEvaluatorBase Base; + + typedef TensorContractionOp XprType; + typedef typename internal::remove_const::type Scalar; + typedef typename XprType::Index Index; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + + enum { + Layout = TensorEvaluator::Layout, + }; + + // Most of the code is assuming that both input tensors are ColMajor. If the + // inputs are RowMajor, we will "cheat" by swapping the LHS and RHS: + // If we want to compute A * B = C, where A is LHS and B is RHS, the code + // will pretend B is LHS and A is RHS. + typedef typename internal::conditional< + static_cast(Layout) == static_cast(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType; + typedef typename internal::conditional< + static_cast(Layout) == static_cast(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType; + + static const int LDims = + internal::array_size::Dimensions>::value; + static const int RDims = + internal::array_size::Dimensions>::value; + static const int ContractDims = internal::array_size::value; + + typedef array left_dim_mapper_t; + typedef array right_dim_mapper_t; + + typedef array contract_t; + typedef array left_nocontract_t; + typedef array right_nocontract_t; + + static const int NumDims = LDims + RDims - 2 * ContractDims; + + typedef DSizes Dimensions; + + // typedefs needed in evalTo + typedef typename internal::remove_const::type LhsScalar; + typedef typename internal::remove_const::type RhsScalar; + + typedef TensorEvaluator LeftEvaluator; + typedef TensorEvaluator RightEvaluator; + + typedef typename LeftEvaluator::Dimensions LeftDimensions; + typedef typename RightEvaluator::Dimensions RightDimensions; + + TensorEvaluator(const XprType& op, const Device& device) : + Base(op, device) + { + EIGEN_STATIC_ASSERT( (internal::is_same::value), + GPU_TENSOR_CONTRACTION_DOES_NOT_SUPPORT_OUTPUT_KERNELS); + } + + // We need to redefine this method to make nvcc happy + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) { + this->m_leftImpl.evalSubExprsIfNeeded(NULL); + this->m_rightImpl.evalSubExprsIfNeeded(NULL); + if (data) { + evalTo(data); + return false; + } else { + this->m_result = static_cast(this->m_device.allocate(this->dimensions().TotalSize() * sizeof(Scalar))); + evalTo(this->m_result); + return true; + } + } + + void evalTo(Scalar* buffer) const { + if (this->m_lhs_inner_dim_contiguous) { + if (this->m_rhs_inner_dim_contiguous) { + if (this->m_rhs_inner_dim_reordered) { + evalTyped(buffer); + } + else { + evalTyped(buffer); + } + } + else { + if (this->m_rhs_inner_dim_reordered) { + evalTyped(buffer); + } + else { + evalTyped(buffer); + } + } + } + else { + if (this->m_rhs_inner_dim_contiguous) { + if (this->m_rhs_inner_dim_reordered) { + evalTyped(buffer); + } + else { + evalTyped(buffer); + } + } + else { + if (this->m_rhs_inner_dim_reordered) { + evalTyped(buffer); + } + else { + evalTyped(buffer); + } + } + } + } + + template struct LaunchKernels { + static void Run(const LhsMapper& lhs, const RhsMapper& rhs, const OutputMapper& output, Index m, Index n, Index k, const GpuDevice& device) { + const Index m_blocks = (m + 63) / 64; + const Index n_blocks = (n + 63) / 64; + const dim3 num_blocks(m_blocks, n_blocks, 1); + const dim3 block_size(8, 8, 8); + LAUNCH_GPU_KERNEL((EigenContractionKernel), num_blocks, block_size, 0, device, lhs, rhs, output, m, n, k); + } + }; + + template struct LaunchKernels { + static void Run(const LhsMapper& lhs, const RhsMapper& rhs, const OutputMapper& output, Index m, Index n, Index k, const GpuDevice& device) { + if (m < 768 || n < 768) { + const Index m_blocks = (m + 63) / 64; + const Index n_blocks = (n + 63) / 64; + const dim3 num_blocks(m_blocks, n_blocks, 1); + const dim3 block_size(16, 16, 1); + LAUNCH_GPU_KERNEL((EigenFloatContractionKernel16x16), num_blocks, block_size, 0, device, lhs, rhs, output, m, n, k); + } else { + const Index m_blocks = (m + 127) / 128; + const Index n_blocks = (n + 63) / 64; + const dim3 num_blocks(m_blocks, n_blocks, 1); + const dim3 block_size(8, 32, 1); + LAUNCH_GPU_KERNEL((EigenFloatContractionKernel), num_blocks, block_size, 0, device, lhs, rhs, output, m, n, k); + } + } + }; + + template + void evalTyped(Scalar* buffer) const { + // columns in left side, rows in right side + const Index k = this->m_k_size; + EIGEN_UNUSED_VARIABLE(k) + + // rows in left side + const Index m = this->m_i_size; + + // columns in right side + const Index n = this->m_j_size; + + // zero out the result buffer (which must be of size at least m * n * sizeof(Scalar) + this->m_device.memset(buffer, 0, m * n * sizeof(Scalar)); + + typedef internal::TensorContractionInputMapper LhsMapper; + + typedef internal::TensorContractionInputMapper RhsMapper; + + typedef internal::blas_data_mapper OutputMapper; + + + // initialize data mappers + LhsMapper lhs(this->m_leftImpl, this->m_left_nocontract_strides, this->m_i_strides, + this->m_left_contracting_strides, this->m_k_strides); + + RhsMapper rhs(this->m_rightImpl, this->m_right_nocontract_strides, this->m_j_strides, + this->m_right_contracting_strides, this->m_k_strides); + + OutputMapper output(buffer, m); + +#if defined(EIGEN_USE_HIP) + setGpuSharedMemConfig(hipSharedMemBankSizeEightByte); +#else + setGpuSharedMemConfig(cudaSharedMemBankSizeEightByte); +#endif + + LaunchKernels::Run(lhs, rhs, output, m, n, k, this->m_device); + } +}; + +} // end namespace Eigen + +#endif // EIGEN_USE_GPU and EIGEN_GPUCC +#endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_GPU_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h new file mode 100644 index 0000000..9ab900b --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h @@ -0,0 +1,575 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_MAPPER_H +#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_MAPPER_H + +namespace Eigen { + +namespace internal { + +enum { + Rhs = 0, + Lhs = 1 +}; + +/* + * Implementation of the Eigen blas_data_mapper class for tensors. + */ +/// The make pointer class is used by sycl in order to build the mapper class on the device. For other platform the default make pointer is used which +/// is scalar * for CoeffLoader. +template class MakePointer_ = MakePointer> +struct CoeffLoader; + +template class MakePointer_ = MakePointer> +class BaseTensorContractionMapper; + +template class MakePointer_> +struct CoeffLoader { + enum { + DirectOffsets = false + }; + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffLoader(const Tensor& tensor) : m_tensor(tensor) { } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void offsetBuffer(typename Tensor::Index) { + eigen_assert(false && "unsupported"); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE const typename MakePointer_::Type + data() const { + eigen_assert(false && "unsupported"); + return NULL; + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE typename Tensor::Scalar coeff(typename Tensor::Index index) const { return m_tensor.coeff(index); } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + typename Tensor::PacketReturnType packet(typename Tensor::Index index) const + { + return m_tensor.template packet(index); + } + + #ifdef EIGEN_USE_SYCL + // The placeholder accessors require to be bound to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_tensor.bind(cgh); + } + #endif + + private: + const Tensor m_tensor; +}; + +template class MakePointer_> +struct CoeffLoader { + enum { + DirectOffsets = true + }; + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffLoader(const Tensor& tensor) : m_data(tensor.data()) {} + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void offsetBuffer(typename Tensor::Index offset) { + m_data += offset; + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE const typename MakePointer_::Type + data() const { + return m_data; + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE typename Tensor::Scalar coeff(typename Tensor::Index index) const { return loadConstant(m_data+index); } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + typename Tensor::PacketReturnType packet(typename Tensor::Index index) const + { + return internal::ploadt_ro(m_data + index); + } + + #ifdef EIGEN_USE_SYCL + // The placeholder accessors require to be bound to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_data.bind(cgh); + } + #endif + private: + typedef typename Tensor::Scalar Scalar; + + typename MakePointer_::Type m_data; +}; + +template class MakePointer_ = MakePointer> +class SimpleTensorContractionMapper { + public: + EIGEN_DEVICE_FUNC + SimpleTensorContractionMapper(const Tensor& tensor, + const nocontract_t& nocontract_strides, + const nocontract_t& ij_strides, + const contract_t& contract_strides, + const contract_t& k_strides) : + m_tensor(tensor), + m_nocontract_strides(nocontract_strides), + m_ij_strides(ij_strides), + m_contract_strides(contract_strides), + m_k_strides(k_strides) { } + + enum { + DirectOffsets = CoeffLoader::DirectOffsets + }; + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void offsetBuffer(typename Tensor::Index offset) { + m_tensor.offsetBuffer(offset); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void prefetch(Index /*i*/) { } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar operator()(Index row) const { + // column major assumption + return operator()(row, 0); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar operator()(Index row, Index col) const { + return m_tensor.coeff(computeIndex(row, col)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index computeIndex(Index row, Index col) const { + const bool left = (side == Lhs); + EIGEN_UNUSED_VARIABLE(left); // annoying bug in g++8.1: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85963 + Index nocontract_val = left ? row : col; + Index linidx = 0; + EIGEN_UNROLL_LOOP + for (int i = static_cast(array_size::value) - 1; i > 0; i--) { + const Index idx = nocontract_val / m_ij_strides[i]; + linidx += idx * m_nocontract_strides[i]; + nocontract_val -= idx * m_ij_strides[i]; + } + if (array_size::value > array_size::value) { + if (side == Lhs && inner_dim_contiguous) { + eigen_assert(m_nocontract_strides[0] == 1); + linidx += nocontract_val; + } else { + linidx += nocontract_val * m_nocontract_strides[0]; + } + } + + Index contract_val = left ? col : row; + if(array_size::value > 0) { + EIGEN_UNROLL_LOOP + for (int i = static_cast(array_size::value) - 1; i > 0; i--) { + const Index idx = contract_val / m_k_strides[i]; + linidx += idx * m_contract_strides[i]; + contract_val -= idx * m_k_strides[i]; + } + + if (side == Rhs && inner_dim_contiguous) { + eigen_assert(m_contract_strides[0] == 1); + linidx += contract_val; + } else { + linidx += contract_val * m_contract_strides[0]; + } + } + + return linidx; + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE IndexPair computeIndexPair(Index row, Index col, const Index distance) const { + const bool left = (side == Lhs); + EIGEN_UNUSED_VARIABLE(left); // annoying bug in g++8.1: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85963 + Index nocontract_val[2] = {left ? row : col, left ? row + distance : col}; + Index linidx[2] = {0, 0}; + if (array_size::value > array_size::value) { + EIGEN_UNROLL_LOOP + for (int i = static_cast(array_size::value) - 1; i > 0; i--) { + const Index idx0 = nocontract_val[0] / m_ij_strides[i]; + const Index idx1 = nocontract_val[1] / m_ij_strides[i]; + linidx[0] += idx0 * m_nocontract_strides[i]; + linidx[1] += idx1 * m_nocontract_strides[i]; + nocontract_val[0] -= idx0 * m_ij_strides[i]; + nocontract_val[1] -= idx1 * m_ij_strides[i]; + } + if (side == Lhs && inner_dim_contiguous) { + eigen_assert(m_nocontract_strides[0] == 1); + linidx[0] += nocontract_val[0]; + linidx[1] += nocontract_val[1]; + } else { + linidx[0] += nocontract_val[0] * m_nocontract_strides[0]; + linidx[1] += nocontract_val[1] * m_nocontract_strides[0]; + } + } + + Index contract_val[2] = {left ? col : row, left ? col : row + distance}; + if (array_size::value> 0) { + EIGEN_UNROLL_LOOP + for (int i = static_cast(array_size::value) - 1; i > 0; i--) { + const Index idx0 = contract_val[0] / m_k_strides[i]; + const Index idx1 = contract_val[1] / m_k_strides[i]; + linidx[0] += idx0 * m_contract_strides[i]; + linidx[1] += idx1 * m_contract_strides[i]; + contract_val[0] -= idx0 * m_k_strides[i]; + contract_val[1] -= idx1 * m_k_strides[i]; + } + + if (side == Rhs && inner_dim_contiguous) { + eigen_assert(m_contract_strides[0] == 1); + linidx[0] += contract_val[0]; + linidx[1] += contract_val[1]; + } else { + linidx[0] += contract_val[0] * m_contract_strides[0]; + linidx[1] += contract_val[1] * m_contract_strides[0]; + } + } + return IndexPair(linidx[0], linidx[1]); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index firstAligned(Index size) const { + // Only claim alignment when we can compute the actual stride (ie when we're + // dealing with the lhs with inner_dim_contiguous. This is because the + // matrix-vector product relies on the stride when dealing with aligned inputs. + return (Alignment == Aligned) && (side == Lhs) && inner_dim_contiguous ? 0 : size; + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index stride() const { + return ((side == Lhs) && inner_dim_contiguous && array_size::value > 0) ? m_contract_strides[0] : 1; + } + + #ifdef EIGEN_USE_SYCL + // The placeholder accessors require to be bound to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_tensor.bind(cgh); + } + #endif + + const CoeffLoader& tensor() const { + return m_tensor; + } + + const nocontract_t& nocontract_strides() const { + return m_nocontract_strides; + } + const nocontract_t& ij_strides() const { return m_ij_strides; } + const contract_t& contract_strides() const { return m_contract_strides; } + const contract_t& k_strides() const { return m_k_strides; } + + protected: + CoeffLoader m_tensor; + const nocontract_t m_nocontract_strides; + const nocontract_t m_ij_strides; + const contract_t m_contract_strides; + const contract_t m_k_strides; +}; + +template class MakePointer_> +class BaseTensorContractionMapper : public SimpleTensorContractionMapper +{ + public: + typedef SimpleTensorContractionMapper ParentMapper; + + EIGEN_DEVICE_FUNC + BaseTensorContractionMapper(const Tensor& tensor, + const nocontract_t& nocontract_strides, + const nocontract_t& ij_strides, + const contract_t& contract_strides, + const contract_t& k_strides) : + ParentMapper(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + typename internal::enable_if::size==packet_size,PacketT>::type + load(Index i, Index j) const + { + // whole method makes column major assumption + + // don't need to add offsets for now (because operator handles that) + // current code assumes packet size must be a multiple of 2 + EIGEN_STATIC_ASSERT(packet_size % 2 == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + + if (Tensor::PacketAccess && inner_dim_contiguous && !inner_dim_reordered) { + const Index index = this->computeIndex(i, j); + eigen_assert(this->computeIndex(i+packet_size-1, j) == index + packet_size-1); + return this->m_tensor.template packet(index); + } + + const IndexPair indexPair = this->computeIndexPair(i, j, packet_size - 1); + const Index first = indexPair.first; + const Index lastIdx = indexPair.second; + + // We can always do optimized packet reads from left hand side right now, because + // the vertical matrix dimension on the left hand side is never contracting. + // On the right hand side we need to check if the contracting dimensions may have + // been shuffled first. + if (Tensor::PacketAccess && + (side == Lhs || internal::array_size::value <= 1 || !inner_dim_reordered) && + (lastIdx - first) == (packet_size - 1)) { + + return this->m_tensor.template packet(first); + } + + EIGEN_ALIGN_MAX Scalar data[packet_size]; + + data[0] = this->m_tensor.coeff(first); + EIGEN_UNROLL_LOOP + for (Index k = 1; k < packet_size - 1; k += 2) { + const IndexPair internal_pair = this->computeIndexPair(i + k, j, 1); + data[k] = this->m_tensor.coeff(internal_pair.first); + data[k + 1] = this->m_tensor.coeff(internal_pair.second); + } + data[packet_size - 1] = this->m_tensor.coeff(lastIdx); + + return pload(data); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + typename internal::enable_if::size!=packet_size,PacketT>::type + load(Index i, Index j) const + { + const Index requested_packet_size = internal::unpacket_traits::size; + EIGEN_ALIGN_MAX Scalar data[requested_packet_size]; + + const IndexPair indexPair = this->computeIndexPair(i, j, requested_packet_size - 1); + const Index first = indexPair.first; + const Index lastIdx = indexPair.second; + + data[0] = this->m_tensor.coeff(first); + for (Index k = 1; k < requested_packet_size - 1; k += 2) { + const IndexPair internal_pair = this->computeIndexPair(i + k, j, 1); + data[k] = this->m_tensor.coeff(internal_pair.first); + data[k + 1] = this->m_tensor.coeff(internal_pair.second); + } + data[requested_packet_size - 1] = this->m_tensor.coeff(lastIdx); + + return pload(data); + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE PacketT loadPacket(Index i, Index j) const { + return this->load(i,j); + } +}; + + +template class MakePointer_> +class BaseTensorContractionMapper + : public SimpleTensorContractionMapper +{ + public: + typedef SimpleTensorContractionMapper ParentMapper; + + EIGEN_DEVICE_FUNC + BaseTensorContractionMapper(const Tensor& tensor, + const nocontract_t& nocontract_strides, + const nocontract_t& ij_strides, + const contract_t& contract_strides, + const contract_t& k_strides) : + ParentMapper(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { } + + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE PacketT loadPacket(Index i, Index j) const { + EIGEN_ALIGN_MAX Scalar data[1]; + data[0] = this->m_tensor.coeff(this->computeIndex(i, j)); + return pload(data); + } + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE PacketT load(Index i, Index j) const { + EIGEN_ALIGN_MAX Scalar data[1]; + data[0] = this->m_tensor.coeff(this->computeIndex(i, j)); + return pload(data); + } +}; + + +template class MakePointer_=MakePointer> +class TensorContractionSubMapper { + public: + + typedef BaseTensorContractionMapper ParentMapper; + typedef TensorContractionSubMapper Self; + typedef Self LinearMapper; + + enum { + // We can use direct offsets iff the parent mapper supports then and we can compute the strides. + // TODO: we should also enable direct offsets for the Rhs case. + UseDirectOffsets = ParentMapper::DirectOffsets && (side == Lhs) && inner_dim_contiguous && (array_size::value > 0) + }; + + EIGEN_DEVICE_FUNC TensorContractionSubMapper(const ParentMapper& base_mapper, Index vert_offset, Index horiz_offset) + : m_base_mapper(base_mapper), m_vert_offset(vert_offset), m_horiz_offset(horiz_offset) { + // Bake the offsets into the buffer used by the base mapper whenever possible. This avoids the need to recompute + // this offset every time we attempt to access a coefficient. + if (UseDirectOffsets) { + Index stride = m_base_mapper.stride(); + m_base_mapper.offsetBuffer(vert_offset + horiz_offset * stride); + } + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const { + if (UseDirectOffsets) { + return m_base_mapper(i, 0); + } + return m_base_mapper(i + m_vert_offset, m_horiz_offset); + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i, Index j) const { + if (UseDirectOffsets) { + return m_base_mapper(i, j); + } + return m_base_mapper(i + m_vert_offset, j + m_horiz_offset); + } + + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT loadPacket(Index i) const { + if (UseDirectOffsets) { + return m_base_mapper.template loadPacket(i, 0); + } + return m_base_mapper.template loadPacket(i + m_vert_offset, m_horiz_offset); + } + + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT loadPacket(Index i, Index j) const { + if (UseDirectOffsets) { + return m_base_mapper.template loadPacket(i, j); + } + return m_base_mapper.template loadPacket(i + m_vert_offset, j + m_horiz_offset); + } + + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT loadPacket(Index i, Index j) const { + if (UseDirectOffsets) { + return m_base_mapper.template load(i, j); + } + return m_base_mapper.template loadPacket(i + m_vert_offset, j + m_horiz_offset); + } + + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const PacketT& p) const { + if (UseDirectOffsets) { + m_base_mapper.storePacket(i, 0, p); + } + m_base_mapper.storePacket(i + m_vert_offset, m_horiz_offset, p); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const { + if (UseDirectOffsets) { + return LinearMapper(m_base_mapper, i, j); + } + return LinearMapper(m_base_mapper, i + m_vert_offset, j + m_horiz_offset); + } + + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i) const { + EIGEN_STATIC_ASSERT((internal::is_same::value), YOU_MADE_A_PROGRAMMING_MISTAKE); + const int ActualAlignment = (AlignmentType == Aligned) && (Alignment == Aligned) ? Aligned : Unaligned; + if (UseDirectOffsets) { + return m_base_mapper.template loadPacket(i, 0); + } + return m_base_mapper.template loadPacket(i + m_vert_offset, m_horiz_offset); + } + + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool aligned(Index) const { + return false; + } + + #ifdef EIGEN_USE_SYCL + // The placeholder accessors require to be bound to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_base_mapper.bind(cgh); + } + #endif + + const ParentMapper& base_mapper() const { return m_base_mapper; } + Index vert_offset() const { return m_vert_offset; } + Index horiz_offset() const { return m_horiz_offset; } + + private: + ParentMapper m_base_mapper; + const Index m_vert_offset; + const Index m_horiz_offset; +}; + + +template class MakePointer_=MakePointer> +class TensorContractionInputMapper + : public BaseTensorContractionMapper { + + public: + typedef Scalar_ Scalar; + typedef BaseTensorContractionMapper Base; + typedef TensorContractionSubMapper SubMapper; + typedef SubMapper VectorMapper; + + EIGEN_DEVICE_FUNC TensorContractionInputMapper(const Tensor& tensor, + const nocontract_t& nocontract_strides, + const nocontract_t& ij_strides, + const contract_t& contract_strides, + const contract_t& k_strides) + : Base(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE SubMapper getSubMapper(Index i, Index j) const { + return SubMapper(*this, i, j); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const { + return VectorMapper(*this, i, j); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE const CoeffLoader& get_tensor() const { + return Base::m_tensor; + } +}; + + +template struct TensorContractionInputMapperTrait; + +template class MakePointer_> +struct TensorContractionInputMapperTrait > { + + typedef Tensor_ XprType; + static const bool inner_dim_contiguous = inner_dim_contiguous_; + static const bool inner_dim_reordered = inner_dim_reordered_; + }; + + +} // end namespace internal +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_MAPPER_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h new file mode 100644 index 0000000..473c228 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h @@ -0,0 +1,1650 @@ +// This file is part of Eigen, a lightweight C++ template library for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: +// +// This Source Code Form is subject to the terms of the Mozilla Public License v. 2.0. If a copy of the MPL was not +// distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/***************************************************************** + * TensorContractionSycl.h + * + * \brief: + * TensorContractionSycl.h, provides various tensor contraction kernel for SYCL backend + * + *****************************************************************/ + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_SYCL_H +#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_SYCL_H + +namespace Eigen { + +namespace TensorSycl { +namespace internal { + +#ifndef EIGEN_SYCL_DISABLE_GEMV +/*! + * \brief TVPanelSize, a template class used for setting the panel size required for launching General TensorVector + * contraction kernel on various hardware devices. + * + * \tparam Scalar: determines the element type of the tensor/vector + * + * \tparam StorageIndex determines the Index type. + * + * \tparam NCWindow: determines the number of non-contracting element to be process by each work-group + * + * \tparam CFactor: determines the number of contracting element to be process by each thread + * + * \tparam NCFactor: determines the number of non-contracting element to be process by each thread + */ +template +struct TVPanelSize { + // LocalThreadSizeC: determines total number of thread per workgroup for the contracting dimension + static EIGEN_CONSTEXPR StorageIndex LocalThreadSizeC = EIGEN_SYCL_LOCAL_THREAD_DIM0; + // LocalThreadSizeNC: determines total number of thread per workgroup for the non-contracting dimension + static EIGEN_CONSTEXPR StorageIndex LocalThreadSizeNC = EIGEN_SYCL_LOCAL_THREAD_DIM1; + // TileSizeDimNC: determines the tile size for the non-contracting dimension + static EIGEN_CONSTEXPR StorageIndex TileSizeDimNC = NCWindow / NCFactor; + // TileSizeDimC: determines the tile size for the contracting dimension + static EIGEN_CONSTEXPR StorageIndex TileSizeDimC = CFactor * LocalThreadSizeNC * LocalThreadSizeC; + // WorkLoadPerThreadNC : determines workload per thread for loading the non-contracting dimension + static EIGEN_CONSTEXPR StorageIndex WorkLoadPerThreadNC = TileSizeDimNC / LocalThreadSizeNC; + // WorkLoadPerThreadC: determines workload per thread for loading the non-contracting dimension + static EIGEN_CONSTEXPR StorageIndex WorkLoadPerThreadC = TileSizeDimC / LocalThreadSizeC; + // BC : determines if supporting bank conflict is required + static EIGEN_CONSTEXPR bool BC = false; +}; +#endif + +/*! + * \brief TTPanelSize, a template class used for setting the panel size required for launching General Tensor Tensor + contraction kernel on various hardware devices. + * + * \tparam Scalar: determines the element type of the tensor + * + * \tparam StorageIndex: determines the Index type. + * + * \tparam REG_SIZE_M: determines workload per thread for loading the M dimension This can be varied based on the + available register on a chosen device(can be controlled by EIGEN_SYCL_REG_M macro). + * + * \tparam REG_SIZE_N: determines workload per thread for loading the N dimension This can be varied based on the + available register on a chosen device(can be controlled by EIGEN_SYCL_REG_N macro). + * + * \tparam TSDK: determines Tile size for dimension K. The packet size is assumed to be considered + */ + +template +struct TTPanelSize { + // TileSizeDimK: determines Tile size for dimension K. The packet size is assumed to be considered + static EIGEN_CONSTEXPR StorageIndex TileSizeDimK = TSDK; + // WorkLoadPerThreadM : determines workload per thread for loading the M dimension This can be varied based on the + // available register on a chosen device(can be controlled by EIGEN_SYCL_REG_M macro// +#ifndef EIGEN_SYCL_REG_M + static EIGEN_CONSTEXPR StorageIndex WorkLoadPerThreadM = REG_SIZE_M; +#else + static EIGEN_CONSTEXPR StorageIndex WorkLoadPerThreadM = EIGEN_SYCL_REG_M; +#endif +// WorkLoadPerThreadN : determines workload per thread for loading the N dimension This can be varied based on the +// available register on a chosen device(can be controlled by EIGEN_SYCL_REG_N macro +#ifndef EIGEN_SYCL_REG_N + static EIGEN_CONSTEXPR StorageIndex WorkLoadPerThreadN = REG_SIZE_N; +#else + static EIGEN_CONSTEXPR StorageIndex WorkLoadPerThreadN = EIGEN_SYCL_REG_N; +#endif + // LocalThreadSizeM: determines total number of thread per workgroup for the m dimension + static EIGEN_CONSTEXPR StorageIndex LocalThreadSizeM = EIGEN_SYCL_LOCAL_THREAD_DIM0; + // LocalThreadSizeN: determines total number of thread per workgroup for the n dimension + static EIGEN_CONSTEXPR StorageIndex LocalThreadSizeN = EIGEN_SYCL_LOCAL_THREAD_DIM1; + // TileSizeDimM: determines the tile size for the m dimension + static EIGEN_CONSTEXPR StorageIndex TileSizeDimM = LocalThreadSizeM * WorkLoadPerThreadM; + // TileSizeDimN: determines the tile size for the n dimension + static EIGEN_CONSTEXPR StorageIndex TileSizeDimN = LocalThreadSizeN * WorkLoadPerThreadN; + // LoadPerThreadLhs: determines workload per thread for loading Lhs Tensor. This must be divisable by packetsize + static EIGEN_CONSTEXPR StorageIndex LoadPerThreadLhs = + ((TileSizeDimK * WorkLoadPerThreadM * WorkLoadPerThreadN) / (TileSizeDimN)); + // LoadPerThreadRhs: determines workload per thread for loading Rhs Tensor. This must be divisable by packetsize + static EIGEN_CONSTEXPR StorageIndex LoadPerThreadRhs = + ((TileSizeDimK * WorkLoadPerThreadM * WorkLoadPerThreadN) / (TileSizeDimM)); + // BC : determines if supporting bank conflict is required + static EIGEN_CONSTEXPR bool BC = true; + // DoubleBuffer: determines if double buffering technique should be used (This can be disabled by + // EIGEN_SYCL_DISABLE_DOUBLE_BUFFER macro when the device doesnot have sufficient local memory) + static EIGEN_CONSTEXPR bool DoubleBuffer = +#ifdef EIGEN_SYCL_DISABLE_DOUBLE_BUFFER + false; +#else + true; +#endif +}; + +/* ! + * \brief contraction_type: an enum class representing the Tensor Contraction implementation algorithm. This is used to + * specialize the contraction algorithm based on device support for dedicated local memory. + */ +enum class contraction_type { local, no_local }; +/* ! + * \brief data_source an enum class determining the location of the data in a memory hierarchy (global, local, private). + */ +enum class data_source { global_mem, local_mem, private_mem }; + +/*! + * \brief read, a template function used for loading the data from global + memory. This function is used to guarantee coalesced and vectorized load whenever possible + * + * \tparam PacketLoad: determines if the each element of this tensor block should be loaded in a packet mode + * + * \param is_coalesced_layout: determines whether or not the Tensor data in a memory can be access coalesced and + vectorized when possible. Coalesced memory access is a key factor in Kernel performance. When a tensor is 2d and the + contracting dimension is 1, it is always possible to accessed tensor data coalesced and vectorized. This is the case + when RHS(right hand side) Tensor is transposed or when LHS(left hand side) Tensor is not transposed. + * + * \tparam PacketType: determines the type of packet + * + * \tparam TensorMapper: determines the input tensor mapper type + * + * \tparam StorageIndex: determines the Index type + + * \param tensorMapper: is the input tensor + * + * \param NCIndex: is the non-contracting dim index + * + * \param CIndex is the contracting dim index + * + * \param ld: is the leading dimension of the flattened tensor + */ +template +static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ::Eigen::internal::enable_if::type read( + const TensorMapper &tensorMapper, const StorageIndex &NCIndex, const StorageIndex &CIndex, const StorageIndex &ld) { + const StorageIndex row = (is_coalesced_layout) ? NCIndex : CIndex; + const StorageIndex col = (is_coalesced_layout) ? CIndex : NCIndex; + return tensorMapper.get_tensor().template packet(row + (col * ld)); +} + +/*! + * \brief read, special overload of read function, when the read access is not vectorized + * + * \tparam PacketLoad: determines if the each element of this tensor block should be loaded in a packet mode + * + * \param is_coalesced_layout: determines whether or not the Tensor data in a memory can be access coalesced and + vectorized when possible. Coalesced memory access is a key factor in Kernel performance. When a tensor is 2d and the + contracting dimension is 1, it is always possible to accessed tensor data coalesced and vectorized. This is the case + when RHS(right hand side) Tensor is transposed or when LHS(left hand side) Tensor is not transposed. + * + * \tparam PacketType: determines the type of packet + * + * \tparam TensorMapper: determines the input tensor mapper type + * + * \tparam StorageIndex: determines the Index type + + * \param tensorMapper: is the input tensor + * + * \param NCIndex: is the non-contracting dim index + * + * \param CIndex: is the contracting dim index + */ +template +static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ::Eigen::internal::enable_if::type read( + const TensorMapper &tensorMapper, const StorageIndex &NCIndex, const StorageIndex &CIndex, const StorageIndex &) { + const StorageIndex row = (IsRhs) ? CIndex : NCIndex; + const StorageIndex col = (IsRhs) ? NCIndex : CIndex; + return tensorMapper(row, col); +} + +/*! + * \brief write, a template function used for storing the data to local memory. This function is used to guarantee + * coalesced and vectorized store whenever possible. + * + * \tparam StorageIndex: determines the Index type + * + * \param ld is the leading dimension of the local memory. ld is a compile time value for the local memory + * + * \tparam data_source: an enum value representing if the location of the data in a memory hierarchy. + * + * \tparam PacketType: determines the type of packet + * + * \tparam DataScalar: determines the output data type + * + * \param packet_data: the data to be written in the local memory + * + * \param ptr: a pointer to the local memory + * + * \param CIndex is the contracting dim index + */ + +template +static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + typename ::Eigen::internal::enable_if
::type + write(PacketType &packet_data, DataScalar ptr) { + EIGEN_CONSTEXPR int PacketSize = Eigen::internal::unpacket_traits::size; + EIGEN_UNROLL_LOOP + for (int i = 0; i < PacketSize; i++) { + *ptr = PacketWrapper::scalarize(i, packet_data); + ptr += ld; + } +} + +/*! + * \brief Overloading the write function for storing the data to global memory, when vectorization enabled This function + * is used to guarantee coalesced and vectorized store whenever possible. + * + * \tparam data_source: an enum value representing if the location of the data in a memory hierarchy. + * + * \tparam PacketType: determines the type of packet + * + * \tparam DataScalar: determines the output data type + * + * \param packet_data: the data to be written in the local memory + * + * \param ptr: a pointer to the local memory + */ + +template +static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ::Eigen::internal::enable_if< + Eigen::internal::unpacket_traits::size != 1 && dt == data_source::global_mem, void>::type +write(PacketType &packet_data, DataScalar *ptr) { + ::Eigen::internal::pstoreu(ptr, packet_data); +} + +/*! + * \brief Overloading the write function for storing the data to global memory, when vectorization is disabled. + * + * \tparam data_source: an enum value representing if the location of the data in a memory hierarchy. + * + * \tparam PacketType: determines the type of packet + * + * \tparam DataScalar: determines the output data type + * + * \param packet_data: the data to be written in the local memory + * + * \param ptr: a pointer to the local memory + */ +template +static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ::Eigen::internal::enable_if< + Eigen::internal::unpacket_traits::size == 1 && dt == data_source::global_mem, void>::type +write(PacketType &packet_data, DataScalar *ptr) { + *ptr = packet_data; +} + +/*! + * \brief check_boundary: is used to check the edge condition for non-internal blocks. + * + * \tparam is_internal: determines if the block is internal + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool check_boundary(bool) { + return true; +} + +/*! + * \brief check_boundary: specialization of the check_boundary for non-internal blocks. + * + * \param cond: true when the data is in range. Otherwise false + */ +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool check_boundary(bool cond) { + return cond; +} + +/*! + * \brief BlockProperties is a template class that provides different characteristic of a block of each Tensor processed + * by each workgroup. + * + * \tparam is_transposed: iff true, determines whether or not the block of the Tensor is transposed + * + * \tparam packet_load_: determines if the each element of this tensor block should be loaded in a packet mode + * + * \tparam PacketType: determines the type of packet + * + * \tparam OutType: determines the type of each element for this block of tensor. If packet load is true, it will be + * packetType; Otherwise it will be scalar Type + * + * \param elements_per_access determines the size of each element based on OutType + * + * \param is_coalesced_layout determines whether or not the Tensor data in a memory can be access coalesced and + * vectorized when possible. Coalesced memory access is a key factor in Kernel performance. When a tensor is 2d and the + * contracting dimension is 1, it is always possible to accessed tensor data coalesced and vectorized. This is the case + * when RHS(right hand side) Tensor is transposed or when LHS(left hand side) Tensor is not transposed. + * + * \param nc_stride determines the stride of non-contracting dimension to access the next adjustment element within the + * Tensor Block for each workgroup + * + * \param c_stride determines the stride of contracting dimension to access the next adjustment element within the + * Tensor Block for each workgroup + */ +template +struct BlockProperties { + static EIGEN_CONSTEXPR bool packet_load = packet_load_; + typedef typename Eigen::internal::unpacket_traits::type OutScalar; + static EIGEN_CONSTEXPR bool is_rhs = is_rhs_; + typedef typename Eigen::internal::conditional::type OutType; + static EIGEN_CONSTEXPR int elements_per_access = Eigen::internal::unpacket_traits::size; + static EIGEN_CONSTEXPR bool is_coalesced_layout = !(is_transposed ^ is_rhs); + static EIGEN_CONSTEXPR int nc_stride = (is_coalesced_layout ? elements_per_access : 1); + static EIGEN_CONSTEXPR int c_stride = (is_coalesced_layout ? 1 : elements_per_access); +}; + +/*! + * \brief ThreadProperties is a template class that provides each thread's properties within a workgroup. Please see + * the sycl-1.2.1 specification (https://www.khronos.org/registry/SYCL/specs/sycl-1.2.1.pdf) for the workgroup, + * work-items + * + * \tparam StorageIndex: determines the StorageIndex Type + * + * \param linearLocalThreadId: determines the linearized location of a thread within a work-group + * + * \param kGroupId: determines the logical group id in a k dimension of the flattened tensor. It will be > 1 when + * tall/skinny algorithm is used + * + * \param mGroupOffset: determines the logical start position of all thread within a workgroup for the m dimension of + * the flattened tensor. + * + * \param kGroupOffset determines the logical start position of all thread within a workgroup for the k dimension of the + * flattened tensor. It will be > 1 when tall/skinny algorithm is used. + * + * \param mLocalOffset: determines the logical start position of each thread within a workgroup for the m dimension of a + * flattened tensor. The position determines the distance of each thread within the workgroup from each other + * independent from their global position. + * + * \param nLocalOffset: determines the logical start position of each thread within a workgroup for the n dimension of a + * flattened tensor. The position determines the distance of each thread within the workgroup from each other + * independent from their global position. + * + * \param mGlobalOffset: determines the logical start position of each thread a thread for the m dimension on a + * flattened tensor + * + * \param nGlobalOffset: determines the logical start position of each thread a thread for the n dimension on a + * flattened tensor + * + * \param kSize : determine the number of the k elements of the flattened Tensor to be processed by each thread for the + * given tensor block. This is !=K dimension of Flattened Tensor when Tall/Skinny matrix is used. + * + * \param is_internal : this will determined if the thread within the work-group computes an internal block of tensor or + * the edge blocks. When it is internal, there is no need to check the boundaries and all the if stantement can be + * resolve by compiler. + */ +template +struct ThreadProperties { + const StorageIndex linearLocalThreadId; + const StorageIndex kGroupId; + const StorageIndex mGroupOffset; + const StorageIndex nGroupOffset; + const StorageIndex kGroupOffset; + const StorageIndex mLocalOffset; + const StorageIndex nLocalOffset; + const StorageIndex mGlobalOffset; + const StorageIndex nGlobalOffset; + StorageIndex kSize; + const bool is_internal; + // this is used to adjust the last block + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ThreadProperties( + const StorageIndex linearLocalThreadId_, const StorageIndex kGroupId_, const StorageIndex mGroupOffset_, + const StorageIndex nGroupOffset_, const StorageIndex kGroupOffset_, const StorageIndex mLocalOffset_, + const StorageIndex nLocalOffset_, const StorageIndex mGlobalOffset_, const StorageIndex nGlobalOffset_, + StorageIndex kSize_, const bool is_internal_) + : linearLocalThreadId(linearLocalThreadId_), + kGroupId(kGroupId_), + mGroupOffset(mGroupOffset_), + nGroupOffset(nGroupOffset_), + kGroupOffset(kGroupOffset_), + mLocalOffset(mLocalOffset_), + nLocalOffset(nLocalOffset_), + mGlobalOffset(mGlobalOffset_), + nGlobalOffset(nGlobalOffset_), + kSize(kSize_), + is_internal(is_internal_) {} +}; + +/*! + * \brief TensorContractionKernel is a template class that provides Tensor -Tensor contraction operation. + * + * \tparam OutScalar: determines the output scalar type + * + * \tparam LhsScalar: determines the left-hand-side scalar type + * + * \tparam RhsScalar: determines the right-hand-side scalar type + * + * \tparam OutAccessor: determines the sycl accessor type for out put (please see the sycl-1.2.1 specification + (https://www.khronos.org/registry/SYCL/specs/sycl-1.2.1.pdf) for accessor definition) + * + * \tparam LhsMapper determines the tensor contraction mapper type for left-hand-side matrix + * + * \tparam RhsMapper determines the tensor contraction mapper type for right-hand-side matrix + * + * \tparam StorageIndex: determines the StorageIndex Type + * + * \tparam Properties: determines the Contraction Panel properties + * + * \tparam TripleDim: determines the M, K, N dimensions for the flatten tensors in order to treat them as a matrix + * + * \tparam Vectorizable: determines whether or not the vectorization is enabled for the Eigen expression. + * + * \tparam input_mapper_properties : determine if the input tensors are matrix. If they are matrix, special memory + access is used to guarantee that always the memory access are coalesced. + * + * \tptaram IsFinal : determine if this is the final kernel. If so, the result will be written in a final output. + Otherwise, the result of contraction will be written iin a temporary buffer. This is the case when Tall/Skinny + contraction is used. So in this case, a final reduction step is required to compute final output. + + * \tparam contraction_tp: it is an enum value representing whether the local memroy/no local memory implementation of + the algorithm to be used + * + * \param scratch: local memory containing tiles of LHS and RHS tensors for each work-group + * + * \param lhs: determines the left-hand-side flattened tensor (tensor mapper) + * + * \param rhs: determines the right-hand-side flattened tensor (tensor mapper) + * + * \param out_res: determines the output tensor containing the contraction result + * + * \param groupSizeM: a logical number determining the number of work-group for m dimension + * + * \param groupSizeN: a logical number determining the number of work-group for n dimension + * + * \param numTiles: determines total number of tiles on the k dimension + * + * \param TripleDim: determines the M, K, N dimensions for the flatten tensors in order to treat them as a matrix + */ +template +class TensorContractionKernel { + public: + typedef typename Eigen::TensorSycl::internal::Vectorise::PacketReturnType + PacketReturnType; + static EIGEN_CONSTEXPR int PacketSize = + Eigen::TensorSycl::internal::Vectorise::PacketSize; + static EIGEN_CONSTEXPR bool is_lhs_transposed = + !::Eigen::internal::TensorContractionInputMapperTrait::inner_dim_contiguous; + static EIGEN_CONSTEXPR bool is_rhs_transposed = + !::Eigen::internal::TensorContractionInputMapperTrait::inner_dim_contiguous; + + typedef BlockProperties + LHSBlockProperties; + + typedef BlockProperties + RHSBlockProperties; + + static EIGEN_CONSTEXPR StorageIndex NStride = + contraction_tp == contraction_type::local ? Properties::WorkLoadPerThreadN : RHSBlockProperties::nc_stride; + + typedef cl::sycl::accessor Scratch; + typedef cl::sycl::multi_ptr local_ptr; + typedef OutScalar * /*cl::sycl::multi_ptr*/ private_ptr; + typedef + typename ::Eigen::internal::conditional::type + tile_ptr; + static EIGEN_CONSTEXPR StorageIndex LSDL = contraction_tp == contraction_type::local + ? Properties::TileSizeDimM + Properties::BC + : Properties::WorkLoadPerThreadM; + static EIGEN_CONSTEXPR StorageIndex LSDR = contraction_tp == contraction_type::local + ? Properties::TileSizeDimN + Properties::BC + : Properties::WorkLoadPerThreadN; + static EIGEN_CONSTEXPR StorageIndex LocalOffset = Properties::LocalThreadSizeM * Properties::LocalThreadSizeN; + + /** + * \brief MemHolder this is a place holder struct for creating memory hierarchy in SYCL. Inside SYCL kernel it is not + * allowed to have dynamic memory allocation. While the local memory is created outside of the kernel and passed to + * the kernel as an accessor, the private memory can only allowed to be allocated statically. Since we are abstracting + * the TiledMemory for both local and private memory, the MemHolder structs is used as a helper to abstract out + * different type of memory needed when local/no_local memory computation is called. + * + * \tparam contraction_type: it is an enum value representing whether the local memroy/no local memory implementation + of the algorithm to be used + * \tparam the private memory size + * \param ptr the tile memory pointer type + */ + template + struct MemHolder { + tile_ptr ptr; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE MemHolder(local_ptr block_start_ptr) : ptr(block_start_ptr) {} + }; + /** + * \brief specialization of memHolder class when no local memory kernel is used. + */ + template + struct MemHolder { + OutScalar ptr[MemSize] = {OutScalar{0}}; + }; + /** + * \brief TiledMemory: contains required memory pointer for loading each tile of the TensorContraction panel from + * global memory to local/private memory when local/no_local algorithm used. + * + * \param lhs_scratch_extract : determines the LHS tile memory. It is either private or local memory based on the + * selected contraction_type. + * + * \param rhs_scratch_extract : determines the RHS tile memory. It is either private or local memory based on the + * selected contraction_type. + * + * \param lhs_extract_index: determins the position of each thread on a local memory for lhs input. When private + * memory is used this is set to zero as this is not applicable in case of private memory. + * + * \param rhs_extract_index: determins the position of each thread on a local memory for rhs input. When private + * memory is used this is set to zero as this is not applicable in case of private memory. + * + * \param lhs_scratch_compute : determines the location to load for computation for lhs_local memory. This is the + * same as lhs_scratch_extract for private memory. + * + * \param rhs_scratch_compute : determines the location to load for computation for rhs_local memory. This is the + * same as rhs_scratch_extract for private memory. + */ + struct TiledMemory { + MemHolder lhs_scratch_extract; + MemHolder rhs_scratch_extract; + tile_ptr lhs_scratch_ptr_compute; + tile_ptr rhs_scratch_ptr_compute; + const std::pair lhs_extract_index; + const std::pair rhs_extract_index; + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TiledMemory(const ThreadProperties &, local_ptr, + typename ::Eigen::internal::enable_if::type * = 0) + : lhs_scratch_extract{}, + rhs_scratch_extract{}, + lhs_scratch_ptr_compute(lhs_scratch_extract.ptr), + rhs_scratch_ptr_compute(rhs_scratch_extract.ptr), + lhs_extract_index(std::pair(StorageIndex{0}, StorageIndex{0})), + rhs_extract_index(std::pair(StorageIndex{0}, StorageIndex{0})) {} + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TiledMemory(const ThreadProperties &thread_properties, local_ptr block_start_ptr, + typename ::Eigen::internal::enable_if::type * = 0) + : lhs_scratch_extract{block_start_ptr}, + rhs_scratch_extract{lhs_scratch_extract.ptr + + ((Properties::DoubleBuffer + 1) * LSDL * Properties::TileSizeDimK)}, + lhs_scratch_ptr_compute(lhs_scratch_extract.ptr + thread_properties.mLocalOffset), + rhs_scratch_ptr_compute(rhs_scratch_extract.ptr + thread_properties.nLocalOffset), + lhs_extract_index( + local_id_extract(thread_properties.linearLocalThreadId)), + rhs_extract_index( + local_id_extract(thread_properties.linearLocalThreadId)) {} + }; + + Scratch scratch; + const LhsMapper lhs; + const RhsMapper rhs; + OutAccessor out_res; + const StorageIndex groupSizeM; + const StorageIndex groupSizeN; + const StorageIndex numTiles; + const TripleDim triple_dim; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorContractionKernel(Scratch scratch_, const LhsMapper lhs_, + const RhsMapper rhs_, OutAccessor out_res_, + const StorageIndex groupSizeM_, + const StorageIndex groupSizeN_, + const StorageIndex numTiles_, + const TripleDim triple_dim_) + : scratch(scratch_), + lhs(lhs_), + rhs(rhs_), + out_res(out_res_), + groupSizeM(groupSizeM_), + groupSizeN(groupSizeN_), + numTiles(numTiles_), + triple_dim(triple_dim_) {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorContractionKernel(Scratch scratch_, const LhsMapper lhs_, + const RhsMapper rhs_, OutAccessor out_res_, + const StorageIndex groupSizeM_, + const StorageIndex numTiles_, + const TripleDim triple_dim_) + : TensorContractionKernel(scratch_, lhs_, rhs_, out_res_, groupSizeM_, 1, numTiles_, triple_dim_) {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(cl::sycl::nd_item<1> itemID) { + const StorageIndex linearLocalThreadId = itemID.get_local_id(0); + const StorageIndex nLocalThreadId = linearLocalThreadId / Properties::LocalThreadSizeM; + const StorageIndex mLocalThreadId = linearLocalThreadId % Properties::LocalThreadSizeM; + const StorageIndex mGroupId = itemID.get_group(0) % groupSizeM; + const StorageIndex tmp = itemID.get_group(0) / groupSizeM; + const StorageIndex nGroupId = IsFinal ? tmp : tmp % groupSizeN; + const StorageIndex kGroupId = IsFinal ? 0 : tmp / groupSizeN; + const StorageIndex mGroupOffset = mGroupId * Properties::TileSizeDimM; + const StorageIndex nGroupOffset = nGroupId * Properties::TileSizeDimN; + const StorageIndex mLocalOffset = PacketSize * mLocalThreadId; + const StorageIndex nLocalOffset = NStride * nLocalThreadId; + const StorageIndex mGlobalOffset = mGroupOffset + mLocalOffset; + const StorageIndex nGlobalOffset = nGroupOffset + nLocalOffset; + + const StorageIndex kSizePerWG = IsFinal ? triple_dim.K : numTiles * Properties::TileSizeDimK; + StorageIndex kGroupOffset = kGroupId * kSizePerWG; + const bool is_internal = triple_dim.M - mGroupOffset >= Properties::TileSizeDimM && + triple_dim.N - nGroupOffset >= Properties::TileSizeDimN && + triple_dim.K - kGroupOffset >= kSizePerWG; + // this is used to adjust the last block + StorageIndex kSize = IsFinal ? triple_dim.K : std::min(kSizePerWG, triple_dim.K - kGroupOffset); + // This is used to find out the lats K offset so that kGroupOffset -kSize can compute the coffset for loading to + // tile + kGroupOffset += kSize; + + auto thread_properties = + ThreadProperties(linearLocalThreadId, kGroupId, mGroupOffset, nGroupOffset, kGroupOffset, + mLocalOffset, nLocalOffset, mGlobalOffset, nGlobalOffset, kSize, is_internal); + + auto out_ptr = out_res.get_pointer() + (IsFinal ? 0 : thread_properties.kGroupId * triple_dim.M * triple_dim.N); + + (thread_properties.is_internal) ? compute_panel(itemID, thread_properties, out_ptr) + : compute_panel(itemID, thread_properties, out_ptr); + } + // The compute block computes the contraction operation private block for each thread and store the resutl in the + // privateRes memory of Each computation the compute block function is independent of local and no local concepts as + // it only compute the block on each thread's private memory space + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void compute_block_per_tile(OutScalar *lhs_block_ptr, OutScalar *rhs_block_ptr, + PacketReturnType *privateRes) { + StorageIndex idx = 0; + EIGEN_CONSTEXPR StorageIndex lhs_stride = + contraction_tp == contraction_type::local ? (PacketSize * Properties::LocalThreadSizeM) : 1; + EIGEN_UNROLL_LOOP + for (StorageIndex wLPTN = 0; wLPTN < Properties::WorkLoadPerThreadN; wLPTN++) { + auto rhsPacket = PacketReturnType{*(rhs_block_ptr + wLPTN)}; + StorageIndex lhs_index = 0; + EIGEN_UNROLL_LOOP + for (StorageIndex wLPTM = 0; wLPTM < Properties::WorkLoadPerThreadM / PacketSize; wLPTM++) { + PacketReturnType lhsPack{}; + Eigen::TensorSycl::internal::PacketWrapper::set_packet(lhsPack, + lhs_block_ptr + lhs_index); + privateRes[idx] = ::Eigen::internal::pmadd(lhsPack, rhsPacket, privateRes[idx]); + + lhs_index += lhs_stride; + idx++; + } + } + } + // The store function write the computed contraction operation in the private memory of each thread to the global + // memory. The store function is independent of local and no local concepts s that it can be abstract out in the base + // class. + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void store(OutPtr *out_ptr, PacketReturnType *privateRes, + StorageIndex mGlobalOffset, StorageIndex nGlobalOffset) { + auto chk_bound = [&](const StorageIndex &mIndex, const StorageIndex &nIndex) EIGEN_DEVICE_FUNC { + return (mIndex + PacketSize - 1 < triple_dim.M && nGlobalOffset + nIndex < triple_dim.N); + }; + // when local memory is not used M and N are both accessed in a coalesced way. However, when local memory is + // available the k*N is transposed in the local to N*K therefore, each blocks operates on blockId* + // WorkLoadPerThreadN slice of N + EIGEN_CONSTEXPR StorageIndex GlobalNStride = + contraction_tp == contraction_type::local ? 1 : Properties::LocalThreadSizeN; + EIGEN_UNROLL_LOOP + for (StorageIndex wLPTN = 0; wLPTN < Properties::WorkLoadPerThreadN / PrivateNStride; wLPTN++) { + // output leading dimension + StorageIndex outputLD = 0; + // When local memory is used the PrivateNstride is always 1 because the coalesed access on N is loaded into Local + // memory and extracting from local to global is the same as no transposed version. However, when local memory is + // not used and RHS is transposed we packetize the load for RHS. + EIGEN_UNROLL_LOOP + for (StorageIndex nId = 0; nId < PrivateNStride; nId++) { + StorageIndex globalRow = mGlobalOffset; + EIGEN_UNROLL_LOOP + for (StorageIndex wLPTM = 0; wLPTM < Properties::WorkLoadPerThreadM / PacketSize; wLPTM++) { + PacketReturnType privetOut = privateRes[wLPTM]; + if (check_boundary(chk_bound(globalRow, nId))) { + // Store the final results in C. The C matrix has always M as a first StorageIndex and N as a second + // StorageIndex Therefore it is always coalesced layout + write(privetOut, out_ptr + outputLD + globalRow); + } else { + EIGEN_UNROLL_LOOP + for (StorageIndex mId = 0; mId < PacketSize; mId++) { + StorageIndex mOffset = globalRow + mId; + if (mOffset < triple_dim.M && (nGlobalOffset + nId < triple_dim.N)) { + out_ptr[mOffset + outputLD] = + Eigen::TensorSycl::internal::PacketWrapper::scalarize(mId, privetOut); + } + } + } + globalRow += (PacketSize * Properties::LocalThreadSizeM); + } + outputLD += triple_dim.M; + privateRes += Properties::WorkLoadPerThreadM / PacketSize; + } + out_ptr += (GlobalNStride * outputLD); + + nGlobalOffset += (PrivateNStride * GlobalNStride); + } + } + // when no local memory is used the following extract_block will be enabled + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + typename ::Eigen::internal::enable_if::type + extract_block(const Input &inpt, PrivateReg private_ptr, const std::pair &, + const StorageIndex &ncOffset, const StorageIndex cOffset) { + EIGEN_CONSTEXPR StorageIndex LocalThreadSizeNC = + InputBlockProperties::is_rhs ? Properties::LocalThreadSizeN : Properties::LocalThreadSizeM; + EIGEN_CONSTEXPR StorageIndex WorkLoadPerThreadNC = + InputBlockProperties::is_rhs ? Properties::WorkLoadPerThreadN : Properties::WorkLoadPerThreadM; + const StorageIndex &NC = InputBlockProperties::is_rhs ? triple_dim.N : triple_dim.M; + + auto chk_bound = [&](const StorageIndex &CIndex, const StorageIndex &NCIndex) EIGEN_DEVICE_FUNC { + return ((CIndex + InputBlockProperties::c_stride - 1 < triple_dim.K) && + (NCIndex + InputBlockProperties::nc_stride - 1 < NC)); + }; + const StorageIndex ld = InputBlockProperties::is_coalesced_layout ? NC : triple_dim.K; + StorageIndex cIndex = cOffset; + + EIGEN_UNROLL_LOOP + for (StorageIndex cId = 0; cId < Properties::TileSizeDimK / InputBlockProperties::c_stride; cId++) { + StorageIndex ncIndex = ncOffset; + EIGEN_UNROLL_LOOP + for (StorageIndex ncId = 0; ncId < WorkLoadPerThreadNC / InputBlockProperties::nc_stride; ncId++) { + if (check_boundary(chk_bound(cIndex, ncIndex))) { + auto val = + read(inpt, ncIndex, cIndex, ld); + + write(val, private_ptr); + } else { + EIGEN_UNROLL_LOOP + for (StorageIndex i = 0; i < InputBlockProperties::elements_per_access; i++) { + const StorageIndex ncInd = ncIndex + (InputBlockProperties::is_coalesced_layout ? i : 0); + const StorageIndex cInd = cIndex + (InputBlockProperties::is_coalesced_layout ? 0 : i); + OutScalar val = + (ncInd < NC && cInd < triple_dim.K) + ? read( + inpt, ncInd, cInd, ld) + : OutScalar(0); + write( + val, private_ptr + (InputBlockProperties::is_coalesced_layout ? i : 0) + + ((InputBlockProperties::is_coalesced_layout ? 0 : i) * WorkLoadPerThreadNC)); + } + } + + // if it is lhs we have to load it packetised when the packet size is > 1, because the output is coalesced. So + // even if M is not accessed in a coalesced mode, we have to load packet_size number of m per thread. + ncIndex = (!InputBlockProperties::is_rhs && InputBlockProperties::nc_stride == 1 && PacketSize != 1) + ? ncOffset + (ncId + 1) % PacketSize + ((ncId + 1) / PacketSize) * LocalThreadSizeNC + : (ncIndex + InputBlockProperties::nc_stride * LocalThreadSizeNC); + private_ptr += InputBlockProperties::nc_stride; + } + // the previous for loop ( private_ptr += (ncId * nc_stride)) has already moved ptr with one WorkLoadPerThreadNC + private_ptr += (InputBlockProperties::c_stride - 1) * WorkLoadPerThreadNC; + cIndex += InputBlockProperties::c_stride; + } + } + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::pair local_id_extract( + const StorageIndex &linearLocalThreadId) { + const StorageIndex localThreadNC = + (InputBlockProperties::is_coalesced_layout) + ? linearLocalThreadId % (TileSizeDimNC / InputBlockProperties::nc_stride) + : linearLocalThreadId / (Properties::TileSizeDimK / InputBlockProperties::c_stride); + const StorageIndex localThreadC = + (InputBlockProperties::is_coalesced_layout) + ? linearLocalThreadId / (TileSizeDimNC / InputBlockProperties::nc_stride) + : linearLocalThreadId % (Properties::TileSizeDimK / InputBlockProperties::c_stride); + return std::pair(localThreadNC, localThreadC); + } + + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + typename ::Eigen::internal::enable_if::type + sync_mem(const cl::sycl::nd_item<1> &, bool &db_offset) noexcept { + db_offset = !db_offset; + } + + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + typename ::Eigen::internal::enable_if::type + sync_mem(const cl::sycl::nd_item<1> &itemID, bool &) noexcept { + itemID.barrier(cl::sycl::access::fence_space::local_space); + } + + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + typename ::Eigen::internal::enable_if::type + sync_mem(const cl::sycl::nd_item<1> &, bool &) noexcept { + return; + } + + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + typename ::Eigen::internal::enable_if::type + sync_thread(const cl::sycl::nd_item<1> & +#ifdef EIGEN_SYCL_ARM_GPU_CACHE_OPTIMISATION + itemID +#endif + ) noexcept { +#ifdef EIGEN_SYCL_ARM_GPU_CACHE_OPTIMISATION + itemID.barrier(cl::sycl::access::fence_spacce::local_space); +#else + return; +#endif + } + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + typename ::Eigen::internal::enable_if::type + sync_thread(const cl::sycl::nd_item<1> &itemID) { + itemID.barrier(cl::sycl::access::fence_space::local_space); + } + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ::Eigen::internal::enable_if::type sync_thread( + const cl::sycl::nd_item<1> &) { + return; + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void compute_tile_per_panel(const cl::sycl::nd_item<1> &itemID, + ThreadProperties &thread_properties, + TiledMemory &tiled_input_block, + PacketReturnType *privateRes, bool &db_offset) { + // Tiling the Rhs block from global to local memory + extract_block( + rhs, tiled_input_block.rhs_scratch_extract.ptr + (db_offset * Properties::TileSizeDimK * LSDR), + tiled_input_block.rhs_extract_index, + contraction_tp == contraction_type::local ? thread_properties.nGroupOffset : thread_properties.nGlobalOffset, + thread_properties.kGroupOffset - thread_properties.kSize); + + sync_thread(itemID); + + // Tiling the Lhs block from global to local memory + extract_block( + lhs, tiled_input_block.lhs_scratch_extract.ptr + (db_offset * LSDL * Properties::TileSizeDimK), + tiled_input_block.lhs_extract_index, + contraction_tp == contraction_type::local ? thread_properties.mGroupOffset : thread_properties.mGlobalOffset, + thread_properties.kGroupOffset - thread_properties.kSize); + + // itemID.barrier(cl::sycl::access::fence_space::local_space); + sync_thread(itemID); + // switch to compute mede + StorageIndex lhs_offset = (db_offset * LSDL * Properties::TileSizeDimK); + StorageIndex rhs_offset = (db_offset * Properties::TileSizeDimK * LSDR); + // Loop over the values of a single tile + for (StorageIndex k = 0; k < Properties::TileSizeDimK; k++) { + compute_block_per_tile(tiled_input_block.lhs_scratch_ptr_compute + lhs_offset, + tiled_input_block.rhs_scratch_ptr_compute + rhs_offset, privateRes); + lhs_offset += LSDL; + rhs_offset += LSDR; + } + // computing the K index for the next tile + thread_properties.kSize -= Properties::TileSizeDimK; + sync_mem(itemID, db_offset); + } + + // when local memory is available the following compute_panel will be enabled + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void compute_panel(const cl::sycl::nd_item<1> &itemID, + ThreadProperties &thread_properties, + OutPtr out_ptr) { + auto tiled_input_block = TiledMemory{thread_properties, scratch.get_pointer()}; + // Allocate register space + PacketReturnType privateRes[Properties::WorkLoadPerThreadM * Properties::WorkLoadPerThreadN / PacketSize] = { + PacketReturnType{0}}; + bool db_offset = 0; + + while (thread_properties.kSize >= Properties::TileSizeDimK) { + compute_tile_per_panel(itemID, thread_properties, tiled_input_block, privateRes, db_offset); + } + if (thread_properties.kSize > 0) { + compute_tile_per_panel(itemID, thread_properties, tiled_input_block, privateRes, db_offset); + } + + // Storing the final results in the output + store(1) : RHSBlockProperties::nc_stride>( + out_ptr + thread_properties.nGlobalOffset * triple_dim.M, privateRes, thread_properties.mGlobalOffset, + thread_properties.nGlobalOffset); + } + // When local memory is available the following extract_block will be enabled + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + typename ::Eigen::internal::enable_if::type + extract_block(const Input &inpt, Local local_ptr, const std::pair& local_index, + const StorageIndex &ncOffset, const StorageIndex cOffset) { + EIGEN_CONSTEXPR StorageIndex TileSizeDimNC = + InputBlockProperties::is_rhs ? Properties::TileSizeDimN : Properties::TileSizeDimM; + EIGEN_CONSTEXPR StorageIndex LoadPerThread = + InputBlockProperties::is_rhs ? Properties::LoadPerThreadRhs : Properties::LoadPerThreadLhs; + EIGEN_CONSTEXPR StorageIndex LSD = InputBlockProperties::is_rhs ? LSDR : LSDL; + static_assert(((LocalOffset % (TileSizeDimNC / InputBlockProperties::nc_stride) == 0) && + (LocalOffset % (Properties::TileSizeDimK / InputBlockProperties::c_stride) == 0)), + " LocalOffset must be divisable by stride"); + const StorageIndex &NC = InputBlockProperties::is_rhs ? triple_dim.N : triple_dim.M; + StorageIndex localThreadNC = local_index.first; + StorageIndex localThreadC = local_index.second; + auto chk_bound = [&](const StorageIndex &CIndex, const StorageIndex &NCIndex) EIGEN_DEVICE_FUNC { + return ((CIndex + InputBlockProperties::c_stride - 1 < triple_dim.K) && + (NCIndex + InputBlockProperties::nc_stride - 1 < NC)); + }; + EIGEN_UNROLL_LOOP + for (StorageIndex lPT = 0; lPT < LoadPerThread / InputBlockProperties::elements_per_access; lPT++) { + const StorageIndex CIndex = cOffset + (InputBlockProperties::c_stride * localThreadC); + const StorageIndex NCIndex = ncOffset + (InputBlockProperties::nc_stride * localThreadNC); + const StorageIndex ld = InputBlockProperties::is_coalesced_layout ? NC : triple_dim.K; + if (check_boundary(chk_bound(CIndex, NCIndex))) { + auto val = + read(inpt, NCIndex, CIndex, ld); + write( + val, local_ptr + (InputBlockProperties::nc_stride * localThreadNC) + + (InputBlockProperties::c_stride * localThreadC * LSD)); + } else { + EIGEN_UNROLL_LOOP + for (StorageIndex i = 0; i < InputBlockProperties::elements_per_access; i++) { + const StorageIndex nCInd = NCIndex + (InputBlockProperties::is_coalesced_layout ? i : 0); + const StorageIndex cInd = CIndex + (InputBlockProperties::is_coalesced_layout ? 0 : i); + OutScalar val = + (nCInd < NC && cInd < triple_dim.K) + ? read( + inpt, nCInd, cInd, ld) + : OutScalar(0); + + write( + val, local_ptr + (InputBlockProperties::nc_stride * localThreadNC) + + (InputBlockProperties::is_coalesced_layout ? i : 0) + + ((InputBlockProperties::c_stride * localThreadC + + (InputBlockProperties::is_coalesced_layout ? 0 : i)) * + LSD)); + } + } + localThreadNC += (InputBlockProperties::is_coalesced_layout) + ? LocalOffset % (TileSizeDimNC / InputBlockProperties::nc_stride) + : LocalOffset / (Properties::TileSizeDimK / InputBlockProperties::c_stride); + localThreadC += (InputBlockProperties::is_coalesced_layout) + ? LocalOffset / (TileSizeDimNC / InputBlockProperties::nc_stride) + : LocalOffset % (Properties::TileSizeDimK / InputBlockProperties::c_stride); + } + } +}; + +#ifndef EIGEN_SYCL_DISABLE_GEMV + +/*! + * \brief GeneralVectorTensor is a template class that provides Tensor -vector contraction operation, which is a special + * case of Tensor Tensor contraction. + * + * \tparam OutScalar: determines the output scalar type + * + * \tparam OutAccessor: determines the sycl accessor type for out put (please see the sycl-1.2.1 specification + * (https://www.khronos.org/registry/SYCL/specs/sycl-1.2.1.pdf) for accessor definition) + * + * \tparam VectorMapper: determines the tensor contraction mapper for the vector input (can be lhs or rhs) + * + * \tparam TensorMapper: determines the tensor contraction mapper for the tensor input (can be lhs or rhs) + * + * \tparam StorageIndex: determines the StorageIndex Type + * + * \tparam Properties: determines the Contraction Panel properties + * + * \tparam KFactor: determines the number of elements in K dimension in a Tile + * + * \tparam Vectorizable: determines whether or not the vectorization is enabled for the Eigen expression. + * + * \tparam is_lhs_vec: determines whether lhs is a vector or rhs is a vector + * + * \tparam IsFinal: determine if this is the final kernel. If so, the result will be written in a final output. + * Otherwise, the result of contraction will be written iin a temporary buffer. + * + * \param scratch: determines the local memory containing the vector block for each work-group + * + * \param vec: determines the vector input (tensor mapper) + * + * \param mat: determines the tensor input (tensor mapper) + * + * \param out_res: determines the output vector containing the contraction result + * + * \param nonContractGroupSize: a logical number determining the number of work-group for non-contracting dimension + * + * \param nonContractDim: determines the size of non contracting dimension for the flattened tensor + * + * \param contractDim: determines the size of non contracting dimension for the flattened tensor + * + */ +template +struct GeneralVectorTensor { + typedef typename Eigen::TensorSycl::internal::Vectorise::PacketReturnType + PacketReturnType; + static EIGEN_CONSTEXPR int PacketSize = + Eigen::TensorSycl::internal::Vectorise::PacketSize; + typedef cl::sycl::accessor Scratch; + + static EIGEN_CONSTEXPR StorageIndex OutScratchOffset = + KFactor * Properties::LocalThreadSizeC * Properties::LocalThreadSizeNC; + + // Since the access layout for a vector can always be coalesced, when LHS is a vector, we pass false and false to make + // sure that the !^ is true When RHS is a vector, we pass true and true to make sure that the !^ is true. + typedef BlockProperties + VecBlockProperties; + + Scratch scratch; + const VectorMapper vec; + const TensorMapper mat; + OutAccessor out_res; + const StorageIndex nonContractGroupSize; + const StorageIndex nonContractDim; + const StorageIndex contractDim; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE GeneralVectorTensor(Scratch scratch_, const VectorMapper vec_, + const TensorMapper mat_, OutAccessor out_res_, + const StorageIndex nonContractGroupSize_, + const StorageIndex nonContractDim_, + const StorageIndex contractDim_) + : scratch(scratch_), + vec(vec_), + mat(mat_), + out_res(out_res_), + nonContractGroupSize(nonContractGroupSize_), + nonContractDim(nonContractDim_), + contractDim(contractDim_) {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(cl::sycl::nd_item<1> itemID) { + auto scratch_ptr = scratch.get_pointer(); + const StorageIndex linearLocalThreadId = itemID.get_local_id(0); + StorageIndex nonContractId = is_lhs_vec ? linearLocalThreadId / Properties::LocalThreadSizeC + : linearLocalThreadId % Properties::LocalThreadSizeNC; + StorageIndex contractId = is_lhs_vec ? linearLocalThreadId % Properties::LocalThreadSizeC + : linearLocalThreadId / Properties::LocalThreadSizeNC; + const StorageIndex cGroupSize = itemID.get_group_range(0) / nonContractGroupSize; + const StorageIndex nonContractGroupId = + is_lhs_vec ? itemID.get_group(0) / cGroupSize : itemID.get_group(0) % nonContractGroupSize; + const StorageIndex contractGroupId = + is_lhs_vec ? itemID.get_group(0) % cGroupSize : itemID.get_group(0) / nonContractGroupSize; + auto out_ptr = out_res.get_pointer() + (IsFinal ? 0 : contractGroupId * nonContractDim); + + const StorageIndex nonContractGroupOffset = nonContractGroupId * Properties::TileSizeDimNC; + const StorageIndex contractGroupOffset = contractGroupId * Properties::TileSizeDimC; + auto outScratchIndex = nonContractId + contractId * Properties::LocalThreadSizeNC; + const StorageIndex globalNonContractDimOffset = nonContractGroupOffset + nonContractId; + const StorageIndex globalContractDimOffset = contractGroupOffset + contractId; + auto local_output = scratch_ptr + OutScratchOffset; + const bool is_internal = nonContractDim - nonContractGroupOffset >= Properties::TileSizeDimNC && + contractDim - contractGroupOffset >= Properties::TileSizeDimC; + is_internal + ? compute_panel(itemID, vec, mat, local_output, out_ptr, +#ifdef EIGEN_SYCL_LOCAL_MEM_UNSET_OR_ON + scratch_ptr, contractGroupOffset, +#endif + nonContractGroupOffset, linearLocalThreadId, contractDim, nonContractDim, contractId, + nonContractId, globalContractDimOffset, globalNonContractDimOffset, outScratchIndex) + : compute_panel(itemID, vec, mat, local_output, out_ptr, +#ifdef EIGEN_SYCL_LOCAL_MEM_UNSET_OR_ON + scratch_ptr, contractGroupOffset, +#endif + nonContractGroupOffset, linearLocalThreadId, contractDim, nonContractDim, contractId, + nonContractId, globalContractDimOffset, globalNonContractDimOffset, outScratchIndex); + } + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void compute_panel( + const cl::sycl::nd_item<1> &itemID, const VectorMapper &vec, const TensorMapper &mat, OutScalar *local_output, + OutPtr out_ptr, +#ifdef EIGEN_SYCL_LOCAL_MEM_UNSET_OR_ON + OutScalar *scratch_ptr, const StorageIndex contractGroupOffset, +#endif + const StorageIndex nonContractGroupOffset, const StorageIndex linearLocalThreadId, StorageIndex contractDim, + StorageIndex nonContractDim, StorageIndex contractId, StorageIndex nonContractId, + StorageIndex globalContractDimOffset, StorageIndex globalNonContractDimOffset, StorageIndex outScratchIndex) { + OutScalar outScalar[Properties::WorkLoadPerThreadNC] = {OutScalar(0)}; + // Reading the vector +#ifdef EIGEN_SYCL_LOCAL_MEM_UNSET_OR_ON + const StorageIndex vectorOffset = contractGroupOffset + linearLocalThreadId; + extract_block(vec, scratch_ptr, linearLocalThreadId, + vectorOffset, contractDim); + + itemID.barrier(cl::sycl::access::fence_space::local_space); + auto in_scratch_ptr = scratch_ptr + contractId; +#endif + + StorageIndex privateOffsetC = 0; + EIGEN_UNROLL_LOOP + for (StorageIndex i = 0; i < Properties::WorkLoadPerThreadC; i++) { + StorageIndex privateOffsetNC = 0; + bool contract_conds = ((globalContractDimOffset + privateOffsetC) < contractDim); +#ifdef EIGEN_SYCL_LOCAL_MEM_UNSET_OR_ON + auto vecScalar = *in_scratch_ptr; +#else + auto vecScalar = (check_boundary(contract_conds)) + ? vec(is_lhs_vec ? StorageIndex(0) : globalContractDimOffset + privateOffsetC, + is_lhs_vec ? globalContractDimOffset + privateOffsetC : StorageIndex(0)) + : OutScalar(0); +#endif + EIGEN_UNROLL_LOOP + for (StorageIndex j = 0; j < Properties::WorkLoadPerThreadNC; j++) { + auto matScalar = (check_boundary( + contract_conds && ((globalNonContractDimOffset + privateOffsetNC) < nonContractDim))) + ? mat(is_lhs_vec ? globalContractDimOffset + privateOffsetC + : globalNonContractDimOffset + privateOffsetNC, + is_lhs_vec ? globalNonContractDimOffset + privateOffsetNC + : globalContractDimOffset + privateOffsetC) + : OutScalar(0); + + outScalar[j] = cl::sycl::mad(matScalar, vecScalar, outScalar[j]); + privateOffsetNC += Properties::LocalThreadSizeNC; + } + privateOffsetC += Properties::LocalThreadSizeC; +#ifdef EIGEN_SYCL_LOCAL_MEM_UNSET_OR_ON + in_scratch_ptr += Properties::LocalThreadSizeC; +#endif + } + + auto out_scratch_ptr = local_output + outScratchIndex; + // Each block of 16*16 element in shared memory should reduce to 16*1 + EIGEN_UNROLL_LOOP + for (StorageIndex j = 0; j < Properties::WorkLoadPerThreadNC; j++) { + *out_scratch_ptr = outScalar[j]; + + out_scratch_ptr += (Properties::LocalThreadSizeNC * Properties::LocalThreadSizeC); + } + if (is_lhs_vec) { + nonContractId = linearLocalThreadId % Properties::LocalThreadSizeNC; + contractId = linearLocalThreadId / Properties::LocalThreadSizeNC; + outScratchIndex = nonContractId + contractId * Properties::LocalThreadSizeNC; + } + + out_scratch_ptr = local_output + outScratchIndex; + EIGEN_UNROLL_LOOP + for (StorageIndex j = 0; j < Properties::WorkLoadPerThreadNC; j++) { + EIGEN_UNROLL_LOOP + for (StorageIndex offset = Properties::LocalThreadSizeC >> 1; offset > 0; offset >>= 1) { + itemID.barrier(cl::sycl::access::fence_space::local_space); + if (contractId < offset) { + StorageIndex myNeigbourId = (Properties::LocalThreadSizeNC * offset); + *out_scratch_ptr += out_scratch_ptr[myNeigbourId]; + } + } + // moving to next 16 by 16 block + out_scratch_ptr += (Properties::LocalThreadSizeNC * Properties::LocalThreadSizeC); + } + + if (contractId == 0) { + out_scratch_ptr = local_output + nonContractId; + StorageIndex global_final_offset = nonContractGroupOffset + nonContractId; + out_ptr += global_final_offset; + EIGEN_UNROLL_LOOP + for (StorageIndex j = 0; j < Properties::WorkLoadPerThreadNC; j++) { + if (check_boundary(global_final_offset < nonContractDim)) { + auto res = *out_scratch_ptr; + + *out_ptr = res; + out_ptr += Properties::LocalThreadSizeNC; + } + // moving to next 16 by 16 block to ge the next 16 reduced elements + out_scratch_ptr += (Properties::LocalThreadSizeNC * Properties::LocalThreadSizeC); + if (!(is_internal_block)) global_final_offset += Properties::LocalThreadSizeNC; + } + } + } + + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void extract_block(const Input &inpt, Local *local_ptr, + const StorageIndex &linearLocalThreadId, + const StorageIndex &cOffset, const StorageIndex &C) { + local_ptr += InputBlockProperties::c_stride * linearLocalThreadId; + StorageIndex cIndex = cOffset; + for (StorageIndex cId = 0; cId < CFactor / InputBlockProperties::c_stride; cId++) { + if (check_boundary(cIndex + InputBlockProperties::c_stride - 1 < C)) { + auto val = read(inpt, StorageIndex(0), + cIndex, StorageIndex(1)); + write(val, local_ptr); + } else { + EIGEN_UNROLL_LOOP + for (StorageIndex i = 0; i < InputBlockProperties::elements_per_access; i++) { + OutScalar val = + (cIndex + i < C) + ? read( + inpt, StorageIndex(0), cIndex + i, StorageIndex(1)) + : OutScalar(0); + write(val, local_ptr + i); + } + } + local_ptr += InputBlockProperties::c_stride * GroupSize; + cIndex += InputBlockProperties::c_stride * GroupSize; + } + } +}; +#endif + +#ifndef EIGEN_SYCL_DISABLE_SCALAR + +/*! + * \brief GeneralScalarContraction is a template class that provides the scalar value of Tensor -Tensor contraction + * operation, when all the dimensions are contracting dimensions. This Kernel reduces two tensors to an scalar + * + * \tparam OutScalar: determines the output scalar type + * + * \tparam LhsScalar: determines the left-hand-side scalar type + * + * \tparam RhsScalar: determines the right-hand-side scalar type + * + * \tparam OutAccessor: determines the sycl accessor type for out put (please see the sycl-1.2.1 specification + * (https://www.khronos.org/registry/SYCL/specs/sycl-1.2.1.pdf) for accessor definition) + * + * \tparam LhsMapper: determines the tensor contraction mapper type for left-hand-side matrix + * + * \tparam RhsMapper: determines the tensor contraction mapper type for right-hand-side matrix + * + * \tparam StorageIndex: determines the StorageIndex Type + * + * \tparam Vectorizable: determines whether or not the vectorization is enabled for the Eigen expression. + * + * \param scratch: local memory containing tiles of LHS and RHS tensors for each work-group + * + * \param lhs: determines the left-hand-side flattened tensor (tensor mapper) + * + * \param rhs: determines the right-hand-side flattened tensor (tensor mapper) + * + * \param out_res: determines the output tensor containing the contraction result + * + * \param rng: determins the total input data size + */ +template +struct GeneralScalarContraction { + typedef cl::sycl::accessor Scratch; + Scratch scratch; + const LhsMapper lhs; + const RhsMapper rhs; + OutAccessor out_res; + const StorageIndex rng; + + EIGEN_DEVICE_FUNC + GeneralScalarContraction(Scratch scratch_, const LhsMapper lhs_, const RhsMapper rhs_, OutAccessor out_res_, + const StorageIndex rng_) + : scratch(scratch_), lhs(lhs_), rhs(rhs_), out_res(out_res_), rng(rng_) {} + + EIGEN_DEVICE_FUNC void operator()(cl::sycl::nd_item<1> itemID) { + auto out_ptr = out_res.get_pointer(); + auto scratch_ptr = scratch.get_pointer().get(); + + StorageIndex globalid = itemID.get_global_id(0); + StorageIndex localid = itemID.get_local_id(0); + OutScalar accumulator = OutScalar(0); + for (StorageIndex i = globalid; i < rng; i += itemID.get_global_range(0)) { + accumulator = cl::sycl::mad(lhs(0, i), rhs(i, 0), accumulator); + } + auto out_scratch_ptr = scratch_ptr + localid; + *out_scratch_ptr = accumulator; + for (StorageIndex offset = itemID.get_local_range(0) >> 1; offset > 0; offset >>= 1) { + itemID.barrier(cl::sycl::access::fence_space::local_space); + if (localid < offset) { + *out_scratch_ptr = (accumulator += out_scratch_ptr[offset]); + } + } + if (localid == 0) { + out_ptr[itemID.get_group(0)] = accumulator; + } + } +}; +#endif + +} // namespace internal +} // namespace TensorSycl + +template +struct TensorEvaluator, + Eigen::SyclDevice> + : public TensorContractionEvaluatorBase, Eigen::SyclDevice>> { + static_assert(std::is_same::value, + "SYCL tensor contraction does not support output kernels."); + + typedef Eigen::SyclDevice Device; + + typedef TensorEvaluator, Device> Self; + typedef TensorContractionEvaluatorBase Base; + typedef TensorContractionOp XprType; + typedef typename internal::remove_const::type Scalar; + typedef typename XprType::Index StorageIndex; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + typedef typename Base::Storage Storage; + typedef typename Base::EvaluatorPointerType EvaluatorPointerType; + struct TripleDim { + const StorageIndex M; + const StorageIndex N; + const StorageIndex K; + TripleDim(const StorageIndex M_, const StorageIndex N_, const StorageIndex K_) : M(M_), N(N_), K(K_) {} + }; + enum { + Layout = TensorEvaluator::Layout, + PacketAccess = (PacketType::size > 1), + BlockAccess = false, + }; + + static EIGEN_CONSTEXPR int LDims = Base::LDims; + static EIGEN_CONSTEXPR int RDims = Base::RDims; + static EIGEN_CONSTEXPR int ContractDims = Base::ContractDims; + + typedef array left_dim_mapper_t; + typedef array right_dim_mapper_t; + + typedef array contract_t; + typedef array left_nocontract_t; + typedef array right_nocontract_t; + + static const int NumDims = LDims + RDims - 2 * ContractDims; + + typedef DSizes Dimensions; + + typedef TensorEvaluator LeftEvaluator; + typedef TensorEvaluator RightEvaluator; + typedef typename Eigen::internal::remove_const::type LhsScalar; + typedef typename Eigen::internal::remove_const::type RhsScalar; + + typedef typename LeftEvaluator::Dimensions LeftDimensions; + typedef typename RightEvaluator::Dimensions RightDimensions; + + template + struct input_mapper_propertis { + static EIGEN_CONSTEXPR bool is_lhs_matrix = (LDims == 2 && ContractDims == 1) || lhs_inner_dim_contiguous; + static EIGEN_CONSTEXPR bool is_rhs_matrix = + (RDims == 2 && ContractDims == 1) || (rhs_inner_dim_contiguous && !rhs_inner_dim_reordered); + }; + + TensorEvaluator(const XprType &op, const Device &device) : Base(op, device) {} + + // We need to redefine this method to make nvcc happy + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(typename Base::EvaluatorPointerType data) { + this->m_leftImpl.evalSubExprsIfNeeded(NULL); + this->m_rightImpl.evalSubExprsIfNeeded(NULL); + if (!data) { + this->m_result = this->m_device.get( + static_cast(this->m_device.allocate_temp(this->dimensions().TotalSize() * sizeof(Scalar)))); + data = this->m_result; + } + evalToSycl(data); + return (this->m_result != NULL); + } + const Eigen::SyclDevice &device() const { return this->m_device; } + void evalToSycl(typename Base::EvaluatorPointerType buffer) const { + if (this->m_lhs_inner_dim_contiguous) { + if (this->m_rhs_inner_dim_contiguous) { + if (this->m_rhs_inner_dim_reordered) { + evalTyped(buffer); + } else { + evalTyped(buffer); + } + } else { + if (this->m_rhs_inner_dim_reordered) { + evalTyped(buffer); + } else { + evalTyped(buffer); + } + } + } else { + if (this->m_rhs_inner_dim_contiguous) { + if (this->m_rhs_inner_dim_reordered) { + evalTyped(buffer); + } else { + evalTyped(buffer); + } + } else { + if (this->m_rhs_inner_dim_reordered) { + evalTyped(buffer); + } else { + evalTyped(buffer); + } + } + } + } + + template + void evalTyped(typename Base::EvaluatorPointerType buffer) const { + const auto triple_dim = TripleDim{this->m_i_size, this->m_j_size, this->m_k_size}; + typedef internal::TensorContractionInputMapper< + LhsScalar, StorageIndex, internal::Lhs, LeftEvaluator, left_nocontract_t, contract_t, + PacketType::size, lhs_inner_dim_contiguous, false, Unaligned, MakeSYCLPointer> + LhsMapper; + + typedef internal::TensorContractionInputMapper::size, rhs_inner_dim_contiguous, + rhs_inner_dim_reordered, Unaligned, MakeSYCLPointer> + RhsMapper; + + // initialize data mappers + LhsMapper lhs(this->m_leftImpl, this->m_left_nocontract_strides, this->m_i_strides, + this->m_left_contracting_strides, this->m_k_strides); + + RhsMapper rhs(this->m_rightImpl, this->m_right_nocontract_strides, this->m_j_strides, + this->m_right_contracting_strides, this->m_k_strides); + +#ifndef EIGEN_SYCL_DISABLE_SCALAR + if (triple_dim.M == 1 && triple_dim.N == 1) { + launchSC(buffer, lhs, rhs, triple_dim.K); + } else +#endif +#ifndef EIGEN_SYCL_DISABLE_GEMV + if (triple_dim.M != 1 && triple_dim.N == 1) { + LaunchVT(buffer, rhs, lhs, triple_dim.M, triple_dim.K); + } else if (triple_dim.M == 1 && triple_dim.N != 1) { + LaunchVT(buffer, lhs, rhs, triple_dim.N, triple_dim.K); + } else // This is equivalent of if (m!=1 && n!=1) +#endif + { + typedef input_mapper_propertis + inpt_mapper_properties; +#ifndef EIGEN_SYCL_DISABLE_SKINNY + bool skinny = false; + auto platform_name = this->device().getPlatformName(); + // This is based on empirical calculation for AMD r9-nano and Fiji + if (platform_name.find("AMD") == 0) { + skinny = (triple_dim.M < triple_dim.K || triple_dim.N < triple_dim.K) && + ((triple_dim.M < 1024 && triple_dim.N < 1024) || + (uint64_t(triple_dim.M * triple_dim.N) < uint64_t(triple_dim.K))); + } else { + skinny = (((std::max(triple_dim.K, triple_dim.N) / std::min(triple_dim.K, triple_dim.N)) > 100) || + ((std::max(triple_dim.K, triple_dim.M) / std::min(triple_dim.K, triple_dim.M)) > 100) || + ((std::max(triple_dim.N, triple_dim.M) / std::min(triple_dim.N, triple_dim.M)) > 100)); + } + if (skinny) + adjustTT(buffer, lhs, rhs, triple_dim); + else +#endif // EIGEN_SYCL_DISABLE_SKINNY + adjustTT(buffer, lhs, rhs, triple_dim); + } + } + + template + void EIGEN_ALWAYS_INLINE adjustTT(EvaluatorPointerType buffer, const LhsMapper &lhs, const RhsMapper &rhs, + const TripleDim &triple_dim) const { +#ifdef EIGEN_SYCL_LOCAL_MEM_UNSET_OR_ON + if (device().has_local_memory()) { + typedef TensorSycl::internal::TTPanelSize PanelParameters; + launchTT( + buffer, lhs, rhs, triple_dim); + } +#endif +#ifdef EIGEN_SYCL_LOCAL_MEM_UNSET_OR_OFF + if (!(device().has_local_memory())) { + typedef TensorSycl::internal::TTPanelSize PanelParameters; + launchTT( + buffer, lhs, rhs, triple_dim); + } +#endif + } + + template + void launchTT(EvaluatorPointerType buffer, const LhsMapper &lhs, const RhsMapper &rhs, + const TripleDim &triple_dim) const { + const StorageIndex roundUpM = Eigen::TensorSycl::internal::roundUp(triple_dim.M, Properties::TileSizeDimM); + const StorageIndex roundUpN = Eigen::TensorSycl::internal::roundUp(triple_dim.N, Properties::TileSizeDimN); + const StorageIndex groupSizeM = roundUpM / Properties::TileSizeDimM; + const StorageIndex groupSizeN = roundUpN / Properties::TileSizeDimN; + + const StorageIndex roundUpK = Eigen::TensorSycl::internal::roundUp(triple_dim.K, Properties::TileSizeDimK); + StorageIndex totalTilesK = roundUpK / Properties::TileSizeDimK; + StorageIndex groupSizeK = + skinny + ? std::max(std::min(totalTilesK, + (StorageIndex)(device().getPowerOfTwo(device().getNumSyclMultiProcessors(), true) * 4) / + (groupSizeM * groupSizeN)), + StorageIndex(1)) + : StorageIndex(1); + + const StorageIndex numTilesPerGroup = Eigen::TensorSycl::internal::roundUp(totalTilesK, groupSizeK) / groupSizeK; + + const StorageIndex totalGroupSize = groupSizeM * groupSizeN * groupSizeK; + + const StorageIndex localRange = Properties::LocalThreadSizeM * Properties::LocalThreadSizeN; + const StorageIndex globalRange = totalGroupSize * localRange; + + const StorageIndex scratchSize = (ct == TensorSycl::internal::contraction_type::local) + ? ((Properties::DoubleBuffer + 1) * + (Properties::TileSizeDimM + Properties::BC) * (Properties::TileSizeDimK)) + + ((Properties::DoubleBuffer + 1) * (Properties::TileSizeDimK) * + (Properties::TileSizeDimN + Properties::BC)) + : StorageIndex(1); + + auto thread_range = cl::sycl::nd_range<1>(cl::sycl::range<1>(globalRange), cl::sycl::range<1>(localRange)); + if (groupSizeK == 1) { + typedef TensorSycl::internal::TensorContractionKernel + ContractKernelName; + device().template binary_kernel_launcher( + lhs, rhs, buffer, thread_range, scratchSize, groupSizeM, groupSizeN, numTilesPerGroup, triple_dim); + } else { + typedef TensorSycl::internal::TensorContractionKernel + ContractKernelName; + CoeffReturnType *temp_pointer = static_cast( + device().allocate_temp(triple_dim.M * triple_dim.N * groupSizeK * sizeof(CoeffReturnType))); + EvaluatorPointerType tmp_global_accessor = device().get(temp_pointer); + + device().template binary_kernel_launcher( + lhs, rhs, tmp_global_accessor, thread_range, scratchSize, groupSizeM, groupSizeN, numTilesPerGroup, + triple_dim); + + typedef Eigen::internal::SumReducer Op; + auto op = Op(); + typedef TensorSycl::internal::SecondStepPartialReduction + ReductionKernel; + + device().template unary_kernel_launcher( + tmp_global_accessor, buffer, + cl::sycl::nd_range<1>(cl::sycl::range<1>(StorageIndex( + Eigen::TensorSycl::internal::roundUp(triple_dim.M * triple_dim.N, localRange))), + cl::sycl::range<1>(localRange)), + StorageIndex(1), op, StorageIndex(triple_dim.M * triple_dim.N), groupSizeK); + + device().deallocate_temp(temp_pointer); + } + } + +#ifndef EIGEN_SYCL_DISABLE_GEMV + template + void EIGEN_ALWAYS_INLINE LaunchVT(EvaluatorPointerType buffer, const VectorMapper &vec, const TensorMapper &mat, + StorageIndex NC, StorageIndex C) const { + const StorageIndex nonContractDim = NC; + EIGEN_CONSTEXPR StorageIndex NCFactor = 1; + EIGEN_CONSTEXPR StorageIndex CFactor = 1; + EIGEN_CONSTEXPR StorageIndex NCWindow = 16; + typedef Eigen::TensorSycl::internal::TVPanelSize + Properties; + const StorageIndex roundUpC = Eigen::TensorSycl::internal::roundUp(C, Properties::TileSizeDimC); + const StorageIndex cNumGroups = roundUpC / (Properties::LocalThreadSizeC * Properties::WorkLoadPerThreadC); + const StorageIndex roundUpNC = Eigen::TensorSycl::internal::roundUp(nonContractDim, Properties::TileSizeDimNC); + const StorageIndex nCNumGroups = roundUpNC / (Properties::LocalThreadSizeNC * Properties::WorkLoadPerThreadNC); + const StorageIndex globalRange = + (roundUpNC / (Properties::WorkLoadPerThreadNC)) * (roundUpC / (Properties::WorkLoadPerThreadC)); + const StorageIndex localRange = Properties::LocalThreadSizeNC * Properties::LocalThreadSizeC; + const StorageIndex scratchSize = + (Properties::WorkLoadPerThreadNC + CFactor) * Properties::LocalThreadSizeC * Properties::LocalThreadSizeNC; + auto thread_range = cl::sycl::nd_range<1>(cl::sycl::range<1>(globalRange), cl::sycl::range<1>(localRange)); + if (cNumGroups > 1) { + typedef Eigen::TensorSycl::internal::GeneralVectorTensor + ContractKernelName; + CoeffReturnType *temp_pointer = + static_cast(device().allocate_temp(nonContractDim * cNumGroups * sizeof(CoeffReturnType))); + EvaluatorPointerType tmp_global_accessor = device().get(temp_pointer); + + device().template binary_kernel_launcher( + vec, mat, tmp_global_accessor, thread_range, scratchSize, nCNumGroups, nonContractDim, C); + + typedef Eigen::internal::SumReducer Op; + typedef TensorSycl::internal::SecondStepPartialReduction + ReductionKernel; + + device().template unary_kernel_launcher( + tmp_global_accessor, buffer, + cl::sycl::nd_range<1>(cl::sycl::range<1>(Eigen::TensorSycl::internal::roundUp(nonContractDim, localRange)), + cl::sycl::range<1>(localRange)), + StorageIndex(1), Op(), nonContractDim, cNumGroups); + + device().deallocate_temp(temp_pointer); + } else { + typedef Eigen::TensorSycl::internal::GeneralVectorTensor + ContractKernelName; + device().template binary_kernel_launcher( + vec, mat, buffer, thread_range, scratchSize, nCNumGroups, nonContractDim, C); + } + } +#endif + +#ifndef EIGEN_SYCL_DISABLE_SCALAR + template + EIGEN_ALWAYS_INLINE void launchSC(EvaluatorPointerType buffer, const LhsMapper &lhs, const RhsMapper &rhs, + StorageIndex K) const { + EIGEN_STATIC_ASSERT(!((EIGEN_SYCL_LOCAL_THREAD_DIM0 * EIGEN_SYCL_LOCAL_THREAD_DIM1) & + (EIGEN_SYCL_LOCAL_THREAD_DIM0 * EIGEN_SYCL_LOCAL_THREAD_DIM1 - 1)), + "The Local thread size must be a power of 2 for the reduction " + "operation"); + EIGEN_CONSTEXPR StorageIndex local_range = EIGEN_SYCL_LOCAL_THREAD_DIM0 * EIGEN_SYCL_LOCAL_THREAD_DIM1; + + // Here we force the code not to be more than 2-step reduction: Our empirical research shows that if each thread + // reduces at least 512 elementss individually, we get better performance. + const StorageIndex num_work_group = ((K + (512 * local_range - 1)) / (512 * local_range) > 1 ? local_range : 1); + const StorageIndex global_range = num_work_group * local_range; + + typedef Eigen::TensorSycl::internal::GeneralScalarContraction< + CoeffReturnType, LhsScalar, RhsScalar, EvaluatorPointerType, LhsMapper, RhsMapper, StorageIndex, false> + ContractKernelName; + auto thread_range = cl::sycl::nd_range<1>(cl::sycl::range<1>(global_range), cl::sycl::range<1>(local_range)); + if (num_work_group > 1) { + CoeffReturnType *temp_pointer = + static_cast(device().allocate_temp(num_work_group * sizeof(CoeffReturnType))); + EvaluatorPointerType tmp_global_accessor = device().get(temp_pointer); + device().template binary_kernel_launcher(lhs, rhs, tmp_global_accessor, + thread_range, local_range, K); + typedef Eigen::internal::SumReducer Op; + typedef TensorSycl::internal::SecondStepFullReducer + GenericRKernel; + device().template unary_kernel_launcher( + tmp_global_accessor, buffer, + cl::sycl::nd_range<1>(cl::sycl::range<1>(local_range), cl::sycl::range<1>(local_range)), local_range, Op()); + + device().deallocate_temp(temp_pointer); + } else { + device().template binary_kernel_launcher(lhs, rhs, buffer, thread_range, + local_range, K); + } + } +#endif + + EIGEN_STRONG_INLINE void cleanup() { + this->m_leftImpl.cleanup(); + this->m_rightImpl.cleanup(); + + if (this->m_result) { + this->m_device.deallocate_temp(this->m_result); + this->m_result = NULL; + } + } + // The placeholder accessors must bound to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + this->m_leftImpl.bind(cgh); + this->m_rightImpl.bind(cgh); + this->m_result.bind(cgh); + } +}; +} // namespace Eigen +#endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_SYCL_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h new file mode 100644 index 0000000..21be6ea --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h @@ -0,0 +1,1679 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_THREAD_POOL_H +#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_THREAD_POOL_H + +// evaluator for thread pool device +#ifdef EIGEN_USE_THREADS + +namespace Eigen { + +template +struct TensorEvaluator, ThreadPoolDevice> : + public TensorContractionEvaluatorBase, ThreadPoolDevice> > { + + typedef ThreadPoolDevice Device; + + typedef TensorEvaluator, Device> Self; + typedef TensorContractionEvaluatorBase Base; + + typedef TensorContractionOp XprType; + typedef typename internal::remove_const::type Scalar; + typedef typename XprType::Index Index; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + + enum { + Layout = TensorEvaluator::Layout, + }; + + // Most of the code is assuming that both input tensors are ColMajor. If the + // inputs are RowMajor, we will "cheat" by swapping the LHS and RHS: + // If we want to compute A * B = C, where A is LHS and B is RHS, the code + // will pretend B is LHS and A is RHS. + typedef typename internal::conditional< + static_cast(Layout) == static_cast(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType; + typedef typename internal::conditional< + static_cast(Layout) == static_cast(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType; + + static const int LDims = + internal::array_size::Dimensions>::value; + static const int RDims = + internal::array_size::Dimensions>::value; + static const int ContractDims = internal::array_size::value; + + typedef array left_dim_mapper_t; + typedef array right_dim_mapper_t; + + typedef array contract_t; + typedef array left_nocontract_t; + typedef array right_nocontract_t; + + static const int NumDims = LDims + RDims - 2 * ContractDims; + + typedef DSizes Dimensions; + + // typedefs needed in evalTo + typedef typename internal::remove_const::type LhsScalar; + typedef typename internal::remove_const::type RhsScalar; + typedef typename internal::gebp_traits Traits; + + typedef TensorEvaluator LeftEvaluator; + typedef TensorEvaluator RightEvaluator; + + TensorEvaluator(const XprType& op, const Device& device) : + Base(op, device) {} + + template + void evalProduct(Scalar* buffer) const { + evalProductImpl(buffer, NoCallback()); + } + + template + void evalProductAsync(Scalar* buffer, EvalToCallback done) const { + evalProductImpl(buffer, std::move(done)); + } + + template + void evalProductImpl(Scalar* buffer, DoneCallback done) const { + // This function computes a lot of heuristics in multiple steps, and it + // also has multiple exit points. To keep it sane, readable and all in one + // place, sync/async execution decision is made at runtime at the very end. + // + // (1) In sync mode we allocate Context on the stack, submit computations + // to the device thread pool, and block on a barrier until it is + // completed. + // + // (2) In async mode we allocate Context on the heap, and after all tasks + // are finished, we call provided the done callback, and delete a + // context from the heap. + // + // (*) EvalParallelContext & EvalShardedByInnerDimContext owns all the state + // and temporary buffers, requried for executing the tensor contraction. + // They are responsible for cleaning it up after contraction is done. + static const bool IsEvalInSyncMode = + std::is_same::value; + + const Index m = this->m_i_size; + const Index n = this->m_j_size; + const Index k = this->m_k_size; + if (m == 0 || n == 0 || k == 0) return; + + // Compute a set of algorithm parameters: + // - kernel block sizes (bm, bn, bk) + // - task grain sizes (number of kernels executed per task: gm, gn) + // - number of threads + // - sharding by row/column + // - parallel packing or first lhs then rhs + // and some derived parameters: + // - number of tasks (nm, nn, nk) + // - number of kernels (nm0, nn0) + // Unfortunately, all these parameters are tightly interdependent. + // So in some cases we first compute approximate values, then compute other + // values based on these approximations and then refine the approximations. + + // There are lots of heuristics here. There is some reasoning behind them, + // but ultimately they are just tuned on contraction benchmarks for + // different input configurations, thread counts and instruction sets. + // So feel free to question any of them. + + // Compute whether we want to shard by row or by column. + // This is a first approximation, it will be refined later. Since we don't + // know number of threads yet we use 2, because what's we are most + // interested in at this point is whether it makes sense to use + // parallelization at all or not. + bool shard_by_col = shardByCol(m, n, 2); + + // First approximation of kernel blocking sizes. + // Again, we don't know number of threads yet, so we use 2. + Index bm, bn, bk; + if (shard_by_col) { + internal::TensorContractionBlocking + blocking(k, m, n, 2); + bm = blocking.mc(); + bn = blocking.nc(); + bk = blocking.kc(); + } else { + internal::TensorContractionBlocking + blocking(k, m, n, 2); + bm = blocking.mc(); + bn = blocking.nc(); + bk = blocking.kc(); + } + + // Compute optimal number of threads. + // Note: we use bk instead of k here because we are interested in amount of + // _parallelizable_ computations, and computations are not parallelizable + // across k dimension. + const TensorOpCost cost = + contractionCost(m, n, bm, bn, bk, shard_by_col, false); + int num_threads = TensorCostModel::numThreads( + static_cast(n) * m, cost, this->m_device.numThreads()); + int num_threads_by_k = numThreadsInnerDim(m, n, k); + if (shardByInnerDim(m, n, k, num_threads, num_threads_by_k)) { + // We are in the scenario where it is more effective to shard by the + // inner dimension. + if (IsEvalInSyncMode) { + EvalShardedByInnerDimContext ctx( + this, num_threads_by_k, buffer, m, n, k, std::move(done)); + ctx.template run(); + } else { + auto* ctx = new EvalShardedByInnerDimContext( + this, num_threads_by_k, buffer, m, n, k, std::move(done)); + ctx->template runAsync(); + } + + return; + } + + // TODO(dvyukov): this is a stop-gap to prevent regressions while the cost + // model is not tuned. Remove this when the cost model is tuned. + if (n == 1) num_threads = 1; + + if (num_threads == 1) { + TENSOR_CONTRACTION_DISPATCH(this->template evalProductSequential, + Unaligned, (buffer)); + if (!IsEvalInSyncMode) done(); + return; + } + + // Now that we know number of threads, recalculate sharding and blocking. + shard_by_col = shardByCol(m, n, num_threads); + if (shard_by_col) { + internal::TensorContractionBlocking + blocking(k, m, n, num_threads); + bm = blocking.mc(); + bn = blocking.nc(); + bk = blocking.kc(); + } else { + internal::TensorContractionBlocking + blocking(k, m, n, num_threads); + bm = blocking.mc(); + bn = blocking.nc(); + bk = blocking.kc(); + } + + // Number of kernels for each dimension. + Index nm0 = divup(m, bm); + Index nn0 = divup(n, bn); + Index nk = divup(k, bk); + + // Calculate task grain size (number of kernels executed per task). + // This task size coarsening serves two purposes: + // 1. It reduces per-task overheads including synchronization overheads. + // 2. It allows to use caches better (reuse the same packed rhs in several + // consecutive kernels). + Index gm = 1; + Index gn = 1; + // If we are sharding by column, then we prefer to reduce rows first. + if (shard_by_col) { + gm = coarsenM(m, n, bm, bn, bk, gn, num_threads, shard_by_col); + gn = coarsenN(m, n, bm, bn, bk, gm, num_threads, shard_by_col); + } else { + gn = coarsenN(m, n, bm, bn, bk, gm, num_threads, shard_by_col); + gm = coarsenM(m, n, bm, bn, bk, gn, num_threads, shard_by_col); + } + // Number of tasks in each dimension. + Index nm = divup(nm0, gm); + Index nn = divup(nn0, gn); + + // If there is enough concurrency in the sharding dimension, we choose not + // to paralellize by the other dimension, and execute all kernels in sync + // mode. This reduces parallelism from the nm x nn down to nn + // (shard_by_col==true) or nm (shard_by_col==false). + const Index sharding_dim_tasks = shard_by_col ? nn : nm; + const int num_worker_threads = this->m_device.numThreadsInPool(); + + // With small number of threads we want to make sure that we do not reduce + // parallelism too much. With large number of threads we trade maximum + // parallelism for better memory locality. + const float oversharding_factor = + num_worker_threads <= 4 ? 8.0 : + num_worker_threads <= 8 ? 4.0 : + num_worker_threads <= 16 ? 2.0 : + num_worker_threads <= 32 ? 1.0 : + num_worker_threads <= 64 ? 0.8 : /* num_worker_threads > 64 */ 0.6; + + const bool parallelize_by_sharding_dim_only = + sharding_dim_tasks >= oversharding_factor * num_worker_threads; + + // Last by not least, decide whether we want to issue both lhs and rhs + // packing in parallel; or issue lhs packing first, and then issue rhs + // packing when lhs packing completes (for !shard_by_col lhs and rhs are + // swapped). Parallel packing allows more parallelism (for both packing and + // kernels), while sequential packing provides better locality (once + // a thread finishes rhs packing it proceed to kernels with that rhs). + // First, we are interested in parallel packing if there are few tasks. + bool parallel_pack = num_threads >= nm * nn; + // Also do parallel packing if all data fits into L2$. + if (m * bk * Index(sizeof(LhsScalar)) + n * bk * Index(sizeof(RhsScalar)) <= + l2CacheSize() * num_threads) + parallel_pack = true; + // But don't do it if we will use each rhs only once. Locality seems to be + // more important in this case. + if ((shard_by_col ? nm : nn) == 1) parallel_pack = false; + // Also don't get in the way of parallelize_by_sharding_dim_only + // optimization. + if (parallelize_by_sharding_dim_only) parallel_pack = false; + + // TODO(ezhulnev): With if contexpr we don't need SyncEvalParallelContext. + if (IsEvalInSyncMode) { +#define CONTEXT_ARGS \ + (this, num_threads, buffer, m, n, k, bm, bn, bk, nm, nn, nk, gm, gn, nm0, \ + nn0, shard_by_col, parallel_pack, parallelize_by_sharding_dim_only, \ + NoCallback()) \ + .run() + TENSOR_CONTRACTION_DISPATCH(SyncEvalParallelContext, Alignment, + CONTEXT_ARGS); +#undef CONTEXT_ARGS + + } else { +#define CONTEXT_ARGS \ + (this, num_threads, buffer, m, n, k, bm, bn, bk, nm, nn, nk, gm, gn, nm0, \ + nn0, shard_by_col, parallel_pack, parallelize_by_sharding_dim_only, \ + std::move(done)) + TENSOR_CONTRACTION_ASYNC_DISPATCH(EvalParallelContext, DoneCallback, + Alignment, CONTEXT_ARGS, run()); +#undef CONTEXT_ARGS + } + } + + // ------------------------------------------------------------------------ // + + // Dummy struct to represent an empty DoneCallback. + + struct NoCallback { + void operator()() { + eigen_assert(false && "NoCallback should never be called"); + } + }; + + // ------------------------------------------------------------------------ // + + template + class EvalParallelNotification; + + // Synchronous evaluation notification that blocks caller thread in Wait(). + template + class EvalParallelNotification { + public: + EvalParallelNotification(Context*, NoCallback) {} + void Notify() { done_.Notify(); } + void Wait() { done_.Wait(); } + private: + Eigen::Notification done_; + }; + + // Asynchronous evaluation notification that does not block in Wait(). + template + class EvalParallelNotification { + public: + EvalParallelNotification(Context* ctx, DoneCallback done) + : ctx_(ctx), done_(std::move(done)) {} + + void Notify() { + // Make a copy of done callback, because it will be destructed when we + // will delete context in the next line (EvalParallelNotification is a + // data member of EvalParallelContext class). + DoneCallback done_copy = std::move(done_); + + // Delete parallel evaluation context. + delete ctx_; + + // Now safely call the done callback. + done_copy(); + } + + void Wait() {} + + private: + Context* ctx_; + DoneCallback done_; + }; + + // Context orchestrates sync/async parallel contraction evaluation. When it is + // executed in asynchronous mode, it owns all the shared state that might be + // accessible by block packing and kernel tasks. + + template + class EvalParallelContext { + public: + typedef internal::TensorContractionInputMapper< + LhsScalar, Index, internal::Lhs, LeftEvaluator, left_nocontract_t, + contract_t, internal::packet_traits::size, + lhs_inner_dim_contiguous, false, Unaligned> + LhsMapper; + typedef internal::TensorContractionInputMapper< + RhsScalar, Index, internal::Rhs, RightEvaluator, right_nocontract_t, + contract_t, internal::packet_traits::size, + rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Unaligned> + RhsMapper; + + typedef internal::blas_data_mapper OutputMapper; + + typedef internal::TensorContractionKernel< + Scalar, LhsScalar, RhsScalar, Index, OutputMapper, LhsMapper, RhsMapper> + TensorContractionKernel; + + typedef typename TensorContractionKernel::LhsBlock LhsBlock; + typedef typename TensorContractionKernel::RhsBlock RhsBlock; + typedef typename TensorContractionKernel::BlockMemHandle BlockMemHandle; + + EvalParallelContext(const Self* self, int num_threads, Scalar* buffer, + Index tm, Index tn, Index tk, Index bm, Index bn, + Index bk, Index nm, Index nn, Index nk, Index gm, + Index gn, Index nm0, Index nn0, bool shard_by_col, + bool parallel_pack, + bool parallelize_by_sharding_dim_only, + DoneCallback done) + : created_by_thread_id_(std::this_thread::get_id()), + done_(this, std::move(done)), + device_(self->m_device), + lhs_(self->m_leftImpl, self->m_left_nocontract_strides, + self->m_i_strides, self->m_left_contracting_strides, + self->m_k_strides), + rhs_(self->m_rightImpl, self->m_right_nocontract_strides, + self->m_j_strides, self->m_right_contracting_strides, + self->m_k_strides), + buffer_(buffer), + output_(buffer, tm), + output_kernel_(self->m_output_kernel), + tensor_contraction_params_(self->m_tensor_contraction_params), + num_threads_(num_threads), + shard_by_col_(shard_by_col), + parallel_pack_(parallel_pack), + parallelize_by_sharding_dim_only_(parallelize_by_sharding_dim_only), + m_(tm), + n_(tn), + k_(tk), + bm_(bm), + bn_(bn), + bk_(bk), + nm_(nm), + nn_(nn), + nk_(nk), + gm_(gm), + gn_(gn), + nm0_(nm0), + nn0_(nn0), + kernel_(m_, k_, n_, bm_, bk_, bn_), + num_thread_local_allocations_(0), + // We reserve 2X more capacity for a thread local values, than the + // number of threads in the pool to efficiently handle task stealing + // by threads that are not managed by the pool. + thread_local_capacity(2 * (parallelize_by_sharding_dim_only_ + ? device_.numThreadsInPool() + : 0)), + // We will use only one of the Lhs/Rhs thread local storage depending + // on the shard_by_col value and we parallelize by sharding dim ONLY. + lhs_thread_local_blocks_(shard_by_col_ ? 0 : thread_local_capacity, + {*this}, {*this}), + rhs_thread_local_blocks_(shard_by_col_ ? thread_local_capacity : 0, + {*this}, {*this}) { + // These two options are mutually exclusive. + eigen_assert(!(parallel_pack && parallelize_by_sharding_dim_only)); + + for (Index x = 0; x < P; x++) { + // Normal number of notifications for k slice switch is + // nm_ + nn_ + nm_ * nn_. However, first P - 1 slices will receive only + // nm_ + nn_ notifications, because they will not receive notifications + // from preceding kernels. + state_switch_[x] = + x == 0 + ? 1 + : (parallel_pack_ ? nn_ + nm_ : (shard_by_col_ ? nn_ : nm_)) + + (x == P - 1 ? nm_ * nn_ : 0); + state_packing_ready_[x] = + parallel_pack_ ? 0 : (shard_by_col_ ? nm_ : nn_); + state_kernel_[x] = new std::atomic*[nm_]; + for (Index m = 0; m < nm_; m++) { + state_kernel_[x][m] = new std::atomic[nn_]; + // Kernels generally receive 3 notifications (previous kernel + 2 + // packing), but the first slice won't get notifications from previous + // kernels. + for (Index n = 0; n < nn_; n++) + state_kernel_[x][m][n].store( + (x == 0 ? 0 : 1) + (parallel_pack_ ? 2 : 1), + std::memory_order_relaxed); + } + } + + // Allocate memory for packed rhs/lhs matrices. + packed_mem_ = kernel_.allocateSlices( // + device_, // + /*num_lhs=*/nm0_, // + /*num_rhs=*/nn0_, // + /*num_slices=*/std::min(nk_, P - 1), // + packed_lhs_, packed_rhs_); + + if (parallelize_by_sharding_dim_only_) { + const int num_worker_threads = device_.numThreadsInPool(); + + if (shard_by_col) { + can_use_thread_local_packed_ = new std::atomic[nn_]; + for (int i = 0; i < nn_; ++i) + can_use_thread_local_packed_[i].store(true, + std::memory_order_relaxed); + + Index num_blocks = num_worker_threads * gn_; + thread_local_pre_alocated_mem_ = kernel_.allocateSlices( // + device_, // + /*num_lhs=*/0, // + /*num_rhs=*/num_blocks, // + /*num_slices=*/1, // + /*lhs_blocks=*/nullptr, &rhs_thread_local_pre_allocated_); + + } else { + can_use_thread_local_packed_ = new std::atomic[nm_]; + for (int i = 0; i < nm_; ++i) + can_use_thread_local_packed_[i].store(true, + std::memory_order_relaxed); + + Index num_blocks = num_worker_threads * gm_; + thread_local_pre_alocated_mem_ = kernel_.allocateSlices( // + device_, // + /*num_lhs=*/num_blocks, // + /*num_rhs=*/0, // + /*num_slices=*/1, &lhs_thread_local_pre_allocated_, // + /*rhs_blocks=*/nullptr); + } + } + } + + ~EvalParallelContext() { + for (Index x = 0; x < P; x++) { + for (Index m = 0; m < nm_; m++) delete[] state_kernel_[x][m]; + delete[] state_kernel_[x]; + } + kernel_.deallocate(device_, packed_mem_); + if (parallelize_by_sharding_dim_only_) { + kernel_.deallocate(device_, thread_local_pre_alocated_mem_); + delete[] can_use_thread_local_packed_; + } + } + + void run() { + // Kick off packing of the first slice. + signal_switch(0, 1); + + // Wait for overall completion. + // + // If parallel evaluation is executed in async mode, this is a no-op, and + // Wait() will return immediately. In synchronous mode it will block the + // caller thread until it will receive notification from last task. + // + // In async mode, last task when completed will call done callback from + // the same thread, and will delete this context. + // + // TODO(dvyukov): This wait can lead to deadlock if contraction is + // evaluated in synchronous mode. If nthreads contractions are + // concurrently submitted from worker threads, this wait will block all + // worker threads and the system will deadlock. + done_.Wait(); + } + + private: + std::thread::id created_by_thread_id_; + + // This notification is specialized on the type of DoneCallback and can be + // blocking or non-blocking. + EvalParallelNotification done_; + + const Device& device_; + LhsMapper lhs_; + RhsMapper rhs_; + Scalar* const buffer_; + OutputMapper output_; + OutputKernelType output_kernel_; + TensorContractionParams tensor_contraction_params_; + const int num_threads_; + const bool shard_by_col_; + const bool parallel_pack_; + const bool parallelize_by_sharding_dim_only_; + // Matrix sizes. + const Index m_; + const Index n_; + const Index k_; + // Block sizes. + const Index bm_; + const Index bn_; + const Index bk_; + // Number of tasks. + const Index nm_; + const Index nn_; + const Index nk_; + // Task grain sizes (number of kernels executed per task). + const Index gm_; + const Index gn_; + // Number of blocks (this is different from ni_/nn_ because of task size + // coarsening). + const Index nm0_; + const Index nn0_; + // Tensor contraction kernel. + TensorContractionKernel kernel_; + + // Parallelization strategy. + // + // Blocks related to the same k block can run in parallel because they write + // to different output blocks. So we parallelize within k slices, this + // gives us parallelism level of m x n. Before we can start any kernels + // related to k-th slice, we need to issue m lhs packing tasks and n rhs + // packing tasks. + // + // However, there is a bottleneck when we are finishing kernels for k-th + // slice (at the very end there is only 1 runnable kernel). To mitigate this + // bottleneck we allow kernels from k-th and k+1-th slices to run in + // parallel. Note that (m, n, k) and (m, n, k+1) kernels write to the same + // output block, so they must not run in parallel. + // + // This gives us the following dependency graph. + // On each k slice we have m x n kernel tasks, m lhs paking tasks and n rhs + // packing tasks. + // Kernel (m, n, k) can start when: + // - kernel (m, n, k-1) has finished + // - lhs packing (m, k) has finished + // - rhs packing (n, k) has finished + // Lhs/rhs packing can start when: + // - all k-1 packing has finished (artificially imposed to limit amount of + // parallel packing) + // + // On top of that we limit runnable tasks to two consecutive k slices. + // This is done to limit amount of memory we need for packed lhs/rhs + // (for each k slice we need m*bk + n*bk memory in packed_lhs_/packed_rhs_). + // + // state_switch_ tracks when we are ready to switch to the next k slice. + // state_kernel_[m][n] tracks when we are ready to kick off kernel (m, n). + // These variable are rolling over 3 consecutive k slices: first two we are + // actively executing + one to track completion of kernels in the second + // slice. + static const Index P = 3; + + // Handle to the allocated temporary storage for Lhs/Rhs blocks. + BlockMemHandle packed_mem_; + std::vector packed_lhs_[P - 1]; + std::vector packed_rhs_[P - 1]; + + // If we choose to parallelize only by the sharding dimension, each thread + // will have it's own "thead local" (not a c++ thread local storage) memory + // for packed_lhs or packed_rhs (shard_by_col = false of true). This memory + // can't be passed to a kernel that might execute on a different thread. + // + // In practice when we are ready to pack memory for the sharding dimension + // (rhs if shard_by_col==true) of the K-th slice, all kernels for K-1 slice + // already computed (99% of the time), and we can pack data into the thread + // local storage, and guarantee that all the kernels will be executed + // immediately in the same thread. This significantly increases L1 cache hit + // ratio and reduces pressure on the memory bus. + // + // It's still possible that kernel for the K-th slice will be ready before + // completion of the K-1 kernel, so we have to allocate "global" packed_lhs_ + // and packed_rhs_ to allow kernels to be executed later on a thread + // different from the thread that was used for packing. + + // Handle for pre-allocated thread local memory buffers. + BlockMemHandle thread_local_pre_alocated_mem_; + + // Only one of these will be initialized depending on shard_by_col value + // (the size will be `num_worker_threads * num_grains_in_the_sharding_dim`). + std::vector lhs_thread_local_pre_allocated_; + std::vector rhs_thread_local_pre_allocated_; + + // How many thread local blocks were already allocated. + std::atomic num_thread_local_allocations_; + const int thread_local_capacity; + + // We will use pre-allocated Lhs/Rhs blocks defined above, if the number of + // unique threads in a system is below or equal to the number of threads in + // a thread pool. We will fallback on dynamic memory allocation after that. + + // ThreadLocalBlocks is a container for Lhs or Rhs thread local buffers. Its + // size is equal to the grain size in Lhs/Rhs sharding dimension. + template + class ThreadLocalBlocks { + public: + ThreadLocalBlocks() = default; + + ThreadLocalBlocks(BlockType* base, size_t grain_size) + : is_pre_allocated_(true), + thread_local_pre_allocated_base_(base), + grain_size_(grain_size) {} + + ThreadLocalBlocks(BlockMemHandle mem_handle, + std::vector blocks) + : is_pre_allocated_(false), + mem_handle_(std::move(mem_handle)), + blocks_(std::move(blocks)) {} + + BlockType& block(int grain_index) { + eigen_assert(grain_index >= 0); + eigen_assert(static_cast(grain_index) < size()); + return is_pre_allocated_ ? thread_local_pre_allocated_base_[grain_index] + : blocks_[grain_index]; + } + + void Release(EvalParallelContext& ctx) const { + if (!is_pre_allocated_) { + ctx.kernel_.deallocate(ctx.device_, mem_handle_); + } + } + + size_t size() const { + return is_pre_allocated_ ? grain_size_ : blocks_.size(); + } + + private: + bool is_pre_allocated_; + + // Reuse pre-allocated thread local buffers. + BlockType* thread_local_pre_allocated_base_ = nullptr; + size_t grain_size_ = 0; + + // These will be initialized only if `is_pre_allocated == false`. + BlockMemHandle mem_handle_{}; + std::vector blocks_; + }; + + // ThreadLocalBlocksInitialize callable does custom thread local blocks + // initialization, and will reuse pre-allocated buffers if possible, or will + // dynamically allocate new memory. + // + // Lhs/Rhs blocks might be of the same type, so we have to pass explicitly + // for what side do we plan to do block allocation. + template + class ThreadLocalBlocksInitialize { + static constexpr bool kIsLhs = + !is_rhs && std::is_same::value; + static const bool kIsRhs = + is_rhs && std::is_same::value; + static_assert(kIsLhs || kIsRhs, "Unkown block type"); + + using Blocks = ThreadLocalBlocks; + + public: + ThreadLocalBlocksInitialize(EvalParallelContext& ctx) + : ctx_(ctx), + num_worker_threads_(ctx_.device_.numThreadsInPool()) {} + + void operator()(Blocks& blocks) { + const int n = ctx_.num_thread_local_allocations_.fetch_add( + 1, std::memory_order_relaxed); + + if (n >= num_worker_threads_) { + ThreadLocalBlocksAllocator::allocate(ctx_, blocks); + } else { + ThreadLocalBlocksAllocator::reuse(ctx_, n, blocks); + } + } + + private: + // NOTE(ezhulenev): Without 'if constexpr' we have to put calls to + // TensorContractionKernel::allocateSlices into template specializations. + // Also explicit specializations are not allowed at class scope in C++03, + // EvalCtx type parameter is just a workaround for that limitation. + template + struct ThreadLocalBlocksAllocator; + + template + struct ThreadLocalBlocksAllocator { + static void allocate(EvalCtx& ctx, Blocks& blocks) { + std::vector rhs_blocks; + BlockMemHandle mem_handle = ctx.kernel_.allocateSlices( + ctx.device_, + /*num_lhs=*/0, + /*num_rhs=*/ctx.gn_, + /*num_slices=*/1, + /*lhs_blocks=*/nullptr, /*rhs_blocks=*/&rhs_blocks); + + blocks = ThreadLocalBlocks(std::move(mem_handle), + std::move(rhs_blocks)); + } + + static void reuse(EvalCtx& ctx, int index, Blocks& blocks) { + RhsBlock* ptr = &ctx.rhs_thread_local_pre_allocated_[ctx.gn_ * index]; + blocks = ThreadLocalBlocks(ptr, ctx.gn_); + } + }; + + template + struct ThreadLocalBlocksAllocator { + static void allocate(EvalCtx& ctx, Blocks& blocks) { + std::vector lhs_blocks; + BlockMemHandle mem_handle = ctx.kernel_.allocateSlices( + ctx.device_, + /*num_lhs=*/ctx.gm_, + /*num_rhs=*/0, + /*num_slices=*/1, + /*lhs_blocks=*/&lhs_blocks, /*rhs_blocks=*/nullptr); + + blocks = ThreadLocalBlocks(std::move(mem_handle), + std::move(lhs_blocks)); + } + + static void reuse(EvalCtx& ctx, int index, Blocks& blocks) { + LhsBlock* ptr = &ctx.lhs_thread_local_pre_allocated_[ctx.gm_ * index]; + blocks = ThreadLocalBlocks(ptr, ctx.gm_); + } + }; + + EvalParallelContext& ctx_; + const int num_worker_threads_; + }; + + template + class ThreadLocalBlocksRelease { + public: + using Blocks = ThreadLocalBlocks; + ThreadLocalBlocksRelease(EvalParallelContext& ctx) : ctx_(ctx) {} + void operator()(Blocks& blocks) { blocks.Release(ctx_); } + + private: + EvalParallelContext& ctx_; + }; + + // ThreadLocalBlocks initialization callables. + using ThreadLocalLhsInit = + ThreadLocalBlocksInitialize; + using ThreadLocalRhsInit = + ThreadLocalBlocksInitialize; + + // ThreadLocalBlocks release callables. + using ThreadLocalLhsRelease = ThreadLocalBlocksRelease; + using ThreadLocalRhsRelease = ThreadLocalBlocksRelease; + + // Thread local containers for Lhs/Rhs block packs. In practice only one of + // them will be used, depending on the shard_by_col value. + Eigen::ThreadLocal, ThreadLocalLhsInit, + ThreadLocalLhsRelease> + lhs_thread_local_blocks_; + Eigen::ThreadLocal, ThreadLocalRhsInit, + ThreadLocalRhsRelease> + rhs_thread_local_blocks_; + + // After a particular shard for Kth slice missed thread local execution + // opportunity (K-1 slice didn't complete kernels execution), we can no + // longer schedule K+1 and following slices in thread local mode, because + // there is no more guarantee that previous kernels were executed + // sequentially in the same thread (size is nn_ or nm_). + std::atomic* can_use_thread_local_packed_; + + std::atomic** state_kernel_[P]; + // state_switch_ is frequently modified by worker threads, while other + // fields are read-only after constructor. Let's move it to a separate cache + // line to reduce cache-coherency traffic. + char pad_[128]; + std::atomic state_packing_ready_[P]; + std::atomic state_switch_[P]; + + LhsBlock& packed_lhs(Index m, Index k, Index m1, bool use_thread_local) { + if (use_thread_local) { + eigen_assert(!shard_by_col_); + ThreadLocalBlocks& blocks = lhs_thread_local_blocks_.local(); + + Index grain_index = m1 - m * gm_; + return blocks.block(internal::convert_index(grain_index)); // FIXME better make ThreadLocalBlocks use Eigen::Index? + } else { + return packed_lhs_[k % (P - 1)][m1]; + } + } + + RhsBlock& packed_rhs(Index n, Index k, Index n1, bool use_thread_local) { + if (use_thread_local) { + eigen_assert(shard_by_col_); + ThreadLocalBlocks& blocks = rhs_thread_local_blocks_.local(); + + Index grain_index = n1 - n * gn_; + return blocks.block(internal::convert_index(grain_index)); // FIXME better make ThreadLocalBlocks use Eigen::Index? + } else { + return packed_rhs_[k % (P - 1)][n1]; + } + } + + // In following two methods (pack_lhs and pack_rhs), if we know for sure + // that we'll be able to immediately call a kernel with packed data, and do + // not submit it to the thread pool, we can use thread local memory for + // packed data. + // + // We can only reliably check it if we are running all kernels in sync mode + // (parallelize only by sharding dim). If kernel for m==0 (n==0) is ready to + // run, it's guaranteed that all kernels with larger values of m (n) are + // also ready, because we execute them in the same order for all K slices. + + void pack_lhs(Index m, Index k) { + bool use_thread_local = false; + + if (parallelize_by_sharding_dim_only_ && !shard_by_col_ && + can_use_thread_local_packed_[m].load(std::memory_order_relaxed)) { + if (state_kernel_[k % P][m][0].load(std::memory_order_relaxed) == 1) { + use_thread_local = true; + } else { + // If we can't guarantee that all kernels in `k` slice will be + // executed sequentially in current thread, it's no longer safe to use + // thread local memory in following slices along the k dimensions. + eigen_assert(k > 0); + can_use_thread_local_packed_[m].store(false, + std::memory_order_relaxed); + } + } + + const Index mend = m * gm_ + gm(m); + for (Index m1 = m * gm_; m1 < mend; m1++) + kernel_.packLhs(&packed_lhs(m, k, m1, use_thread_local), + lhs_.getSubMapper(m1 * bm_, k * bk_), bk(k), bm(m1)); + + if (!parallel_pack_ && shard_by_col_) { + assert(!use_thread_local); + signal_packing(k); + } else { + signal_switch(k + 1); + for (Index n = nn_ - 1; n >= 0; n--) { + bool sync = parallelize_by_sharding_dim_only_ || n == 0; + signal_kernel(m, n, k, sync, use_thread_local); + } + } + } + + void pack_rhs(Index n, Index k) { + bool use_thread_local = false; + + if (parallelize_by_sharding_dim_only_ && shard_by_col_ && + can_use_thread_local_packed_[n].load(std::memory_order_relaxed)) { + if (state_kernel_[k % P][0][n].load(std::memory_order_relaxed) == 1) { + use_thread_local = true; + } else { + // If we can't guarantee that all kernels in `k` slice will be + // executed sequentially in current thread, it's no longer safe to use + // thread local memory in followig slices along the k dimensions. + eigen_assert(k > 0); + can_use_thread_local_packed_[n].store(false, + std::memory_order_relaxed); + } + } + + const Index nend = n * gn_ + gn(n); + for (Index n1 = n * gn_; n1 < nend; n1++) { + if (!TensorContractionKernel::HasBeta && k == 0) { + // Zero the output memory in parallel, only if contraction kernel does + // not support `beta`. Otherwise we will pass beta 0.0 to the first + // call to the `TensorContractionKernel::invoke()`. + // + // On 10000x2x10000 mm zeroing can easily take half of time. Zero (bn + // x m) row. Safe to do here because all kernels that will write to + // this memory depend on completion of this task. Note: don't call + // device_.memset() here. device_.memset() blocks on thread pool + // worker thread, which can lead to underutilization and deadlocks. + memset(buffer_ + n1 * bn_ * m_, 0, bn(n1) * m_ * sizeof(Scalar)); + } + kernel_.packRhs(&packed_rhs(n, k, n1, use_thread_local), + rhs_.getSubMapper(k * bk_, n1 * bn_), bk(k), bn(n1)); + } + + if (parallel_pack_ || shard_by_col_) { + signal_switch(k + 1); + for (Index m = nm_ - 1; m >= 0; m--) { + bool sync = parallelize_by_sharding_dim_only_ || m == 0; + signal_kernel(m, n, k, sync, use_thread_local); + } + } else { + assert(!use_thread_local); + signal_packing(k); + } + } + + void kernel(Index m, Index n, Index k, bool use_thread_local) { + // Note: order of iteration matters here. Iteration over m is innermost + // because we want to reuse the same packed rhs in consecutive tasks + // (rhs fits into L2$ while lhs only into L3$). + const Index nend = n * gn_ + gn(n); + const Index mend = m * gm_ + gm(m); + + // NOTE: output = alpha * LHS * RHS + beta * output. + const Scalar alpha = Scalar(1); + const Scalar beta = + (TensorContractionKernel::HasBeta && k == 0) ? Scalar(0) : Scalar(1); + + if (shard_by_col_) { + for (Index n1 = n * gn_; n1 < nend; n1++) { + for (Index m1 = m * gm_; m1 < mend; m1++) { + const auto output_mapper = output_.getSubMapper(m1 * bm_, n1 * bn_); + kernel_.invoke( + output_mapper, + packed_lhs(m, k, m1, !shard_by_col_ && use_thread_local), + packed_rhs(n, k, n1, shard_by_col_ && use_thread_local), bm(m1), + bk(k), bn(n1), alpha, beta); + + // We are done with the last task for the [m1, n1] block. + if (k + 1 == nk_) { + output_kernel_(output_mapper, tensor_contraction_params_, + m1 * bm_, n1 * bn_, bm(m1), bn(n1)); + } + } + } + } else { + for (Index m1 = m * gm_; m1 < mend; m1++) + for (Index n1 = n * gn_; n1 < nend; n1++) { + const auto output_mapper = output_.getSubMapper(m1 * bm_, n1 * bn_); + kernel_.invoke( + output_mapper, + packed_lhs(m, k, m1, !shard_by_col_ && use_thread_local), + packed_rhs(n, k, n1, shard_by_col_ && use_thread_local), bm(m1), + bk(k), bn(n1), alpha, beta); + + // We are done with the last task for the [m1, n1] block. + if (k + 1 == nk_) { + output_kernel_(output_mapper, tensor_contraction_params_, + m1 * bm_, n1 * bn_, bm(m1), bn(n1)); + } + } + } + signal_kernel(m, n, k + 1, /*sync=*/false, /*use_thread_local=*/false); + signal_switch(k + 2); + } + + void signal_packing(Index k) { + eigen_assert(!parallel_pack_); + Index s = state_packing_ready_[k % P].fetch_sub(1); + eigen_assert(s > 0); + if (s != 1) return; + state_packing_ready_[k % P] = shard_by_col_ ? nm_ : nn_; + enqueue_packing(k, shard_by_col_); + } + + void signal_kernel(Index m, Index n, Index k, bool sync, + bool use_thread_local) { + std::atomic* state = &state_kernel_[k % P][m][n]; + Index s = state->load(); + eigen_assert(s > 0); + if (s != 1 && state->fetch_sub(1) != 1) { + eigen_assert(!use_thread_local); + return; + } + state->store(parallel_pack_ ? 3 : 2, std::memory_order_relaxed); + if (sync) { + kernel(m, n, k, use_thread_local); + } else { + eigen_assert(!use_thread_local); + device_.enqueueNoNotification( + [=]() { kernel(m, n, k, use_thread_local); }); + } + } + + void signal_switch(Index k, Index v = 1) { + Index s = state_switch_[k % P].fetch_sub(v); + eigen_assert(s >= v); + if (s != v) return; + + // Ready to switch to the next k slice. + // Reset counter for the next iteration. + state_switch_[k % P] = + (parallel_pack_ ? nm_ + nn_ : (shard_by_col_ ? nn_ : nm_)) + + nm_ * nn_; + if (k < nk_) { + // Issue lhs/rhs packing. Their completion will in turn kick off + // kernels. + if (parallel_pack_) { + enqueue_packing(k, !shard_by_col_); + enqueue_packing(k, shard_by_col_); + } else if (shard_by_col_) { + enqueue_packing(k, false); + } else { + enqueue_packing(k, true); + } + + // Termination handling. + // Because kernel completion signals k + 2 switch, we need to finish nk + // + 2 slices without issuing any tasks on nk + 1 slice. So here we + // pretend that all nk + 1 packing tasks just finish instantly; so that + // nk + 2 switch only waits for completion of nk kernels. + } else if (k == nk_) { + signal_switch(k + 1, + parallel_pack_ ? nm_ + nn_ : (shard_by_col_ ? nn_ : nm_)); + } else { + done_.Notify(); + } + } + + // Enqueue all rhs/lhs packing for k-th slice. + void enqueue_packing(Index k, bool rhs) { + enqueue_packing_helper(0, rhs ? nn_ : nm_, k, rhs); + } + + void enqueue_packing_helper(Index start, Index end, Index k, bool rhs) { + if (end - start == 1) { + if (rhs) + pack_rhs(start, k); + else + pack_lhs(start, k); + } else { + while (end - start > 1) { + Index mid = (start + end) / 2; + device_.enqueueNoNotification( + [=]() { enqueue_packing_helper(mid, end, k, rhs); }); + end = mid; + } + + // Decide if we want to run first packing task (start == 0) in + // async mode if we parallelize only by sharding dim: + // (1) pack_lhs and pack_rhs call signal_switch before completing + // all calls to signal_kernel, which in sync mode might lead + // to the execution of the first kernel of the k+1 slice, before + // completing a call to the last kernel of the k slice. + // (2) all pack tasks for sharded dim must be executed in a thread + // pool to get pre-allocated thead local buffers. + bool pack_async = + (start == 0) && + (parallelize_by_sharding_dim_only_&& shard_by_col_ == rhs) && + (k > 0 || std::this_thread::get_id() == created_by_thread_id_); + + if (pack_async) { + device_.enqueueNoNotification( + [=]() { enqueue_packing_helper(start, end, k, rhs); }); + } else { + enqueue_packing_helper(start, end, k, rhs); + } + } + } + + // Block sizes with accounting for potentially incomplete last block. + Index bm(Index m) const { return m + 1 < nm0_ ? bm_ : m_ + bm_ - bm_ * nm0_; } + Index bn(Index n) const { return n + 1 < nn0_ ? bn_ : n_ + bn_ - bn_ * nn0_; } + Index bk(Index k) const { return k + 1 < nk_ ? bk_ : k_ + bk_ - bk_ * nk_; } + // Task grain sizes accounting for potentially incomplete last task. + Index gm(Index m) const { return m + 1 < nm_ ? gm_ : nm0_ + gm_ - gm_ * nm_; } + Index gn(Index n) const { return n + 1 < nn_ ? gn_ : nn0_ + gn_ - gn_ * nn_; } + + EvalParallelContext(const EvalParallelContext&) = delete; + void operator=(const EvalParallelContext&) = delete; + }; + + template + using SyncEvalParallelContext = + EvalParallelContext; + + // ------------------------------------------------------------------------ // + + // EvalShardedByInnerDimContext orchestrates sync/async contraction + // evaluation, when we shard by inner dimension. When it is executed in + // asynchronous mode, it owns all the shared state that might be accessible by + // block processing tasks. + + template + struct EvalShardedByInnerDimContext { + EvalShardedByInnerDimContext(const Self* self, int num_threads, + Scalar* result_buffer, + Index m_size, Index n_size, Index k_size, + DoneCallback done_callback) + : evaluator(self), + m_lhs_inner_dim_contiguous(evaluator->m_lhs_inner_dim_contiguous), + m_rhs_inner_dim_contiguous(evaluator->m_rhs_inner_dim_contiguous), + m_rhs_inner_dim_reordered(evaluator->m_rhs_inner_dim_reordered), + result(result_buffer), + m(m_size), + n(n_size), + k(k_size), + done(std::move(done_callback)), + buffer_size_bytes(m * n * sizeof(Scalar)), + block_size(blockSize(k, num_threads)), + num_blocks(divup(k, block_size)), + num_pending_blocks(internal::convert_index(num_blocks)), + l0_ranges(divup(num_blocks, l0_size)), + l0_state(l0_ranges), + block_buffers(num_blocks) { + // Keep count of pending gemm tasks for each l0 range. + for (int i = 0; i < l0_ranges; ++i) { + const Index num_pending_tasks = actualRangeSize(l0_ranges, l0_size, i); + l0_state.emplace_back(internal::convert_index(num_pending_tasks)); + } + + // Allocate temporary buffers for each block. + for (Index block_idx = 0; block_idx < num_blocks; ++block_idx) { + Scalar* buf = block_idx == 0 + ? result + : static_cast(evaluator->m_device.allocate( + buffer_size_bytes)); + block_buffers.emplace_back(buf); + } + } + + ~EvalShardedByInnerDimContext() { + for (Index i = 1; i < num_blocks; ++i) { + evaluator->m_device.deallocate(block_buffers[i]); + } + } + + template + void run() { + Barrier barrier(internal::convert_index(num_blocks)); + eval(barrier, 0, num_blocks); + barrier.Wait(); + + // Aggregate partial sums from l0 ranges. + aggregateL0Blocks(); + + // Apply output kernel. + applyOutputKernel(); + } + + template + void runAsync() { + evalAsync(0, num_blocks); + } + + private: + // The underlying GEMM kernel assumes that k is a multiple of + // the packet size and subtle breakage occurs if this is violated. + static const Index packet_size = internal::packet_traits::size; + + const Self* evaluator; // TensorContraction evaluator + + // These fields required fromTENSOR_CONTRACTION_DISPATCH macro. + bool m_lhs_inner_dim_contiguous; + bool m_rhs_inner_dim_contiguous; + bool m_rhs_inner_dim_reordered; + + Scalar* result; + + Index m; + Index n; + Index k; + + DoneCallback done; + + // ----------------------------------------------------------------------// + // Algorithm parameters. + + // We will compute partial results into the buffers of this size. + Index buffer_size_bytes; + + Index block_size; + Index num_blocks; + + // Keep track of pending tasks when evaluate in async mode. + std::atomic num_pending_blocks; + + // We compute partial gemm results in parallel, and to get the final result + // we need to add them all together. For the large number of threads (>= 48) + // this adds a very expensive sequential step at the end. + // + // We split the [0, num_blocks) into small ranges, and when a task for the + // block finishes its partial gemm computation, it checks if it was the last + // gemm in the range, and if so, it will add all blocks of the range. + // + // After all tasks done, we need to add only these pre-aggregated blocks. + + // For now we use just a single level of ranges to compute pre-aggregated + // partial sums, but in general we can use more layers to compute tree + // aggregation in parallel and reduce the size of the sequential step. + // + // TODO(ezhulenev): Add multilevel tree aggregation? Probably will make + // sense only if number of threads >= ~128? + static const Index l0_size = 4; + Index l0_ranges; + + // Keep count of pending gemm tasks for each l0 range. + MaxSizeVector> l0_state; // [0, l0_ranges) + + // Buffers allocated for each temporary block computation. + MaxSizeVector block_buffers; // [0, num_blocks) + + template + void processBlock(Index block_idx, Index begin, Index end) { + Scalar* buf = block_buffers[block_idx]; + + TENSOR_CONTRACTION_DISPATCH( + evaluator->template evalGemmPartialWithoutOutputKernel, Alignment, + (buf, begin, end, + /*num_threads=*/internal::convert_index(num_blocks))); + + // Check if it was the last task in l0 range. + const Index l0_index = block_idx / l0_size; + const int v = l0_state[l0_index].fetch_sub(1); + eigen_assert(v >= 1); + + // If we processed the last block of the range, we can aggregate all + // partial results into the first block of the range. + if (v == 1) { + const Index rng_size = actualRangeSize(l0_ranges, l0_size, l0_index); + const Index dst_block_idx = l0_index * l0_size; + + if (rng_size == l0_size) { + addAllToBuffer( + m * n, + /*src_buf0=*/block_buffers[dst_block_idx + 1], + /*src_buf1=*/block_buffers[dst_block_idx + 2], + /*src_buf2=*/block_buffers[dst_block_idx + 3], + /*dst_buf= */ block_buffers[dst_block_idx]); + } else { + // Aggregate blocks of potentially incomplete last range. + for (int i = 1; i < rng_size; ++i) { + addToBuffer(m * n, + /*src_buf=*/block_buffers[dst_block_idx + i], + /*dst_buf=*/block_buffers[dst_block_idx]); + } + } + } + } + + // Aggregate partial sums from l0 ranges. + template + void aggregateL0Blocks() const { + Index l0_index = 1; + + for (; l0_index + 2 < l0_ranges; l0_index += 3) { + addAllToBuffer( + m * n, + /*src_buf0=*/block_buffers[(l0_index + 0) * l0_size], + /*src_buf1=*/block_buffers[(l0_index + 1) * l0_size], + /*src_buf2=*/block_buffers[(l0_index + 2) * l0_size], + /*dst_buf= */ block_buffers[0]); + } + + for (; l0_index < l0_ranges; ++l0_index) { + addToBuffer(m * n, block_buffers[l0_index * l0_size], + block_buffers[0]); + } + } + + void applyOutputKernel() const { + typedef internal::blas_data_mapper OutputMapper; + evaluator->m_output_kernel( + OutputMapper(result, m), evaluator->m_tensor_contraction_params, + static_cast(0), static_cast(0), m, n); + } + + // Compute block size with accounting for potentially incomplete last block. + Index actualBlockSize(Index block_idx) const { + return block_idx + 1 < num_blocks + ? block_size + : k + block_size - block_size * num_blocks; + }; + + // Compute range size with accounting for potentially incomplete last range. + Index actualRangeSize(Index num_ranges, Index range_size, + Index range_idx) const { + eigen_assert(range_idx < num_ranges); + return range_idx + 1 < num_ranges + ? range_size + : num_blocks + range_size - range_size * num_ranges; + }; + + template + EIGEN_STRONG_INLINE static void addToBuffer(size_t n, const Scalar* src_buf, + Scalar* tgt_buf) { + const int output_packet_size = + internal::unpacket_traits::size; + size_t i = 0; + const size_t num_packets = n / output_packet_size; + for (; i < output_packet_size * num_packets; i += output_packet_size) { + const PacketReturnType src_val = + internal::pload(src_buf + i); + const PacketReturnType tgt_val = + internal::ploadt(tgt_buf + i); + const PacketReturnType sum = internal::padd(src_val, tgt_val); + internal::pstoret(tgt_buf + i, + sum); + } + for (; i < n; ++i) { + tgt_buf[i] += src_buf[i]; + } + } + + template + EIGEN_STRONG_INLINE static void addAllToBuffer(size_t n, + const Scalar* src_buf0, + const Scalar* src_buf1, + const Scalar* src_buf2, + Scalar* dst_buf) { + using ::Eigen::internal::padd; + using ::Eigen::internal::pload; + using ::Eigen::internal::ploadt; + using ::Eigen::internal::pstoret; + + const int output_packet_size = + internal::unpacket_traits::size; + + size_t i = 0; + const size_t num_packets = n / output_packet_size; + for (; i < output_packet_size * num_packets; i += output_packet_size) { + const auto src_val0 = pload(src_buf0 + i); + const auto src_val1 = pload(src_buf1 + i); + const auto src_val2 = pload(src_buf2 + i); + + const auto dst_val = ploadt(dst_buf + i); + const auto sum = + padd(padd(dst_val, src_val0), padd(src_val1, src_val2)); + + pstoret(dst_buf + i, sum); + } + for (; i < n; ++i) { + dst_buf[i] += src_buf0[i] + src_buf1[i] + src_buf2[i]; + } + } + + template + void eval(Barrier& barrier, Index start_block_idx, Index end_block_idx) { + while (end_block_idx - start_block_idx > 1) { + Index mid_block_idx = (start_block_idx + end_block_idx) / 2; + evaluator->m_device.enqueueNoNotification( + [this, &barrier, mid_block_idx, end_block_idx]() { + eval(barrier, mid_block_idx, end_block_idx); + }); + end_block_idx = mid_block_idx; + } + + Index block_idx = start_block_idx; + Index block_start = block_idx * block_size; + Index block_end = block_start + actualBlockSize(block_idx); + + processBlock(block_idx, block_start, block_end); + barrier.Notify(); + } + + template + void evalAsync(Index start_block_idx, Index end_block_idx) { + while (end_block_idx - start_block_idx > 1) { + Index mid_block_idx = (start_block_idx + end_block_idx) / 2; + evaluator->m_device.enqueueNoNotification( + [this, mid_block_idx, end_block_idx]() { + evalAsync(mid_block_idx, end_block_idx); + }); + end_block_idx = mid_block_idx; + } + + Index block_idx = start_block_idx; + + Index block_start = block_idx * block_size; + Index block_end = block_start + actualBlockSize(block_idx); + + processBlock(block_idx, block_start, block_end); + + int v = num_pending_blocks.fetch_sub(1); + eigen_assert(v >= 1); + + if (v == 1) { + // Aggregate partial sums from l0 ranges. + aggregateL0Blocks(); + + // Apply output kernel. + applyOutputKernel(); + + // NOTE: If we call `done` callback before deleting this (context), + // it might deallocate Self* pointer captured by context, and we'll + // fail in destructor trying to deallocate temporary buffers. + + // Move done call back from context before it will be destructed. + DoneCallback done_copy = std::move(done); + + // We are confident that we are the last one who touches context. + delete this; + + // Now safely call the done callback. + done_copy(); + } + } + + // Cost model doesn't capture well the cost associated with constructing + // tensor contraction mappers and computing loop bounds in gemm_pack_lhs + // and gemm_pack_rhs, so we specify minimum desired block size. + static Index blockSize(Index k, int num_threads) { + const auto round_up = [=](Index index) -> Index { + const Index kmultiple = packet_size <= 8 ? 8 : packet_size; + return divup(index, kmultiple) * kmultiple; + }; + + const Index target_block_size = round_up(divup(k, num_threads)); + const Index desired_min_block_size = 12 * packet_size; + + return numext::mini( + k, numext::maxi(desired_min_block_size, target_block_size)); + } + + EvalShardedByInnerDimContext(const EvalShardedByInnerDimContext&) = delete; + void operator=(const EvalShardedByInnerDimContext&) = delete; + }; + + // ------------------------------------------------------------------------ // + + // Below are the function used by evalProductImpl heuristics, trying to select + // optimcal parameters for parallelization algorithm. + + // Decide whether we want to shard m x n contraction by columns or by rows. + static bool shardByCol(Index m, Index n, Index num_threads) { + // Note: we are comparing both n and m against Traits::nr, it is not + // a mistake. We are trying to figure out how both n and m will fit into + // the main sharding dimension. + + // Sharding by column is the default + // ... unless there is enough data for vectorization over rows + if (m / num_threads >= Traits::nr && + // and not enough data for vectorization over columns + (n / num_threads < Traits::nr || + // ... or barely enough data for vectorization over columns, + // but it is not evenly dividable across threads + (n / num_threads < 4 * Traits::nr && + (n % (num_threads * Traits::nr)) != 0 && + // ... and it is evenly dividable across threads for rows + ((m % (num_threads * Traits::nr)) == 0 || + // .. or it is not evenly dividable for both dimensions but + // there is much more data over rows so that corner effects are + // mitigated. + (m / n >= 6))))) + return false; + // Wait, or if matrices are just substantially prolonged over the other + // dimension. + if (n / num_threads < 16 * Traits::nr && m > n * 32) return false; + return true; + } + + Index coarsenM(Index m, Index n, Index bm, Index bn, Index bk, Index gn, + int num_threads, bool shard_by_col) const { + Index gm = 1; + Index gm1 = 1; + Index nm0 = divup(m, bm); + Index nm1 = nm0; + for (;;) { + // Find the next candidate for m grain size. It needs to result in + // different number of blocks. E.g. if we have 10 kernels, we want to try + // 5 and 10, but not 6, 7, 8 and 9. + while (gm1 <= nm0 && nm1 == divup(nm0, gm1)) gm1++; + if (gm1 > nm0) break; + // Check the candidate. + int res = checkGrain(m, n, bm, bn, bk, gm1, gn, gm, gn, num_threads, + shard_by_col); + if (res < 0) break; + nm1 = divup(nm0, gm1); + if (res == 0) continue; + // Commit new grain size. + gm = gm1; + } + return gm; + } + + Index coarsenN(Index m, Index n, Index bm, Index bn, Index bk, Index gm, + int num_threads, bool shard_by_col) const { + Index gn = 1; + Index gn1 = 1; + Index nn0 = divup(n, bn); + Index nn1 = nn0; + for (;;) { + while (gn1 <= nn0 && nn1 == divup(nn0, gn1)) gn1++; + if (gn1 > nn0) break; + int res = checkGrain(m, n, bm, bn, bk, gm, gn1, gm, gn, num_threads, + shard_by_col); + if (res < 0) break; + nn1 = divup(nn0, gn1); + if (res == 0) continue; + gn = gn1; + } + return gn; + } + + // checkGrain checks whether grain (gm, gn) is suitable and is better than + // (oldgm, oldgn). + int checkGrain(Index m, Index n, Index bm, Index bn, Index bk, Index gm, + Index gn, Index oldgm, Index oldgn, int num_threads, + bool shard_by_col) const { + const TensorOpCost cost = + contractionCost(bm * gm, bn * gn, bm, bn, bk, shard_by_col, true); + double taskSize = TensorCostModel::taskSize( + static_cast(bm) * gm * bn * gn, cost); + // If the task is too small, then we agree on it regardless of anything + // else. Otherwise synchronization overheads will dominate. + if (taskSize < 1) return 1; + // If it is too large, then we reject it and all larger tasks. + if (taskSize > 2) return -1; + // Now we are in presumably good task size range. + // The main deciding factor here is parallelism. Consider that we have 12 + // kernels and 4 threads. Grains of 2, 3 and 4 all yield good task sizes. + // But 2/4 yield 6/3 tasks, which gives us parallelism of 0.75 (at most 3/4 + // of cores will be busy). While grain size 3 gives us 4 tasks, which gives + // us parallelism of 1 (we can load all cores). + Index nm0 = divup(m, bm); + Index nn0 = divup(n, bn); + Index new_tasks = divup(nm0, gm) * divup(nn0, gn); + double new_parallelism = static_cast(new_tasks) / + (divup(new_tasks, num_threads) * num_threads); + Index old_tasks = divup(nm0, oldgm) * divup(nn0, oldgn); + double old_parallelism = static_cast(old_tasks) / + (divup(old_tasks, num_threads) * num_threads); + if (new_parallelism > old_parallelism || new_parallelism == 1) return 1; + return 0; + } + + TensorOpCost contractionCost(Index m, Index n, Index bm, Index bn, Index bk, + bool shard_by_col, bool prepacked) const { + const int packed_size = std::min(PacketType::size, + PacketType::size); + const int output_packet_size = internal::unpacket_traits::size; + const double kd = static_cast(bk); + double compute_bandwidth = computeBandwidth(false, bm, bn, bk); + // Computations. + TensorOpCost cost = TensorOpCost(0, 0, kd * compute_bandwidth, true, packed_size); + // Output stores. + cost += TensorOpCost(0, sizeof(CoeffReturnType), 0, true, output_packet_size); + if (prepacked) { + // Packing and kernels are executed in different tasks. When we calculate + // task grain size we look only at kernel cost assuming that kernel + // is more expensive than packing. + return cost; + } + // Lhs/rhs loads + computations. + TensorOpCost lhsCost = this->m_leftImpl.costPerCoeff(true) * (kd / n); + TensorOpCost rhsCost = this->m_rightImpl.costPerCoeff(true) * (kd / m); + // Lhs packing memory cost does not contribute considerably to overall + // execution time because lhs is prefetched early and accessed sequentially. + if (shard_by_col) + lhsCost.dropMemoryCost(); + else + rhsCost.dropMemoryCost(); + return cost + lhsCost + rhsCost; + } + + // Decide whether we want to shard m x k x n contraction over the inner + // (contraction) dimension (k). + static bool shardByInnerDim(Index m, Index n, Index k, int num_threads, + int num_threads_by_k) { + std::ptrdiff_t bufsize = m * n * sizeof(Scalar); + bool shard_by_k = false; + if (n == 1 || // If mat*vec or... + num_threads_by_k < 2 || // running single threaded or... + num_threads_by_k < + num_threads || // sharding by k gives less parallelism or... + bufsize > l3CacheSize() / num_threads_by_k || // need more buffer space + // than L3 cache or... + k / num_threads_by_k < 2 * Traits::nr) { // k per thread is tiny. + shard_by_k = false; + } else if (numext::maxi(m, n) / num_threads < + Traits::nr || // both other dimensions are tiny or... + // k per thread is not small and... + (k / num_threads_by_k > 8 * Traits::nr && + // one of the outer dimensions is tiny or sharding by k offers + // more parallelism. + (numext::mini(m, n) < 2 * Traits::nr || + num_threads_by_k > num_threads))) { + shard_by_k = true; + } + return shard_by_k; + } + + TensorOpCost contractionCostPerInnerDim(Index m, Index n, Index k) const { + // Compute cost. + const int output_packet_size = internal::unpacket_traits::size; + TensorOpCost cost(0, 0, (computeBandwidth(true, m, n, k) * m) * n, true, output_packet_size); + // Output stores. + cost += TensorOpCost(0, sizeof(CoeffReturnType), 0, true, output_packet_size); + TensorOpCost lhsCost = this->m_leftImpl.costPerCoeff(true) * m; + TensorOpCost rhsCost = this->m_rightImpl.costPerCoeff(true) * n; + // Since the inner gemm kernel is always sharded by column, the lhs + // load cost is negligible. + lhsCost.dropMemoryCost(); + return cost + lhsCost + rhsCost; + } + + int numThreadsInnerDim(Index m, Index n, Index k) const { + const int output_packet_size = internal::unpacket_traits::size; + TensorOpCost cost = contractionCostPerInnerDim(m, n, k); + double total_parallel_cost = + TensorCostModel::totalCost(k, cost); + // Cost of reduction step accumulating the m*n per-thread buffers into the + // result. + double reduction_cost = TensorCostModel::totalCost( + m * n, TensorOpCost(2, 1, 1, true, output_packet_size)); + int num_threads = 1; + double min_cost = total_parallel_cost; + double kPerThreadOverHead = 3000; + double kFixedOverHead = 100000; + for (int nt = 2; nt <= this->m_device.numThreads(); nt += 2) { + double sequential_cost = + kFixedOverHead + nt * (reduction_cost + kPerThreadOverHead); + double parallel_cost = total_parallel_cost / nt + sequential_cost; + if (parallel_cost < min_cost) { + num_threads = nt; + min_cost = parallel_cost; + } + } + return num_threads; + } + + double computeBandwidth(bool shard_by_col, Index bm, Index bn, + Index bk) const { + // Peak VFMA bandwidth is 0.5. However if we have not enough data for + // vectorization bandwidth drops. The 4.0 and 2.0 bandwidth is determined + // experimentally. + double computeBandwidth = + bk == 1 ? 4.0 + : (shard_by_col ? bn : bm) < Traits::nr || + (shard_by_col ? bm : bn) < Traits::mr + ? 2.0 + : 0.5; +#ifndef EIGEN_VECTORIZE_FMA + // Bandwidth of all of VFMA/MULPS/ADDPS is 0.5 on latest Intel processors. + // However for MULPS/ADDPS we have dependent sequence of 2 such + // instructions, + // so overall bandwidth is 1.0. + if (computeBandwidth == 0.5) computeBandwidth = 1.0; +#endif + return computeBandwidth; + } + +}; + +} // end namespace Eigen + +#endif // EIGEN_USE_THREADS +#endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_THREAD_POOL_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h new file mode 100644 index 0000000..09d2da9 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h @@ -0,0 +1,456 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H +#define EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H + +namespace Eigen { + +/** \class TensorConversionOp + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor conversion class. This class makes it possible to vectorize + * type casting operations when the number of scalars per packet in the source + * and the destination type differ + */ +namespace internal { +template +struct traits > +{ + // Type promotion to handle the case where the types of the lhs and the rhs are different. + typedef TargetType Scalar; + typedef typename traits::StorageKind StorageKind; + typedef typename traits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = traits::NumDimensions; + static const int Layout = traits::Layout; + enum { Flags = 0 }; + typedef typename TypeConversion::PointerType>::type PointerType; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorConversionOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorConversionOp type; +}; + +} // end namespace internal + + +template +struct PacketConverter; + +template +struct PacketConverter { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + PacketConverter(const TensorEvaluator& impl) + : m_impl(impl) {} + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const { + return internal::pcast(m_impl.template packet(index)); + } + + private: + const TensorEvaluator& m_impl; +}; + + +template +struct PacketConverter { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + PacketConverter(const TensorEvaluator& impl) + : m_impl(impl) {} + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const { + const int SrcPacketSize = internal::unpacket_traits::size; + + SrcPacket src1 = m_impl.template packet(index); + SrcPacket src2 = m_impl.template packet(index + SrcPacketSize); + TgtPacket result = internal::pcast(src1, src2); + return result; + } + + private: + const TensorEvaluator& m_impl; +}; + +template +struct PacketConverter { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + PacketConverter(const TensorEvaluator& impl) + : m_impl(impl) {} + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const { + const int SrcPacketSize = internal::unpacket_traits::size; + + SrcPacket src1 = m_impl.template packet(index); + SrcPacket src2 = m_impl.template packet(index + SrcPacketSize); + SrcPacket src3 = m_impl.template packet(index + 2 * SrcPacketSize); + SrcPacket src4 = m_impl.template packet(index + 3 * SrcPacketSize); + TgtPacket result = internal::pcast(src1, src2, src3, src4); + return result; + } + + private: + const TensorEvaluator& m_impl; +}; + +template +struct PacketConverter { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + PacketConverter(const TensorEvaluator& impl) + : m_impl(impl) {} + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const { + const int SrcPacketSize = internal::unpacket_traits::size; + + SrcPacket src1 = m_impl.template packet(index); + SrcPacket src2 = m_impl.template packet(index + 1 * SrcPacketSize); + SrcPacket src3 = m_impl.template packet(index + 2 * SrcPacketSize); + SrcPacket src4 = m_impl.template packet(index + 3 * SrcPacketSize); + SrcPacket src5 = m_impl.template packet(index + 4 * SrcPacketSize); + SrcPacket src6 = m_impl.template packet(index + 5 * SrcPacketSize); + SrcPacket src7 = m_impl.template packet(index + 6 * SrcPacketSize); + SrcPacket src8 = m_impl.template packet(index + 7 * SrcPacketSize); + TgtPacket result = internal::pcast(src1, src2, src3, src4, src5, src6, src7, src8); + return result; + } + + private: + const TensorEvaluator& m_impl; +}; + +template +struct PacketConverter { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + PacketConverter(const TensorEvaluator& impl) + : m_impl(impl), m_maxIndex(impl.dimensions().TotalSize()) {} + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const { + const int SrcPacketSize = internal::unpacket_traits::size; + // Only call m_impl.packet() when we have direct access to the underlying data. This + // ensures that we don't compute the subexpression twice. We may however load some + // coefficients twice, but in practice this doesn't negatively impact performance. + if (m_impl.data() && (index + SrcPacketSize < m_maxIndex)) { + // Force unaligned memory loads since we can't ensure alignment anymore + return internal::pcast(m_impl.template packet(index)); + } else { + const int TgtPacketSize = internal::unpacket_traits::size; + typedef typename internal::unpacket_traits::type SrcType; + typedef typename internal::unpacket_traits::type TgtType; + internal::scalar_cast_op converter; + EIGEN_ALIGN_MAX typename internal::unpacket_traits::type values[TgtPacketSize]; + EIGEN_UNROLL_LOOP + for (int i = 0; i < TgtPacketSize; ++i) { + values[i] = converter(m_impl.coeff(index+i)); + } + TgtPacket rslt = internal::pload(values); + return rslt; + } + } + + private: + const TensorEvaluator& m_impl; + const typename TensorEvaluator::Index m_maxIndex; +}; + +template +class TensorConversionOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Index Index; + typedef typename internal::nested::type Nested; + typedef Scalar CoeffReturnType; + typedef typename NumTraits::Real RealScalar; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConversionOp(const XprType& xpr) + : m_xpr(xpr) {} + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + protected: + typename XprType::Nested m_xpr; +}; + +template struct ConversionSubExprEval { + static EIGEN_STRONG_INLINE bool run(Eval& impl, EvalPointerType) { + impl.evalSubExprsIfNeeded(NULL); + return true; + } +}; + +template struct ConversionSubExprEval { + static EIGEN_STRONG_INLINE bool run(Eval& impl, EvalPointerType data) { + return impl.evalSubExprsIfNeeded(data); + } +}; + +#ifdef EIGEN_USE_THREADS +template +struct ConversionSubExprEvalAsync { + static EIGEN_STRONG_INLINE void run(Eval& impl, EvalPointerType, EvalSubExprsCallback done) { + impl.evalSubExprsIfNeededAsync(nullptr, std::move(done)); + } +}; + +template +struct ConversionSubExprEvalAsync { + static EIGEN_STRONG_INLINE void run(Eval& impl, EvalPointerType data, EvalSubExprsCallback done) { + impl.evalSubExprsIfNeededAsync(data, std::move(done)); + } +}; +#endif + +namespace internal { + +template +struct CoeffConv { + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetType run(const TensorEvaluator& impl, Index index) { + internal::scalar_cast_op converter; + return converter(impl.coeff(index)); + } +}; + +template +struct CoeffConv { + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetType run(const TensorEvaluator& impl, Index index) { + return impl.coeff(index); + } +}; + +template +struct PacketConv { + typedef typename internal::unpacket_traits::type SrcType; + typedef typename internal::unpacket_traits::type TargetType; + + static const int PacketSize = internal::unpacket_traits::size; + + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator& impl, Index index) { + internal::scalar_cast_op converter; + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + EIGEN_UNROLL_LOOP + for (int i = 0; i < PacketSize; ++i) { + values[i] = converter(impl.coeff(index+i)); + } + TargetPacket rslt = internal::pload(values); + return rslt; + } +}; + +template +struct PacketConv { + typedef typename internal::unpacket_traits::type SrcType; + typedef typename internal::unpacket_traits::type TargetType; + + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator& impl, Index index) { + const int SrcCoeffRatio = internal::type_casting_traits::SrcCoeffRatio; + const int TgtCoeffRatio = internal::type_casting_traits::TgtCoeffRatio; + PacketConverter, SrcPacket, TargetPacket, + SrcCoeffRatio, TgtCoeffRatio> converter(impl); + return converter.template packet(index); + } +}; + +template +struct PacketConv { + typedef typename internal::unpacket_traits::type TargetType; + static const int PacketSize = internal::unpacket_traits::size; + + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator& impl, Index index) { + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + for (int i = 0; i < PacketSize; ++i) values[i] = impl.coeff(index+i); + return internal::pload(values); + } +}; + +template +struct PacketConv { + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator& impl, Index index) { + return impl.template packet(index); + } +}; + +} // namespace internal + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorConversionOp XprType; + typedef typename XprType::Index Index; + typedef typename TensorEvaluator::Dimensions Dimensions; + typedef TargetType Scalar; + typedef TargetType CoeffReturnType; + typedef typename internal::remove_all::Scalar>::type SrcType; + typedef typename PacketType::type PacketReturnType; + typedef typename PacketType::type PacketSourceType; + static const int PacketSize = PacketType::size; + static const bool IsSameType = internal::is_same::value; + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; + + enum { + IsAligned = false, + PacketAccess = + #ifndef EIGEN_USE_SYCL + true, + #else + TensorEvaluator::PacketAccess & + internal::type_casting_traits::VectorizedCast, + #endif + BlockAccess = TensorEvaluator::BlockAccess, + PreferBlockAccess = TensorEvaluator::PreferBlockAccess, + Layout = TensorEvaluator::Layout, + RawAccess = false + }; + + static const int NumDims = internal::array_size::value; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockDescriptor TensorBlockDesc; + typedef internal::TensorBlockScratchAllocator TensorBlockScratch; + + typedef typename TensorEvaluator::TensorBlock + ArgTensorBlock; + + struct TensorConversionOpBlockFactory { + template + struct XprType { + typedef TensorConversionOp type; + }; + + template + typename XprType::type expr(const ArgXprType& expr) const { + return typename XprType::type(expr); + } + }; + + typedef internal::TensorUnaryExprBlock + TensorBlock; + //===--------------------------------------------------------------------===// + + EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device) + { + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_impl.dimensions(); } + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) + { + return ConversionSubExprEval, EvaluatorPointerType>::run(m_impl, data); + } + +#ifdef EIGEN_USE_THREADS + template + EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync( + EvaluatorPointerType data, EvalSubExprsCallback done) { + ConversionSubExprEvalAsync, + EvaluatorPointerType, + EvalSubExprsCallback>::run(m_impl, data, std::move(done)); + } +#endif + + EIGEN_STRONG_INLINE void cleanup() + { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + return internal::CoeffConv::run(m_impl,index); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType + packet(Index index) const { + // If we are not going to do the cast, we just need to check that base + // TensorEvaluator has packet access. Otherwise we also need to make sure, + // that we have an implementation of vectorized cast. + const bool Vectorizable = + IsSameType + ? TensorEvaluator::PacketAccess + : int(TensorEvaluator::PacketAccess) & + int(internal::type_casting_traits::VectorizedCast); + + return internal::PacketConv::run(m_impl, index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + const double cast_cost = TensorOpCost::CastCost(); + if (vectorized) { + const double SrcCoeffRatio = + internal::type_casting_traits::SrcCoeffRatio; + const double TgtCoeffRatio = + internal::type_casting_traits::TgtCoeffRatio; + return m_impl.costPerCoeff(vectorized) * (SrcCoeffRatio / PacketSize) + + TensorOpCost(0, 0, TgtCoeffRatio * (cast_cost / PacketSize)); + } else { + return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, cast_cost); + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + internal::TensorBlockResourceRequirements getResourceRequirements() const { + return m_impl.getResourceRequirements(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock + block(TensorBlockDesc& desc, TensorBlockScratch& scratch, + bool /*root_of_expr_ast*/ = false) const { + return TensorBlock(m_impl.block(desc, scratch), + TensorConversionOpBlockFactory()); + } + + EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; } + + /// required by sycl in order to extract the sycl accessor + const TensorEvaluator& impl() const { return m_impl; } +#ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_impl.bind(cgh); + } +#endif + + protected: + TensorEvaluator m_impl; +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h new file mode 100644 index 0000000..b20f80b --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h @@ -0,0 +1,1132 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_H +#define EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_H + +namespace Eigen { + +/** \class TensorConvolution + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor convolution class. + * + * + */ +namespace internal { + +template +class IndexMapper { + public: + IndexMapper(const InputDims& input_dims, const array& kernel_dims, + const array& indices) { + + array dimensions = input_dims; + for (int i = 0; i < NumKernelDims; ++i) { + const Index index = indices[i]; + const Index input_dim = input_dims[index]; + const Index kernel_dim = kernel_dims[i]; + const Index result_dim = input_dim - kernel_dim + 1; + dimensions[index] = result_dim; + } + + array inputStrides; + array outputStrides; + if (static_cast(Layout) == static_cast(ColMajor)) { + inputStrides[0] = 1; + outputStrides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + inputStrides[i] = inputStrides[i-1] * input_dims[i-1]; + outputStrides[i] = outputStrides[i-1] * dimensions[i-1]; + } + } else { + inputStrides[NumDims - 1] = 1; + outputStrides[NumDims - 1] = 1; + for (int i = static_cast(NumDims) - 2; i >= 0; --i) { + inputStrides[i] = inputStrides[i + 1] * input_dims[i + 1]; + outputStrides[i] = outputStrides[i + 1] * dimensions[i + 1]; + } + } + + array gpuInputDimensions; + array gpuOutputDimensions; + array tmp = dimensions; + array ordering; + const size_t offset = static_cast(Layout) == static_cast(ColMajor) + ? 0 + : NumDims - NumKernelDims; + for (int i = 0; i < NumKernelDims; ++i) { + const Index index = i + offset; + ordering[index] = indices[i]; + tmp[indices[i]] = -1; + gpuInputDimensions[index] = input_dims[indices[i]]; + gpuOutputDimensions[index] = dimensions[indices[i]]; + } + + int written = static_cast(Layout) == static_cast(ColMajor) + ? NumKernelDims + : 0; + for (int i = 0; i < NumDims; ++i) { + if (tmp[i] >= 0) { + ordering[written] = i; + gpuInputDimensions[written] = input_dims[i]; + gpuOutputDimensions[written] = dimensions[i]; + ++written; + } + } + + for (int i = 0; i < NumDims; ++i) { + m_inputStrides[i] = inputStrides[ordering[i]]; + m_outputStrides[i] = outputStrides[ordering[i]]; + } + + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = 0; i < NumDims; ++i) { + if (i > NumKernelDims) { + m_gpuInputStrides[i] = + m_gpuInputStrides[i - 1] * gpuInputDimensions[i - 1]; + m_gpuOutputStrides[i] = + m_gpuOutputStrides[i - 1] * gpuOutputDimensions[i - 1]; + } else { + m_gpuInputStrides[i] = 1; + m_gpuOutputStrides[i] = 1; + } + } + } else { + for (int i = NumDims - 1; i >= 0; --i) { + if (static_cast(i + 1) < offset) { + m_gpuInputStrides[i] = + m_gpuInputStrides[i + 1] * gpuInputDimensions[i + 1]; + m_gpuOutputStrides[i] = + m_gpuOutputStrides[i + 1] * gpuOutputDimensions[i + 1]; + } else { + m_gpuInputStrides[i] = 1; + m_gpuOutputStrides[i] = 1; + } + } + } + } + + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapGpuInputPlaneToTensorInputOffset(Index p) const { + Index inputIndex = 0; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int d = NumDims - 1; d > NumKernelDims; --d) { + const Index idx = p / m_gpuInputStrides[d]; + inputIndex += idx * m_inputStrides[d]; + p -= idx * m_gpuInputStrides[d]; + } + inputIndex += p * m_inputStrides[NumKernelDims]; + } else { + std::ptrdiff_t limit = 0; + if (NumKernelDims < NumDims) { + limit = NumDims - NumKernelDims - 1; + } + for (int d = 0; d < limit; ++d) { + const Index idx = p / m_gpuInputStrides[d]; + inputIndex += idx * m_inputStrides[d]; + p -= idx * m_gpuInputStrides[d]; + } + inputIndex += p * m_inputStrides[limit]; + } + return inputIndex; + } + + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapGpuOutputPlaneToTensorOutputOffset(Index p) const { + Index outputIndex = 0; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int d = NumDims - 1; d > NumKernelDims; --d) { + const Index idx = p / m_gpuOutputStrides[d]; + outputIndex += idx * m_outputStrides[d]; + p -= idx * m_gpuOutputStrides[d]; + } + outputIndex += p * m_outputStrides[NumKernelDims]; + } else { + std::ptrdiff_t limit = 0; + if (NumKernelDims < NumDims) { + limit = NumDims - NumKernelDims - 1; + } + for (int d = 0; d < limit; ++d) { + const Index idx = p / m_gpuOutputStrides[d]; + outputIndex += idx * m_outputStrides[d]; + p -= idx * m_gpuOutputStrides[d]; + } + outputIndex += p * m_outputStrides[limit]; + } + return outputIndex; + } + + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapGpuInputKernelToTensorInputOffset(Index i) const { + const size_t offset = static_cast(Layout) == static_cast(ColMajor) + ? 0 + : NumDims - NumKernelDims; + return i * m_inputStrides[offset]; + } + + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapGpuOutputKernelToTensorOutputOffset(Index i) const { + const size_t offset = static_cast(Layout) == static_cast(ColMajor) + ? 0 + : NumDims - NumKernelDims; + return i * m_outputStrides[offset]; + } + + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapGpuInputKernelToTensorInputOffset(Index i, Index j) const { + const size_t offset = static_cast(Layout) == static_cast(ColMajor) + ? 0 + : NumDims - NumKernelDims; + return i * m_inputStrides[offset] + j * m_inputStrides[offset + 1]; + } + + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapGpuOutputKernelToTensorOutputOffset(Index i, Index j) const { + const size_t offset = static_cast(Layout) == static_cast(ColMajor) + ? 0 + : NumDims - NumKernelDims; + return i * m_outputStrides[offset] + j * m_outputStrides[offset + 1]; + } + + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapGpuInputKernelToTensorInputOffset(Index i, Index j, Index k) const { + const size_t offset = static_cast(Layout) == static_cast(ColMajor) + ? 0 + : NumDims - NumKernelDims; + return i * m_inputStrides[offset] + j * m_inputStrides[offset + 1] + + k * m_inputStrides[offset + 2]; + } + + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapGpuOutputKernelToTensorOutputOffset(Index i, Index j, Index k) const { + const size_t offset = static_cast(Layout) == static_cast(ColMajor) + ? 0 + : NumDims - NumKernelDims; + return i * m_outputStrides[offset] + j * m_outputStrides[offset + 1] + + k * m_outputStrides[offset + 2]; + } + + private: + static const int NumDims = internal::array_size::value; + array m_inputStrides; + array m_outputStrides; + array m_gpuInputStrides; + array m_gpuOutputStrides; +}; + + + +template +struct traits > +{ + // Type promotion to handle the case where the types of the lhs and the rhs are different. + typedef typename promote_storage_type::ret Scalar; + typedef typename promote_storage_type::StorageKind, + typename traits::StorageKind>::ret StorageKind; + typedef typename promote_index_type::Index, + typename traits::Index>::type Index; + typedef typename InputXprType::Nested LhsNested; + typedef typename KernelXprType::Nested RhsNested; + typedef typename remove_reference::type _LhsNested; + typedef typename remove_reference::type _RhsNested; + static const int NumDimensions = traits::NumDimensions; + static const int Layout = traits::Layout; + typedef typename conditional::val, + typename traits::PointerType, typename traits::PointerType>::type PointerType; + + enum { + Flags = 0 + }; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorConvolutionOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorConvolutionOp type; +}; + +} // end namespace internal + + + +template +class TensorConvolutionOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename internal::promote_storage_type::ret CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConvolutionOp(const InputXprType& input, const KernelXprType& kernel, const Indices& dims) + : m_input_xpr(input), m_kernel_xpr(kernel), m_indices(dims) {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const Indices& indices() const { return m_indices; } + + /** \returns the nested expressions */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const typename internal::remove_all::type& + inputExpression() const { return m_input_xpr; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const typename internal::remove_all::type& + kernelExpression() const { return m_kernel_xpr; } + + protected: + typename InputXprType::Nested m_input_xpr; + typename KernelXprType::Nested m_kernel_xpr; + const Indices m_indices; +}; + + +template +struct TensorEvaluator, Device> +{ + typedef TensorConvolutionOp XprType; + + static const int NumDims = internal::array_size::Dimensions>::value; + static const int NumKernelDims = internal::array_size::value; + typedef typename XprType::Index Index; + typedef DSizes Dimensions; + + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = PacketType::size; + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; + + enum { + IsAligned = int(TensorEvaluator::IsAligned) & int(TensorEvaluator::IsAligned), + PacketAccess = int(TensorEvaluator::PacketAccess) & int(TensorEvaluator::PacketAccess), + BlockAccess = false, + PreferBlockAccess = false, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockNotImplemented TensorBlock; + //===--------------------------------------------------------------------===// + + EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_inputImpl(op.inputExpression(), device), m_kernelImpl(op.kernelExpression(), device), m_kernelArg(op.kernelExpression()), m_kernel(NULL), m_local_kernel(false), m_device(device) + { + EIGEN_STATIC_ASSERT((static_cast(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE); + + const typename TensorEvaluator::Dimensions& input_dims = m_inputImpl.dimensions(); + const typename TensorEvaluator::Dimensions& kernel_dims = m_kernelImpl.dimensions(); + + if (static_cast(Layout) == static_cast(ColMajor)) { + m_inputStride[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_inputStride[i] = m_inputStride[i - 1] * input_dims[i - 1]; + } + } else { + m_inputStride[NumDims - 1] = 1; + for (int i = NumDims - 2; i >= 0; --i) { + m_inputStride[i] = m_inputStride[i + 1] * input_dims[i + 1]; + } + } + + m_dimensions = m_inputImpl.dimensions(); + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = 0; i < NumKernelDims; ++i) { + const Index index = op.indices()[i]; + const Index input_dim = input_dims[index]; + const Index kernel_dim = kernel_dims[i]; + const Index result_dim = input_dim - kernel_dim + 1; + m_dimensions[index] = result_dim; + if (i > 0) { + m_kernelStride[i] = m_kernelStride[i - 1] * kernel_dims[i - 1]; + } else { + m_kernelStride[0] = 1; + } + m_indexStride[i] = m_inputStride[index]; + } + + m_outputStride[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_outputStride[i] = m_outputStride[i - 1] * m_dimensions[i - 1]; + } + } else { + for (int i = NumKernelDims - 1; i >= 0; --i) { + const Index index = op.indices()[i]; + const Index input_dim = input_dims[index]; + const Index kernel_dim = kernel_dims[i]; + const Index result_dim = input_dim - kernel_dim + 1; + m_dimensions[index] = result_dim; + if (i < NumKernelDims - 1) { + m_kernelStride[i] = m_kernelStride[i + 1] * kernel_dims[i + 1]; + } else { + m_kernelStride[NumKernelDims - 1] = 1; + } + m_indexStride[i] = m_inputStride[index]; + } + + m_outputStride[NumDims - 1] = 1; + for (int i = NumDims - 2; i >= 0; --i) { + m_outputStride[i] = m_outputStride[i + 1] * m_dimensions[i + 1]; + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { + m_inputImpl.evalSubExprsIfNeeded(NULL); + preloadKernel(); + return true; + } + EIGEN_STRONG_INLINE void cleanup() { + m_inputImpl.cleanup(); + if (m_local_kernel) { + m_device.deallocate((void*)m_kernel); + m_local_kernel = false; + } + m_kernel = NULL; + } + + void evalTo(typename XprType::Scalar* buffer) { + evalSubExprsIfNeeded(NULL); + for (int i = 0; i < dimensions().TotalSize(); ++i) { + buffer[i] += coeff(i); + } + cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + CoeffReturnType result = CoeffReturnType(0); + convolve(firstInput(index), 0, NumKernelDims-1, result); + return result; + } + + template + EIGEN_DEVICE_FUNC PacketReturnType packet(const Index index) const + { + Index indices[2] = {index, index+PacketSize-1}; + Index startInputs[2] = {0, 0}; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumDims - 1; i > 0; --i) { + const Index idx0 = indices[0] / m_outputStride[i]; + const Index idx1 = indices[1] / m_outputStride[i]; + startInputs[0] += idx0 * m_inputStride[i]; + startInputs[1] += idx1 * m_inputStride[i]; + indices[0] -= idx0 * m_outputStride[i]; + indices[1] -= idx1 * m_outputStride[i]; + } + } else { + for (int i = 0; i < NumDims - 1; ++i) { + const Index idx0 = indices[0] / m_outputStride[i]; + const Index idx1 = indices[1] / m_outputStride[i]; + startInputs[0] += idx0 * m_inputStride[i]; + startInputs[1] += idx1 * m_inputStride[i]; + indices[0] -= idx0 * m_outputStride[i]; + indices[1] -= idx1 * m_outputStride[i]; + } + } + startInputs[0] += indices[0]; + startInputs[1] += indices[1]; + + if (startInputs[1]-startInputs[0] == PacketSize-1) { + PacketReturnType result = internal::pset1(0); + convolvePacket(startInputs[0], 0, NumKernelDims-1, result); + return result; + } else { + EIGEN_ALIGN_MAX Scalar data[PacketSize]; + data[0] = Scalar(0); + convolve(startInputs[0], 0, NumKernelDims-1, data[0]); + for (int i = 1; i < PacketSize-1; ++i) { + data[i] = Scalar(0); + convolve(firstInput(index+i), 0, NumKernelDims-1, data[i]); + } + data[PacketSize-1] = Scalar(0); + convolve(startInputs[1], 0, NumKernelDims-1, data[PacketSize-1]); + return internal::pload(data); + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + const double kernel_size = m_kernelImpl.dimensions().TotalSize(); + // We ignore the use of fused multiply-add. + const double convolve_compute_cost = + TensorOpCost::AddCost() + TensorOpCost::MulCost(); + const double firstIndex_compute_cost = + NumDims * + (2 * TensorOpCost::AddCost() + 2 * TensorOpCost::MulCost() + + TensorOpCost::DivCost()); + return TensorOpCost(0, 0, firstIndex_compute_cost, vectorized, PacketSize) + + kernel_size * (m_inputImpl.costPerCoeff(vectorized) + + m_kernelImpl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, convolve_compute_cost, vectorized, + PacketSize)); + } + + EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; } + + private: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index firstInput(Index index) const { + Index startInput = 0; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumDims - 1; i > 0; --i) { + const Index idx = index / m_outputStride[i]; + startInput += idx * m_inputStride[i]; + index -= idx * m_outputStride[i]; + } + } else { + for (int i = 0; i < NumDims - 1; ++i) { + const Index idx = index / m_outputStride[i]; + startInput += idx * m_inputStride[i]; + index -= idx * m_outputStride[i]; + } + } + startInput += index; + return startInput; + } + + EIGEN_DEVICE_FUNC void convolve(Index firstIndex, Index firstKernel, int DimIndex, CoeffReturnType& accum) const { + for (int j = 0; j < m_kernelImpl.dimensions()[DimIndex]; ++j) { + const Index input = firstIndex + j * m_indexStride[DimIndex]; + const Index kernel = firstKernel + j * m_kernelStride[DimIndex]; + if (DimIndex > 0) { + convolve(input, kernel, DimIndex-1, accum); + } else { + accum += m_inputImpl.coeff(input) * m_kernel[kernel]; + } + } + } + + template + EIGEN_DEVICE_FUNC void convolvePacket(Index firstIndex, Index firstKernel, int DimIndex, Packet& accum) const { + for (int j = 0; j < m_kernelImpl.dimensions()[DimIndex]; ++j) { + const Index input = firstIndex + j * m_indexStride[DimIndex]; + const Index kernel = firstKernel + j * m_kernelStride[DimIndex]; + if (DimIndex > 0) { + convolvePacket(input, kernel, DimIndex-1, accum); + } else { + accum = internal::pmadd(m_inputImpl.template packet(input), internal::pset1(m_kernel[kernel]), accum); + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void preloadKernel() { + // Don't make a local copy of the kernel unless we have to (i.e. it's an + // expression that needs to be evaluated) + const Scalar* in_place = m_kernelImpl.data(); + if (in_place) { + m_kernel = in_place; + m_local_kernel = false; + } else { + size_t kernel_sz = m_kernelImpl.dimensions().TotalSize() * sizeof(Scalar); + Scalar* local = (Scalar*)m_device.allocate_temp(kernel_sz); + typedef TensorEvalToOp EvalTo; + EvalTo evalToTmp(local, m_kernelArg); + const bool Vectorize = internal::IsVectorizable::value; + internal::TensorExecutor::run(evalToTmp, m_device); + + m_kernel = local; + m_local_kernel = true; + } + } + + array m_inputStride; + array m_outputStride; + + array m_indexStride; + array m_kernelStride; + TensorEvaluator m_inputImpl; + TensorEvaluator m_kernelImpl; + Dimensions m_dimensions; + + KernelArgType m_kernelArg; + const Scalar* m_kernel; + bool m_local_kernel; + const Device EIGEN_DEVICE_REF m_device; +}; + + + + +// Use an optimized implementation of the evaluation code for GPUs whenever possible. +#if defined(EIGEN_USE_GPU) && defined(EIGEN_GPUCC) + +template +struct GetKernelSize { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int operator() (const int /*kernelSize*/) const { + return StaticKernelSize; + } +}; +template <> +struct GetKernelSize { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int operator() (const int kernelSize) const { + return kernelSize; + } +}; + +template +__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void EigenConvolutionKernel1D( + InputEvaluator eval, + const internal::IndexMapper + indexMapper, + const float* __restrict kernel, const int numPlanes, const int numX, + const int maxX, const int kernelSize, float* buffer) { +#if defined(EIGEN_HIPCC) + HIP_DYNAMIC_SHARED(float, s) +#else + extern __shared__ float s[]; +#endif + + const int first_x = blockIdx.x * maxX; + const int last_x = (first_x + maxX < numX ? first_x + maxX : numX) - 1; + const int num_x_input = last_x - first_x + GetKernelSize()(kernelSize); + const int num_x_output = last_x - first_x + 1; + + const int first_plane = blockIdx.y * blockDim.y; + const int plane_stride = blockDim.y * gridDim.y; + + for (int p = first_plane + threadIdx.y; p < numPlanes; p += plane_stride) { + // Load inputs to shared memory + const int plane_input_offset = indexMapper.mapGpuInputPlaneToTensorInputOffset(p); + const int plane_kernel_offset = threadIdx.y * num_x_input; + #pragma unroll + for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) { + const int tensor_index = plane_input_offset + indexMapper.mapGpuInputKernelToTensorInputOffset(i+first_x); + s[i + plane_kernel_offset] = eval.coeff(tensor_index); + } + + __syncthreads(); + + // Compute the convolution + const int plane_output_offset = indexMapper.mapGpuOutputPlaneToTensorOutputOffset(p); + + #pragma unroll + for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) { + const int kernel_offset = plane_kernel_offset + i; + float result = 0.0f; + #pragma unroll + for (int k = 0; k < GetKernelSize()(kernelSize); ++k) { + result += s[k + kernel_offset] * kernel[k]; + } + const int tensor_index = plane_output_offset + indexMapper.mapGpuOutputKernelToTensorOutputOffset(i+first_x); + buffer[tensor_index] = result; + } + __syncthreads(); + } +}; + +template +__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void EigenConvolutionKernel2D( + InputEvaluator eval, + const internal::IndexMapper + indexMapper, + const float* __restrict kernel, const int numPlanes, const int numX, + const int maxX, const int numY, const int maxY, const int kernelSizeX, + const int kernelSizeY, float* buffer) { +#if defined(EIGEN_HIPCC) + HIP_DYNAMIC_SHARED(float, s) +#else + extern __shared__ float s[]; +#endif + + const int first_x = blockIdx.x * maxX; + const int last_x = (first_x + maxX < numX ? first_x + maxX : numX) - 1; + const int num_x_input = last_x - first_x + GetKernelSize()(kernelSizeX); + const int num_x_output = last_x - first_x + 1; + + const int first_y = blockIdx.y * maxY; + const int last_y = (first_y + maxY < numY ? first_y + maxY : numY) - 1; + const int num_y_input = last_y - first_y + GetKernelSize()(kernelSizeY); + const int num_y_output = last_y - first_y + 1; + + const int first_plane = blockIdx.z * blockDim.z; + const int plane_stride = blockDim.z * gridDim.z; + + for (int p = first_plane + threadIdx.z; p < numPlanes; p += plane_stride) { + + const int plane_input_offset = indexMapper.mapGpuInputPlaneToTensorInputOffset(p); + const int plane_kernel_offset = threadIdx.z * num_y_input; + + // Load inputs to shared memory + #pragma unroll + for (int j = threadIdx.y; j < num_y_input; j += blockDim.y) { + const int input_offset = num_x_input * (j + plane_kernel_offset); + #pragma unroll + for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) { + const int tensor_index = plane_input_offset + indexMapper.mapGpuInputKernelToTensorInputOffset(i+first_x, j+first_y); + s[i + input_offset] = eval.coeff(tensor_index); + } + } + + __syncthreads(); + + // Convolution + const int plane_output_offset = indexMapper.mapGpuOutputPlaneToTensorOutputOffset(p); + + #pragma unroll + for (int j = threadIdx.y; j < num_y_output; j += blockDim.y) { + #pragma unroll + for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) { + float result = 0.0f; + #pragma unroll + for (int l = 0; l < GetKernelSize()(kernelSizeY); ++l) { + const int kernel_offset = kernelSizeX * l; + const int input_offset = i + num_x_input * (j + l + plane_kernel_offset); + #pragma unroll + for (int k = 0; k < GetKernelSize()(kernelSizeX); ++k) { + result += s[k + input_offset] * kernel[k + kernel_offset]; + } + } + const int tensor_index = plane_output_offset + indexMapper.mapGpuOutputKernelToTensorOutputOffset(i+first_x, j+first_y); + buffer[tensor_index] = result; + } + } + + __syncthreads(); + } +}; + +template +__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void EigenConvolutionKernel3D( + InputEvaluator eval, + const internal::IndexMapper + indexMapper, + const float* __restrict kernel, const size_t numPlanes, const size_t numX, + const size_t maxX, const size_t numY, const size_t maxY, const size_t numZ, + const size_t maxZ, const size_t kernelSizeX, const size_t kernelSizeY, + const size_t kernelSizeZ, float* buffer) { +#if defined(EIGEN_HIPCC) + HIP_DYNAMIC_SHARED(float, s) +#else + extern __shared__ float s[]; +#endif + + // Load inputs to shared memory + const int first_x = blockIdx.x * maxX; + const int last_x = (first_x + maxX < numX ? first_x + maxX : numX) - 1; + const int num_x_input = last_x - first_x + kernelSizeX; + + const int first_y = blockIdx.y * maxY; + const int last_y = (first_y + maxY < numY ? first_y + maxY : numY) - 1; + const int num_y_input = last_y - first_y + kernelSizeY; + + const int first_z = blockIdx.z * maxZ; + const int last_z = (first_z + maxZ < numZ ? first_z + maxZ : numZ) - 1; + const int num_z_input = last_z - first_z + kernelSizeZ; + + for (int p = 0; p < numPlanes; ++p) { + + const int plane_input_offset = indexMapper.mapGpuInputPlaneToTensorInputOffset(p); + const int plane_kernel_offset = 0; + + for (int k = threadIdx.z; k < num_z_input; k += blockDim.z) { + for (int j = threadIdx.y; j < num_y_input; j += blockDim.y) { + for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) { + const int tensor_index = plane_input_offset + indexMapper.mapGpuInputKernelToTensorInputOffset(i+first_x, j+first_y, k+first_z); + s[i + num_x_input * (j + num_y_input * (k + plane_kernel_offset))] = eval.coeff(tensor_index); + } + } + } + + __syncthreads(); + + // Convolution + const int num_z_output = last_z - first_z + 1; + const int num_y_output = last_y - first_y + 1; + const int num_x_output = last_x - first_x + 1; + const int plane_output_offset = indexMapper.mapGpuOutputPlaneToTensorOutputOffset(p); + + for (int k = threadIdx.z; k < num_z_output; k += blockDim.z) { + for (int j = threadIdx.y; j < num_y_output; j += blockDim.y) { + for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) { + float result = 0.0f; + for (int n = 0; n < kernelSizeZ; ++n) { + for (int m = 0; m < kernelSizeY; ++m) { + for (int l = 0; l < kernelSizeX; ++l) { + result += s[i + l + num_x_input * (j + m + num_y_input * (k + n + plane_kernel_offset))] * kernel[l + kernelSizeX * (m + kernelSizeY * n)]; + } + } + } + const int tensor_index = plane_output_offset + indexMapper.mapGpuOutputKernelToTensorOutputOffset(i+first_x, j+first_y, k+first_z); + buffer[tensor_index] = result; + } + } + } + __syncthreads(); + } +}; + + + +template +struct TensorEvaluator, GpuDevice> +{ + typedef TensorConvolutionOp XprType; + + static const int NumDims = internal::array_size::Dimensions>::value; + static const int NumKernelDims = internal::array_size::value; + typedef typename XprType::Index Index; + typedef DSizes Dimensions; + typedef typename TensorEvaluator::Dimensions KernelDimensions; + + enum { + IsAligned = TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, + PacketAccess = false, + BlockAccess = false, + PreferBlockAccess = false, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockNotImplemented TensorBlock; + //===--------------------------------------------------------------------===// + + TensorEvaluator(const XprType& op, const GpuDevice& device) + : m_inputImpl(op.inputExpression(), device), m_kernelImpl(op.kernelExpression(), device), m_kernelArg(op.kernelExpression()), m_indices(op.indices()), m_buf(NULL), m_kernel(NULL), m_local_kernel(false), m_device(device) + { + EIGEN_STATIC_ASSERT((static_cast(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE); + + const typename TensorEvaluator::Dimensions& input_dims = m_inputImpl.dimensions(); + const typename TensorEvaluator::Dimensions& kernel_dims = m_kernelImpl.dimensions(); + + m_dimensions = m_inputImpl.dimensions(); + for (int i = 0; i < NumKernelDims; ++i) { + const Index index = op.indices()[i]; + const Index input_dim = input_dims[index]; + const Index kernel_dim = kernel_dims[i]; + const Index result_dim = input_dim - kernel_dim + 1; + m_dimensions[index] = result_dim; + } + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + typedef typename InputArgType::Scalar Scalar; + static const int PacketSize = internal::unpacket_traits::size; + + EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) { + preloadKernel(); + m_inputImpl.evalSubExprsIfNeeded(NULL); + if (data) { + executeEval(data); + return false; + } else { + m_buf = (Scalar*)m_device.allocate(dimensions().TotalSize() * sizeof(Scalar)); + executeEval(m_buf); + return true; + } + } + + EIGEN_STRONG_INLINE void cleanup() { + m_inputImpl.cleanup(); + if (m_buf) { + m_device.deallocate(m_buf); + m_buf = NULL; + } + if (m_local_kernel) { + m_device.deallocate((void*)m_kernel); + m_local_kernel = false; + } + m_kernel = NULL; + } + + EIGEN_STRONG_INLINE void preloadKernel() { + // Don't make a local copy of the kernel unless we have to (i.e. it's an + // expression that needs to be evaluated) + const Scalar* in_place = m_kernelImpl.data(); + if (in_place) { + m_kernel = in_place; + m_local_kernel = false; + } else { + size_t kernel_sz = m_kernelImpl.dimensions().TotalSize() * sizeof(Scalar); + Scalar* local = (Scalar*)m_device.allocate(kernel_sz); + typedef TensorEvalToOp EvalTo; + EvalTo evalToTmp(local, m_kernelArg); + const bool PacketAccess = internal::IsVectorizable::value; + internal::TensorExecutor::run(evalToTmp, m_device); + + m_kernel = local; + m_local_kernel = true; + } + } + + static unsigned int ceil(unsigned int num, unsigned int denom) { + const unsigned int rounded_toward_zero = num / denom; + if (num > rounded_toward_zero * denom) { + return rounded_toward_zero + 1; + } + return rounded_toward_zero; + } + + void executeEval(Scalar* data) const { + typedef typename TensorEvaluator::Dimensions InputDims; + + const int maxSharedMem = m_device.sharedMemPerBlock(); + const int maxThreadsPerBlock = m_device.maxGpuThreadsPerBlock(); + const int maxBlocksPerProcessor = m_device.maxGpuThreadsPerMultiProcessor() / maxThreadsPerBlock; + const int numMultiProcessors = m_device.getNumGpuMultiProcessors(); + const int warpSize = 32; + + switch (NumKernelDims) { + case 1: { + const int kernel_size = m_kernelImpl.dimensions().TotalSize(); + + const int numX = dimensions()[m_indices[0]]; + const int numP = dimensions().TotalSize() / numX; + int maxX; + dim3 block_size; + + const int single_stride_dim = + static_cast(Layout) == static_cast(ColMajor) + ? 0 + : m_inputImpl.dimensions().rank() - 1; + if (m_indices[0] == single_stride_dim) { + // Maximum the reuse + const int inner_dim = ((maxSharedMem / (sizeof(Scalar)) - kernel_size + 1 + 31) / 32) * 32; + maxX = numext::mini(inner_dim, numX); + const int maxP = numext::mini(maxSharedMem / ((kernel_size - 1 + maxX) * sizeof(Scalar)), numP); + block_size.x = numext::mini(maxThreadsPerBlock, maxX); + block_size.y = numext::mini(maxThreadsPerBlock / block_size.x, maxP); + } + else { + // Read as much as possible alongside the inner most dimension, that is the plane + const int inner_dim = maxSharedMem / ((warpSize + kernel_size) * sizeof(Scalar)); + const int maxP = numext::mini(inner_dim, numP); + maxX = numext::mini(maxSharedMem / (inner_dim * sizeof(Scalar)) - kernel_size + 1, numX); + + block_size.x = numext::mini(warpSize, maxX); + block_size.y = numext::mini(maxThreadsPerBlock/block_size.x, maxP); + } + + const int shared_mem = block_size.y * (maxX + kernel_size - 1) * sizeof(Scalar); + gpu_assert(shared_mem <= maxSharedMem); + + const int num_x_blocks = ceil(numX, maxX); + const int blocksPerProcessor = numext::mini(maxBlocksPerProcessor, maxSharedMem / shared_mem); + const int num_y_blocks = ceil(numMultiProcessors * blocksPerProcessor, num_x_blocks); + + dim3 num_blocks(num_x_blocks, numext::mini(num_y_blocks, ceil(numP, block_size.y))); + + + //cout << "launching 1D kernel with block_size.x: " << block_size.x << " block_size.y: " << block_size.y << " num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y << " maxX: " << maxX << " shared_mem: " << shared_mem << " in stream " << m_device.stream() << endl; + + const array indices(m_indices[0]); + const array kernel_dims(m_kernelImpl.dimensions()[0]); + internal::IndexMapper indexMapper( + m_inputImpl.dimensions(), kernel_dims, indices); + switch(kernel_size) { + case 4: { + LAUNCH_GPU_KERNEL((EigenConvolutionKernel1D, Index, InputDims, 4>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, 4, data); + break; + } + case 7: { + LAUNCH_GPU_KERNEL((EigenConvolutionKernel1D, Index, InputDims, 7>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, 7, data); + break; + } + default: { + LAUNCH_GPU_KERNEL((EigenConvolutionKernel1D, Index, InputDims, Dynamic>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, kernel_size, data); + } + } + break; + } + + case 2: { + const int idxX = + static_cast(Layout) == static_cast(ColMajor) ? 0 : 1; + const int idxY = + static_cast(Layout) == static_cast(ColMajor) ? 1 : 0; + const int kernel_size_x = m_kernelImpl.dimensions()[idxX]; + const int kernel_size_y = m_kernelImpl.dimensions()[idxY]; + + const int numX = dimensions()[m_indices[idxX]]; + const int numY = dimensions()[m_indices[idxY]]; + const int numP = dimensions().TotalSize() / (numX*numY); + + const float scaling_factor = sqrtf(static_cast(maxSharedMem) / (sizeof(Scalar) * kernel_size_y * kernel_size_x)); + + // Snap maxX to warp size + int inner_dim = ((static_cast(scaling_factor * kernel_size_x) - kernel_size_x + 1 + 32) / 32) * 32; + const int maxX = numext::mini(inner_dim, numX); + const int maxY = numext::mini(maxSharedMem / (sizeof(Scalar) * (maxX + kernel_size_x - 1)) - kernel_size_y + 1, numY); + const int maxP = numext::mini(maxSharedMem / ((kernel_size_x - 1 + maxX) * (kernel_size_y - 1 + maxY) * sizeof(Scalar)), numP); + + dim3 block_size; + block_size.x = numext::mini(1024, maxX); + block_size.y = numext::mini(1024/block_size.x, maxY); + block_size.z = numext::mini(1024/(block_size.x*block_size.y), maxP); + + const int shared_mem = block_size.z * (maxX + kernel_size_x - 1) * (maxY + kernel_size_y - 1) * sizeof(Scalar); + gpu_assert(shared_mem <= maxSharedMem); + + const int num_x_blocks = ceil(numX, maxX); + const int num_y_blocks = ceil(numY, maxY); + const int blocksPerProcessor = numext::mini(maxBlocksPerProcessor, maxSharedMem / shared_mem); + const int num_z_blocks = ceil(numMultiProcessors * blocksPerProcessor, num_x_blocks * num_y_blocks); + + dim3 num_blocks(num_x_blocks, num_y_blocks, numext::mini(num_z_blocks, ceil(numP, block_size.z))); + + + //cout << "launching 2D kernel with block_size.x: " << block_size.x << " block_size.y: " << block_size.y << " block_size.z: " << block_size.z << " num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y << " num_blocks.z: " << num_blocks.z << " maxX: " << maxX << " maxY: " << maxY << " maxP: " << maxP << " shared_mem: " << shared_mem << " in stream " << m_device.stream() << endl; + + const array indices(m_indices[idxX], m_indices[idxY]); + const array kernel_dims(m_kernelImpl.dimensions()[idxX], + m_kernelImpl.dimensions()[idxY]); + internal::IndexMapper indexMapper( + m_inputImpl.dimensions(), kernel_dims, indices); + switch (kernel_size_x) { + case 4: { + switch (kernel_size_y) { + case 7: { + LAUNCH_GPU_KERNEL((EigenConvolutionKernel2D, Index, InputDims, 4, 7>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, 4, 7, data); + break; + } + default: { + LAUNCH_GPU_KERNEL((EigenConvolutionKernel2D, Index, InputDims, 4, Dynamic>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, 4, kernel_size_y, data); + break; + } + } + break; + } + case 7: { + switch (kernel_size_y) { + case 4: { + LAUNCH_GPU_KERNEL((EigenConvolutionKernel2D, Index, InputDims, 7, 4>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, 7, 4, data); + break; + } + default: { + LAUNCH_GPU_KERNEL((EigenConvolutionKernel2D, Index, InputDims, 7, Dynamic>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, 7, kernel_size_y, data); + break; + } + } + break; + } + default: { + LAUNCH_GPU_KERNEL((EigenConvolutionKernel2D, Index, InputDims, Dynamic, Dynamic>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, kernel_size_x, kernel_size_y, data); + break; + } + } + break; + } + + case 3: { + const int idxX = + static_cast(Layout) == static_cast(ColMajor) ? 0 : 2; + const int idxY = + static_cast(Layout) == static_cast(ColMajor) ? 1 : 1; + const int idxZ = + static_cast(Layout) == static_cast(ColMajor) ? 2 : 0; + + const int kernel_size_x = m_kernelImpl.dimensions()[idxX]; + const int kernel_size_y = m_kernelImpl.dimensions()[idxY]; + const int kernel_size_z = m_kernelImpl.dimensions()[idxZ]; + + const int numX = dimensions()[m_indices[idxX]]; + const int numY = dimensions()[m_indices[idxY]]; + const int numZ = dimensions()[m_indices[idxZ]]; + const int numP = dimensions().TotalSize() / (numX*numY*numZ); + + const int maxX = numext::mini(128, numext::mini(maxSharedMem / (sizeof(Scalar) * kernel_size_y * kernel_size_z) - kernel_size_x + 1, numX)); + const int maxY = numext::mini(128, numext::mini(maxSharedMem / (sizeof(Scalar) * (maxX + kernel_size_x - 1) * kernel_size_z) - kernel_size_y + 1, numY)); + const int maxZ = numext::mini(128, numext::mini(maxSharedMem / (sizeof(Scalar) * (maxX + kernel_size_x - 1) * (maxY + kernel_size_y - 1)) - kernel_size_z + 1, numZ)); + + dim3 block_size; + block_size.x = numext::mini(32, maxX); + block_size.y = numext::mini(32, maxY); + block_size.z = numext::mini(1024/(block_size.x*block_size.y), maxZ); + dim3 num_blocks(ceil(numX, maxX), ceil(numY, maxY), ceil(numZ, maxZ)); + + const int shared_mem = (maxX + kernel_size_x - 1) * (maxY + kernel_size_y - 1) * (maxZ + kernel_size_z - 1) * sizeof(Scalar); + gpu_assert(shared_mem <= maxSharedMem); + + //cout << "launching 3D kernel with block_size.x: " << block_size.x << " block_size.y: " << block_size.y << " block_size.z: " << block_size.z << " num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y << " num_blocks.z: " << num_blocks.z << " shared_mem: " << shared_mem << " in stream " << m_device.stream() << endl; + const array indices(m_indices[idxX], m_indices[idxY], + m_indices[idxZ]); + const array kernel_dims(m_kernelImpl.dimensions()[idxX], + m_kernelImpl.dimensions()[idxY], + m_kernelImpl.dimensions()[idxZ]); + internal::IndexMapper indexMapper( + m_inputImpl.dimensions(), kernel_dims, indices); + + LAUNCH_GPU_KERNEL((EigenConvolutionKernel3D, Index, InputDims>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, numZ, maxZ, kernel_size_x, kernel_size_y, kernel_size_z, data); + break; + } + + default: { + EIGEN_STATIC_ASSERT((NumKernelDims >= 1 && NumKernelDims <= 3), THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE); + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + eigen_assert(m_buf); + eigen_assert(index < m_dimensions.TotalSize()); + return m_buf[index]; + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(const Index index) const + { + eigen_assert(m_buf); + eigen_assert(index < m_dimensions.TotalSize()); + return internal::ploadt(m_buf+index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + // TODO(rmlarsen): FIXME: For now, this is just a copy of the CPU cost + // model. + const double kernel_size = m_kernelImpl.dimensions().TotalSize(); + // We ignore the use of fused multiply-add. + const double convolve_compute_cost = + TensorOpCost::AddCost() + TensorOpCost::MulCost(); + const double firstIndex_compute_cost = + NumDims * + (2 * TensorOpCost::AddCost() + 2 * TensorOpCost::MulCost() + + TensorOpCost::DivCost()); + return TensorOpCost(0, 0, firstIndex_compute_cost, vectorized, PacketSize) + + kernel_size * (m_inputImpl.costPerCoeff(vectorized) + + m_kernelImpl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, convolve_compute_cost, vectorized, + PacketSize)); + } + + private: + // No assignment (copies are needed by the kernels) + TensorEvaluator& operator = (const TensorEvaluator&); + + TensorEvaluator m_inputImpl; + TensorEvaluator m_kernelImpl; + KernelArgType m_kernelArg; + Indices m_indices; + Dimensions m_dimensions; + Scalar* m_buf; + const Scalar* m_kernel; + bool m_local_kernel; + + const GpuDevice& m_device; +}; +#endif + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h new file mode 100644 index 0000000..033318f --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h @@ -0,0 +1,544 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: +// Copyright (C) 2016 Benoit Steiner + +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_SYCL_H +#define EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_SYCL_H + +namespace Eigen { + +/** \class TensorConvolution + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor convolution class. + * + * + */ + +enum class convolution_type { CONV1D, CONV2D, CONV3D }; +template +struct EigenConvolutionKernel; +template +struct EigenConvolutionKernel { + typedef cl::sycl::accessor + Local_accessor; + Local_accessor local_acc; + Evaluator device_evaluator; + Kernel_accessor kernel_filter; + Buffer_accessor buffer_acc; + internal::IndexMapper indexMapper; + const size_t kernelSize; + const cl::sycl::range<2> input_range; + EigenConvolutionKernel(Local_accessor local_acc_, Evaluator device_evaluator_, Kernel_accessor kernel_filter_, + Buffer_accessor buffer_acc_, + internal::IndexMapper indexMapper_, + const size_t kernelSize_, const cl::sycl::range<2> input_range_) + : local_acc(local_acc_), + device_evaluator(device_evaluator_), + kernel_filter(kernel_filter_), + buffer_acc(buffer_acc_), + indexMapper(indexMapper_), + kernelSize(kernelSize_), + input_range(input_range_) {} + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool boundary_check(const BooleanDim2 boolean_check) { + return (boolean_check[0] && boolean_check[1]); + } + void operator()(cl::sycl::nd_item<2> itemID) { + auto buffer_ptr = buffer_acc.get_pointer(); + auto kernel_ptr = kernel_filter.get_pointer(); + // the required row to be calculated for the for each plane in shered memory + const size_t num_input = (itemID.get_local_range()[0] + kernelSize - 1); + const size_t plane_kernel_offset = itemID.get_local_id(1) * num_input; + const size_t input_offset = itemID.get_group(0) * itemID.get_local_range()[0]; + const size_t plane_tensor_offset = indexMapper.mapGpuInputPlaneToTensorInputOffset(itemID.get_global_id(1)); + /// fill the shared memory + for (size_t i = itemID.get_local_id(0); i < num_input; i += itemID.get_local_range()[0]) { + const size_t local_index = i + plane_kernel_offset; + const size_t tensor_index = + plane_tensor_offset + indexMapper.mapGpuInputKernelToTensorInputOffset(i + input_offset); + + local_acc[local_index] = + (((i + input_offset) < (input_range[0] + kernelSize - 1)) && itemID.get_global_id(1) < input_range[1]) + ? device_evaluator.coeff(tensor_index) + : CoeffReturnType(0); + } + + itemID.barrier(cl::sycl::access::fence_space::local_space); + + // calculate the convolution // output start x + const size_t first_output_start = itemID.get_group(0) * (itemID.get_local_range()[0]); + if (boundary_check(itemID.get_global_id() < input_range)) { + CoeffReturnType result = static_cast(0); + const size_t index = plane_kernel_offset + itemID.get_local_id(0); + for (size_t k = 0; k < kernelSize; ++k) { + result += (local_acc[k + index] * kernel_ptr[k]); + } + const size_t tensor_index = + indexMapper.mapGpuOutputPlaneToTensorOutputOffset(itemID.get_global_id(1)) + + indexMapper.mapGpuOutputKernelToTensorOutputOffset(itemID.get_local_id(0) + first_output_start); + buffer_ptr[tensor_index] = result; + } + } +}; + +template +struct EigenConvolutionKernel { + typedef cl::sycl::accessor + Local_accessor; + Local_accessor local_acc; + Evaluator device_evaluator; + Kernel_accessor kernel_filter; + Buffer_accessor buffer_acc; + internal::IndexMapper indexMapper; + const cl::sycl::range<2> kernel_size; + const cl::sycl::range<3> input_range; + EigenConvolutionKernel(Local_accessor local_acc_, Evaluator device_evaluator_, Kernel_accessor kernel_filter_, + Buffer_accessor buffer_acc_, + internal::IndexMapper indexMapper_, + const cl::sycl::range<2> kernel_size_, const cl::sycl::range<3> input_range_) + : local_acc(local_acc_), + device_evaluator(device_evaluator_), + kernel_filter(kernel_filter_), + buffer_acc(buffer_acc_), + indexMapper(indexMapper_), + kernel_size(kernel_size_), + input_range(input_range_) {} + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool boundary_check(const BooleanDim3 boolean_check) { + return (boolean_check[0] && boolean_check[1] && boolean_check[2]); + } + + void operator()(cl::sycl::nd_item<3> itemID) { + auto buffer_ptr = buffer_acc.get_pointer(); + auto kernel_ptr = kernel_filter.get_pointer(); + // the required row to be calculated for the for each plane in shered memory + const auto num_input = cl::sycl::range<2>{ + (cl::sycl::range<2>(itemID.get_local_range()[0], itemID.get_local_range()[1]) + kernel_size - 1)}; + + const size_t plane_input_offset = indexMapper.mapGpuInputPlaneToTensorInputOffset(itemID.get_global_id(2)); + const size_t plane_kernel_offset = itemID.get_local_id(2) * num_input[1]; + + const auto input_offset = cl::sycl::range<2>{itemID.get_group(0) * itemID.get_local_range()[0], + itemID.get_group(1) * itemID.get_local_range()[1]}; + + // fill the local memory + bool in_range_dim2 = itemID.get_global_id(2) < input_range[2]; + for (size_t j = itemID.get_local_id(1); j < num_input[1]; j += itemID.get_local_range()[1]) { + const size_t local_input_offset = num_input[0] * (j + plane_kernel_offset); + bool in_range_dim1 = ((j + input_offset[1]) < (input_range[1] + kernel_size[1] - 1)); + for (size_t i = itemID.get_local_id(0); i < num_input[0]; i += itemID.get_local_range()[0]) { + const size_t local_index = i + local_input_offset; + const size_t tensor_index = plane_input_offset + indexMapper.mapGpuInputKernelToTensorInputOffset( + i + input_offset[0], j + input_offset[1]); + local_acc[local_index] = (((i + input_offset[0]) < (input_range[0] + kernel_size[0] - 1)) && + in_range_dim1 && in_range_dim2) + ? device_evaluator.coeff(tensor_index) + : CoeffReturnType(0); + } + } + + itemID.barrier(cl::sycl::access::fence_space::local_space); + + // output offset start for each thread + const auto output_offset = cl::sycl::range<2>{itemID.get_group(0) * itemID.get_local_range()[0], + itemID.get_group(1) * itemID.get_local_range()[1]}; + + if (boundary_check(itemID.get_global_id() < input_range)) { + CoeffReturnType result = static_cast(0); + + for (size_t j = 0; j < kernel_size[1]; j++) { + size_t kernel_offset = kernel_size[0] * j; + const size_t index = + (num_input[0] * (plane_kernel_offset + j + itemID.get_local_id(1))) + itemID.get_local_id(0); + for (size_t i = 0; i < kernel_size[0]; i++) { + result += (local_acc[i + index] * kernel_ptr[i + kernel_offset]); + } + } + const size_t tensor_index = + indexMapper.mapGpuOutputPlaneToTensorOutputOffset(itemID.get_global_id(2)) + + indexMapper.mapGpuOutputKernelToTensorOutputOffset(itemID.get_local_id(0) + output_offset[0], + itemID.get_local_id(1) + output_offset[1]); + + buffer_ptr[tensor_index] = result; + } + } +}; + +template +struct EigenConvolutionKernel { + typedef cl::sycl::accessor + Local_accessor; + Local_accessor local_acc; + Evaluator device_evaluator; + Kernel_accessor kernel_filter; + Buffer_accessor buffer_acc; + internal::IndexMapper indexMapper; + const cl::sycl::range<3> kernel_size; + const cl::sycl::range<3> input_range; + const size_t numP; + + EigenConvolutionKernel(Local_accessor local_acc_, Evaluator device_evaluator_, Kernel_accessor kernel_filter_, + Buffer_accessor buffer_acc_, + internal::IndexMapper indexMapper_, + const cl::sycl::range<3> kernel_size_, const cl::sycl::range<3> input_range_, + const size_t numP_) + : local_acc(local_acc_), + device_evaluator(device_evaluator_), + kernel_filter(kernel_filter_), + buffer_acc(buffer_acc_), + indexMapper(indexMapper_), + kernel_size(kernel_size_), + input_range(input_range_), + numP(numP_) {} + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool boundary_check(const BooleanDim3 boolean_check) { + return (boolean_check[0] && boolean_check[1] && boolean_check[2]); + } + void operator()(cl::sycl::nd_item<3> itemID) { + auto buffer_ptr = buffer_acc.get_pointer(); + auto kernel_ptr = kernel_filter.get_pointer(); + const auto num_input = cl::sycl::range<3>{itemID.get_local_range() + kernel_size - 1}; + + const auto input_offset = cl::sycl::range<3>{itemID.get_group().get_id() * itemID.get_local_range()}; + + const auto output_offset = + cl::sycl::range<3>{itemID.get_group().get_id() * itemID.get_local_range() + itemID.get_local_id()}; + + for (size_t p = 0; p < numP; p++) { + /// fill the shared memory + const size_t plane_input_offset = indexMapper.mapGpuInputPlaneToTensorInputOffset(p); + for (size_t k = itemID.get_local_id(2); k < num_input[2]; k += itemID.get_local_range()[2]) { + size_t local_index_dim2 = num_input[0] * num_input[1] * k; + bool cond_k_dim = (k + input_offset[2] < (input_range[2] + kernel_size[2] - 1)); + for (size_t j = itemID.get_local_id(1); j < num_input[1]; j += itemID.get_local_range()[1]) { + bool cond_j_dim = cond_k_dim && (j + input_offset[1] < (input_range[1] + kernel_size[1] - 1)); + size_t local_index_dim1 = (num_input[0] * j) + local_index_dim2; + for (size_t i = itemID.get_local_id(0); i < num_input[0]; i += itemID.get_local_range()[0]) { + bool conds = cond_j_dim && (i + input_offset[0] < (input_range[0] + kernel_size[0] - 1)); + const size_t local_index = local_index_dim1 + i; + const size_t tensor_index = + plane_input_offset + indexMapper.mapGpuInputKernelToTensorInputOffset( + i + input_offset[0], j + input_offset[1], k + input_offset[2]); + local_acc[local_index] = conds ? device_evaluator.coeff(tensor_index) : CoeffReturnType(0); + } + } + } + itemID.barrier(cl::sycl::access::fence_space::local_space); + + // calculate the convolution + + if (boundary_check(itemID.get_global_id() < input_range)) { + CoeffReturnType result = static_cast(0); + for (size_t k = 0; k < kernel_size[2]; k++) { + for (size_t j = 0; j < kernel_size[1]; j++) { + for (size_t i = 0; i < kernel_size[0]; i++) { + const size_t kernel_index = i + kernel_size[0] * (j + kernel_size[1] * k); + const size_t local_index = + ((i + itemID.get_local_id(0)) + + num_input[0] * ((j + itemID.get_local_id(1)) + num_input[1] * (k + itemID.get_local_id(2)))); + + result += (local_acc[local_index] * kernel_ptr[kernel_index]); + } + } + } + const size_t tensor_index = + indexMapper.mapGpuOutputPlaneToTensorOutputOffset(p) + + indexMapper.mapGpuOutputKernelToTensorOutputOffset(output_offset[0], output_offset[1], output_offset[2]); + buffer_ptr[tensor_index] = result; + } + + itemID.barrier(cl::sycl::access::fence_space::local_space); + } + } +}; + +template +struct TensorEvaluator, Eigen::SyclDevice> { + typedef TensorConvolutionOp XprType; + + static const int NumDims = + internal::array_size::Dimensions>::value; + static const int NumKernelDims = internal::array_size::value; + typedef typename XprType::Index Index; + typedef DSizes Dimensions; + typedef typename TensorEvaluator::Dimensions KernelDimensions; + typedef const Eigen::SyclDevice Device; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + typedef typename InputArgType::Scalar Scalar; + static const int PacketSize = PacketType::size; + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; + typedef StorageMemory KernelStorage; + + enum { + IsAligned = TensorEvaluator::IsAligned & + TensorEvaluator::IsAligned, + PacketAccess = false, + BlockAccess = false, + PreferBlockAccess = false, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockNotImplemented TensorBlock; + //===--------------------------------------------------------------------===// + + TensorEvaluator(const XprType &op, const Eigen::SyclDevice &device) + : m_inputImpl(op.inputExpression(), device), + m_kernelArg(op.kernelExpression()), + m_kernelImpl(op.kernelExpression(), device), + m_indices(op.indices()), + m_buf(NULL), + m_kernel(NULL), + m_local_kernel(false), + m_device(device) { + EIGEN_STATIC_ASSERT((static_cast(TensorEvaluator::Layout) == + static_cast(TensorEvaluator::Layout)), + YOU_MADE_A_PROGRAMMING_MISTAKE); + + const typename TensorEvaluator::Dimensions &input_dims = m_inputImpl.dimensions(); + const typename TensorEvaluator::Dimensions &kernel_dims = + m_kernelImpl.dimensions(); + + m_dimensions = m_inputImpl.dimensions(); + for (int i = 0; i < NumKernelDims; ++i) { + const Index index = op.indices()[i]; + const Index input_dim = input_dims[index]; + const Index kernel_dim = kernel_dims[i]; + const Index result_dim = input_dim - kernel_dim + 1; + m_dimensions[index] = result_dim; + } + } + + EIGEN_DEVICE_FUNC const Dimensions &dimensions() const { return m_dimensions; } + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) { + preloadKernel(); + m_inputImpl.evalSubExprsIfNeeded(NULL); + if (data) { + executeEval(data); + return false; + } else { + m_buf = (EvaluatorPointerType)m_device.get( + (Scalar *)m_device.allocate_temp(dimensions().TotalSize() * sizeof(Scalar))); + executeEval(m_buf); + return true; + } + } + + EIGEN_STRONG_INLINE void cleanup() { + m_inputImpl.cleanup(); + if (m_buf) { + m_device.deallocate_temp(m_buf); + m_buf = NULL; + } + if (m_local_kernel) { + m_device.deallocate_temp(m_kernel); + m_local_kernel = false; + } + m_kernel = NULL; + } + /// used by sycl in order to build the sycl buffer + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Device &device() const { return m_device; } + /// used by sycl in order to build the sycl buffer + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const { return m_buf; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void preloadKernel() { + // Don't make a local copy of the kernel unless we have to (i.e. it's an + // expression that needs to be evaluated) + typename KernelStorage::Type in_place = m_kernelImpl.data(); + if (in_place) { + m_kernel = in_place; + m_local_kernel = false; + } else { + ptrdiff_t kernel_sz = m_kernelImpl.dimensions().TotalSize() * sizeof(Scalar); + EvaluatorPointerType local = (EvaluatorPointerType)m_device.get((Scalar *)m_device.allocate_temp(kernel_sz)); + typedef TensorEvalToOp EvalTo; + EvalTo evalToTmp(m_device.get(local), m_kernelArg); + const bool PacketAccess = internal::IsVectorizable::value; + internal::TensorExecutor::run(evalToTmp, m_device); + m_kernel = local; + m_local_kernel = true; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void executeEval(EvaluatorPointerType data) const { + typedef TensorEvaluator InputEvaluator; + typedef typename InputEvaluator::Dimensions InputDims; + switch (NumKernelDims) { + case 1: { + const size_t numX = dimensions()[m_indices[0]]; + const size_t numP = dimensions().TotalSize() / numX; + const auto input_dim = std::array{numX, numP}; + auto global_range = cl::sycl::range<2>{}; + auto local_range = cl::sycl::range<2>{}; + const size_t kernel_size = m_kernelImpl.dimensions().TotalSize(); + + m_device.parallel_for_setup(input_dim, global_range, local_range); + const size_t local_memory_size = (local_range[0] + kernel_size - 1) * (local_range[1]); + gpu_assert(static_cast(local_memory_size) <= m_device.sharedMemPerBlock()); + const array indices{{m_indices[0]}}; + const array kernel_dims{{m_kernelImpl.dimensions()[0]}}; + internal::IndexMapper indexMapper(m_inputImpl.dimensions(), kernel_dims, indices); + + typedef EigenConvolutionKernel + ConvKernel; + + m_device.template binary_kernel_launcher( + m_inputImpl, m_kernel, data, cl::sycl::nd_range<2>(global_range, local_range), local_memory_size, + indexMapper, kernel_size, cl::sycl::range<2>(input_dim[0], input_dim[1])); + break; + } + + case 2: { + auto kernel_index = std::array{static_cast(Layout) == static_cast(ColMajor) ? 0 : 1, + static_cast(Layout) == static_cast(ColMajor) ? 1 : 0}; + auto kernel_size = cl::sycl::range<2>{(size_t)m_kernelImpl.dimensions()[kernel_index[0]], + (size_t)m_kernelImpl.dimensions()[kernel_index[1]]}; + const size_t numX = dimensions()[m_indices[kernel_index[0]]]; + const size_t numY = dimensions()[m_indices[kernel_index[1]]]; + const size_t numP = dimensions().TotalSize() / (numX * numY); + auto input_dim = std::array{numX, numY, numP}; + + auto global_range = cl::sycl::range<3>{}; + auto local_range = cl::sycl::range<3>{}; + + m_device.parallel_for_setup(input_dim, global_range, local_range); + + const size_t local_memory_size = + (local_range[0] + kernel_size[0] - 1) * (local_range[1] + kernel_size[1] - 1) * local_range[2]; + gpu_assert(static_cast(local_memory_size) <= m_device.sharedMemPerBlock()); + const array indices{{m_indices[kernel_index[0]], m_indices[kernel_index[1]]}}; + const array kernel_dims{ + {m_kernelImpl.dimensions()[kernel_index[0]], m_kernelImpl.dimensions()[kernel_index[1]]}}; + internal::IndexMapper indexMapper(m_inputImpl.dimensions(), kernel_dims, indices); + typedef EigenConvolutionKernel + ConvKernel; + m_device.template binary_kernel_launcher( + m_inputImpl, m_kernel, data, cl::sycl::nd_range<3>(global_range, local_range), local_memory_size, + indexMapper, kernel_size, cl::sycl::range<3>{input_dim[0], input_dim[1], input_dim[2]}); + break; + } + + case 3: { + auto kernel_index = std::array{static_cast(Layout) == static_cast(ColMajor) ? 0 : 2, + static_cast(Layout) == static_cast(ColMajor) ? 1 : 1, + static_cast(Layout) == static_cast(ColMajor) ? 2 : 0}; + + auto kernel_size = cl::sycl::range<3>{(size_t)m_kernelImpl.dimensions()[kernel_index[0]], + (size_t)m_kernelImpl.dimensions()[kernel_index[1]], + (size_t)m_kernelImpl.dimensions()[kernel_index[2]]}; + + const size_t numX = dimensions()[m_indices[kernel_index[0]]]; + const size_t numY = dimensions()[m_indices[kernel_index[1]]]; + const size_t numZ = dimensions()[m_indices[kernel_index[2]]]; + auto input_dim = std::array{numX, numY, numZ}; + const size_t numP = dimensions().TotalSize() / (numX * numY * numZ); + + const array indices{ + {m_indices[kernel_index[0]], m_indices[kernel_index[1]], m_indices[kernel_index[2]]}}; + const array kernel_dims{{m_kernelImpl.dimensions()[kernel_index[0]], + m_kernelImpl.dimensions()[kernel_index[1]], + m_kernelImpl.dimensions()[kernel_index[2]]}}; + + internal::IndexMapper indexMapper(m_inputImpl.dimensions(), kernel_dims, indices); + + auto global_range = cl::sycl::range<3>{}; + auto local_range = cl::sycl::range<3>{}; + + m_device.parallel_for_setup(input_dim, global_range, local_range); + auto local_memory_range = (local_range + kernel_size - 1); + const size_t local_memory_size = local_memory_range[0] * local_memory_range[1] * local_memory_range[2]; + + gpu_assert(static_cast(local_memory_size) <= m_device.sharedMemPerBlock()); + typedef EigenConvolutionKernel + ConvKernel; + m_device.template binary_kernel_launcher( + m_inputImpl, m_kernel, data, cl::sycl::nd_range<3>(global_range, local_range), local_memory_size, + indexMapper, kernel_size, cl::sycl::range<3>(input_dim[0], input_dim[1], input_dim[2]), numP); + break; + } + + default: { + EIGEN_STATIC_ASSERT((NumKernelDims >= 1 && NumKernelDims <= 3), + THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE); + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + eigen_assert(m_buf != NULL); + eigen_assert(index < m_dimensions.TotalSize()); + return m_buf[index]; + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(const Index index) const { + eigen_assert(m_buf != NULL); + eigen_assert(index < m_dimensions.TotalSize()); + return internal::ploadt(m_buf + index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + // TODO(rmlarsen): FIXME: For now, this is just a copy of the CPU cost + // model. + const double kernel_size = m_kernelImpl.dimensions().TotalSize(); + // We ignore the use of fused multiply-add. + const double convolve_compute_cost = TensorOpCost::AddCost() + TensorOpCost::MulCost(); + const double firstIndex_compute_cost = + NumDims * + (2 * TensorOpCost::AddCost() + 2 * TensorOpCost::MulCost() + TensorOpCost::DivCost()); + return TensorOpCost(0, 0, firstIndex_compute_cost, vectorized, PacketSize) + + kernel_size * (m_inputImpl.costPerCoeff(vectorized) + m_kernelImpl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, convolve_compute_cost, vectorized, PacketSize)); + } + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_kernelImpl.bind(cgh); + m_inputImpl.bind(cgh); + m_buf.bind(cgh); + m_kernel.bind(cgh); + } + + private: + // No assignment (copies are needed by the kernels) + TensorEvaluator &operator=(const TensorEvaluator &); + TensorEvaluator m_inputImpl; + KernelArgType m_kernelArg; + TensorEvaluator m_kernelImpl; + Indices m_indices; + Dimensions m_dimensions; + EvaluatorPointerType m_buf; + typename KernelStorage::Type m_kernel; + bool m_local_kernel; + const Eigen::SyclDevice EIGEN_DEVICE_REF m_device; +}; // namespace Eigen + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h new file mode 100644 index 0000000..195267c --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h @@ -0,0 +1,214 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Rasmus Munk Larsen +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_COST_MODEL_H +#define EIGEN_CXX11_TENSOR_TENSOR_COST_MODEL_H + +namespace Eigen { + +/** \class TensorEvaluator + * \ingroup CXX11_Tensor_Module + * + * \brief A cost model used to limit the number of threads used for evaluating + * tensor expression. + * + */ + +// Class storing the cost of evaluating a tensor expression in terms of the +// estimated number of operand bytes loads, bytes stored, and compute cycles. +class TensorOpCost { + public: + // TODO(rmlarsen): Fix the scalar op costs in Eigen proper. Even a simple + // model based on minimal reciprocal throughput numbers from Intel or + // Agner Fog's tables would be better than what is there now. + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int MulCost() { + return internal::functor_traits< + internal::scalar_product_op >::Cost; + } + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int AddCost() { + return internal::functor_traits >::Cost; + } + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int DivCost() { + return internal::functor_traits< + internal::scalar_quotient_op >::Cost; + } + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int ModCost() { + return internal::functor_traits >::Cost; + } + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int CastCost() { + return internal::functor_traits< + internal::scalar_cast_op >::Cost; + } + + EIGEN_DEVICE_FUNC + TensorOpCost() : bytes_loaded_(0), bytes_stored_(0), compute_cycles_(0) {} + EIGEN_DEVICE_FUNC + TensorOpCost(double bytes_loaded, double bytes_stored, double compute_cycles) + : bytes_loaded_(bytes_loaded), + bytes_stored_(bytes_stored), + compute_cycles_(compute_cycles) {} + + EIGEN_DEVICE_FUNC + TensorOpCost(double bytes_loaded, double bytes_stored, double compute_cycles, + bool vectorized, double packet_size) + : bytes_loaded_(bytes_loaded), + bytes_stored_(bytes_stored), + compute_cycles_(vectorized ? compute_cycles / packet_size + : compute_cycles) { + eigen_assert(bytes_loaded >= 0 && (numext::isfinite)(bytes_loaded)); + eigen_assert(bytes_stored >= 0 && (numext::isfinite)(bytes_stored)); + eigen_assert(compute_cycles >= 0 && (numext::isfinite)(compute_cycles)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bytes_loaded() const { + return bytes_loaded_; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bytes_stored() const { + return bytes_stored_; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double compute_cycles() const { + return compute_cycles_; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double total_cost( + double load_cost, double store_cost, double compute_cost) const { + return load_cost * bytes_loaded_ + store_cost * bytes_stored_ + + compute_cost * compute_cycles_; + } + + // Drop memory access component. Intended for cases when memory accesses are + // sequential or are completely masked by computations. + EIGEN_DEVICE_FUNC void dropMemoryCost() { + bytes_loaded_ = 0; + bytes_stored_ = 0; + } + + // TODO(rmlarsen): Define min in terms of total cost, not elementwise. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost cwiseMin( + const TensorOpCost& rhs) const { + double bytes_loaded = numext::mini(bytes_loaded_, rhs.bytes_loaded()); + double bytes_stored = numext::mini(bytes_stored_, rhs.bytes_stored()); + double compute_cycles = numext::mini(compute_cycles_, rhs.compute_cycles()); + return TensorOpCost(bytes_loaded, bytes_stored, compute_cycles); + } + + // TODO(rmlarsen): Define max in terms of total cost, not elementwise. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost cwiseMax( + const TensorOpCost& rhs) const { + double bytes_loaded = numext::maxi(bytes_loaded_, rhs.bytes_loaded()); + double bytes_stored = numext::maxi(bytes_stored_, rhs.bytes_stored()); + double compute_cycles = numext::maxi(compute_cycles_, rhs.compute_cycles()); + return TensorOpCost(bytes_loaded, bytes_stored, compute_cycles); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost& operator+=( + const TensorOpCost& rhs) { + bytes_loaded_ += rhs.bytes_loaded(); + bytes_stored_ += rhs.bytes_stored(); + compute_cycles_ += rhs.compute_cycles(); + return *this; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost& operator*=(double rhs) { + bytes_loaded_ *= rhs; + bytes_stored_ *= rhs; + compute_cycles_ *= rhs; + return *this; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend TensorOpCost operator+( + TensorOpCost lhs, const TensorOpCost& rhs) { + lhs += rhs; + return lhs; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend TensorOpCost operator*( + TensorOpCost lhs, double rhs) { + lhs *= rhs; + return lhs; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend TensorOpCost operator*( + double lhs, TensorOpCost rhs) { + rhs *= lhs; + return rhs; + } + + friend std::ostream& operator<<(std::ostream& os, const TensorOpCost& tc) { + return os << "[bytes_loaded = " << tc.bytes_loaded() + << ", bytes_stored = " << tc.bytes_stored() + << ", compute_cycles = " << tc.compute_cycles() << "]"; + } + + private: + double bytes_loaded_; + double bytes_stored_; + double compute_cycles_; +}; + +// TODO(rmlarsen): Implement a policy that chooses an "optimal" number of theads +// in [1:max_threads] instead of just switching multi-threading off for small +// work units. +template +class TensorCostModel { + public: + // Scaling from Eigen compute cost to device cycles. + static const int kDeviceCyclesPerComputeCycle = 1; + + // Costs in device cycles. + static const int kStartupCycles = 100000; + static const int kPerThreadCycles = 100000; + static const int kTaskSize = 40000; + + // Returns the number of threads in [1:max_threads] to use for + // evaluating an expression with the given output size and cost per + // coefficient. + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int numThreads( + double output_size, const TensorOpCost& cost_per_coeff, int max_threads) { + double cost = totalCost(output_size, cost_per_coeff); + double threads = (cost - kStartupCycles) / kPerThreadCycles + 0.9; + // Make sure we don't invoke undefined behavior when we convert to an int. + threads = numext::mini(threads, GenericNumTraits::highest()); + return numext::mini(max_threads, + numext::maxi(1, static_cast(threads))); + } + + // taskSize assesses parallel task size. + // Value of 1.0 means ideal parallel task size. Values < 1.0 mean that task + // granularity needs to be increased to mitigate parallelization overheads. + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double taskSize( + double output_size, const TensorOpCost& cost_per_coeff) { + return totalCost(output_size, cost_per_coeff) / kTaskSize; + } + + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double totalCost( + double output_size, const TensorOpCost& cost_per_coeff) { + // Cost of memory fetches from L2 cache. 64 is typical cache line size. + // 11 is L2 cache latency on Haswell. + // We don't know whether data is in L1, L2 or L3. But we are most interested + // in single-threaded computational time around 100us-10ms (smaller time + // is too small for parallelization, larger time is not interesting + // either because we are probably using all available threads already). + // And for the target time range, L2 seems to be what matters. Data set + // fitting into L1 is too small to take noticeable time. Data set fitting + // only into L3 presumably will take more than 10ms to load and process. + const double kLoadCycles = 1.0 / 64 * 11; + const double kStoreCycles = 1.0 / 64 * 11; + // Scaling from Eigen compute cost to device cycles. + return output_size * + cost_per_coeff.total_cost(kLoadCycles, kStoreCycles, + kDeviceCyclesPerComputeCycle); + } +}; + +} // namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_COST_MODEL_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h new file mode 100644 index 0000000..95a8a84 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h @@ -0,0 +1,347 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_CUSTOM_OP_H +#define EIGEN_CXX11_TENSOR_TENSOR_CUSTOM_OP_H + +namespace Eigen { + +/** \class TensorCustomUnaryOp + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor custom class. + * + * + */ +namespace internal { +template +struct traits > +{ + typedef typename XprType::Scalar Scalar; + typedef typename XprType::StorageKind StorageKind; + typedef typename XprType::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = traits::NumDimensions; + static const int Layout = traits::Layout; + typedef typename traits::PointerType PointerType; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorCustomUnaryOpEIGEN_DEVICE_REF type; +}; + +template +struct nested > +{ + typedef TensorCustomUnaryOp type; +}; + +} // end namespace internal + + + +template +class TensorCustomUnaryOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename internal::nested::type Nested; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCustomUnaryOp(const XprType& expr, const CustomUnaryFunc& func) + : m_expr(expr), m_func(func) {} + + EIGEN_DEVICE_FUNC + const CustomUnaryFunc& func() const { return m_func; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_expr; } + + protected: + typename XprType::Nested m_expr; + const CustomUnaryFunc m_func; +}; + + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorCustomUnaryOp ArgType; + typedef typename internal::traits::Index Index; + static const int NumDims = internal::traits::NumDimensions; + typedef DSizes Dimensions; + typedef typename internal::remove_const::type Scalar; + typedef typename internal::remove_const::type CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = PacketType::size; + typedef typename Eigen::internal::traits::PointerType TensorPointerType; + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; + + enum { + IsAligned = false, + PacketAccess = (PacketType::size > 1), + BlockAccess = false, + PreferBlockAccess = false, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockNotImplemented TensorBlock; + //===--------------------------------------------------------------------===// + + EIGEN_STRONG_INLINE TensorEvaluator(const ArgType& op, const Device& device) + : m_op(op), m_device(device), m_result(NULL) + { + m_dimensions = op.func().dimensions(op.expression()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) { + if (data) { + evalTo(data); + return false; + } else { + m_result = static_cast(m_device.get( (CoeffReturnType*) + m_device.allocate_temp(dimensions().TotalSize() * sizeof(Scalar)))); + evalTo(m_result); + return true; + } + } + + EIGEN_STRONG_INLINE void cleanup() { + if (m_result) { + m_device.deallocate_temp(m_result); + m_result = NULL; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + return m_result[index]; + } + + template + EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const { + return internal::ploadt(m_result + index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + // TODO(rmlarsen): Extend CustomOp API to return its cost estimate. + return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_result; } + +#ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_result.bind(cgh); + } +#endif + + protected: + void evalTo(EvaluatorPointerType data) { + TensorMap > result(m_device.get(data), m_dimensions); + m_op.func().eval(m_op.expression(), result, m_device); + } + + Dimensions m_dimensions; + const ArgType m_op; + const Device EIGEN_DEVICE_REF m_device; + EvaluatorPointerType m_result; +}; + + + +/** \class TensorCustomBinaryOp + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor custom class. + * + * + */ +namespace internal { +template +struct traits > +{ + typedef typename internal::promote_storage_type::ret Scalar; + typedef typename internal::promote_storage_type::ret CoeffReturnType; + typedef typename promote_storage_type::StorageKind, + typename traits::StorageKind>::ret StorageKind; + typedef typename promote_index_type::Index, + typename traits::Index>::type Index; + typedef typename LhsXprType::Nested LhsNested; + typedef typename RhsXprType::Nested RhsNested; + typedef typename remove_reference::type _LhsNested; + typedef typename remove_reference::type _RhsNested; + static const int NumDimensions = traits::NumDimensions; + static const int Layout = traits::Layout; + typedef typename conditional::val, + typename traits::PointerType, typename traits::PointerType>::type PointerType; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorCustomBinaryOp& type; +}; + +template +struct nested > +{ + typedef TensorCustomBinaryOp type; +}; + +} // end namespace internal + + + +template +class TensorCustomBinaryOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename internal::traits::CoeffReturnType CoeffReturnType; + typedef typename internal::nested::type Nested; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCustomBinaryOp(const LhsXprType& lhs, const RhsXprType& rhs, const CustomBinaryFunc& func) + + : m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_func(func) {} + + EIGEN_DEVICE_FUNC + const CustomBinaryFunc& func() const { return m_func; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + lhsExpression() const { return m_lhs_xpr; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + rhsExpression() const { return m_rhs_xpr; } + + protected: + typename LhsXprType::Nested m_lhs_xpr; + typename RhsXprType::Nested m_rhs_xpr; + const CustomBinaryFunc m_func; +}; + + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorCustomBinaryOp XprType; + typedef typename internal::traits::Index Index; + static const int NumDims = internal::traits::NumDimensions; + typedef DSizes Dimensions; + typedef typename XprType::Scalar Scalar; + typedef typename internal::remove_const::type CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = PacketType::size; + + typedef typename Eigen::internal::traits::PointerType TensorPointerType; + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; + + enum { + IsAligned = false, + PacketAccess = (PacketType::size > 1), + BlockAccess = false, + PreferBlockAccess = false, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockNotImplemented TensorBlock; + //===--------------------------------------------------------------------===// + + EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_op(op), m_device(device), m_result(NULL) + { + m_dimensions = op.func().dimensions(op.lhsExpression(), op.rhsExpression()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) { + if (data) { + evalTo(data); + return false; + } else { + m_result = static_cast(m_device.get( (CoeffReturnType*) + m_device.allocate_temp(dimensions().TotalSize() * sizeof(CoeffReturnType)))); + evalTo(m_result); + return true; + } + } + + EIGEN_STRONG_INLINE void cleanup() { + if (m_result != NULL) { + m_device.deallocate_temp(m_result); + m_result = NULL; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + return m_result[index]; + } + + template + EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const { + return internal::ploadt(m_result + index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + // TODO(rmlarsen): Extend CustomOp API to return its cost estimate. + return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_result; } + +#ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_result.bind(cgh); + } +#endif + + protected: + void evalTo(EvaluatorPointerType data) { + TensorMap > result(m_device.get(data), m_dimensions); + m_op.func().eval(m_op.lhsExpression(), m_op.rhsExpression(), result, m_device); + } + + Dimensions m_dimensions; + const XprType m_op; + const Device EIGEN_DEVICE_REF m_device; + EvaluatorPointerType m_result; +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_CUSTOM_OP_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h new file mode 100644 index 0000000..96fa46c --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h @@ -0,0 +1,137 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H +#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H + +namespace Eigen { + +/** \class TensorDevice + * \ingroup CXX11_Tensor_Module + * + * \brief Pseudo expression providing an operator = that will evaluate its argument + * on the specified computing 'device' (GPU, thread pool, ...) + * + * Example: + * C.device(EIGEN_GPU) = A + B; + * + * Todo: operator *= and /=. + */ + +template class TensorDevice { + public: + TensorDevice(const DeviceType& device, ExpressionType& expression) : m_device(device), m_expression(expression) {} + + EIGEN_DEFAULT_COPY_CONSTRUCTOR(TensorDevice) + + template + EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) { + typedef TensorAssignOp Assign; + Assign assign(m_expression, other); + internal::TensorExecutor::run(assign, m_device); + return *this; + } + + template + EIGEN_STRONG_INLINE TensorDevice& operator+=(const OtherDerived& other) { + typedef typename OtherDerived::Scalar Scalar; + typedef TensorCwiseBinaryOp, const ExpressionType, const OtherDerived> Sum; + Sum sum(m_expression, other); + typedef TensorAssignOp Assign; + Assign assign(m_expression, sum); + internal::TensorExecutor::run(assign, m_device); + return *this; + } + + template + EIGEN_STRONG_INLINE TensorDevice& operator-=(const OtherDerived& other) { + typedef typename OtherDerived::Scalar Scalar; + typedef TensorCwiseBinaryOp, const ExpressionType, const OtherDerived> Difference; + Difference difference(m_expression, other); + typedef TensorAssignOp Assign; + Assign assign(m_expression, difference); + internal::TensorExecutor::run(assign, m_device); + return *this; + } + + protected: + const DeviceType& m_device; + ExpressionType& m_expression; +}; + +/** \class TensorAsyncDevice + * \ingroup CXX11_Tensor_Module + * + * \brief Pseudo expression providing an operator = that will evaluate its + * argument asynchronously on the specified device. Currently only + * ThreadPoolDevice implements proper asynchronous execution, while the default + * and GPU devices just run the expression synchronously and call m_done() on + * completion.. + * + * Example: + * auto done = []() { ... expression evaluation done ... }; + * C.device(thread_pool_device, std::move(done)) = A + B; + */ + +template +class TensorAsyncDevice { + public: + TensorAsyncDevice(const DeviceType& device, ExpressionType& expression, + DoneCallback done) + : m_device(device), m_expression(expression), m_done(std::move(done)) {} + + template + EIGEN_STRONG_INLINE TensorAsyncDevice& operator=(const OtherDerived& other) { + typedef TensorAssignOp Assign; + typedef internal::TensorExecutor Executor; + + Assign assign(m_expression, other); + Executor::run(assign, m_device); + m_done(); + + return *this; + } + + protected: + const DeviceType& m_device; + ExpressionType& m_expression; + DoneCallback m_done; +}; + + +#ifdef EIGEN_USE_THREADS +template +class TensorAsyncDevice { + public: + TensorAsyncDevice(const ThreadPoolDevice& device, ExpressionType& expression, + DoneCallback done) + : m_device(device), m_expression(expression), m_done(std::move(done)) {} + + template + EIGEN_STRONG_INLINE TensorAsyncDevice& operator=(const OtherDerived& other) { + typedef TensorAssignOp Assign; + typedef internal::TensorAsyncExecutor Executor; + + // WARNING: After assignment 'm_done' callback will be in undefined state. + Assign assign(m_expression, other); + Executor::runAsync(assign, m_device, std::move(m_done)); + + return *this; + } + + protected: + const ThreadPoolDevice& m_device; + ExpressionType& m_expression; + DoneCallback m_done; +}; +#endif + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h new file mode 100644 index 0000000..f779239 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h @@ -0,0 +1,6 @@ + +#if defined(__clang__) || defined(__GNUC__) +#warning "Deprecated header file, please either include the main Eigen/CXX11/Tensor header or the respective TensorDeviceGpu.h file" +#endif + +#include "TensorDeviceGpu.h" diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h new file mode 100644 index 0000000..46b9d3a --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h @@ -0,0 +1,104 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_DEVICE_DEFAULT_H +#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_DEFAULT_H + + +namespace Eigen { + +// Default device for the machine (typically a single cpu core) +struct DefaultDevice { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { + return internal::aligned_malloc(num_bytes); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void* buffer) const { + internal::aligned_free(buffer); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* allocate_temp(size_t num_bytes) const { + return allocate(num_bytes); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate_temp(void* buffer) const { + deallocate(buffer); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { + ::memcpy(dst, src, n); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const { + memcpy(dst, src, n); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const { + memcpy(dst, src, n); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const { + ::memset(buffer, c, n); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Type get(Type data) const { + return data; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t numThreads() const { +#if !defined(EIGEN_GPU_COMPILE_PHASE) + // Running on the host CPU + return 1; +#elif defined(EIGEN_HIP_DEVICE_COMPILE) + // Running on a HIP device + return 64; +#else + // Running on a CUDA device + return 32; +#endif + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const { +#if !defined(EIGEN_GPU_COMPILE_PHASE) && !defined(SYCL_DEVICE_ONLY) + // Running on the host CPU + return l1CacheSize(); +#elif defined(EIGEN_HIP_DEVICE_COMPILE) + // Running on a HIP device + return 48*1024; // FIXME : update this number for HIP +#else + // Running on a CUDA device, return the amount of shared memory available. + return 48*1024; +#endif + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const { +#if !defined(EIGEN_GPU_COMPILE_PHASE) && !defined(SYCL_DEVICE_ONLY) + // Running single threaded on the host CPU + return l3CacheSize(); +#elif defined(EIGEN_HIP_DEVICE_COMPILE) + // Running on a HIP device + return firstLevelCacheSize(); // FIXME : update this number for HIP +#else + // Running on a CUDA device + return firstLevelCacheSize(); +#endif + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const { +#if !defined(EIGEN_GPU_COMPILE_PHASE) + // Running single threaded on the host CPU + // Should return an enum that encodes the ISA supported by the CPU + return 1; +#elif defined(EIGEN_HIP_DEVICE_COMPILE) + // Running on a HIP device + // return 1 as major for HIP + return 1; +#else + // Running on a CUDA device + return EIGEN_CUDA_ARCH / 100; +#endif + } +}; + +} // namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_DEFAULT_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h new file mode 100644 index 0000000..ec2e3cb --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h @@ -0,0 +1,389 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#if defined(EIGEN_USE_GPU) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_GPU_H) +#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_GPU_H + +// This header file container defines fo gpu* macros which will resolve to +// their equivalent hip* or cuda* versions depending on the compiler in use +// A separate header (included at the end of this file) will undefine all +#include "TensorGpuHipCudaDefines.h" + +namespace Eigen { + +static const int kGpuScratchSize = 1024; + +// This defines an interface that GPUDevice can take to use +// HIP / CUDA streams underneath. +class StreamInterface { + public: + virtual ~StreamInterface() {} + + virtual const gpuStream_t& stream() const = 0; + virtual const gpuDeviceProp_t& deviceProperties() const = 0; + + // Allocate memory on the actual device where the computation will run + virtual void* allocate(size_t num_bytes) const = 0; + virtual void deallocate(void* buffer) const = 0; + + // Return a scratchpad buffer of size 1k + virtual void* scratchpad() const = 0; + + // Return a semaphore. The semaphore is initially initialized to 0, and + // each kernel using it is responsible for resetting to 0 upon completion + // to maintain the invariant that the semaphore is always equal to 0 upon + // each kernel start. + virtual unsigned int* semaphore() const = 0; +}; + +class GpuDeviceProperties { + public: + GpuDeviceProperties() : + initialized_(false), first_(true), device_properties_(nullptr) {} + + ~GpuDeviceProperties() { + if (device_properties_) { + delete[] device_properties_; + } + } + + EIGEN_STRONG_INLINE const gpuDeviceProp_t& get(int device) const { + return device_properties_[device]; + } + + EIGEN_STRONG_INLINE bool isInitialized() const { + return initialized_; + } + + void initialize() { + if (!initialized_) { + // Attempts to ensure proper behavior in the case of multiple threads + // calling this function simultaneously. This would be trivial to + // implement if we could use std::mutex, but unfortunately mutex don't + // compile with nvcc, so we resort to atomics and thread fences instead. + // Note that if the caller uses a compiler that doesn't support c++11 we + // can't ensure that the initialization is thread safe. + if (first_.exchange(false)) { + // We're the first thread to reach this point. + int num_devices; + gpuError_t status = gpuGetDeviceCount(&num_devices); + if (status != gpuSuccess) { + std::cerr << "Failed to get the number of GPU devices: " + << gpuGetErrorString(status) + << std::endl; + gpu_assert(status == gpuSuccess); + } + device_properties_ = new gpuDeviceProp_t[num_devices]; + for (int i = 0; i < num_devices; ++i) { + status = gpuGetDeviceProperties(&device_properties_[i], i); + if (status != gpuSuccess) { + std::cerr << "Failed to initialize GPU device #" + << i + << ": " + << gpuGetErrorString(status) + << std::endl; + gpu_assert(status == gpuSuccess); + } + } + + std::atomic_thread_fence(std::memory_order_release); + initialized_ = true; + } else { + // Wait for the other thread to inititialize the properties. + while (!initialized_) { + std::atomic_thread_fence(std::memory_order_acquire); + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + } + } + } + } + + private: + volatile bool initialized_; + std::atomic first_; + gpuDeviceProp_t* device_properties_; +}; + +EIGEN_ALWAYS_INLINE const GpuDeviceProperties& GetGpuDeviceProperties() { + static GpuDeviceProperties* deviceProperties = new GpuDeviceProperties(); + if (!deviceProperties->isInitialized()) { + deviceProperties->initialize(); + } + return *deviceProperties; +} + +EIGEN_ALWAYS_INLINE const gpuDeviceProp_t& GetGpuDeviceProperties(int device) { + return GetGpuDeviceProperties().get(device); +} + +static const gpuStream_t default_stream = gpuStreamDefault; + +class GpuStreamDevice : public StreamInterface { + public: + // Use the default stream on the current device + GpuStreamDevice() : stream_(&default_stream), scratch_(NULL), semaphore_(NULL) { + gpuGetDevice(&device_); + } + // Use the default stream on the specified device + GpuStreamDevice(int device) : stream_(&default_stream), device_(device), scratch_(NULL), semaphore_(NULL) {} + // Use the specified stream. Note that it's the + // caller responsibility to ensure that the stream can run on + // the specified device. If no device is specified the code + // assumes that the stream is associated to the current gpu device. + GpuStreamDevice(const gpuStream_t* stream, int device = -1) + : stream_(stream), device_(device), scratch_(NULL), semaphore_(NULL) { + if (device < 0) { + gpuGetDevice(&device_); + } else { + int num_devices; + gpuError_t err = gpuGetDeviceCount(&num_devices); + EIGEN_UNUSED_VARIABLE(err) + gpu_assert(err == gpuSuccess); + gpu_assert(device < num_devices); + device_ = device; + } + } + + virtual ~GpuStreamDevice() { + if (scratch_) { + deallocate(scratch_); + } + } + + const gpuStream_t& stream() const { return *stream_; } + const gpuDeviceProp_t& deviceProperties() const { + return GetGpuDeviceProperties(device_); + } + virtual void* allocate(size_t num_bytes) const { + gpuError_t err = gpuSetDevice(device_); + EIGEN_UNUSED_VARIABLE(err) + gpu_assert(err == gpuSuccess); + void* result; + err = gpuMalloc(&result, num_bytes); + gpu_assert(err == gpuSuccess); + gpu_assert(result != NULL); + return result; + } + virtual void deallocate(void* buffer) const { + gpuError_t err = gpuSetDevice(device_); + EIGEN_UNUSED_VARIABLE(err) + gpu_assert(err == gpuSuccess); + gpu_assert(buffer != NULL); + err = gpuFree(buffer); + gpu_assert(err == gpuSuccess); + } + + virtual void* scratchpad() const { + if (scratch_ == NULL) { + scratch_ = allocate(kGpuScratchSize + sizeof(unsigned int)); + } + return scratch_; + } + + virtual unsigned int* semaphore() const { + if (semaphore_ == NULL) { + char* scratch = static_cast(scratchpad()) + kGpuScratchSize; + semaphore_ = reinterpret_cast(scratch); + gpuError_t err = gpuMemsetAsync(semaphore_, 0, sizeof(unsigned int), *stream_); + EIGEN_UNUSED_VARIABLE(err) + gpu_assert(err == gpuSuccess); + } + return semaphore_; + } + + private: + const gpuStream_t* stream_; + int device_; + mutable void* scratch_; + mutable unsigned int* semaphore_; +}; + +struct GpuDevice { + // The StreamInterface is not owned: the caller is + // responsible for its initialization and eventual destruction. + explicit GpuDevice(const StreamInterface* stream) : stream_(stream), max_blocks_(INT_MAX) { + eigen_assert(stream); + } + explicit GpuDevice(const StreamInterface* stream, int num_blocks) : stream_(stream), max_blocks_(num_blocks) { + eigen_assert(stream); + } + // TODO(bsteiner): This is an internal API, we should not expose it. + EIGEN_STRONG_INLINE const gpuStream_t& stream() const { + return stream_->stream(); + } + + EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { + return stream_->allocate(num_bytes); + } + + EIGEN_STRONG_INLINE void deallocate(void* buffer) const { + stream_->deallocate(buffer); + } + + EIGEN_STRONG_INLINE void* allocate_temp(size_t num_bytes) const { + return stream_->allocate(num_bytes); + } + + EIGEN_STRONG_INLINE void deallocate_temp(void* buffer) const { + stream_->deallocate(buffer); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Type get(Type data) const { + return data; + } + + EIGEN_STRONG_INLINE void* scratchpad() const { + return stream_->scratchpad(); + } + + EIGEN_STRONG_INLINE unsigned int* semaphore() const { + return stream_->semaphore(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { +#ifndef EIGEN_GPU_COMPILE_PHASE + gpuError_t err = gpuMemcpyAsync(dst, src, n, gpuMemcpyDeviceToDevice, + stream_->stream()); + EIGEN_UNUSED_VARIABLE(err) + gpu_assert(err == gpuSuccess); +#else + EIGEN_UNUSED_VARIABLE(dst); + EIGEN_UNUSED_VARIABLE(src); + EIGEN_UNUSED_VARIABLE(n); + eigen_assert(false && "The default device should be used instead to generate kernel code"); +#endif + } + + EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const { + gpuError_t err = + gpuMemcpyAsync(dst, src, n, gpuMemcpyHostToDevice, stream_->stream()); + EIGEN_UNUSED_VARIABLE(err) + gpu_assert(err == gpuSuccess); + } + + EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const { + gpuError_t err = + gpuMemcpyAsync(dst, src, n, gpuMemcpyDeviceToHost, stream_->stream()); + EIGEN_UNUSED_VARIABLE(err) + gpu_assert(err == gpuSuccess); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const { +#ifndef EIGEN_GPU_COMPILE_PHASE + gpuError_t err = gpuMemsetAsync(buffer, c, n, stream_->stream()); + EIGEN_UNUSED_VARIABLE(err) + gpu_assert(err == gpuSuccess); +#else + eigen_assert(false && "The default device should be used instead to generate kernel code"); +#endif + } + + EIGEN_STRONG_INLINE size_t numThreads() const { + // FIXME + return 32; + } + + EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const { + // FIXME + return 48*1024; + } + + EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const { + // We won't try to take advantage of the l2 cache for the time being, and + // there is no l3 cache on hip/cuda devices. + return firstLevelCacheSize(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void synchronize() const { +#ifndef EIGEN_GPU_COMPILE_PHASE + gpuError_t err = gpuStreamSynchronize(stream_->stream()); + if (err != gpuSuccess) { + std::cerr << "Error detected in GPU stream: " + << gpuGetErrorString(err) + << std::endl; + gpu_assert(err == gpuSuccess); + } +#else + gpu_assert(false && "The default device should be used instead to generate kernel code"); +#endif + } + + EIGEN_STRONG_INLINE int getNumGpuMultiProcessors() const { + return stream_->deviceProperties().multiProcessorCount; + } + EIGEN_STRONG_INLINE int maxGpuThreadsPerBlock() const { + return stream_->deviceProperties().maxThreadsPerBlock; + } + EIGEN_STRONG_INLINE int maxGpuThreadsPerMultiProcessor() const { + return stream_->deviceProperties().maxThreadsPerMultiProcessor; + } + EIGEN_STRONG_INLINE int sharedMemPerBlock() const { + return stream_->deviceProperties().sharedMemPerBlock; + } + EIGEN_STRONG_INLINE int majorDeviceVersion() const { + return stream_->deviceProperties().major; + } + EIGEN_STRONG_INLINE int minorDeviceVersion() const { + return stream_->deviceProperties().minor; + } + + EIGEN_STRONG_INLINE int maxBlocks() const { + return max_blocks_; + } + + // This function checks if the GPU runtime recorded an error for the + // underlying stream device. + inline bool ok() const { +#ifdef EIGEN_GPUCC + gpuError_t error = gpuStreamQuery(stream_->stream()); + return (error == gpuSuccess) || (error == gpuErrorNotReady); +#else + return false; +#endif + } + + private: + const StreamInterface* stream_; + int max_blocks_; +}; + +#if defined(EIGEN_HIPCC) + +#define LAUNCH_GPU_KERNEL(kernel, gridsize, blocksize, sharedmem, device, ...) \ + hipLaunchKernelGGL(kernel, dim3(gridsize), dim3(blocksize), (sharedmem), (device).stream(), __VA_ARGS__); \ + gpu_assert(hipGetLastError() == hipSuccess); + +#else + +#define LAUNCH_GPU_KERNEL(kernel, gridsize, blocksize, sharedmem, device, ...) \ + (kernel) <<< (gridsize), (blocksize), (sharedmem), (device).stream() >>> (__VA_ARGS__); \ + gpu_assert(cudaGetLastError() == cudaSuccess); + +#endif + +// FIXME: Should be device and kernel specific. +#ifdef EIGEN_GPUCC +static EIGEN_DEVICE_FUNC inline void setGpuSharedMemConfig(gpuSharedMemConfig config) { +#ifndef EIGEN_GPU_COMPILE_PHASE + gpuError_t status = gpuDeviceSetSharedMemConfig(config); + EIGEN_UNUSED_VARIABLE(status) + gpu_assert(status == gpuSuccess); +#else + EIGEN_UNUSED_VARIABLE(config) +#endif +} +#endif + +} // end namespace Eigen + +// undefine all the gpu* macros we defined at the beginning of the file +#include "TensorGpuHipCudaUndefines.h" + +#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_GPU_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h new file mode 100644 index 0000000..df591c2 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h @@ -0,0 +1,1048 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: +// Copyright (C) 2016 Benoit Steiner + +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#if defined(EIGEN_USE_SYCL) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H) +#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H +#include + +namespace Eigen { + +namespace TensorSycl { +namespace internal { + +/// Cache all the device information needed +struct SyclDeviceInfo { + SyclDeviceInfo(cl::sycl::queue queue) + : local_mem_type( + queue.get_device() + .template get_info()), + max_work_item_sizes( + queue.get_device() + .template get_info< + cl::sycl::info::device::max_work_item_sizes>()), + max_mem_alloc_size( + queue.get_device() + .template get_info< + cl::sycl::info::device::max_mem_alloc_size>()), + max_compute_units(queue.get_device() + .template get_info< + cl::sycl::info::device::max_compute_units>()), + max_work_group_size( + queue.get_device() + .template get_info< + cl::sycl::info::device::max_work_group_size>()), + local_mem_size( + queue.get_device() + .template get_info()), + platform_name(queue.get_device() + .get_platform() + .template get_info()), + device_name(queue.get_device() + .template get_info()), + device_vendor( + queue.get_device() + .template get_info()) {} + + cl::sycl::info::local_mem_type local_mem_type; + cl::sycl::id<3> max_work_item_sizes; + unsigned long max_mem_alloc_size; + unsigned long max_compute_units; + unsigned long max_work_group_size; + size_t local_mem_size; + std::string platform_name; + std::string device_name; + std::string device_vendor; +}; + +} // end namespace internal +} // end namespace TensorSycl + +typedef TensorSycl::internal::buffer_data_type_t buffer_scalar_t; +// All devices (even AMD CPU with intel OpenCL runtime) that support OpenCL and +// can consume SPIR or SPIRV can use the Eigen SYCL backend and consequently +// TensorFlow via the Eigen SYCL Backend. +EIGEN_STRONG_INLINE auto get_sycl_supported_devices() + -> decltype(cl::sycl::device::get_devices()) { +#ifdef EIGEN_SYCL_USE_DEFAULT_SELECTOR + return {cl::sycl::device(cl::sycl::default_selector())}; +#else + std::vector supported_devices; + auto platform_list = cl::sycl::platform::get_platforms(); + for (const auto &platform : platform_list) { + auto device_list = platform.get_devices(); + auto platform_name = + platform.template get_info(); + std::transform(platform_name.begin(), platform_name.end(), + platform_name.begin(), ::tolower); + for (const auto &device : device_list) { + auto vendor = device.template get_info(); + std::transform(vendor.begin(), vendor.end(), vendor.begin(), ::tolower); + bool unsupported_condition = + (device.is_cpu() && platform_name.find("amd") != std::string::npos && + vendor.find("apu") == std::string::npos) || + (platform_name.find("experimental") != std::string::npos) || + device.is_host(); + if (!unsupported_condition) { + supported_devices.push_back(device); + } + } + } + return supported_devices; +#endif +} + +class QueueInterface { + public: + /// Creating device by using cl::sycl::selector or cl::sycl::device. + template + explicit QueueInterface( + const DeviceOrSelector &dev_or_sel, cl::sycl::async_handler handler, + unsigned num_threads = std::thread::hardware_concurrency()) + : m_queue(dev_or_sel, handler), +#ifdef EIGEN_SYCL_USE_PROGRAM_CLASS + m_prog(m_queue.get_context(), get_sycl_supported_devices()), +#endif + m_thread_pool(num_threads), + m_device_info(m_queue) { +#ifdef EIGEN_SYCL_USE_PROGRAM_CLASS + m_prog.build_with_kernel_type(); + auto f = [&](cl::sycl::handler &cgh) { + cgh.single_task(m_prog.get_kernel(), + [=]() {}) + }; + EIGEN_SYCL_TRY_CATCH(m_queue.submit(f)); +#endif + } + + template + explicit QueueInterface( + const DeviceOrSelector &dev_or_sel, + unsigned num_threads = std::thread::hardware_concurrency()) + : QueueInterface(dev_or_sel, + [this](cl::sycl::exception_list l) { + this->exception_caught_ = this->sycl_async_handler(l); + }, + num_threads) {} + +#ifdef EIGEN_SYCL_USE_PROGRAM_CLASS + EIGEN_STRONG_INLINE cl::sycl::program &program() const { return m_prog; } +#endif + + /// Attach an existing buffer to the pointer map, Eigen will not reuse it + EIGEN_STRONG_INLINE void *attach_buffer( + cl::sycl::buffer &buf) const { + std::lock_guard lock(pmapper_mutex_); + return static_cast(pMapper.add_pointer(buf)); + } + + /// Detach previously attached buffer + EIGEN_STRONG_INLINE void detach_buffer(void *p) const { + std::lock_guard lock(pmapper_mutex_); + TensorSycl::internal::SYCLfree(p, pMapper); + } + + /// Allocating device pointer. This pointer is actually an 8 bytes host + /// pointer used as key to access the sycl device buffer. The reason is that + /// we cannot use device buffer as a pointer as a m_data in Eigen leafNode + /// expressions. So we create a key pointer to be used in Eigen expression + /// construction. When we convert the Eigen construction into the sycl + /// construction we use this pointer as a key in our buffer_map and we make + /// sure that we dedicate only one buffer only for this pointer. The device + /// pointer would be deleted by calling deallocate function. + EIGEN_STRONG_INLINE void *allocate(size_t num_bytes) const { +#if EIGEN_MAX_ALIGN_BYTES > 0 + size_t align = num_bytes % EIGEN_MAX_ALIGN_BYTES; + if (align > 0) { + num_bytes += EIGEN_MAX_ALIGN_BYTES - align; + } +#endif + std::lock_guard lock(pmapper_mutex_); + return TensorSycl::internal::SYCLmalloc(num_bytes, pMapper); + } + + EIGEN_STRONG_INLINE void *allocate_temp(size_t num_bytes) const { +#if EIGEN_MAX_ALIGN_BYTES > 0 + size_t align = num_bytes % EIGEN_MAX_ALIGN_BYTES; + if (align > 0) { + num_bytes += EIGEN_MAX_ALIGN_BYTES - align; + } +#endif + std::lock_guard lock(pmapper_mutex_); +#ifndef EIGEN_SYCL_NO_REUSE_BUFFERS + if (scratch_buffers.empty()) { + return TensorSycl::internal::SYCLmalloc(num_bytes, pMapper); + ; + } else { + for (auto it = scratch_buffers.begin(); it != scratch_buffers.end();) { + auto buff = pMapper.get_buffer(*it); + if (buff.get_size() >= num_bytes) { + auto ptr = *it; + scratch_buffers.erase(it); + return ptr; + } else { + ++it; + } + } + return TensorSycl::internal::SYCLmalloc(num_bytes, pMapper); + } +#else + return TensorSycl::internal::SYCLmalloc(num_bytes, pMapper); +#endif + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorSycl::internal::RangeAccess< + cl::sycl::access::mode::read_write, data_t> + get(data_t *data) const { + return get_range_accessor(data); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE data_t *get( + TensorSycl::internal::RangeAccess + data) const { + return static_cast(data.get_virtual_pointer()); + } + + EIGEN_STRONG_INLINE void deallocate_temp(void *p) const { + std::lock_guard lock(pmapper_mutex_); +#ifndef EIGEN_SYCL_NO_REUSE_BUFFERS + scratch_buffers.insert(p); +#else + TensorSycl::internal::SYCLfree(p, pMapper); +#endif + } + template + EIGEN_STRONG_INLINE void deallocate_temp( + const TensorSycl::internal::RangeAccess &p) const { + deallocate_temp(p.get_virtual_pointer()); + } + + /// This is used to deallocate the device pointer. p is used as a key inside + /// the map to find the device buffer and delete it. + EIGEN_STRONG_INLINE void deallocate(void *p) const { + std::lock_guard lock(pmapper_mutex_); + TensorSycl::internal::SYCLfree(p, pMapper); + } + + EIGEN_STRONG_INLINE void deallocate_all() const { + std::lock_guard lock(pmapper_mutex_); + TensorSycl::internal::SYCLfreeAll(pMapper); +#ifndef EIGEN_SYCL_NO_REUSE_BUFFERS + scratch_buffers.clear(); +#endif + } + + /// The memcpyHostToDevice is used to copy the data from host to device + /// The destination pointer could be deleted before the copy happend which is + /// why a callback function is needed. By default if none is provided, the + /// function is blocking. + EIGEN_STRONG_INLINE void memcpyHostToDevice( + void *dst, const void *src, size_t n, + std::function callback) const { + static const auto write_mode = cl::sycl::access::mode::discard_write; + static const auto global_access = cl::sycl::access::target::global_buffer; + typedef cl::sycl::accessor + write_accessor; + if (n == 0) { + if (callback) callback(); + return; + } + n /= sizeof(buffer_scalar_t); + auto f = [&](cl::sycl::handler &cgh) { + write_accessor dst_acc = get_range_accessor(cgh, dst, n); + buffer_scalar_t const *ptr = static_cast(src); + auto non_deleter = [](buffer_scalar_t const *) {}; + std::shared_ptr s_ptr(ptr, non_deleter); + cgh.copy(s_ptr, dst_acc); + }; + cl::sycl::event e; + EIGEN_SYCL_TRY_CATCH(e = m_queue.submit(f)); + synchronize_and_callback(e, callback); + } + + /// The memcpyDeviceToHost is used to copy the data from device to host. + /// The source pointer could be deleted before the copy happend which is + /// why a callback function is needed. By default if none is provided, the + /// function is blocking. + EIGEN_STRONG_INLINE void memcpyDeviceToHost( + void *dst, const void *src, size_t n, + std::function callback) const { + static const auto read_mode = cl::sycl::access::mode::read; + static const auto global_access = cl::sycl::access::target::global_buffer; + typedef cl::sycl::accessor + read_accessor; + if (n == 0) { + if (callback) callback(); + return; + } + n /= sizeof(buffer_scalar_t); + auto f = [&](cl::sycl::handler &cgh) { + read_accessor src_acc = get_range_accessor(cgh, src, n); + buffer_scalar_t *ptr = static_cast(dst); + auto non_deleter = [](buffer_scalar_t *) {}; + std::shared_ptr s_ptr(ptr, non_deleter); + cgh.copy(src_acc, s_ptr); + }; + cl::sycl::event e; + EIGEN_SYCL_TRY_CATCH(e = m_queue.submit(f)); + synchronize_and_callback(e, callback); + } + + /// The memcpy function. + /// No callback is required here as both arguments are on the device + /// and SYCL can handle the dependency. + EIGEN_STRONG_INLINE void memcpy(void *dst, const void *src, size_t n) const { + static const auto read_mode = cl::sycl::access::mode::read; + static const auto write_mode = cl::sycl::access::mode::discard_write; + if (n == 0) { + return; + } + n /= sizeof(buffer_scalar_t); + auto f = [&](cl::sycl::handler &cgh) { + auto src_acc = get_range_accessor(cgh, src, n); + auto dst_acc = get_range_accessor(cgh, dst, n); + cgh.copy(src_acc, dst_acc); + }; + cl::sycl::event e; + EIGEN_SYCL_TRY_CATCH(e = m_queue.submit(f)); + async_synchronize(e); + } + + /// the memset function. + /// No callback is required here as both arguments are on the device + /// and SYCL can handle the dependency. + EIGEN_STRONG_INLINE void memset(void *data, int c, size_t n) const { + static const auto write_mode = cl::sycl::access::mode::discard_write; + if (n == 0) { + return; + } + n /= sizeof(buffer_scalar_t); + auto f = [&](cl::sycl::handler &cgh) { + auto dst_acc = get_range_accessor(cgh, data, n); + // The cast to uint8_t is here to match the behaviour of the standard + // memset. The cast to buffer_scalar_t is needed to match the type of the + // accessor (in case buffer_scalar_t is not uint8_t) + cgh.fill(dst_acc, static_cast(static_cast(c))); + }; + cl::sycl::event e; + EIGEN_SYCL_TRY_CATCH(e = m_queue.submit(f)); + async_synchronize(e); + } + + /// Get a range accessor to the virtual pointer's device memory. This range + /// accessor will allow access to the memory from the pointer to the end of + /// the buffer. + /// + /// NOTE: Inside a kernel the range accessor will always be indexed from the + /// start of the buffer, so the offset in the accessor is only used by + /// methods like handler::copy and will not be available inside a kernel. + template + EIGEN_STRONG_INLINE TensorSycl::internal::RangeAccess + get_range_accessor(const void *ptr) const { + static const auto global_access = cl::sycl::access::target::global_buffer; + static const auto is_place_holder = cl::sycl::access::placeholder::true_t; + typedef TensorSycl::internal::RangeAccess ret_type; + typedef const TensorSycl::internal::buffer_data_type_t *internal_ptr_t; + + std::lock_guard lock(pmapper_mutex_); + + auto original_buffer = pMapper.get_buffer(ptr); + const ptrdiff_t offset = pMapper.get_offset(ptr); + const ptrdiff_t typed_offset = offset / sizeof(T); + eigen_assert(typed_offset >= 0); + const auto typed_size = original_buffer.get_size() / sizeof(T); + auto buffer = original_buffer.template reinterpret< + typename Eigen::internal::remove_const::type>( + cl::sycl::range<1>(typed_size)); + const ptrdiff_t size = buffer.get_count() - typed_offset; + eigen_assert(size >= 0); + typedef cl::sycl::accessor::type, + 1, AcMd, global_access, is_place_holder> + placeholder_accessor_t; + const auto start_ptr = static_cast(ptr) - offset; + return ret_type(placeholder_accessor_t(buffer, cl::sycl::range<1>(size), + cl::sycl::id<1>(typed_offset)), + static_cast(typed_offset), + reinterpret_cast(start_ptr)); + } + + /// Get a range accessor to the virtual pointer's device memory with a + /// specified size. + template + EIGEN_STRONG_INLINE cl::sycl::accessor< + buffer_scalar_t, 1, AcMd, cl::sycl::access::target::global_buffer> + get_range_accessor(cl::sycl::handler &cgh, const void *ptr, + const Index n_bytes) const { + static const auto global_access = cl::sycl::access::target::global_buffer; + eigen_assert(n_bytes >= 0); + std::lock_guard lock(pmapper_mutex_); + auto buffer = pMapper.get_buffer(ptr); + const ptrdiff_t offset = pMapper.get_offset(ptr); + eigen_assert(offset >= 0); + eigen_assert(offset + n_bytes <= buffer.get_size()); + return buffer.template get_access( + cgh, cl::sycl::range<1>(n_bytes), cl::sycl::id<1>(offset)); + } + + /// Creation of sycl accessor for a buffer. This function first tries to find + /// the buffer in the buffer_map. If found it gets the accessor from it, if + /// not, the function then adds an entry by creating a sycl buffer for that + /// particular pointer. + template + EIGEN_STRONG_INLINE cl::sycl::accessor< + buffer_scalar_t, 1, AcMd, cl::sycl::access::target::global_buffer> + get_sycl_accessor(cl::sycl::handler &cgh, const void *ptr) const { + std::lock_guard lock(pmapper_mutex_); + return pMapper.get_buffer(ptr) + .template get_access( + cgh); + } + + EIGEN_STRONG_INLINE cl::sycl::buffer get_sycl_buffer( + const void *ptr) const { + std::lock_guard lock(pmapper_mutex_); + return pMapper.get_buffer(ptr); + } + + EIGEN_STRONG_INLINE ptrdiff_t get_offset(const void *ptr) const { + std::lock_guard lock(pmapper_mutex_); + return pMapper.get_offset(ptr); + } + + template + EIGEN_ALWAYS_INLINE void binary_kernel_launcher(const Lhs &lhs, + const Rhs &rhs, OutPtr outptr, + Range thread_range, + Index scratchSize, + T... var) const { + auto kernel_functor = [=](cl::sycl::handler &cgh) { + // binding the placeholder accessors to a commandgroup handler + lhs.bind(cgh); + rhs.bind(cgh); + outptr.bind(cgh); + typedef cl::sycl::accessor + LocalAccessor; + + LocalAccessor scratch(cl::sycl::range<1>(scratchSize), cgh); + cgh.parallel_for( +#ifdef EIGEN_SYCL_USE_PROGRAM_CLASS + program().template get_kernel(), +#endif + thread_range, sycl_kernel(scratch, lhs, rhs, outptr, var...)); + }; + cl::sycl::event e; + EIGEN_SYCL_TRY_CATCH(e = m_queue.submit(kernel_functor)); + async_synchronize(e); + } + + template + EIGEN_ALWAYS_INLINE void unary_kernel_launcher(const InPtr &inptr, + OutPtr &outptr, + Range thread_range, + Index scratchSize, + T... var) const { + auto kernel_functor = [=](cl::sycl::handler &cgh) { + // binding the placeholder accessors to a commandgroup handler + inptr.bind(cgh); + outptr.bind(cgh); + typedef cl::sycl::accessor + LocalAccessor; + + LocalAccessor scratch(cl::sycl::range<1>(scratchSize), cgh); + cgh.parallel_for( +#ifdef EIGEN_SYCL_USE_PROGRAM_CLASS + program().template get_kernel(), +#endif + thread_range, sycl_kernel(scratch, inptr, outptr, var...)); + }; + cl::sycl::event e; + EIGEN_SYCL_TRY_CATCH(e = m_queue.submit(kernel_functor)); + async_synchronize(e); + } + + template + EIGEN_ALWAYS_INLINE void nullary_kernel_launcher(const InPtr &inptr, + Range thread_range, + Index scratchSize, + T... var) const { + auto kernel_functor = [=](cl::sycl::handler &cgh) { + // binding the placeholder accessors to a commandgroup handler + inptr.bind(cgh); + typedef cl::sycl::accessor + LocalAccessor; + + LocalAccessor scratch(cl::sycl::range<1>(scratchSize), cgh); + cgh.parallel_for( +#ifdef EIGEN_SYCL_USE_PROGRAM_CLASS + program().template get_kernel(), +#endif + thread_range, sycl_kernel(scratch, inptr, var...)); + }; + cl::sycl::event e; + EIGEN_SYCL_TRY_CATCH(e = m_queue.submit(kernel_functor)); + async_synchronize(e); + } + + + EIGEN_STRONG_INLINE void synchronize() const { +#ifdef EIGEN_EXCEPTIONS + m_queue.wait_and_throw(); +#else + m_queue.wait(); +#endif + } + + + EIGEN_STRONG_INLINE void async_synchronize(cl::sycl::event e) const { + set_latest_event(e); +#ifndef EIGEN_SYCL_ASYNC_EXECUTION + synchronize(); +#endif + } + + template + EIGEN_STRONG_INLINE void parallel_for_setup(Index n, Index &tileSize, + Index &rng, Index &GRange) const { + tileSize = static_cast(getNearestPowerOfTwoWorkGroupSize()); + tileSize = std::min(static_cast(EIGEN_SYCL_LOCAL_THREAD_DIM0 * + EIGEN_SYCL_LOCAL_THREAD_DIM1), + static_cast(tileSize)); + rng = n; + if (rng == 0) rng = static_cast(1); + GRange = rng; + if (tileSize > GRange) + tileSize = GRange; + else if (GRange > tileSize) { + Index xMode = static_cast(GRange % tileSize); + if (xMode != 0) GRange += static_cast(tileSize - xMode); + } + } + + /// This is used to prepare the number of threads and also the number of + /// threads per block for sycl kernels + template + EIGEN_STRONG_INLINE void parallel_for_setup( + const std::array &input_dim, cl::sycl::range<2> &global_range, + cl::sycl::range<2> &local_range) const { + std::array input_range = input_dim; + Index max_workgroup_Size = + static_cast(getNearestPowerOfTwoWorkGroupSize()); + max_workgroup_Size = + std::min(static_cast(EIGEN_SYCL_LOCAL_THREAD_DIM0 * + EIGEN_SYCL_LOCAL_THREAD_DIM1), + static_cast(max_workgroup_Size)); + Index pow_of_2 = static_cast(std::log2(max_workgroup_Size)); + local_range[1] = + static_cast(std::pow(2, static_cast(pow_of_2 / 2))); + input_range[1] = input_dim[1]; + if (input_range[1] == 0) input_range[1] = static_cast(1); + global_range[1] = input_range[1]; + if (local_range[1] > global_range[1]) + local_range[1] = global_range[1]; + else if (global_range[1] > local_range[1]) { + Index xMode = static_cast(global_range[1] % local_range[1]); + if (xMode != 0) + global_range[1] += static_cast(local_range[1] - xMode); + } + local_range[0] = static_cast(max_workgroup_Size / local_range[1]); + input_range[0] = input_dim[0]; + if (input_range[0] == 0) input_range[0] = static_cast(1); + global_range[0] = input_range[0]; + if (local_range[0] > global_range[0]) + local_range[0] = global_range[0]; + else if (global_range[0] > local_range[0]) { + Index xMode = static_cast(global_range[0] % local_range[0]); + if (xMode != 0) + global_range[0] += static_cast(local_range[0] - xMode); + } + } + + /// This is used to prepare the number of threads and also the number of + /// threads per block for sycl kernels + template + EIGEN_STRONG_INLINE void parallel_for_setup( + const std::array &input_dim, cl::sycl::range<3> &global_range, + cl::sycl::range<3> &local_range) const { + std::array input_range = input_dim; + Index max_workgroup_Size = + static_cast(getNearestPowerOfTwoWorkGroupSize()); + max_workgroup_Size = + std::min(static_cast(EIGEN_SYCL_LOCAL_THREAD_DIM0 * + EIGEN_SYCL_LOCAL_THREAD_DIM1), + static_cast(max_workgroup_Size)); + Index pow_of_2 = static_cast(std::log2(max_workgroup_Size)); + local_range[2] = + static_cast(std::pow(2, static_cast(pow_of_2 / 3))); + input_range[2] = input_dim[2]; + if (input_range[2] == 0) input_range[1] = static_cast(1); + global_range[2] = input_range[2]; + if (local_range[2] > global_range[2]) + local_range[2] = global_range[2]; + else if (global_range[2] > local_range[2]) { + Index xMode = static_cast(global_range[2] % local_range[2]); + if (xMode != 0) + global_range[2] += static_cast(local_range[2] - xMode); + } + pow_of_2 = static_cast( + std::log2(static_cast(max_workgroup_Size / local_range[2]))); + local_range[1] = + static_cast(std::pow(2, static_cast(pow_of_2 / 2))); + input_range[1] = input_dim[1]; + if (input_range[1] == 0) input_range[1] = static_cast(1); + global_range[1] = input_range[1]; + if (local_range[1] > global_range[1]) + local_range[1] = global_range[1]; + else if (global_range[1] > local_range[1]) { + Index xMode = static_cast(global_range[1] % local_range[1]); + if (xMode != 0) + global_range[1] += static_cast(local_range[1] - xMode); + } + local_range[0] = static_cast(max_workgroup_Size / + (local_range[1] * local_range[2])); + input_range[0] = input_dim[0]; + if (input_range[0] == 0) input_range[0] = static_cast(1); + global_range[0] = input_range[0]; + if (local_range[0] > global_range[0]) + local_range[0] = global_range[0]; + else if (global_range[0] > local_range[0]) { + Index xMode = static_cast(global_range[0] % local_range[0]); + if (xMode != 0) + global_range[0] += static_cast(local_range[0] - xMode); + } + } + + EIGEN_STRONG_INLINE bool has_local_memory() const { +#if !defined(EIGEN_SYCL_LOCAL_MEM) && defined(EIGEN_SYCL_NO_LOCAL_MEM) + return false; +#elif defined(EIGEN_SYCL_LOCAL_MEM) && !defined(EIGEN_SYCL_NO_LOCAL_MEM) + return true; +#else + return m_device_info.local_mem_type == + cl::sycl::info::local_mem_type::local; +#endif + } + + EIGEN_STRONG_INLINE unsigned long max_buffer_size() const { + return m_device_info.max_mem_alloc_size; + } + + EIGEN_STRONG_INLINE unsigned long getNumSyclMultiProcessors() const { + return m_device_info.max_compute_units; + } + + EIGEN_STRONG_INLINE unsigned long maxSyclThreadsPerBlock() const { + return m_device_info.max_work_group_size; + } + + EIGEN_STRONG_INLINE cl::sycl::id<3> maxWorkItemSizes() const { + return m_device_info.max_work_item_sizes; + } + + /// No need for sycl it should act the same as CPU version + EIGEN_STRONG_INLINE int majorDeviceVersion() const { return 1; } + + EIGEN_STRONG_INLINE unsigned long maxSyclThreadsPerMultiProcessor() const { + // OpenCL doesnot have such concept + return 2; + } + + EIGEN_STRONG_INLINE size_t sharedMemPerBlock() const { + return m_device_info.local_mem_size; + } + + // This function returns the nearest power of 2 Work-group size which is <= + // maximum device workgroup size. + EIGEN_STRONG_INLINE size_t getNearestPowerOfTwoWorkGroupSize() const { + return getPowerOfTwo(m_device_info.max_work_group_size, false); + } + + EIGEN_STRONG_INLINE std::string getPlatformName() const { + return m_device_info.platform_name; + } + + EIGEN_STRONG_INLINE std::string getDeviceName() const { + return m_device_info.device_name; + } + + EIGEN_STRONG_INLINE std::string getDeviceVendor() const { + return m_device_info.device_vendor; + } + + // This function returns the nearest power of 2 + // if roundup is true returns result>=wgsize + // else it return result <= wgsize + EIGEN_STRONG_INLINE size_t getPowerOfTwo(size_t wGSize, bool roundUp) const { + if (roundUp) --wGSize; + wGSize |= (wGSize >> 1); + wGSize |= (wGSize >> 2); + wGSize |= (wGSize >> 4); + wGSize |= (wGSize >> 8); + wGSize |= (wGSize >> 16); +#if EIGEN_ARCH_x86_64 || EIGEN_ARCH_ARM64 || EIGEN_OS_WIN64 + wGSize |= (wGSize >> 32); +#endif + return ((!roundUp) ? (wGSize - (wGSize >> 1)) : ++wGSize); + } + + EIGEN_STRONG_INLINE cl::sycl::queue &sycl_queue() const { return m_queue; } + + // This function checks if the runtime recorded an error for the + // underlying stream device. + EIGEN_STRONG_INLINE bool ok() const { + if (!exception_caught_) { + synchronize(); + } + return !exception_caught_; + } + + EIGEN_STRONG_INLINE cl::sycl::event get_latest_event() const { +#ifdef EIGEN_SYCL_STORE_LATEST_EVENT + std::lock_guard lock(event_mutex_); + return latest_events_[std::this_thread::get_id()]; +#else + eigen_assert(false); + return cl::sycl::event(); +#endif + } + + // destructor + ~QueueInterface() { + pMapper.clear(); +#ifndef EIGEN_SYCL_NO_REUSE_BUFFERS + scratch_buffers.clear(); +#endif + } + + protected: + EIGEN_STRONG_INLINE void set_latest_event(cl::sycl::event e) const { +#ifdef EIGEN_SYCL_STORE_LATEST_EVENT + std::lock_guard lock(event_mutex_); + latest_events_[std::this_thread::get_id()] = e; +#else + EIGEN_UNUSED_VARIABLE(e); +#endif + } + + void synchronize_and_callback(cl::sycl::event e, + const std::function &callback) const { + set_latest_event(e); + if (callback) { + auto callback_ = [=]() { +#ifdef EIGEN_EXCEPTIONS + cl::sycl::event(e).wait_and_throw(); +#else + cl::sycl::event(e).wait(); +#endif + callback(); + }; + m_thread_pool.Schedule(std::move(callback_)); + } else { +#ifdef EIGEN_EXCEPTIONS + m_queue.wait_and_throw(); +#else + m_queue.wait(); +#endif + } + } + + bool sycl_async_handler(cl::sycl::exception_list exceptions) const { + bool exception_caught = false; + for (const auto &e : exceptions) { + if (e) { + exception_caught = true; + EIGEN_THROW_X(e); + } + } + return exception_caught; + } + + /// class members: + bool exception_caught_ = false; + + mutable std::mutex pmapper_mutex_; + +#ifdef EIGEN_SYCL_STORE_LATEST_EVENT + mutable std::mutex event_mutex_; + mutable std::unordered_map latest_events_; +#endif + + /// std::map is the container used to make sure that we create only one buffer + /// per pointer. The lifespan of the buffer now depends on the lifespan of + /// SyclDevice. If a non-read-only pointer is needed to be accessed on the + /// host we should manually deallocate it. + mutable TensorSycl::internal::PointerMapper pMapper; +#ifndef EIGEN_SYCL_NO_REUSE_BUFFERS + mutable std::unordered_set scratch_buffers; +#endif + /// sycl queue + mutable cl::sycl::queue m_queue; +#ifdef EIGEN_SYCL_USE_PROGRAM_CLASS + mutable cl::sycl::program m_prog; +#endif + + /// The thread pool is used to wait on events and call callbacks + /// asynchronously + mutable Eigen::ThreadPool m_thread_pool; + + const TensorSycl::internal::SyclDeviceInfo m_device_info; +}; + +struct SyclDeviceBase { + /// QueueInterface is not owned. it is the caller's responsibility to destroy + /// it + const QueueInterface *m_queue_stream; + explicit SyclDeviceBase(const QueueInterface *queue_stream) + : m_queue_stream(queue_stream) {} + EIGEN_STRONG_INLINE const QueueInterface *queue_stream() const { + return m_queue_stream; + } +}; + +// Here is a sycl device struct which accept the sycl queue interface +// as an input +struct SyclDevice : public SyclDeviceBase { + explicit SyclDevice(const QueueInterface *queue_stream) + : SyclDeviceBase(queue_stream) {} + + // this is the accessor used to construct the evaluator + template + EIGEN_STRONG_INLINE TensorSycl::internal::RangeAccess + get_range_accessor(const void *ptr) const { + return queue_stream()->template get_range_accessor(ptr); + } + + // get sycl accessor + template + EIGEN_STRONG_INLINE cl::sycl::accessor< + buffer_scalar_t, 1, AcMd, cl::sycl::access::target::global_buffer> + get_sycl_accessor(cl::sycl::handler &cgh, const void *ptr) const { + return queue_stream()->template get_sycl_accessor(cgh, ptr); + } + + /// Accessing the created sycl device buffer for the device pointer + EIGEN_STRONG_INLINE cl::sycl::buffer get_sycl_buffer( + const void *ptr) const { + return queue_stream()->get_sycl_buffer(ptr); + } + + /// This is used to prepare the number of threads and also the number of + /// threads per block for sycl kernels + template + EIGEN_STRONG_INLINE void parallel_for_setup(Index n, Index &tileSize, + Index &rng, Index &GRange) const { + queue_stream()->parallel_for_setup(n, tileSize, rng, GRange); + } + + /// This is used to prepare the number of threads and also the number of + /// threads per block for sycl kernels + template + EIGEN_STRONG_INLINE void parallel_for_setup( + const std::array &input_dim, cl::sycl::range<2> &global_range, + cl::sycl::range<2> &local_range) const { + queue_stream()->parallel_for_setup(input_dim, global_range, local_range); + } + + /// This is used to prepare the number of threads and also the number of + /// threads per block for sycl kernels + template + EIGEN_STRONG_INLINE void parallel_for_setup( + const std::array &input_dim, cl::sycl::range<3> &global_range, + cl::sycl::range<3> &local_range) const { + queue_stream()->parallel_for_setup(input_dim, global_range, local_range); + } + + /// allocate device memory + EIGEN_STRONG_INLINE void *allocate(size_t num_bytes) const { + return queue_stream()->allocate(num_bytes); + } + + EIGEN_STRONG_INLINE void *allocate_temp(size_t num_bytes) const { + return queue_stream()->allocate_temp(num_bytes); + } + + /// deallocate device memory + EIGEN_STRONG_INLINE void deallocate(void *p) const { + queue_stream()->deallocate(p); + } + + EIGEN_STRONG_INLINE void deallocate_temp(void *buffer) const { + queue_stream()->deallocate_temp(buffer); + } + template + EIGEN_STRONG_INLINE void deallocate_temp( + const TensorSycl::internal::RangeAccess &buffer) const { + queue_stream()->deallocate_temp(buffer); + } + EIGEN_STRONG_INLINE void deallocate_all() const { + queue_stream()->deallocate_all(); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorSycl::internal::RangeAccess< + cl::sycl::access::mode::read_write, data_t> + get(data_t *data) const { + return queue_stream()->get(data); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE data_t *get( + TensorSycl::internal::RangeAccess + data) const { + return queue_stream()->get(data); + } + + /// attach existing buffer + EIGEN_STRONG_INLINE void *attach_buffer( + cl::sycl::buffer &buf) const { + return queue_stream()->attach_buffer(buf); + } + /// detach buffer + EIGEN_STRONG_INLINE void detach_buffer(void *p) const { + queue_stream()->detach_buffer(p); + } + EIGEN_STRONG_INLINE ptrdiff_t get_offset(const void *ptr) const { + return queue_stream()->get_offset(ptr); + } + + // some runtime conditions that can be applied here + EIGEN_STRONG_INLINE bool isDeviceSuitable() const { return true; } + + /// memcpyHostToDevice + template + EIGEN_STRONG_INLINE void memcpyHostToDevice( + Index *dst, const Index *src, size_t n, + std::function callback = {}) const { + queue_stream()->memcpyHostToDevice(dst, src, n, callback); + } + /// memcpyDeviceToHost + template + EIGEN_STRONG_INLINE void memcpyDeviceToHost( + void *dst, const Index *src, size_t n, + std::function callback = {}) const { + queue_stream()->memcpyDeviceToHost(dst, src, n, callback); + } + /// the memcpy function + template + EIGEN_STRONG_INLINE void memcpy(void *dst, const Index *src, size_t n) const { + queue_stream()->memcpy(dst, src, n); + } + /// the memset function + EIGEN_STRONG_INLINE void memset(void *data, int c, size_t n) const { + queue_stream()->memset(data, c, n); + } + /// returning the sycl queue + EIGEN_STRONG_INLINE cl::sycl::queue &sycl_queue() const { + return queue_stream()->sycl_queue(); + } +#ifdef EIGEN_SYCL_USE_PROGRAM_CLASS + EIGEN_STRONG_INLINE cl::sycl::program &program() const { + return queue_stream()->program(); + } +#endif + + EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const { return 48 * 1024; } + + EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const { + // We won't try to take advantage of the l2 cache for the time being, and + // there is no l3 cache on sycl devices. + return firstLevelCacheSize(); + } + EIGEN_STRONG_INLINE unsigned long getNumSyclMultiProcessors() const { + return queue_stream()->getNumSyclMultiProcessors(); + } + EIGEN_STRONG_INLINE unsigned long maxSyclThreadsPerBlock() const { + return queue_stream()->maxSyclThreadsPerBlock(); + } + EIGEN_STRONG_INLINE cl::sycl::id<3> maxWorkItemSizes() const { + return queue_stream()->maxWorkItemSizes(); + } + EIGEN_STRONG_INLINE unsigned long maxSyclThreadsPerMultiProcessor() const { + // OpenCL doesnot have such concept + return queue_stream()->maxSyclThreadsPerMultiProcessor(); + } + EIGEN_STRONG_INLINE size_t sharedMemPerBlock() const { + return queue_stream()->sharedMemPerBlock(); + } + EIGEN_STRONG_INLINE size_t getNearestPowerOfTwoWorkGroupSize() const { + return queue_stream()->getNearestPowerOfTwoWorkGroupSize(); + } + + EIGEN_STRONG_INLINE size_t getPowerOfTwo(size_t val, bool roundUp) const { + return queue_stream()->getPowerOfTwo(val, roundUp); + } + /// No need for sycl it should act the same as CPU version + EIGEN_STRONG_INLINE int majorDeviceVersion() const { + return queue_stream()->majorDeviceVersion(); + } + + EIGEN_STRONG_INLINE void synchronize() const { + queue_stream()->synchronize(); + } + EIGEN_STRONG_INLINE void async_synchronize( + cl::sycl::event e = cl::sycl::event()) const { + queue_stream()->async_synchronize(e); + } + EIGEN_STRONG_INLINE cl::sycl::event get_latest_event() const { + return queue_stream()->get_latest_event(); + } + + // This function checks if the runtime recorded an error for the + // underlying stream device. + EIGEN_STRONG_INLINE bool ok() const { return queue_stream()->ok(); } + + EIGEN_STRONG_INLINE bool has_local_memory() const { + return queue_stream()->has_local_memory(); + } + EIGEN_STRONG_INLINE long max_buffer_size() const { + return queue_stream()->max_buffer_size(); + } + EIGEN_STRONG_INLINE std::string getPlatformName() const { + return queue_stream()->getPlatformName(); + } + EIGEN_STRONG_INLINE std::string getDeviceName() const { + return queue_stream()->getDeviceName(); + } + EIGEN_STRONG_INLINE std::string getDeviceVendor() const { + return queue_stream()->getDeviceVendor(); + } + template + EIGEN_ALWAYS_INLINE void binary_kernel_launcher(T... var) const { + queue_stream()->template binary_kernel_launcher( + var...); + } + template + EIGEN_ALWAYS_INLINE void unary_kernel_launcher(T... var) const { + queue_stream()->template unary_kernel_launcher( + var...); + } + + template + EIGEN_ALWAYS_INLINE void nullary_kernel_launcher(T... var) const { + queue_stream()->template nullary_kernel_launcher( + var...); + } +}; +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h new file mode 100644 index 0000000..e524b53 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h @@ -0,0 +1,409 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#if defined(EIGEN_USE_THREADS) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_THREAD_POOL_H) +#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_THREAD_POOL_H + +namespace Eigen { + +// Runs an arbitrary function and then calls Notify() on the passed in +// Notification. +template struct FunctionWrapperWithNotification +{ + static void run(Notification* n, Function f, Args... args) { + f(args...); + if (n) { + n->Notify(); + } + } +}; + +template struct FunctionWrapperWithBarrier +{ + static void run(Barrier* b, Function f, Args... args) { + f(args...); + if (b) { + b->Notify(); + } + } +}; + +template +static EIGEN_STRONG_INLINE void wait_until_ready(SyncType* n) { + if (n) { + n->Wait(); + } +} + +// An abstract interface to a device specific memory allocator. +class Allocator { + public: + virtual ~Allocator() {} + virtual void* allocate(size_t num_bytes) const = 0; + virtual void deallocate(void* buffer) const = 0; +}; + +// Build a thread pool device on top the an existing pool of threads. +struct ThreadPoolDevice { + // The ownership of the thread pool remains with the caller. + ThreadPoolDevice(ThreadPoolInterface* pool, int num_cores, Allocator* allocator = nullptr) + : pool_(pool), num_threads_(num_cores), allocator_(allocator) { } + + EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { + return allocator_ ? allocator_->allocate(num_bytes) + : internal::aligned_malloc(num_bytes); + } + + EIGEN_STRONG_INLINE void deallocate(void* buffer) const { + if (allocator_) { + allocator_->deallocate(buffer); + } else { + internal::aligned_free(buffer); + } + } + + EIGEN_STRONG_INLINE void* allocate_temp(size_t num_bytes) const { + return allocate(num_bytes); + } + + EIGEN_STRONG_INLINE void deallocate_temp(void* buffer) const { + deallocate(buffer); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Type get(Type data) const { + return data; + } + + EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { +#ifdef __ANDROID__ + ::memcpy(dst, src, n); +#else + // TODO(rmlarsen): Align blocks on cache lines. + // We have observed that going beyond 4 threads usually just wastes + // CPU cycles due to the threads competing for memory bandwidth, so we + // statically schedule at most 4 block copies here. + const size_t kMinBlockSize = 32768; + const size_t num_threads = CostModel::numThreads(n, TensorOpCost(1.0, 1.0, 0), 4); + if (n <= kMinBlockSize || num_threads < 2) { + ::memcpy(dst, src, n); + } else { + const char* src_ptr = static_cast(src); + char* dst_ptr = static_cast(dst); + const size_t blocksize = (n + (num_threads - 1)) / num_threads; + Barrier barrier(static_cast(num_threads - 1)); + // Launch the last 3 blocks on worker threads. + for (size_t i = 1; i < num_threads; ++i) { + enqueue_with_barrier(&barrier, [n, i, src_ptr, dst_ptr, blocksize] { + ::memcpy(dst_ptr + i * blocksize, src_ptr + i * blocksize, + numext::mini(blocksize, n - (i * blocksize))); + }); + } + // Launch the first block on the main thread. + ::memcpy(dst_ptr, src_ptr, blocksize); + barrier.Wait(); + } +#endif + } + EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const { + memcpy(dst, src, n); + } + EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const { + memcpy(dst, src, n); + } + + EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const { + ::memset(buffer, c, n); + } + + EIGEN_STRONG_INLINE int numThreads() const { + return num_threads_; + } + + // Number of theads available in the underlying thread pool. This number can + // be different from the value returned by numThreads(). + EIGEN_STRONG_INLINE int numThreadsInPool() const { + return pool_->NumThreads(); + } + + EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const { + return l1CacheSize(); + } + + EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const { + // The l3 cache size is shared between all the cores. + return l3CacheSize() / num_threads_; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const { + // Should return an enum that encodes the ISA supported by the CPU + return 1; + } + + template + EIGEN_STRONG_INLINE Notification* enqueue(Function&& f, + Args&&... args) const { + Notification* n = new Notification(); + pool_->Schedule( + std::bind(&FunctionWrapperWithNotification::run, n, + std::move(f), args...)); + return n; + } + + template + EIGEN_STRONG_INLINE void enqueue_with_barrier(Barrier* b, Function&& f, + Args&&... args) const { + pool_->Schedule( + std::bind(&FunctionWrapperWithBarrier::run, b, + std::move(f), args...)); + } + + template + EIGEN_STRONG_INLINE void enqueueNoNotification(Function&& f, + Args&&... args) const { + if (sizeof...(args) > 0) { + pool_->Schedule(std::bind(std::move(f), args...)); + } else { + pool_->Schedule(std::move(f)); + } + } + + // Returns a logical thread index between 0 and pool_->NumThreads() - 1 if + // called from one of the threads in pool_. Returns -1 otherwise. + EIGEN_STRONG_INLINE int currentThreadId() const { + return pool_->CurrentThreadId(); + } + + // WARNING: This function is synchronous and will block the calling thread. + // + // Synchronous parallelFor executes f with [0, n) arguments in parallel and + // waits for completion. F accepts a half-open interval [first, last). Block + // size is chosen based on the iteration cost and resulting parallel + // efficiency. If block_align is not nullptr, it is called to round up the + // block size. + void parallelFor(Index n, const TensorOpCost& cost, + std::function block_align, + std::function f) const { + if (EIGEN_PREDICT_FALSE(n <= 0)){ + return; + // Compute small problems directly in the caller thread. + } else if (n == 1 || numThreads() == 1 || + CostModel::numThreads(n, cost, static_cast(numThreads())) == 1) { + f(0, n); + return; + } + + // Compute block size and total count of blocks. + ParallelForBlock block = CalculateParallelForBlock(n, cost, block_align); + + // Recursively divide size into halves until we reach block_size. + // Division code rounds mid to block_size, so we are guaranteed to get + // block_count leaves that do actual computations. + Barrier barrier(static_cast(block.count)); + std::function handleRange; + handleRange = [=, &handleRange, &barrier, &f](Index firstIdx, + Index lastIdx) { + while (lastIdx - firstIdx > block.size) { + // Split into halves and schedule the second half on a different thread. + const Index midIdx = firstIdx + divup((lastIdx - firstIdx) / 2, block.size) * block.size; + pool_->Schedule([=, &handleRange]() { handleRange(midIdx, lastIdx); }); + lastIdx = midIdx; + } + // Single block or less, execute directly. + f(firstIdx, lastIdx); + barrier.Notify(); + }; + + if (block.count <= numThreads()) { + // Avoid a thread hop by running the root of the tree and one block on the + // main thread. + handleRange(0, n); + } else { + // Execute the root in the thread pool to avoid running work on more than + // numThreads() threads. + pool_->Schedule([=, &handleRange]() { handleRange(0, n); }); + } + + barrier.Wait(); + } + + // Convenience wrapper for parallelFor that does not align blocks. + void parallelFor(Index n, const TensorOpCost& cost, + std::function f) const { + parallelFor(n, cost, nullptr, std::move(f)); + } + + // WARNING: This function is asynchronous and will not block the calling thread. + // + // Asynchronous parallelFor executes f with [0, n) arguments in parallel + // without waiting for completion. When the last block finished, it will call + // 'done' callback. F accepts a half-open interval [first, last). Block size + // is chosen based on the iteration cost and resulting parallel efficiency. If + // block_align is not nullptr, it is called to round up the block size. + void parallelForAsync(Index n, const TensorOpCost& cost, + std::function block_align, + std::function f, + std::function done) const { + // Compute small problems directly in the caller thread. + if (n <= 1 || numThreads() == 1 || + CostModel::numThreads(n, cost, static_cast(numThreads())) == 1) { + f(0, n); + done(); + return; + } + + // Compute block size and total count of blocks. + ParallelForBlock block = CalculateParallelForBlock(n, cost, block_align); + + ParallelForAsyncContext* const ctx = + new ParallelForAsyncContext(block.count, std::move(f), std::move(done)); + + // Recursively divide size into halves until we reach block_size. + // Division code rounds mid to block_size, so we are guaranteed to get + // block_count leaves that do actual computations. + ctx->handle_range = [this, ctx, block](Index firstIdx, Index lastIdx) { + while (lastIdx - firstIdx > block.size) { + // Split into halves and schedule the second half on a different thread. + const Index midIdx = firstIdx + divup((lastIdx - firstIdx) / 2, block.size) * block.size; + pool_->Schedule( + [ctx, midIdx, lastIdx]() { ctx->handle_range(midIdx, lastIdx); }); + lastIdx = midIdx; + } + + // Single block or less, execute directly. + ctx->f(firstIdx, lastIdx); + + // Delete async context if it was the last block. + if (ctx->count.fetch_sub(1) == 1) delete ctx; + }; + + if (block.count <= numThreads()) { + // Avoid a thread hop by running the root of the tree and one block on the + // main thread. + ctx->handle_range(0, n); + } else { + // Execute the root in the thread pool to avoid running work on more than + // numThreads() threads. + pool_->Schedule([ctx, n]() { ctx->handle_range(0, n); }); + } + } + + // Convenience wrapper for parallelForAsync that does not align blocks. + void parallelForAsync(Index n, const TensorOpCost& cost, + std::function f, + std::function done) const { + parallelForAsync(n, cost, nullptr, std::move(f), std::move(done)); + } + + // Thread pool accessor. + ThreadPoolInterface* getPool() const { return pool_; } + + // Allocator accessor. + Allocator* allocator() const { return allocator_; } + + private: + typedef TensorCostModel CostModel; + + // For parallelForAsync we must keep passed in closures on the heap, and + // delete them only after `done` callback finished. + struct ParallelForAsyncContext { + ParallelForAsyncContext(Index block_count, + std::function block_f, + std::function done_callback) + : count(block_count), + f(std::move(block_f)), + done(std::move(done_callback)) {} + ~ParallelForAsyncContext() { done(); } + + std::atomic count; + std::function f; + std::function done; + + std::function handle_range; + }; + + struct ParallelForBlock { + Index size; // block size + Index count; // number of blocks + }; + + // Calculates block size based on (1) the iteration cost and (2) parallel + // efficiency. We want blocks to be not too small to mitigate parallelization + // overheads; not too large to mitigate tail effect and potential load + // imbalance and we also want number of blocks to be evenly dividable across + // threads. + ParallelForBlock CalculateParallelForBlock( + const Index n, const TensorOpCost& cost, + std::function block_align) const { + const double block_size_f = 1.0 / CostModel::taskSize(1, cost); + const Index max_oversharding_factor = 4; + Index block_size = numext::mini( + n, numext::maxi( + divup(n, max_oversharding_factor * numThreads()), + block_size_f)); + const Index max_block_size = numext::mini(n, 2 * block_size); + + if (block_align) { + Index new_block_size = block_align(block_size); + eigen_assert(new_block_size >= block_size); + block_size = numext::mini(n, new_block_size); + } + + Index block_count = divup(n, block_size); + + // Calculate parallel efficiency as fraction of total CPU time used for + // computations: + double max_efficiency = + static_cast(block_count) / + (divup(block_count, numThreads()) * numThreads()); + + // Now try to increase block size up to max_block_size as long as it + // doesn't decrease parallel efficiency. + for (Index prev_block_count = block_count; + max_efficiency < 1.0 && prev_block_count > 1;) { + // This is the next block size that divides size into a smaller number + // of blocks than the current block_size. + Index coarser_block_size = divup(n, prev_block_count - 1); + if (block_align) { + Index new_block_size = block_align(coarser_block_size); + eigen_assert(new_block_size >= coarser_block_size); + coarser_block_size = numext::mini(n, new_block_size); + } + if (coarser_block_size > max_block_size) { + break; // Reached max block size. Stop. + } + // Recalculate parallel efficiency. + const Index coarser_block_count = divup(n, coarser_block_size); + eigen_assert(coarser_block_count < prev_block_count); + prev_block_count = coarser_block_count; + const double coarser_efficiency = + static_cast(coarser_block_count) / + (divup(coarser_block_count, numThreads()) * numThreads()); + if (coarser_efficiency + 0.01 >= max_efficiency) { + // Taking it. + block_size = coarser_block_size; + block_count = coarser_block_count; + if (max_efficiency < coarser_efficiency) { + max_efficiency = coarser_efficiency; + } + } + } + + return {block_size, block_count}; + } + + ThreadPoolInterface* pool_; + int num_threads_; + Allocator* allocator_; +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_THREAD_POOL_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h new file mode 100644 index 0000000..1a30e45 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h @@ -0,0 +1,236 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_DIMENSION_LIST_H +#define EIGEN_CXX11_TENSOR_TENSOR_DIMENSION_LIST_H + +namespace Eigen { + +/** \internal + * + * \class TensorDimensionList + * \ingroup CXX11_Tensor_Module + * + * \brief Special case of tensor index list used to list all the dimensions of a tensor of rank n. + * + * \sa Tensor + */ + +template struct DimensionList { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + const Index operator[] (const Index i) const { return i; } +}; + +namespace internal { + +template struct array_size > { + static const size_t value = Rank; +}; +template struct array_size > { + static const size_t value = Rank; +}; + +template const Index array_get(DimensionList&) { + return n; +} +template const Index array_get(const DimensionList&) { + return n; +} + + +#if EIGEN_HAS_CONSTEXPR +template +struct index_known_statically_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex) { + return true; + } +}; +template +struct index_known_statically_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex) { + return true; + } +}; + +template +struct all_indices_known_statically_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run() { + return true; + } +}; +template +struct all_indices_known_statically_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run() { + return true; + } +}; + +template +struct indices_statically_known_to_increase_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run() { + return true; + } +}; +template +struct indices_statically_known_to_increase_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run() { + return true; + } +}; + +template +struct index_statically_eq_impl > { + static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return i == value; + } +}; +template +struct index_statically_eq_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return i == value; + } +}; + +template +struct index_statically_ne_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return i != value; + } +}; +template +struct index_statically_ne_impl > { + static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return i != value; + } +}; + +template +struct index_statically_gt_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return i > value; + } +}; +template +struct index_statically_gt_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return i > value; + } +}; + +template +struct index_statically_lt_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return i < value; + } +}; +template +struct index_statically_lt_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return i < value; + } +}; + +#else +template +struct index_known_statically_impl > { + EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run(const DenseIndex) { + return true; + } +}; +template +struct index_known_statically_impl > { + EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run(const DenseIndex) { + return true; + } +}; + +template +struct all_indices_known_statically_impl > { + EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run() { + return true; + } +}; +template +struct all_indices_known_statically_impl > { + EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run() { + return true; + } +}; + +template +struct indices_statically_known_to_increase_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() { + return true; + } +}; +template +struct indices_statically_known_to_increase_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() { + return true; + } +}; + +template +struct index_statically_eq_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { + return false; + } +}; +template +struct index_statically_eq_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { + return false; + } +}; + +template +struct index_statically_ne_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex){ + return false; + } +}; +template +struct index_statically_ne_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { + return false; + } +}; + +template +struct index_statically_gt_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { + return false; + } +}; +template +struct index_statically_gt_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { + return false; + } +}; + +template +struct index_statically_lt_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { + return false; + } +}; +template +struct index_statically_lt_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { + return false; + } +}; +#endif + +} // end namespace internal +} // end namespace Eigen + + +#endif // EIGEN_CXX11_TENSOR_TENSOR_DIMENSION_LIST_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h new file mode 100644 index 0000000..f0f1e83 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h @@ -0,0 +1,490 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_DIMENSIONS_H +#define EIGEN_CXX11_TENSOR_TENSOR_DIMENSIONS_H + + +namespace Eigen { + +/** \internal + * + * \class TensorDimensions + * \ingroup CXX11_Tensor_Module + * + * \brief Set of classes used to encode and store the dimensions of a Tensor. + * + * The Sizes class encodes as part of the type the number of dimensions and the + * sizes corresponding to each dimension. It uses no storage space since it is + * entirely known at compile time. + * The DSizes class is its dynamic sibling: the number of dimensions is known + * at compile time but the sizes are set during execution. + * + * \sa Tensor + */ + +// Boilerplate code +namespace internal { + +template struct dget { + static const std::ptrdiff_t value = get::value; +}; + + +template +struct fixed_size_tensor_index_linearization_helper +{ + template EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Index run(array const& indices, + const Dimensions& dimensions) + { + return array_get(indices) + + dget::value * + fixed_size_tensor_index_linearization_helper::run(indices, dimensions); + } +}; + +template +struct fixed_size_tensor_index_linearization_helper +{ + template EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Index run(array const&, const Dimensions&) + { + return 0; + } +}; + +template +struct fixed_size_tensor_index_extraction_helper +{ + template EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Index run(const Index index, + const Dimensions& dimensions) + { + const Index mult = (index == n-1) ? 1 : 0; + return array_get(dimensions) * mult + + fixed_size_tensor_index_extraction_helper::run(index, dimensions); + } +}; + +template +struct fixed_size_tensor_index_extraction_helper +{ + template EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Index run(const Index, + const Dimensions&) + { + return 0; + } + }; + +} // end namespace internal + + +// Fixed size +#ifndef EIGEN_EMULATE_CXX11_META_H +template +struct Sizes { + typedef internal::numeric_list Base; + const Base t = Base(); + static const std::ptrdiff_t total_size = internal::arg_prod(Indices...); + static const ptrdiff_t count = Base::count; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t rank() const { + return Base::count; + } + + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t TotalSize() { + return internal::arg_prod(Indices...); + } + + EIGEN_DEVICE_FUNC Sizes() { } + template + explicit EIGEN_DEVICE_FUNC Sizes(const array& /*indices*/) { + // todo: add assertion + } +#if EIGEN_HAS_VARIADIC_TEMPLATES + template EIGEN_DEVICE_FUNC Sizes(DenseIndex...) { } + explicit EIGEN_DEVICE_FUNC Sizes(std::initializer_list /*l*/) { + // todo: add assertion + } +#endif + + template Sizes& operator = (const T& /*other*/) { + // add assertion failure if the size of other is different + return *this; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t operator[] (const std::ptrdiff_t index) const { + return internal::fixed_size_tensor_index_extraction_helper::run(index, t); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + ptrdiff_t IndexOfColMajor(const array& indices) const { + return internal::fixed_size_tensor_index_linearization_helper::run(indices, t); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + ptrdiff_t IndexOfRowMajor(const array& indices) const { + return internal::fixed_size_tensor_index_linearization_helper::run(indices, t); + } +}; + +namespace internal { +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_prod(const Sizes&) { + return Sizes::total_size; +} +} + +#else + +template +struct non_zero_size { + typedef internal::type2val type; +}; +template <> +struct non_zero_size<0> { + typedef internal::null_type type; +}; + +template struct Sizes { + typedef typename internal::make_type_list::type, typename non_zero_size::type, typename non_zero_size::type, typename non_zero_size::type, typename non_zero_size::type >::type Base; + static const std::ptrdiff_t count = Base::count; + static const std::ptrdiff_t total_size = internal::arg_prod::value; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ptrdiff_t rank() const { + return count; + } + + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ptrdiff_t TotalSize() { + return internal::arg_prod::value; + } + + Sizes() { } + template + explicit Sizes(const array& /*indices*/) { + // todo: add assertion + } + template Sizes& operator = (const T& /*other*/) { + // add assertion failure if the size of other is different + return *this; + } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template Sizes(DenseIndex... /*indices*/) { } + explicit Sizes(std::initializer_list) { + // todo: add assertion + } +#else + EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex) { + } + EIGEN_DEVICE_FUNC Sizes(const DenseIndex, const DenseIndex) { + } + EIGEN_DEVICE_FUNC Sizes(const DenseIndex, const DenseIndex, const DenseIndex) { + } + EIGEN_DEVICE_FUNC Sizes(const DenseIndex, const DenseIndex, const DenseIndex, const DenseIndex) { + } + EIGEN_DEVICE_FUNC Sizes(const DenseIndex, const DenseIndex, const DenseIndex, const DenseIndex, const DenseIndex) { + } +#endif + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index operator[] (const Index index) const { + switch (index) { + case 0: + return internal::get<0, Base>::value; + case 1: + return internal::get<1, Base>::value; + case 2: + return internal::get<2, Base>::value; + case 3: + return internal::get<3, Base>::value; + case 4: + return internal::get<4, Base>::value; + default: + eigen_assert(false && "index overflow"); + return static_cast(-1); + } + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + ptrdiff_t IndexOfColMajor(const array& indices) const { + return internal::fixed_size_tensor_index_linearization_helper::run(indices, *reinterpret_cast(this)); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + ptrdiff_t IndexOfRowMajor(const array& indices) const { + return internal::fixed_size_tensor_index_linearization_helper::run(indices, *reinterpret_cast(this)); + } +}; + +namespace internal { +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_prod(const Sizes&) { + return Sizes::total_size; +} +} + +#endif + +// Boilerplate +namespace internal { +template +struct tensor_index_linearization_helper +{ + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Index run(array const& indices, array const& dimensions) + { + return array_get(indices) + + array_get(dimensions) * + tensor_index_linearization_helper::run(indices, dimensions); + } +}; + +template +struct tensor_index_linearization_helper +{ + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Index run(array const& indices, array const&) + { + return array_get(indices); + } +}; +} // end namespace internal + + + +// Dynamic size +template +struct DSizes : array { + typedef array Base; + static const int count = NumDims; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rank() const { + return NumDims; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex TotalSize() const { + return (NumDims == 0) ? 1 : internal::array_prod(*static_cast(this)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DSizes() { + for (int i = 0 ; i < NumDims; ++i) { + (*this)[i] = 0; + } + } + EIGEN_DEVICE_FUNC explicit DSizes(const array& a) : Base(a) { } + + EIGEN_DEVICE_FUNC explicit DSizes(const DenseIndex i0) { + eigen_assert(NumDims == 1); + (*this)[0] = i0; + } + + EIGEN_DEVICE_FUNC DSizes(const DimensionList& a) { + for (int i = 0 ; i < NumDims; ++i) { + (*this)[i] = a[i]; + } + } + + // Enable DSizes index type promotion only if we are promoting to the + // larger type, e.g. allow to promote dimensions of type int to long. + template + EIGEN_DEVICE_FUNC + explicit DSizes(const array& other, + // Default template parameters require c++11. + typename internal::enable_if< + internal::is_same< + DenseIndex, + typename internal::promote_index_type< + DenseIndex, + OtherIndex + >::type + >::value, void*>::type = 0) { + for (int i = 0; i < NumDims; ++i) { + (*this)[i] = static_cast(other[i]); + } + } + +#ifdef EIGEN_HAS_INDEX_LIST + template + EIGEN_DEVICE_FUNC + explicit DSizes(const Eigen::IndexList& dimensions) { + for (int i = 0; i < dimensions.count; ++i) { + (*this)[i] = dimensions[i]; + } + } +#endif + +#ifndef EIGEN_EMULATE_CXX11_META_H + template + EIGEN_DEVICE_FUNC DSizes(const Sizes& a) { + for (int i = 0 ; i < NumDims; ++i) { + (*this)[i] = a[i]; + } + } +#else + template + EIGEN_DEVICE_FUNC DSizes(const Sizes& a) { + for (int i = 0 ; i < NumDims; ++i) { + (*this)[i] = a[i]; + } + } +#endif + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE explicit DSizes(DenseIndex firstDimension, DenseIndex secondDimension, IndexTypes... otherDimensions) : Base({{firstDimension, secondDimension, otherDimensions...}}) { + EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 2 == NumDims, YOU_MADE_A_PROGRAMMING_MISTAKE) + } +#else + EIGEN_DEVICE_FUNC DSizes(const DenseIndex i0, const DenseIndex i1) { + eigen_assert(NumDims == 2); + (*this)[0] = i0; + (*this)[1] = i1; + } + EIGEN_DEVICE_FUNC DSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2) { + eigen_assert(NumDims == 3); + (*this)[0] = i0; + (*this)[1] = i1; + (*this)[2] = i2; + } + EIGEN_DEVICE_FUNC DSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3) { + eigen_assert(NumDims == 4); + (*this)[0] = i0; + (*this)[1] = i1; + (*this)[2] = i2; + (*this)[3] = i3; + } + EIGEN_DEVICE_FUNC DSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3, const DenseIndex i4) { + eigen_assert(NumDims == 5); + (*this)[0] = i0; + (*this)[1] = i1; + (*this)[2] = i2; + (*this)[3] = i3; + (*this)[4] = i4; + } +#endif + + EIGEN_DEVICE_FUNC DSizes& operator = (const array& other) { + *static_cast(this) = other; + return *this; + } + + // A constexpr would be so much better here + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex IndexOfColMajor(const array& indices) const { + return internal::tensor_index_linearization_helper::run(indices, *static_cast(this)); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex IndexOfRowMajor(const array& indices) const { + return internal::tensor_index_linearization_helper::run(indices, *static_cast(this)); + } +}; + +template +std::ostream& operator<<(std::ostream& os, + const DSizes& dims) { + os << "["; + for (int i = 0; i < NumDims; ++i) { + if (i > 0) os << ", "; + os << dims[i]; + } + os << "]"; + return os; +} + +// Boilerplate +namespace internal { +template +struct tensor_vsize_index_linearization_helper +{ + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Index run(array const& indices, std::vector const& dimensions) + { + return array_get(indices) + + array_get(dimensions) * + tensor_vsize_index_linearization_helper::run(indices, dimensions); + } +}; + +template +struct tensor_vsize_index_linearization_helper +{ + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Index run(array const& indices, std::vector const&) + { + return array_get(indices); + } +}; +} // end namespace internal + + +namespace internal { + +template struct array_size > { + static const ptrdiff_t value = NumDims; +}; +template struct array_size > { + static const ptrdiff_t value = NumDims; +}; +#ifndef EIGEN_EMULATE_CXX11_META_H +template struct array_size > { +static const std::ptrdiff_t value = Sizes::count; +}; +template struct array_size > { +static const std::ptrdiff_t value = Sizes::count; +}; +template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_get(const Sizes&) { + return get >::value; +} +template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_get(const Sizes<>&) { + eigen_assert(false && "should never be called"); + return -1; +} +#else +template struct array_size > { + static const ptrdiff_t value = Sizes::count; +}; +template struct array_size > { + static const ptrdiff_t value = Sizes::count; +}; +template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_get(const Sizes&) { + return get::Base>::value; +} + +#endif + + +template +struct sizes_match_below_dim { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Dims1&, Dims2&) { + return false; + } +}; +template +struct sizes_match_below_dim { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Dims1& dims1, Dims2& dims2) { + return (array_get(dims1) == array_get(dims2)) && + sizes_match_below_dim::run(dims1, dims2); + } +}; +template +struct sizes_match_below_dim { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Dims1&, Dims2&) { + return true; + } +}; + +} // end namespace internal + + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool dimensions_match(Dims1 dims1, Dims2 dims2) { + return internal::sizes_match_below_dim::value, internal::array_size::value>::run(dims1, dims2); +} + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_DIMENSIONS_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h new file mode 100644 index 0000000..a48d035 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h @@ -0,0 +1,236 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_EVAL_TO_H +#define EIGEN_CXX11_TENSOR_TENSOR_EVAL_TO_H + +namespace Eigen { + +/** \class TensorForcedEval + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor reshaping class. + * + * + */ +namespace internal { +template class MakePointer_> +struct traits > +{ + // Type promotion to handle the case where the types of the lhs and the rhs are different. + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; + typedef typename MakePointer_::Type PointerType; + + enum { + Flags = 0 + }; + template + struct MakePointer { + // Intermediate typedef to workaround MSVC issue. + typedef MakePointer_ MakePointerT; + typedef typename MakePointerT::Type Type; + + + }; +}; + +template class MakePointer_> +struct eval, Eigen::Dense> +{ + typedef const TensorEvalToOp& type; +}; + +template class MakePointer_> +struct nested, 1, typename eval >::type> +{ + typedef TensorEvalToOp type; +}; + +} // end namespace internal + + + + +template class MakePointer_> +class TensorEvalToOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename internal::remove_const::type CoeffReturnType; + typedef typename MakePointer_::Type PointerType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + static const int NumDims = Eigen::internal::traits::NumDimensions; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvalToOp(PointerType buffer, const XprType& expr) + : m_xpr(expr), m_buffer(buffer) {} + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + EIGEN_DEVICE_FUNC PointerType buffer() const { return m_buffer; } + + protected: + typename XprType::Nested m_xpr; + PointerType m_buffer; +}; + + + +template class MakePointer_> +struct TensorEvaluator, Device> +{ + typedef TensorEvalToOp XprType; + typedef typename ArgType::Scalar Scalar; + typedef typename TensorEvaluator::Dimensions Dimensions; + typedef typename XprType::Index Index; + typedef typename internal::remove_const::type CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = PacketType::size; + typedef typename Eigen::internal::traits::PointerType TensorPointerType; + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; + enum { + IsAligned = TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess, + BlockAccess = true, + PreferBlockAccess = false, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = true + }; + + static const int NumDims = internal::traits::NumDimensions; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockDescriptor TensorBlockDesc; + typedef internal::TensorBlockScratchAllocator TensorBlockScratch; + + typedef typename TensorEvaluator::TensorBlock + ArgTensorBlock; + + typedef internal::TensorBlockAssignment< + CoeffReturnType, NumDims, typename ArgTensorBlock::XprType, Index> + TensorBlockAssignment; + //===--------------------------------------------------------------------===// + + EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device), m_buffer(device.get(op.buffer())), m_expression(op.expression()){} + + + EIGEN_STRONG_INLINE ~TensorEvaluator() { + } + + + EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); } + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType scalar) { + EIGEN_UNUSED_VARIABLE(scalar); + eigen_assert(scalar == NULL); + return m_impl.evalSubExprsIfNeeded(m_buffer); + } + +#ifdef EIGEN_USE_THREADS + template + EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync( + EvaluatorPointerType scalar, EvalSubExprsCallback done) { + EIGEN_UNUSED_VARIABLE(scalar); + eigen_assert(scalar == NULL); + m_impl.evalSubExprsIfNeededAsync(m_buffer, std::move(done)); + } +#endif + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalScalar(Index i) { + m_buffer[i] = m_impl.coeff(i); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalPacket(Index i) { + internal::pstoret(m_buffer + i, m_impl.template packet::IsAligned ? Aligned : Unaligned>(i)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + internal::TensorBlockResourceRequirements getResourceRequirements() const { + return m_impl.getResourceRequirements(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalBlock( + TensorBlockDesc& desc, TensorBlockScratch& scratch) { + // Add `m_buffer` as destination buffer to the block descriptor. + desc.template AddDestinationBuffer( + /*dst_base=*/m_buffer + desc.offset(), + /*dst_strides=*/internal::strides(m_impl.dimensions())); + + ArgTensorBlock block = + m_impl.block(desc, scratch, /*root_of_expr_ast=*/true); + + // If block was evaluated into a destination buffer, there is no need to do + // an assignment. + if (block.kind() != internal::TensorBlockKind::kMaterializedInOutput) { + TensorBlockAssignment::Run( + TensorBlockAssignment::target( + desc.dimensions(), internal::strides(m_impl.dimensions()), + m_buffer, desc.offset()), + block.expr()); + } + block.cleanup(); + } + + EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + return m_buffer[index]; + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + return internal::ploadt(m_buffer + index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + // We assume that evalPacket or evalScalar is called to perform the + // assignment and account for the cost of the write here. + return m_impl.costPerCoeff(vectorized) + + TensorOpCost(0, sizeof(CoeffReturnType), 0, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_buffer; } + ArgType expression() const { return m_expression; } + #ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_impl.bind(cgh); + m_buffer.bind(cgh); + } + #endif + + + private: + TensorEvaluator m_impl; + EvaluatorPointerType m_buffer; + const ArgType m_expression; +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_EVAL_TO_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h new file mode 100644 index 0000000..3aff7fa --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -0,0 +1,983 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H +#define EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H + +namespace Eigen { + +/** \class TensorEvaluator + * \ingroup CXX11_Tensor_Module + * + * \brief The tensor evaluator classes. + * + * These classes are responsible for the evaluation of the tensor expression. + * + * TODO: add support for more types of expressions, in particular expressions + * leading to lvalues (slicing, reshaping, etc...) + */ + +// Generic evaluator +template +struct TensorEvaluator +{ + typedef typename Derived::Index Index; + typedef typename Derived::Scalar Scalar; + typedef typename Derived::Scalar CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + typedef typename Derived::Dimensions Dimensions; + typedef Derived XprType; + static const int PacketSize = PacketType::size; + typedef typename internal::traits::template MakePointer::Type TensorPointerType; + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; + + // NumDimensions is -1 for variable dim tensors + static const int NumCoords = internal::traits::NumDimensions > 0 ? + internal::traits::NumDimensions : 0; + + enum { + IsAligned = Derived::IsAligned, + PacketAccess = (PacketType::size > 1), + BlockAccess = internal::is_arithmetic::type>::value, + PreferBlockAccess = false, + Layout = Derived::Layout, + CoordAccess = NumCoords > 0, + RawAccess = true + }; + + typedef typename internal::remove_const::type ScalarNoConst; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockDescriptor TensorBlockDesc; + typedef internal::TensorBlockScratchAllocator TensorBlockScratch; + + typedef typename internal::TensorMaterializedBlock + TensorBlock; + //===--------------------------------------------------------------------===// + + EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device) + : m_data(device.get((const_cast(m.data())))), + m_dims(m.dimensions()), + m_device(device) + { } + + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; } + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType dest) { + if (!NumTraits::type>::RequireInitialization && dest) { + m_device.memcpy((void*)(m_device.get(dest)), m_device.get(m_data), m_dims.TotalSize() * sizeof(Scalar)); + return false; + } + return true; + } + +#ifdef EIGEN_USE_THREADS + template + EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync( + EvaluatorPointerType dest, EvalSubExprsCallback done) { + // TODO(ezhulenev): ThreadPoolDevice memcpy is blockign operation. + done(evalSubExprsIfNeeded(dest)); + } +#endif // EIGEN_USE_THREADS + + EIGEN_STRONG_INLINE void cleanup() {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + eigen_assert(m_data != NULL); + return m_data[index]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) { + eigen_assert(m_data != NULL); + return m_data[index]; + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + PacketReturnType packet(Index index) const + { + return internal::ploadt(m_data + index); + } + + // Return a packet starting at `index` where `umask` specifies which elements + // have to be loaded. Type/size of mask depends on PacketReturnType, e.g. for + // Packet16f, `umask` is of type uint16_t and if a bit is 1, corresponding + // float element will be loaded, otherwise 0 will be loaded. + // Function has been templatized to enable Sfinae. + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + typename internal::enable_if::masked_load_available, PacketReturnTypeT>::type + partialPacket(Index index, typename internal::unpacket_traits::mask_t umask) const + { + return internal::ploadu(m_data + index, umask); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketReturnType& x) + { + return internal::pstoret(m_data + index, x); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array& coords) const { + eigen_assert(m_data != NULL); + if (static_cast(Layout) == static_cast(ColMajor)) { + return m_data[m_dims.IndexOfColMajor(coords)]; + } else { + return m_data[m_dims.IndexOfRowMajor(coords)]; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& + coeffRef(const array& coords) { + eigen_assert(m_data != NULL); + if (static_cast(Layout) == static_cast(ColMajor)) { + return m_data[m_dims.IndexOfColMajor(coords)]; + } else { + return m_data[m_dims.IndexOfRowMajor(coords)]; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, + PacketType::size); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + internal::TensorBlockResourceRequirements getResourceRequirements() const { + return internal::TensorBlockResourceRequirements::any(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock + block(TensorBlockDesc& desc, TensorBlockScratch& scratch, + bool /*root_of_expr_ast*/ = false) const { + assert(m_data != NULL); + return TensorBlock::materialize(m_data, m_dims, desc, scratch); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock( + const TensorBlockDesc& desc, const TensorBlock& block) { + assert(m_data != NULL); + + typedef typename TensorBlock::XprType TensorBlockExpr; + typedef internal::TensorBlockAssignment + TensorBlockAssign; + + TensorBlockAssign::Run( + TensorBlockAssign::target(desc.dimensions(), + internal::strides(m_dims), m_data, + desc.offset()), + block.expr()); + } + + EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_data; } + +#ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_data.bind(cgh); + } +#endif + protected: + EvaluatorPointerType m_data; + Dimensions m_dims; + const Device EIGEN_DEVICE_REF m_device; +}; + +namespace { +template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T loadConstant(const T* address) { + return *address; +} +// Use the texture cache on CUDA devices whenever possible +#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350 +template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float loadConstant(const float* address) { + return __ldg(address); +} +template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double loadConstant(const double* address) { + return __ldg(address); +} +template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +Eigen::half loadConstant(const Eigen::half* address) { + return Eigen::half(half_impl::raw_uint16_to_half(__ldg(&address->x))); +} +#endif +#ifdef EIGEN_USE_SYCL +// overload of load constant should be implemented here based on range access +template +T &loadConstant(const Eigen::TensorSycl::internal::RangeAccess &address) { + return *address; +} +#endif +} + + +// Default evaluator for rvalues +template +struct TensorEvaluator +{ + typedef typename Derived::Index Index; + typedef typename Derived::Scalar Scalar; + typedef typename Derived::Scalar CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + typedef typename Derived::Dimensions Dimensions; + typedef const Derived XprType; + typedef typename internal::traits::template MakePointer::Type TensorPointerType; + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; + + typedef typename internal::remove_const::type ScalarNoConst; + + // NumDimensions is -1 for variable dim tensors + static const int NumCoords = internal::traits::NumDimensions > 0 ? + internal::traits::NumDimensions : 0; + static const int PacketSize = PacketType::size; + + enum { + IsAligned = Derived::IsAligned, + PacketAccess = (PacketType::size > 1), + BlockAccess = internal::is_arithmetic::value, + PreferBlockAccess = false, + Layout = Derived::Layout, + CoordAccess = NumCoords > 0, + RawAccess = true + }; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockDescriptor TensorBlockDesc; + typedef internal::TensorBlockScratchAllocator TensorBlockScratch; + + typedef typename internal::TensorMaterializedBlock + TensorBlock; + //===--------------------------------------------------------------------===// + + EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device) + : m_data(device.get(m.data())), m_dims(m.dimensions()), m_device(device) + { } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; } + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) { + if (!NumTraits::type>::RequireInitialization && data) { + m_device.memcpy((void*)(m_device.get(data)),m_device.get(m_data), m_dims.TotalSize() * sizeof(Scalar)); + return false; + } + return true; + } + +#ifdef EIGEN_USE_THREADS + template + EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync( + EvaluatorPointerType dest, EvalSubExprsCallback done) { + // TODO(ezhulenev): ThreadPoolDevice memcpy is a blockign operation. + done(evalSubExprsIfNeeded(dest)); + } +#endif // EIGEN_USE_THREADS + + EIGEN_STRONG_INLINE void cleanup() { } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + eigen_assert(m_data != NULL); + return loadConstant(m_data+index); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + PacketReturnType packet(Index index) const + { + return internal::ploadt_ro(m_data + index); + } + + // Return a packet starting at `index` where `umask` specifies which elements + // have to be loaded. Type/size of mask depends on PacketReturnType, e.g. for + // Packet16f, `umask` is of type uint16_t and if a bit is 1, corresponding + // float element will be loaded, otherwise 0 will be loaded. + // Function has been templatized to enable Sfinae. + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + typename internal::enable_if::masked_load_available, PacketReturnTypeT>::type + partialPacket(Index index, typename internal::unpacket_traits::mask_t umask) const + { + return internal::ploadu(m_data + index, umask); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array& coords) const { + eigen_assert(m_data != NULL); + const Index index = (static_cast(Layout) == static_cast(ColMajor)) ? m_dims.IndexOfColMajor(coords) + : m_dims.IndexOfRowMajor(coords); + return loadConstant(m_data+index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, + PacketType::size); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + internal::TensorBlockResourceRequirements getResourceRequirements() const { + return internal::TensorBlockResourceRequirements::any(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock + block(TensorBlockDesc& desc, TensorBlockScratch& scratch, + bool /*root_of_expr_ast*/ = false) const { + assert(m_data != NULL); + return TensorBlock::materialize(m_data, m_dims, desc, scratch); + } + + EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_data; } +#ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_data.bind(cgh); + } +#endif + protected: + EvaluatorPointerType m_data; + Dimensions m_dims; + const Device EIGEN_DEVICE_REF m_device; +}; + + + + +// -------------------- CwiseNullaryOp -------------------- + +template +struct TensorEvaluator, Device> +{ + typedef TensorCwiseNullaryOp XprType; + + TensorEvaluator(const XprType& op, const Device& device) + : m_functor(op.functor()), m_argImpl(op.nestedExpression(), device), m_wrapper() + { } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename internal::traits::Scalar CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = PacketType::size; + typedef typename TensorEvaluator::Dimensions Dimensions; + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; + + enum { + IsAligned = true, + PacketAccess = internal::functor_traits::PacketAccess + #ifdef EIGEN_USE_SYCL + && (PacketType::size >1) + #endif + , + BlockAccess = false, + PreferBlockAccess = false, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockNotImplemented TensorBlock; + //===--------------------------------------------------------------------===// + + EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); } + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { return true; } + +#ifdef EIGEN_USE_THREADS + template + EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync( + EvaluatorPointerType, EvalSubExprsCallback done) { + done(true); + } +#endif // EIGEN_USE_THREADS + + EIGEN_STRONG_INLINE void cleanup() { } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_wrapper(m_functor, index); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + return m_wrapper.template packetOp(m_functor, index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, + PacketType::size); + } + + EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; } + +#ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_argImpl.bind(cgh); + } +#endif + + private: + const NullaryOp m_functor; + TensorEvaluator m_argImpl; + const internal::nullary_wrapper m_wrapper; +}; + + + +// -------------------- CwiseUnaryOp -------------------- + +template +struct TensorEvaluator, Device> +{ + typedef TensorCwiseUnaryOp XprType; + + enum { + IsAligned = TensorEvaluator::IsAligned, + PacketAccess = int(TensorEvaluator::PacketAccess) & + int(internal::functor_traits::PacketAccess), + BlockAccess = TensorEvaluator::BlockAccess, + PreferBlockAccess = TensorEvaluator::PreferBlockAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + TensorEvaluator(const XprType& op, const Device& device) + : m_device(device), + m_functor(op.functor()), + m_argImpl(op.nestedExpression(), device) + { } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename internal::remove_const::type ScalarNoConst; + typedef typename internal::traits::Scalar CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = PacketType::size; + typedef typename TensorEvaluator::Dimensions Dimensions; + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; + static const int NumDims = internal::array_size::value; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockDescriptor TensorBlockDesc; + typedef internal::TensorBlockScratchAllocator TensorBlockScratch; + + typedef typename TensorEvaluator::TensorBlock + ArgTensorBlock; + + typedef internal::TensorCwiseUnaryBlock + TensorBlock; + //===--------------------------------------------------------------------===// + + EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); } + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { + m_argImpl.evalSubExprsIfNeeded(NULL); + return true; + } + +#ifdef EIGEN_USE_THREADS + template + EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync( + EvaluatorPointerType, EvalSubExprsCallback done) { + m_argImpl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); }); + } +#endif // EIGEN_USE_THREADS + + EIGEN_STRONG_INLINE void cleanup() { + m_argImpl.cleanup(); + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_functor(m_argImpl.coeff(index)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + return m_functor.packetOp(m_argImpl.template packet(index)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + const double functor_cost = internal::functor_traits::Cost; + return m_argImpl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, functor_cost, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + internal::TensorBlockResourceRequirements getResourceRequirements() const { + static const double functor_cost = internal::functor_traits::Cost; + return m_argImpl.getResourceRequirements().addCostPerCoeff( + {0, 0, functor_cost / PacketSize}); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock + block(TensorBlockDesc& desc, TensorBlockScratch& scratch, + bool /*root_of_expr_ast*/ = false) const { + return TensorBlock(m_argImpl.block(desc, scratch), m_functor); + } + + EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; } + +#ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const{ + m_argImpl.bind(cgh); + } +#endif + + + private: + const Device EIGEN_DEVICE_REF m_device; + const UnaryOp m_functor; + TensorEvaluator m_argImpl; +}; + + +// -------------------- CwiseBinaryOp -------------------- + +template +struct TensorEvaluator, Device> +{ + typedef TensorCwiseBinaryOp XprType; + + enum { + IsAligned = int(TensorEvaluator::IsAligned) & + int(TensorEvaluator::IsAligned), + PacketAccess = int(TensorEvaluator::PacketAccess) & + int(TensorEvaluator::PacketAccess) & + int(internal::functor_traits::PacketAccess), + BlockAccess = int(TensorEvaluator::BlockAccess) & + int(TensorEvaluator::BlockAccess), + PreferBlockAccess = int(TensorEvaluator::PreferBlockAccess) | + int(TensorEvaluator::PreferBlockAccess), + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + TensorEvaluator(const XprType& op, const Device& device) + : m_device(device), + m_functor(op.functor()), + m_leftImpl(op.lhsExpression(), device), + m_rightImpl(op.rhsExpression(), device) + { + EIGEN_STATIC_ASSERT((static_cast(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout) || internal::traits::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE); + eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions())); + } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename internal::traits::Scalar CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = PacketType::size; + typedef typename TensorEvaluator::Dimensions Dimensions; + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; + + static const int NumDims = internal::array_size< + typename TensorEvaluator::Dimensions>::value; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockDescriptor TensorBlockDesc; + typedef internal::TensorBlockScratchAllocator TensorBlockScratch; + + typedef typename TensorEvaluator::TensorBlock + LeftTensorBlock; + typedef typename TensorEvaluator::TensorBlock + RightTensorBlock; + + typedef internal::TensorCwiseBinaryBlock + TensorBlock; + //===--------------------------------------------------------------------===// + + EIGEN_DEVICE_FUNC const Dimensions& dimensions() const + { + // TODO: use right impl instead if right impl dimensions are known at compile time. + return m_leftImpl.dimensions(); + } + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { + m_leftImpl.evalSubExprsIfNeeded(NULL); + m_rightImpl.evalSubExprsIfNeeded(NULL); + return true; + } + +#ifdef EIGEN_USE_THREADS + template + EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync( + EvaluatorPointerType, EvalSubExprsCallback done) { + // TODO(ezhulenev): Evaluate two expression in parallel? + m_leftImpl.evalSubExprsIfNeededAsync(nullptr, [this, done](bool) { + m_rightImpl.evalSubExprsIfNeededAsync(nullptr, + [done](bool) { done(true); }); + }); + } +#endif // EIGEN_USE_THREADS + + EIGEN_STRONG_INLINE void cleanup() { + m_leftImpl.cleanup(); + m_rightImpl.cleanup(); + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_functor(m_leftImpl.coeff(index), m_rightImpl.coeff(index)); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + return m_functor.packetOp(m_leftImpl.template packet(index), m_rightImpl.template packet(index)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + const double functor_cost = internal::functor_traits::Cost; + return m_leftImpl.costPerCoeff(vectorized) + + m_rightImpl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, functor_cost, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + internal::TensorBlockResourceRequirements getResourceRequirements() const { + static const double functor_cost = internal::functor_traits::Cost; + return internal::TensorBlockResourceRequirements::merge( + m_leftImpl.getResourceRequirements(), + m_rightImpl.getResourceRequirements()) + .addCostPerCoeff({0, 0, functor_cost / PacketSize}); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock + block(TensorBlockDesc& desc, TensorBlockScratch& scratch, + bool /*root_of_expr_ast*/ = false) const { + desc.DropDestinationBuffer(); + return TensorBlock(m_leftImpl.block(desc, scratch), + m_rightImpl.block(desc, scratch), m_functor); + } + + EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; } + + #ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_leftImpl.bind(cgh); + m_rightImpl.bind(cgh); + } + #endif + private: + const Device EIGEN_DEVICE_REF m_device; + const BinaryOp m_functor; + TensorEvaluator m_leftImpl; + TensorEvaluator m_rightImpl; +}; + +// -------------------- CwiseTernaryOp -------------------- + +template +struct TensorEvaluator, Device> +{ + typedef TensorCwiseTernaryOp XprType; + + enum { + IsAligned = TensorEvaluator::IsAligned & TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess && + TensorEvaluator::PacketAccess && + TensorEvaluator::PacketAccess && + internal::functor_traits::PacketAccess, + BlockAccess = false, + PreferBlockAccess = TensorEvaluator::PreferBlockAccess || + TensorEvaluator::PreferBlockAccess || + TensorEvaluator::PreferBlockAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + TensorEvaluator(const XprType& op, const Device& device) + : m_functor(op.functor()), + m_arg1Impl(op.arg1Expression(), device), + m_arg2Impl(op.arg2Expression(), device), + m_arg3Impl(op.arg3Expression(), device) + { + EIGEN_STATIC_ASSERT((static_cast(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout) || internal::traits::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE); + + EIGEN_STATIC_ASSERT((internal::is_same::StorageKind, + typename internal::traits::StorageKind>::value), + STORAGE_KIND_MUST_MATCH) + EIGEN_STATIC_ASSERT((internal::is_same::StorageKind, + typename internal::traits::StorageKind>::value), + STORAGE_KIND_MUST_MATCH) + EIGEN_STATIC_ASSERT((internal::is_same::Index, + typename internal::traits::Index>::value), + STORAGE_INDEX_MUST_MATCH) + EIGEN_STATIC_ASSERT((internal::is_same::Index, + typename internal::traits::Index>::value), + STORAGE_INDEX_MUST_MATCH) + + eigen_assert(dimensions_match(m_arg1Impl.dimensions(), m_arg2Impl.dimensions()) && dimensions_match(m_arg1Impl.dimensions(), m_arg3Impl.dimensions())); + } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename internal::traits::Scalar CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = PacketType::size; + typedef typename TensorEvaluator::Dimensions Dimensions; + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockNotImplemented TensorBlock; + //===--------------------------------------------------------------------===// + + EIGEN_DEVICE_FUNC const Dimensions& dimensions() const + { + // TODO: use arg2 or arg3 dimensions if they are known at compile time. + return m_arg1Impl.dimensions(); + } + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { + m_arg1Impl.evalSubExprsIfNeeded(NULL); + m_arg2Impl.evalSubExprsIfNeeded(NULL); + m_arg3Impl.evalSubExprsIfNeeded(NULL); + return true; + } + EIGEN_STRONG_INLINE void cleanup() { + m_arg1Impl.cleanup(); + m_arg2Impl.cleanup(); + m_arg3Impl.cleanup(); + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index)); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + return m_functor.packetOp(m_arg1Impl.template packet(index), + m_arg2Impl.template packet(index), + m_arg3Impl.template packet(index)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + const double functor_cost = internal::functor_traits::Cost; + return m_arg1Impl.costPerCoeff(vectorized) + + m_arg2Impl.costPerCoeff(vectorized) + + m_arg3Impl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, functor_cost, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; } + +#ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_arg1Impl.bind(cgh); + m_arg2Impl.bind(cgh); + m_arg3Impl.bind(cgh); + } +#endif + + private: + const TernaryOp m_functor; + TensorEvaluator m_arg1Impl; + TensorEvaluator m_arg2Impl; + TensorEvaluator m_arg3Impl; +}; + + +// -------------------- SelectOp -------------------- + +template +struct TensorEvaluator, Device> +{ + typedef TensorSelectOp XprType; + typedef typename XprType::Scalar Scalar; + + enum { + IsAligned = TensorEvaluator::IsAligned & + TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess & + TensorEvaluator::PacketAccess & + PacketType::HasBlend, + BlockAccess = TensorEvaluator::BlockAccess && + TensorEvaluator::BlockAccess && + TensorEvaluator::BlockAccess, + PreferBlockAccess = TensorEvaluator::PreferBlockAccess || + TensorEvaluator::PreferBlockAccess || + TensorEvaluator::PreferBlockAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + TensorEvaluator(const XprType& op, const Device& device) + : m_condImpl(op.ifExpression(), device), + m_thenImpl(op.thenExpression(), device), + m_elseImpl(op.elseExpression(), device) + { + EIGEN_STATIC_ASSERT((static_cast(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((static_cast(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE); + eigen_assert(dimensions_match(m_condImpl.dimensions(), m_thenImpl.dimensions())); + eigen_assert(dimensions_match(m_thenImpl.dimensions(), m_elseImpl.dimensions())); + } + + typedef typename XprType::Index Index; + typedef typename internal::traits::Scalar CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = PacketType::size; + typedef typename TensorEvaluator::Dimensions Dimensions; + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; + + static const int NumDims = internal::array_size::value; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockDescriptor TensorBlockDesc; + typedef internal::TensorBlockScratchAllocator TensorBlockScratch; + + typedef typename TensorEvaluator::TensorBlock + IfArgTensorBlock; + typedef typename TensorEvaluator::TensorBlock + ThenArgTensorBlock; + typedef typename TensorEvaluator::TensorBlock + ElseArgTensorBlock; + + struct TensorSelectOpBlockFactory { + template + struct XprType { + typedef TensorSelectOp type; + }; + + template + typename XprType::type expr( + const IfArgXprType& if_expr, const ThenArgXprType& then_expr, const ElseArgXprType& else_expr) const { + return typename XprType::type(if_expr, then_expr, else_expr); + } + }; + + typedef internal::TensorTernaryExprBlock + TensorBlock; + //===--------------------------------------------------------------------===// + + EIGEN_DEVICE_FUNC const Dimensions& dimensions() const + { + // TODO: use then or else impl instead if they happen to be known at compile time. + return m_condImpl.dimensions(); + } + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { + m_condImpl.evalSubExprsIfNeeded(NULL); + m_thenImpl.evalSubExprsIfNeeded(NULL); + m_elseImpl.evalSubExprsIfNeeded(NULL); + return true; + } + +#ifdef EIGEN_USE_THREADS + template + EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync( + EvaluatorPointerType, EvalSubExprsCallback done) { + m_condImpl.evalSubExprsIfNeeded(nullptr, [this, done](bool) { + m_thenImpl.evalSubExprsIfNeeded(nullptr, [this, done](bool) { + m_elseImpl.evalSubExprsIfNeeded(nullptr, [done](bool) { done(true); }); + }); + }); + } +#endif // EIGEN_USE_THREADS + + EIGEN_STRONG_INLINE void cleanup() { + m_condImpl.cleanup(); + m_thenImpl.cleanup(); + m_elseImpl.cleanup(); + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_condImpl.coeff(index) ? m_thenImpl.coeff(index) : m_elseImpl.coeff(index); + } + template + EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const + { + internal::Selector select; + EIGEN_UNROLL_LOOP + for (Index i = 0; i < PacketSize; ++i) { + select.select[i] = m_condImpl.coeff(index+i); + } + return internal::pblend(select, + m_thenImpl.template packet(index), + m_elseImpl.template packet(index)); + + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + return m_condImpl.costPerCoeff(vectorized) + + m_thenImpl.costPerCoeff(vectorized) + .cwiseMax(m_elseImpl.costPerCoeff(vectorized)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + internal::TensorBlockResourceRequirements getResourceRequirements() const { + auto then_req = m_thenImpl.getResourceRequirements(); + auto else_req = m_elseImpl.getResourceRequirements(); + + auto merged_req = + internal::TensorBlockResourceRequirements::merge(then_req, else_req); + merged_req.cost_per_coeff = + then_req.cost_per_coeff.cwiseMax(else_req.cost_per_coeff); + + return internal::TensorBlockResourceRequirements::merge( + m_condImpl.getResourceRequirements(), merged_req); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock + block(TensorBlockDesc& desc, TensorBlockScratch& scratch, + bool /*root_of_expr_ast*/ = false) const { + // It's unsafe to pass destination buffer to underlying expressions, because + // output might be aliased with one of the inputs. + desc.DropDestinationBuffer(); + + return TensorBlock( + m_condImpl.block(desc, scratch), m_thenImpl.block(desc, scratch), + m_elseImpl.block(desc, scratch), TensorSelectOpBlockFactory()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const { return NULL; } + +#ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_condImpl.bind(cgh); + m_thenImpl.bind(cgh); + m_elseImpl.bind(cgh); + } +#endif + private: + TensorEvaluator m_condImpl; + TensorEvaluator m_thenImpl; + TensorEvaluator m_elseImpl; +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h new file mode 100644 index 0000000..c52fb77 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -0,0 +1,703 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_EXECUTOR_H +#define EIGEN_CXX11_TENSOR_TENSOR_EXECUTOR_H + +namespace Eigen { + +/** + * \class TensorExecutor + * \ingroup CXX11_Tensor_Module + * + * \brief The tensor executor class. + * + * This class is responsible for launch the evaluation of the expression on + * the specified computing device. + * + * @tparam Vectorizable can use packet math (SSE/AVX/etc... registers and + * instructions) + * @tparam Tiling can use block based tensor evaluation + * (see TensorBlock.h) + */ +namespace internal { + +/** + * Evaluating TensorBroadcastingOp via coefficient of packet path is extremely + * expensive. If expression has at least one broadcast op in it, and it supports + * block based evaluation, we always prefer it, even for the small tensors. For + * all other tileable ops, block evaluation overhead for small tensors (fits + * into L1) is too large, and we fallback on vectorized evaluation. + */ + +// TODO(ezhulenev): Add specializations for all other types of Tensor ops. + +template +struct ExpressionHasTensorBroadcastingOp { + enum { value = false }; +}; + +template +struct ExpressionHasTensorBroadcastingOp< + const TensorAssignOp > { + enum { value = ExpressionHasTensorBroadcastingOp::value }; +}; + +template +struct ExpressionHasTensorBroadcastingOp< + const TensorCwiseUnaryOp > { + enum { value = ExpressionHasTensorBroadcastingOp::value }; +}; + +template +struct ExpressionHasTensorBroadcastingOp< + const TensorCwiseBinaryOp > { + enum { + value = ExpressionHasTensorBroadcastingOp::value || + ExpressionHasTensorBroadcastingOp::value + }; +}; + +template +struct ExpressionHasTensorBroadcastingOp< + const TensorBroadcastingOp > { + enum { value = true }; +}; + +// -------------------------------------------------------------------------- // + +/** + * Default strategy: the expression is evaluated sequentially with a single cpu + * thread, without vectorization and block evaluation. + */ +template +class TensorExecutor { + public: + typedef typename Expression::Index StorageIndex; + + // Including `unsupported/Eigen/CXX11/Tensor` in different translation units + // with/without `EIGEN_USE_THREADS` or `EIGEN_USE_GPU` is a potential ODR + // violation. If this template is instantiated with a non-default device, it + // means that this header file was included without defining + // `EIGEN_USE_THREADS`, `EIGEN_USE_GPU` or `EIGEN_USE_SYCL`. + static_assert(std::is_same::value, + "Default executor instantiated with non-default device. " + "You must #define EIGEN_USE_THREADS, EIGEN_USE_GPU or " + "EIGEN_USE_SYCL before including Eigen headers."); + + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE void run(const Expression& expr, + const Device& device = Device()) { + TensorEvaluator evaluator(expr, device); + const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); + if (needs_assign) { + const StorageIndex size = array_prod(evaluator.dimensions()); + for (StorageIndex i = 0; i < size; ++i) { + evaluator.evalScalar(i); + } + } + evaluator.cleanup(); + } +}; + +/** + * Default async execution strategy is not implemented. Currently it's only + * available for ThreadPoolDevice (see definition below). + */ +template +class TensorAsyncExecutor {}; + +/** + * Process all the data with a single cpu thread, using vectorized instructions. + */ +template +class TensorExecutor { + public: + typedef typename Expression::Index StorageIndex; + + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE void run( + const Expression& expr, const DefaultDevice& device = DefaultDevice()) { + TensorEvaluator evaluator(expr, device); + const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); + if (needs_assign) { + const StorageIndex size = array_prod(evaluator.dimensions()); + const int PacketSize = unpacket_traits::PacketReturnType>::size; + + // Give compiler a strong possibility to unroll the loop. But don't insist + // on unrolling, because if the function is expensive compiler should not + // unroll the loop at the expense of inlining. + const StorageIndex UnrolledSize = + (size / (4 * PacketSize)) * 4 * PacketSize; + for (StorageIndex i = 0; i < UnrolledSize; i += 4 * PacketSize) { + for (StorageIndex j = 0; j < 4; j++) { + evaluator.evalPacket(i + j * PacketSize); + } + } + const StorageIndex VectorizedSize = (size / PacketSize) * PacketSize; + for (StorageIndex i = UnrolledSize; i < VectorizedSize; i += PacketSize) { + evaluator.evalPacket(i); + } + for (StorageIndex i = VectorizedSize; i < size; ++i) { + evaluator.evalScalar(i); + } + } + evaluator.cleanup(); + } +}; + +/** + * Process all the data with a single cpu thread, using blocks of data. By + * sizing a block to fit L1 cache we get better cache performance. + */ +template +class TensorExecutor { + public: + typedef typename traits::Scalar Scalar; + typedef typename remove_const::type ScalarNoConst; + + typedef TensorEvaluator Evaluator; + typedef typename traits::Index StorageIndex; + + static const int NumDims = traits::NumDimensions; + + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE void run(const Expression& expr, + const DefaultDevice& device = DefaultDevice()) { + typedef TensorBlockMapper + TensorBlockMapper; + + typedef internal::TensorBlockDescriptor + TensorBlockDesc; + typedef internal::TensorBlockScratchAllocator + TensorBlockScratch; + + Evaluator evaluator(expr, device); + + // TODO(ezhulenev): Do not use tiling for small tensors? + const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); + + if (needs_assign) { + // Query expression tree for desired block size/shape. + const TensorBlockResourceRequirements requirements = + evaluator.getResourceRequirements(); + + const TensorBlockMapper block_mapper( + typename TensorBlockDesc::Dimensions(evaluator.dimensions()), + requirements); + + // Share scratch memory allocator between all blocks. + TensorBlockScratch scratch(device); + + const StorageIndex total_block_count = block_mapper.blockCount(); + for (StorageIndex i = 0; i < total_block_count; ++i) { + TensorBlockDesc desc = block_mapper.blockDescriptor(i); + evaluator.evalBlock(desc, scratch); + scratch.reset(); + } + } + evaluator.cleanup(); + } +}; + +/** + * Multicore strategy: the index space is partitioned and each partition is + * executed on a single core. + * + * (1) TensorExecutor will submit work to the ThreadPoolDevice managed thread + * pool, and will block the caller thread until all tasks are finished. + * + * (2) TensorAsyncExecutor is a non-blocking version, that will submit work to + * the ThreadPoolDevice managed thread pool, and will return immediately. + * It will call 'done' callback after all tasks are finished. + */ +#ifdef EIGEN_USE_THREADS + +template +struct TensorExecutorTilingContext { + TensorExecutorTilingContext() = default; + TensorExecutorTilingContext(const TensorBlockMapper& b_mapper, + const TensorOpCost& b_cost, size_t b_aligned_size) + : block_mapper(b_mapper), + cost(b_cost), + aligned_blocksize(b_aligned_size) {} + + TensorBlockMapper block_mapper; // navigate through blocks + TensorOpCost cost; // cost of computing a single block + size_t aligned_blocksize; // block size after memory alignment +}; + +// Computes a block evaluation parameters, and allocates temporary memory buffer +// for blocks. See TensorExecutor/TensorAsyncExecutor (Tiling=On) below. +template +TensorExecutorTilingContext GetTensorExecutorTilingContext( + const Evaluator& evaluator) { + // Query expression tree for desired block size/shape. + TensorBlockResourceRequirements requirements = + evaluator.getResourceRequirements(); + + // Update target block size based on cost model. + double taskSize = TensorCostModel::taskSize( + 1, requirements.cost_per_coeff); + requirements.size = static_cast(1.0 / taskSize); + + TensorBlockMapper block_mapper( + typename TensorBlockMapper::Dimensions(evaluator.dimensions()), + requirements); + + size_t block_size = block_mapper.blockTotalSize(); + const size_t align = numext::maxi(EIGEN_MAX_ALIGN_BYTES, 1); + const size_t aligned_blocksize = + align * + divup(block_size * sizeof(typename Evaluator::Scalar), align); + + return {block_mapper, requirements.cost_per_coeff * block_size, + aligned_blocksize}; +} + +template +struct EvalRange { + static void run(Evaluator* evaluator_in, const StorageIndex firstIdx, + const StorageIndex lastIdx) { + Evaluator evaluator = *evaluator_in; + eigen_assert(lastIdx >= firstIdx); + for (StorageIndex i = firstIdx; i < lastIdx; ++i) { + evaluator.evalScalar(i); + } + } + + static StorageIndex alignBlockSize(StorageIndex size) { return size; } +}; + +template +struct EvalRange { + static const int PacketSize = + unpacket_traits::size; + + static void run(Evaluator* evaluator_in, const StorageIndex firstIdx, + const StorageIndex lastIdx) { + Evaluator evaluator = *evaluator_in; + eigen_assert(lastIdx >= firstIdx); + StorageIndex i = firstIdx; + if (lastIdx - firstIdx >= PacketSize) { + eigen_assert(firstIdx % PacketSize == 0); + StorageIndex last_chunk_offset = lastIdx - 4 * PacketSize; + // Give compiler a strong possibility to unroll the loop. But don't insist + // on unrolling, because if the function is expensive compiler should not + // unroll the loop at the expense of inlining. + for (; i <= last_chunk_offset; i += 4 * PacketSize) { + for (StorageIndex j = 0; j < 4; j++) { + evaluator.evalPacket(i + j * PacketSize); + } + } + last_chunk_offset = lastIdx - PacketSize; + for (; i <= last_chunk_offset; i += PacketSize) { + evaluator.evalPacket(i); + } + } + for (; i < lastIdx; ++i) { + evaluator.evalScalar(i); + } + } + + static StorageIndex alignBlockSize(StorageIndex size) { + // Align block size to packet size and account for unrolling in run above. + if (size >= 16 * PacketSize) { + return (size + 4 * PacketSize - 1) & ~(4 * PacketSize - 1); + } + // Aligning to 4 * PacketSize would increase block size by more than 25%. + return (size + PacketSize - 1) & ~(PacketSize - 1); + } +}; + +template +class TensorExecutor { + public: + typedef typename Expression::Index StorageIndex; + + static EIGEN_STRONG_INLINE void run(const Expression& expr, + const ThreadPoolDevice& device) { + typedef TensorEvaluator Evaluator; + typedef EvalRange EvalRange; + + Evaluator evaluator(expr, device); + const bool needs_assign = evaluator.evalSubExprsIfNeeded(nullptr); + if (needs_assign) { + const StorageIndex size = array_prod(evaluator.dimensions()); + device.parallelFor(size, evaluator.costPerCoeff(Vectorizable), + EvalRange::alignBlockSize, + [&evaluator](StorageIndex firstIdx, StorageIndex lastIdx) { + EvalRange::run(&evaluator, firstIdx, lastIdx); + }); + } + evaluator.cleanup(); + } +}; + +template +class TensorExecutor { + public: + typedef typename traits::Index IndexType; + typedef typename traits::Scalar Scalar; + typedef typename remove_const::type ScalarNoConst; + + static const int NumDims = traits::NumDimensions; + + typedef TensorEvaluator Evaluator; + typedef TensorBlockMapper BlockMapper; + typedef TensorExecutorTilingContext TilingContext; + + typedef internal::TensorBlockDescriptor + TensorBlockDesc; + typedef internal::TensorBlockScratchAllocator + TensorBlockScratch; + + static EIGEN_STRONG_INLINE void run(const Expression& expr, + const ThreadPoolDevice& device) { + Evaluator evaluator(expr, device); + + const bool needs_assign = evaluator.evalSubExprsIfNeeded(nullptr); + if (needs_assign) { + const TilingContext tiling = + internal::GetTensorExecutorTilingContext(evaluator); + + auto eval_block = [&device, &evaluator, &tiling](IndexType firstBlockIdx, + IndexType lastBlockIdx) { + TensorBlockScratch scratch(device); + + for (IndexType block_idx = firstBlockIdx; block_idx < lastBlockIdx; + ++block_idx) { + TensorBlockDesc desc = tiling.block_mapper.blockDescriptor(block_idx); + evaluator.evalBlock(desc, scratch); + scratch.reset(); + } + }; + + // Evaluate small expressions directly as a single block. + if (tiling.block_mapper.blockCount() == 1) { + TensorBlockScratch scratch(device); + TensorBlockDesc desc(0, tiling.block_mapper.blockDimensions()); + evaluator.evalBlock(desc, scratch); + } else { + device.parallelFor(tiling.block_mapper.blockCount(), tiling.cost, + eval_block); + } + } + evaluator.cleanup(); + } +}; + +template +class TensorAsyncExecutor { + public: + typedef typename Expression::Index StorageIndex; + typedef TensorEvaluator Evaluator; + + static EIGEN_STRONG_INLINE void runAsync(const Expression& expr, + const ThreadPoolDevice& device, + DoneCallback done) { + TensorAsyncExecutorContext* const ctx = + new TensorAsyncExecutorContext(expr, device, std::move(done)); + + const auto on_eval_subexprs = [ctx, &device](bool need_assign) -> void { + if (!need_assign) { + delete ctx; + return; + } + + typedef EvalRange EvalRange; + const StorageIndex size = array_prod(ctx->evaluator.dimensions()); + device.parallelForAsync( + size, ctx->evaluator.costPerCoeff(Vectorizable), + EvalRange::alignBlockSize, + [ctx](StorageIndex firstIdx, StorageIndex lastIdx) { + EvalRange::run(&ctx->evaluator, firstIdx, lastIdx); + }, + [ctx]() { delete ctx; }); + }; + + ctx->evaluator.evalSubExprsIfNeededAsync(nullptr, on_eval_subexprs); + } + + private: + struct TensorAsyncExecutorContext { + TensorAsyncExecutorContext(const Expression& expr, + const ThreadPoolDevice& thread_pool, + DoneCallback done) + : evaluator(expr, thread_pool), on_done(std::move(done)) {} + + ~TensorAsyncExecutorContext() { + evaluator.cleanup(); + on_done(); + } + + Evaluator evaluator; + + private: + DoneCallback on_done; + }; +}; + +template +class TensorAsyncExecutor { + public: + typedef typename traits::Index IndexType; + typedef typename traits::Scalar Scalar; + typedef typename remove_const::type ScalarNoConst; + + static const int NumDims = traits::NumDimensions; + + typedef TensorEvaluator Evaluator; + typedef TensorBlockMapper BlockMapper; + typedef TensorExecutorTilingContext TilingContext; + + typedef internal::TensorBlockDescriptor TensorBlockDesc; + typedef internal::TensorBlockScratchAllocator + TensorBlockScratch; + + static EIGEN_STRONG_INLINE void runAsync(const Expression& expr, + const ThreadPoolDevice& device, + DoneCallback done) { + + TensorAsyncExecutorContext* const ctx = + new TensorAsyncExecutorContext(expr, device, std::move(done)); + + const auto on_eval_subexprs = [ctx](bool need_assign) -> void { + if (!need_assign) { + delete ctx; + return; + } + + ctx->tiling = internal::GetTensorExecutorTilingContext< + Evaluator, BlockMapper, Vectorizable>(ctx->evaluator); + + auto eval_block = [ctx](IndexType firstBlockIdx, IndexType lastBlockIdx) { + TensorBlockScratch scratch(ctx->device); + + for (IndexType block_idx = firstBlockIdx; block_idx < lastBlockIdx; + ++block_idx) { + TensorBlockDesc desc = + ctx->tiling.block_mapper.blockDescriptor(block_idx); + ctx->evaluator.evalBlock(desc, scratch); + scratch.reset(); + } + }; + + // Evaluate small expressions directly as a single block. + if (ctx->tiling.block_mapper.blockCount() == 1) { + TensorBlockScratch scratch(ctx->device); + TensorBlockDesc desc(0, ctx->tiling.block_mapper.blockDimensions()); + ctx->evaluator.evalBlock(desc, scratch); + delete ctx; + } else { + ctx->device.parallelForAsync(ctx->tiling.block_mapper.blockCount(), + ctx->tiling.cost, eval_block, + [ctx]() { delete ctx; }); + } + }; + + ctx->evaluator.evalSubExprsIfNeededAsync(nullptr, on_eval_subexprs); + } + + private: + struct TensorAsyncExecutorContext { + TensorAsyncExecutorContext(const Expression& expr, + const ThreadPoolDevice& thread_pool, + DoneCallback done) + : device(thread_pool), + evaluator(expr, thread_pool), + on_done(std::move(done)) {} + + ~TensorAsyncExecutorContext() { + evaluator.cleanup(); + on_done(); + } + + const ThreadPoolDevice& device; + Evaluator evaluator; + TilingContext tiling; + + private: + DoneCallback on_done; + }; +}; + +#endif // EIGEN_USE_THREADS + +// GPU: the evaluation of the expression is offloaded to a GPU. +#if defined(EIGEN_USE_GPU) + +template +class TensorExecutor { + public: + typedef typename Expression::Index StorageIndex; + static void run(const Expression& expr, const GpuDevice& device); +}; + +#if defined(EIGEN_GPUCC) +template +struct EigenMetaKernelEval { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + void run(Evaluator& eval, StorageIndex firstIdx, StorageIndex lastIdx, StorageIndex step_size) { + for (StorageIndex i = firstIdx; i < lastIdx; i += step_size) { + eval.evalScalar(i); + } + } +}; + +template +struct EigenMetaKernelEval { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + void run(Evaluator& eval, StorageIndex firstIdx, StorageIndex lastIdx, StorageIndex step_size) { + const StorageIndex PacketSize = unpacket_traits::size; + const StorageIndex vectorized_size = (lastIdx / PacketSize) * PacketSize; + const StorageIndex vectorized_step_size = step_size * PacketSize; + + // Use the vector path + for (StorageIndex i = firstIdx * PacketSize; i < vectorized_size; + i += vectorized_step_size) { + eval.evalPacket(i); + } + for (StorageIndex i = vectorized_size + firstIdx; i < lastIdx; i += step_size) { + eval.evalScalar(i); + } + } +}; + +template +__global__ void +__launch_bounds__(1024) +EigenMetaKernel(Evaluator eval, StorageIndex size) { + + const StorageIndex first_index = blockIdx.x * blockDim.x + threadIdx.x; + const StorageIndex step_size = blockDim.x * gridDim.x; + + const bool vectorizable = Evaluator::PacketAccess & Evaluator::IsAligned; + EigenMetaKernelEval::run(eval, first_index, size, step_size); +} + +/*static*/ +template +EIGEN_STRONG_INLINE void TensorExecutor::run( + const Expression& expr, const GpuDevice& device) { + TensorEvaluator evaluator(expr, device); + const bool needs_assign = evaluator.evalSubExprsIfNeeded(nullptr); + if (needs_assign) { + + const int block_size = device.maxGpuThreadsPerBlock(); + const int max_blocks = device.getNumGpuMultiProcessors() * + device.maxGpuThreadsPerMultiProcessor() / block_size; + const StorageIndex size = array_prod(evaluator.dimensions()); + // Create a least one block to ensure we won't crash when tensorflow calls with tensors of size 0. + const int num_blocks = numext::maxi(numext::mini(max_blocks, divup(size, block_size)), 1); + + LAUNCH_GPU_KERNEL( + (EigenMetaKernel, StorageIndex>), + num_blocks, block_size, 0, device, evaluator, size); + } + evaluator.cleanup(); +} + +#endif // EIGEN_GPUCC +#endif // EIGEN_USE_GPU + +// SYCL Executor policy +#ifdef EIGEN_USE_SYCL + +template +struct ExecExprFunctorKernel { + typedef typename Evaluator::Index Index; + Evaluator evaluator; + const Index range; + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE ExecExprFunctorKernel( + const Scratch, Evaluator evaluator_, const Index range_) + : evaluator(evaluator_), range(range_) {} + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void operator()( + cl::sycl::nd_item<1> itemID) { + compute(itemID); + } + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE typename std::enable_if::type + compute(const cl::sycl::nd_item<1>& itemID) { + Index gId = static_cast(itemID.get_global_linear_id()); + Index total_threads = itemID.get_global_range(0); + + for (Index i = gId; i < range; i += total_threads) { + evaluator.evalScalar(i); + } + } + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE typename std::enable_if::type + compute(const cl::sycl::nd_item<1>& itemID) { + const Index vectorizedRange = + (range / Evaluator::PacketSize) * Evaluator::PacketSize; + Index gId = static_cast(itemID.get_global_linear_id()); + const Index step = Evaluator::PacketSize * itemID.get_global_range(0); + const Index start = Evaluator::PacketSize * gId; + for (Index i = start; i < vectorizedRange; i += step) { + evaluator.evalPacket(i); + } + gId += vectorizedRange; + for (Index i = gId; i < range; i += itemID.get_global_range(0)) { + evaluator.evalScalar(i); + } + } +}; + +template +class TensorExecutor { + public: + typedef typename Expression::Index Index; + static EIGEN_STRONG_INLINE void run(const Expression& expr, + const Eigen::SyclDevice& dev) { + typedef Eigen::TensorEvaluator Evaluator; + Evaluator evaluator(expr, dev); + const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); + if (needs_assign) { + Index range, GRange, tileSize; + Index total_size = ::Eigen::internal::array_prod(evaluator.dimensions()); + total_size = (total_size == 0) ? 1 : total_size; + const int PacketSize = + Eigen::PacketType::size; + Index vectorizable_threads = static_cast(total_size / PacketSize); + dev.parallel_for_setup(vectorizable_threads, tileSize, range, GRange); + range = total_size; + + dev.template nullary_kernel_launcher< + typename Evaluator::CoeffReturnType, + ExecExprFunctorKernel >( + evaluator, + cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), + cl::sycl::range<1>(tileSize)), + Index(1), range); + } + evaluator.cleanup(); + } +}; + +#endif + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_EXECUTOR_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h new file mode 100644 index 0000000..c9bccfc --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h @@ -0,0 +1,388 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_EXPR_H +#define EIGEN_CXX11_TENSOR_TENSOR_EXPR_H + +namespace Eigen { + +/** \class TensorExpr + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor expression classes. + * + * The TensorCwiseNullaryOp class applies a nullary operators to an expression. + * This is typically used to generate constants. + * + * The TensorCwiseUnaryOp class represents an expression where a unary operator + * (e.g. cwiseSqrt) is applied to an expression. + * + * The TensorCwiseBinaryOp class represents an expression where a binary + * operator (e.g. addition) is applied to a lhs and a rhs expression. + * + */ +namespace internal { +template +struct traits > + : traits +{ + typedef traits XprTraits; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::Nested XprTypeNested; + typedef typename remove_reference::type _XprTypeNested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; + typedef typename XprTraits::PointerType PointerType; + enum { + Flags = 0 + }; +}; + +} // end namespace internal + + + +template +class TensorCwiseNullaryOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef TensorCwiseNullaryOp Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCwiseNullaryOp(const XprType& xpr, const NullaryOp& func = NullaryOp()) + : m_xpr(xpr), m_functor(func) {} + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + nestedExpression() const { return m_xpr; } + + EIGEN_DEVICE_FUNC + const NullaryOp& functor() const { return m_functor; } + + protected: + typename XprType::Nested m_xpr; + const NullaryOp m_functor; +}; + + + +namespace internal { +template +struct traits > + : traits +{ + // TODO(phli): Add InputScalar, InputPacket. Check references to + // current Scalar/Packet to see if the intent is Input or Output. + typedef typename result_of::type Scalar; + typedef traits XprTraits; + typedef typename XprType::Nested XprTypeNested; + typedef typename remove_reference::type _XprTypeNested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; + typedef typename TypeConversion::type + PointerType; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorCwiseUnaryOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorCwiseUnaryOp type; +}; + +} // end namespace internal + + + +template +class TensorCwiseUnaryOp : public TensorBase, ReadOnlyAccessors> +{ + public: + // TODO(phli): Add InputScalar, InputPacket. Check references to + // current Scalar/Packet to see if the intent is Input or Output. + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef Scalar CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp()) + : m_xpr(xpr), m_functor(func) {} + + EIGEN_DEVICE_FUNC + const UnaryOp& functor() const { return m_functor; } + + /** \returns the nested expression */ + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + nestedExpression() const { return m_xpr; } + + protected: + typename XprType::Nested m_xpr; + const UnaryOp m_functor; +}; + + +namespace internal { +template +struct traits > +{ + // Type promotion to handle the case where the types of the lhs and the rhs + // are different. + // TODO(phli): Add Lhs/RhsScalar, Lhs/RhsPacket. Check references to + // current Scalar/Packet to see if the intent is Inputs or Output. + typedef typename result_of< + BinaryOp(typename LhsXprType::Scalar, + typename RhsXprType::Scalar)>::type Scalar; + typedef traits XprTraits; + typedef typename promote_storage_type< + typename traits::StorageKind, + typename traits::StorageKind>::ret StorageKind; + typedef typename promote_index_type< + typename traits::Index, + typename traits::Index>::type Index; + typedef typename LhsXprType::Nested LhsNested; + typedef typename RhsXprType::Nested RhsNested; + typedef typename remove_reference::type _LhsNested; + typedef typename remove_reference::type _RhsNested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; + typedef typename TypeConversion::val, + typename traits::PointerType, + typename traits::PointerType>::type + >::type + PointerType; + enum { + Flags = 0 + }; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorCwiseBinaryOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorCwiseBinaryOp type; +}; + +} // end namespace internal + + + +template +class TensorCwiseBinaryOp : public TensorBase, ReadOnlyAccessors> +{ + public: + // TODO(phli): Add Lhs/RhsScalar, Lhs/RhsPacket. Check references to + // current Scalar/Packet to see if the intent is Inputs or Output. + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef Scalar CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCwiseBinaryOp(const LhsXprType& lhs, const RhsXprType& rhs, const BinaryOp& func = BinaryOp()) + : m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_functor(func) {} + + EIGEN_DEVICE_FUNC + const BinaryOp& functor() const { return m_functor; } + + /** \returns the nested expressions */ + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + lhsExpression() const { return m_lhs_xpr; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + rhsExpression() const { return m_rhs_xpr; } + + protected: + typename LhsXprType::Nested m_lhs_xpr; + typename RhsXprType::Nested m_rhs_xpr; + const BinaryOp m_functor; +}; + + +namespace internal { +template +struct traits > +{ + // Type promotion to handle the case where the types of the args are different. + typedef typename result_of< + TernaryOp(typename Arg1XprType::Scalar, + typename Arg2XprType::Scalar, + typename Arg3XprType::Scalar)>::type Scalar; + typedef traits XprTraits; + typedef typename traits::StorageKind StorageKind; + typedef typename traits::Index Index; + typedef typename Arg1XprType::Nested Arg1Nested; + typedef typename Arg2XprType::Nested Arg2Nested; + typedef typename Arg3XprType::Nested Arg3Nested; + typedef typename remove_reference::type _Arg1Nested; + typedef typename remove_reference::type _Arg2Nested; + typedef typename remove_reference::type _Arg3Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; + typedef typename TypeConversion::val, + typename traits::PointerType, + typename traits::PointerType>::type + >::type + PointerType; + enum { + Flags = 0 + }; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorCwiseTernaryOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorCwiseTernaryOp type; +}; + +} // end namespace internal + + + +template +class TensorCwiseTernaryOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef Scalar CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCwiseTernaryOp(const Arg1XprType& arg1, const Arg2XprType& arg2, const Arg3XprType& arg3, const TernaryOp& func = TernaryOp()) + : m_arg1_xpr(arg1), m_arg2_xpr(arg2), m_arg3_xpr(arg3), m_functor(func) {} + + EIGEN_DEVICE_FUNC + const TernaryOp& functor() const { return m_functor; } + + /** \returns the nested expressions */ + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + arg1Expression() const { return m_arg1_xpr; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + arg2Expression() const { return m_arg2_xpr; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + arg3Expression() const { return m_arg3_xpr; } + + protected: + typename Arg1XprType::Nested m_arg1_xpr; + typename Arg2XprType::Nested m_arg2_xpr; + typename Arg3XprType::Nested m_arg3_xpr; + const TernaryOp m_functor; +}; + + +namespace internal { +template +struct traits > + : traits +{ + typedef typename traits::Scalar Scalar; + typedef traits XprTraits; + typedef typename promote_storage_type::StorageKind, + typename traits::StorageKind>::ret StorageKind; + typedef typename promote_index_type::Index, + typename traits::Index>::type Index; + typedef typename IfXprType::Nested IfNested; + typedef typename ThenXprType::Nested ThenNested; + typedef typename ElseXprType::Nested ElseNested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; + typedef typename conditional::val, + typename traits::PointerType, + typename traits::PointerType>::type PointerType; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorSelectOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorSelectOp type; +}; + +} // end namespace internal + + +template +class TensorSelectOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename internal::promote_storage_type::ret CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC + TensorSelectOp(const IfXprType& a_condition, + const ThenXprType& a_then, + const ElseXprType& a_else) + : m_condition(a_condition), m_then(a_then), m_else(a_else) + { } + + EIGEN_DEVICE_FUNC + const IfXprType& ifExpression() const { return m_condition; } + + EIGEN_DEVICE_FUNC + const ThenXprType& thenExpression() const { return m_then; } + + EIGEN_DEVICE_FUNC + const ElseXprType& elseExpression() const { return m_else; } + + protected: + typename IfXprType::Nested m_condition; + typename ThenXprType::Nested m_then; + typename ElseXprType::Nested m_else; +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_EXPR_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h new file mode 100644 index 0000000..4a1a068 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h @@ -0,0 +1,669 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Jianwei Cui +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_FFT_H +#define EIGEN_CXX11_TENSOR_TENSOR_FFT_H + +namespace Eigen { + +/** \class TensorFFT + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor FFT class. + * + * TODO: + * Vectorize the Cooley Tukey and the Bluestein algorithm + * Add support for multithreaded evaluation + * Improve the performance on GPU + */ + +template struct MakeComplex { + template + EIGEN_DEVICE_FUNC + T operator() (const T& val) const { return val; } +}; + +template <> struct MakeComplex { + template + EIGEN_DEVICE_FUNC + std::complex operator() (const T& val) const { return std::complex(val, 0); } +}; + +template <> struct MakeComplex { + template + EIGEN_DEVICE_FUNC + std::complex operator() (const std::complex& val) const { return val; } +}; + +template struct PartOf { + template T operator() (const T& val) const { return val; } +}; + +template <> struct PartOf { + template T operator() (const std::complex& val) const { return val.real(); } +}; + +template <> struct PartOf { + template T operator() (const std::complex& val) const { return val.imag(); } +}; + +namespace internal { +template +struct traits > : public traits { + typedef traits XprTraits; + typedef typename NumTraits::Real RealScalar; + typedef typename std::complex ComplexScalar; + typedef typename XprTraits::Scalar InputScalar; + typedef typename conditional::type OutputScalar; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; + typedef typename traits::PointerType PointerType; +}; + +template +struct eval, Eigen::Dense> { + typedef const TensorFFTOp& type; +}; + +template +struct nested, 1, typename eval >::type> { + typedef TensorFFTOp type; +}; + +} // end namespace internal + +template +class TensorFFTOp : public TensorBase, ReadOnlyAccessors> { + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename std::complex ComplexScalar; + typedef typename internal::conditional::type OutputScalar; + typedef OutputScalar CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorFFTOp(const XprType& expr, const FFT& fft) + : m_xpr(expr), m_fft(fft) {} + + EIGEN_DEVICE_FUNC + const FFT& fft() const { return m_fft; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& expression() const { + return m_xpr; + } + + protected: + typename XprType::Nested m_xpr; + const FFT m_fft; +}; + +// Eval as rvalue +template +struct TensorEvaluator, Device> { + typedef TensorFFTOp XprType; + typedef typename XprType::Index Index; + static const int NumDims = internal::array_size::Dimensions>::value; + typedef DSizes Dimensions; + typedef typename XprType::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename std::complex ComplexScalar; + typedef typename TensorEvaluator::Dimensions InputDimensions; + typedef internal::traits XprTraits; + typedef typename XprTraits::Scalar InputScalar; + typedef typename internal::conditional::type OutputScalar; + typedef OutputScalar CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; + + enum { + IsAligned = false, + PacketAccess = true, + BlockAccess = false, + PreferBlockAccess = false, + Layout = TensorEvaluator::Layout, + CoordAccess = false, + RawAccess = false + }; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockNotImplemented TensorBlock; + //===--------------------------------------------------------------------===// + + EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_fft(op.fft()), m_impl(op.expression(), device), m_data(NULL), m_device(device) { + const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); + for (int i = 0; i < NumDims; ++i) { + eigen_assert(input_dims[i] > 0); + m_dimensions[i] = input_dims[i]; + } + + if (static_cast(Layout) == static_cast(ColMajor)) { + m_strides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_strides[i] = m_strides[i - 1] * m_dimensions[i - 1]; + } + } else { + m_strides[NumDims - 1] = 1; + for (int i = NumDims - 2; i >= 0; --i) { + m_strides[i] = m_strides[i + 1] * m_dimensions[i + 1]; + } + } + m_size = m_dimensions.TotalSize(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { + return m_dimensions; + } + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) { + m_impl.evalSubExprsIfNeeded(NULL); + if (data) { + evalToBuf(data); + return false; + } else { + m_data = (EvaluatorPointerType)m_device.get((CoeffReturnType*)(m_device.allocate_temp(sizeof(CoeffReturnType) * m_size))); + evalToBuf(m_data); + return true; + } + } + + EIGEN_STRONG_INLINE void cleanup() { + if (m_data) { + m_device.deallocate(m_data); + m_data = NULL; + } + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffReturnType coeff(Index index) const { + return m_data[index]; + } + + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketReturnType + packet(Index index) const { + return internal::ploadt(m_data + index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_data; } +#ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_data.bind(cgh); + } +#endif + + private: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalToBuf(EvaluatorPointerType data) { + const bool write_to_out = internal::is_same::value; + ComplexScalar* buf = write_to_out ? (ComplexScalar*)data : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * m_size); + + for (Index i = 0; i < m_size; ++i) { + buf[i] = MakeComplex::value>()(m_impl.coeff(i)); + } + + for (size_t i = 0; i < m_fft.size(); ++i) { + Index dim = m_fft[i]; + eigen_assert(dim >= 0 && dim < NumDims); + Index line_len = m_dimensions[dim]; + eigen_assert(line_len >= 1); + ComplexScalar* line_buf = (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * line_len); + const bool is_power_of_two = isPowerOfTwo(line_len); + const Index good_composite = is_power_of_two ? 0 : findGoodComposite(line_len); + const Index log_len = is_power_of_two ? getLog2(line_len) : getLog2(good_composite); + + ComplexScalar* a = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * good_composite); + ComplexScalar* b = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * good_composite); + ComplexScalar* pos_j_base_powered = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * (line_len + 1)); + if (!is_power_of_two) { + // Compute twiddle factors + // t_n = exp(sqrt(-1) * pi * n^2 / line_len) + // for n = 0, 1,..., line_len-1. + // For n > 2 we use the recurrence t_n = t_{n-1}^2 / t_{n-2} * t_1^2 + + // The recurrence is correct in exact arithmetic, but causes + // numerical issues for large transforms, especially in + // single-precision floating point. + // + // pos_j_base_powered[0] = ComplexScalar(1, 0); + // if (line_len > 1) { + // const ComplexScalar pos_j_base = ComplexScalar( + // numext::cos(M_PI / line_len), numext::sin(M_PI / line_len)); + // pos_j_base_powered[1] = pos_j_base; + // if (line_len > 2) { + // const ComplexScalar pos_j_base_sq = pos_j_base * pos_j_base; + // for (int i = 2; i < line_len + 1; ++i) { + // pos_j_base_powered[i] = pos_j_base_powered[i - 1] * + // pos_j_base_powered[i - 1] / + // pos_j_base_powered[i - 2] * + // pos_j_base_sq; + // } + // } + // } + // TODO(rmlarsen): Find a way to use Eigen's vectorized sin + // and cosine functions here. + for (int j = 0; j < line_len + 1; ++j) { + double arg = ((EIGEN_PI * j) * j) / line_len; + std::complex tmp(numext::cos(arg), numext::sin(arg)); + pos_j_base_powered[j] = static_cast(tmp); + } + } + + for (Index partial_index = 0; partial_index < m_size / line_len; ++partial_index) { + const Index base_offset = getBaseOffsetFromIndex(partial_index, dim); + + // get data into line_buf + const Index stride = m_strides[dim]; + if (stride == 1) { + m_device.memcpy(line_buf, &buf[base_offset], line_len*sizeof(ComplexScalar)); + } else { + Index offset = base_offset; + for (int j = 0; j < line_len; ++j, offset += stride) { + line_buf[j] = buf[offset]; + } + } + + // process the line + if (is_power_of_two) { + processDataLineCooleyTukey(line_buf, line_len, log_len); + } + else { + processDataLineBluestein(line_buf, line_len, good_composite, log_len, a, b, pos_j_base_powered); + } + + // write back + if (FFTDir == FFT_FORWARD && stride == 1) { + m_device.memcpy(&buf[base_offset], line_buf, line_len*sizeof(ComplexScalar)); + } else { + Index offset = base_offset; + const ComplexScalar div_factor = ComplexScalar(1.0 / line_len, 0); + for (int j = 0; j < line_len; ++j, offset += stride) { + buf[offset] = (FFTDir == FFT_FORWARD) ? line_buf[j] : line_buf[j] * div_factor; + } + } + } + m_device.deallocate(line_buf); + if (!is_power_of_two) { + m_device.deallocate(a); + m_device.deallocate(b); + m_device.deallocate(pos_j_base_powered); + } + } + + if(!write_to_out) { + for (Index i = 0; i < m_size; ++i) { + data[i] = PartOf()(buf[i]); + } + m_device.deallocate(buf); + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static bool isPowerOfTwo(Index x) { + eigen_assert(x > 0); + return !(x & (x - 1)); + } + + // The composite number for padding, used in Bluestein's FFT algorithm + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index findGoodComposite(Index n) { + Index i = 2; + while (i < 2 * n - 1) i *= 2; + return i; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index getLog2(Index m) { + Index log2m = 0; + while (m >>= 1) log2m++; + return log2m; + } + + // Call Cooley Tukey algorithm directly, data length must be power of 2 + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineCooleyTukey(ComplexScalar* line_buf, Index line_len, Index log_len) { + eigen_assert(isPowerOfTwo(line_len)); + scramble_FFT(line_buf, line_len); + compute_1D_Butterfly(line_buf, line_len, log_len); + } + + // Call Bluestein's FFT algorithm, m is a good composite number greater than (2 * n - 1), used as the padding length + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineBluestein(ComplexScalar* line_buf, Index line_len, Index good_composite, Index log_len, ComplexScalar* a, ComplexScalar* b, const ComplexScalar* pos_j_base_powered) { + Index n = line_len; + Index m = good_composite; + ComplexScalar* data = line_buf; + + for (Index i = 0; i < n; ++i) { + if(FFTDir == FFT_FORWARD) { + a[i] = data[i] * numext::conj(pos_j_base_powered[i]); + } + else { + a[i] = data[i] * pos_j_base_powered[i]; + } + } + for (Index i = n; i < m; ++i) { + a[i] = ComplexScalar(0, 0); + } + + for (Index i = 0; i < n; ++i) { + if(FFTDir == FFT_FORWARD) { + b[i] = pos_j_base_powered[i]; + } + else { + b[i] = numext::conj(pos_j_base_powered[i]); + } + } + for (Index i = n; i < m - n; ++i) { + b[i] = ComplexScalar(0, 0); + } + for (Index i = m - n; i < m; ++i) { + if(FFTDir == FFT_FORWARD) { + b[i] = pos_j_base_powered[m-i]; + } + else { + b[i] = numext::conj(pos_j_base_powered[m-i]); + } + } + + scramble_FFT(a, m); + compute_1D_Butterfly(a, m, log_len); + + scramble_FFT(b, m); + compute_1D_Butterfly(b, m, log_len); + + for (Index i = 0; i < m; ++i) { + a[i] *= b[i]; + } + + scramble_FFT(a, m); + compute_1D_Butterfly(a, m, log_len); + + //Do the scaling after ifft + for (Index i = 0; i < m; ++i) { + a[i] /= m; + } + + for (Index i = 0; i < n; ++i) { + if(FFTDir == FFT_FORWARD) { + data[i] = a[i] * numext::conj(pos_j_base_powered[i]); + } + else { + data[i] = a[i] * pos_j_base_powered[i]; + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static void scramble_FFT(ComplexScalar* data, Index n) { + eigen_assert(isPowerOfTwo(n)); + Index j = 1; + for (Index i = 1; i < n; ++i){ + if (j > i) { + std::swap(data[j-1], data[i-1]); + } + Index m = n >> 1; + while (m >= 2 && j > m) { + j -= m; + m >>= 1; + } + j += m; + } + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_2(ComplexScalar* data) { + ComplexScalar tmp = data[1]; + data[1] = data[0] - data[1]; + data[0] += tmp; + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_4(ComplexScalar* data) { + ComplexScalar tmp[4]; + tmp[0] = data[0] + data[1]; + tmp[1] = data[0] - data[1]; + tmp[2] = data[2] + data[3]; + if (Dir == FFT_FORWARD) { + tmp[3] = ComplexScalar(0.0, -1.0) * (data[2] - data[3]); + } else { + tmp[3] = ComplexScalar(0.0, 1.0) * (data[2] - data[3]); + } + data[0] = tmp[0] + tmp[2]; + data[1] = tmp[1] + tmp[3]; + data[2] = tmp[0] - tmp[2]; + data[3] = tmp[1] - tmp[3]; + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_8(ComplexScalar* data) { + ComplexScalar tmp_1[8]; + ComplexScalar tmp_2[8]; + + tmp_1[0] = data[0] + data[1]; + tmp_1[1] = data[0] - data[1]; + tmp_1[2] = data[2] + data[3]; + if (Dir == FFT_FORWARD) { + tmp_1[3] = (data[2] - data[3]) * ComplexScalar(0, -1); + } else { + tmp_1[3] = (data[2] - data[3]) * ComplexScalar(0, 1); + } + tmp_1[4] = data[4] + data[5]; + tmp_1[5] = data[4] - data[5]; + tmp_1[6] = data[6] + data[7]; + if (Dir == FFT_FORWARD) { + tmp_1[7] = (data[6] - data[7]) * ComplexScalar(0, -1); + } else { + tmp_1[7] = (data[6] - data[7]) * ComplexScalar(0, 1); + } + tmp_2[0] = tmp_1[0] + tmp_1[2]; + tmp_2[1] = tmp_1[1] + tmp_1[3]; + tmp_2[2] = tmp_1[0] - tmp_1[2]; + tmp_2[3] = tmp_1[1] - tmp_1[3]; + tmp_2[4] = tmp_1[4] + tmp_1[6]; +// SQRT2DIV2 = sqrt(2)/2 +#define SQRT2DIV2 0.7071067811865476 + if (Dir == FFT_FORWARD) { + tmp_2[5] = (tmp_1[5] + tmp_1[7]) * ComplexScalar(SQRT2DIV2, -SQRT2DIV2); + tmp_2[6] = (tmp_1[4] - tmp_1[6]) * ComplexScalar(0, -1); + tmp_2[7] = (tmp_1[5] - tmp_1[7]) * ComplexScalar(-SQRT2DIV2, -SQRT2DIV2); + } else { + tmp_2[5] = (tmp_1[5] + tmp_1[7]) * ComplexScalar(SQRT2DIV2, SQRT2DIV2); + tmp_2[6] = (tmp_1[4] - tmp_1[6]) * ComplexScalar(0, 1); + tmp_2[7] = (tmp_1[5] - tmp_1[7]) * ComplexScalar(-SQRT2DIV2, SQRT2DIV2); + } + data[0] = tmp_2[0] + tmp_2[4]; + data[1] = tmp_2[1] + tmp_2[5]; + data[2] = tmp_2[2] + tmp_2[6]; + data[3] = tmp_2[3] + tmp_2[7]; + data[4] = tmp_2[0] - tmp_2[4]; + data[5] = tmp_2[1] - tmp_2[5]; + data[6] = tmp_2[2] - tmp_2[6]; + data[7] = tmp_2[3] - tmp_2[7]; + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_1D_merge( + ComplexScalar* data, Index n, Index n_power_of_2) { + // Original code: + // RealScalar wtemp = std::sin(M_PI/n); + // RealScalar wpi = -std::sin(2 * M_PI/n); + const RealScalar wtemp = m_sin_PI_div_n_LUT[n_power_of_2]; + const RealScalar wpi = (Dir == FFT_FORWARD) + ? m_minus_sin_2_PI_div_n_LUT[n_power_of_2] + : -m_minus_sin_2_PI_div_n_LUT[n_power_of_2]; + + const ComplexScalar wp(wtemp, wpi); + const ComplexScalar wp_one = wp + ComplexScalar(1, 0); + const ComplexScalar wp_one_2 = wp_one * wp_one; + const ComplexScalar wp_one_3 = wp_one_2 * wp_one; + const ComplexScalar wp_one_4 = wp_one_3 * wp_one; + const Index n2 = n / 2; + ComplexScalar w(1.0, 0.0); + for (Index i = 0; i < n2; i += 4) { + ComplexScalar temp0(data[i + n2] * w); + ComplexScalar temp1(data[i + 1 + n2] * w * wp_one); + ComplexScalar temp2(data[i + 2 + n2] * w * wp_one_2); + ComplexScalar temp3(data[i + 3 + n2] * w * wp_one_3); + w = w * wp_one_4; + + data[i + n2] = data[i] - temp0; + data[i] += temp0; + + data[i + 1 + n2] = data[i + 1] - temp1; + data[i + 1] += temp1; + + data[i + 2 + n2] = data[i + 2] - temp2; + data[i + 2] += temp2; + + data[i + 3 + n2] = data[i + 3] - temp3; + data[i + 3] += temp3; + } + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void compute_1D_Butterfly( + ComplexScalar* data, Index n, Index n_power_of_2) { + eigen_assert(isPowerOfTwo(n)); + if (n > 8) { + compute_1D_Butterfly(data, n / 2, n_power_of_2 - 1); + compute_1D_Butterfly(data + n / 2, n / 2, n_power_of_2 - 1); + butterfly_1D_merge(data, n, n_power_of_2); + } else if (n == 8) { + butterfly_8(data); + } else if (n == 4) { + butterfly_4(data); + } else if (n == 2) { + butterfly_2(data); + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index getBaseOffsetFromIndex(Index index, Index omitted_dim) const { + Index result = 0; + + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumDims - 1; i > omitted_dim; --i) { + const Index partial_m_stride = m_strides[i] / m_dimensions[omitted_dim]; + const Index idx = index / partial_m_stride; + index -= idx * partial_m_stride; + result += idx * m_strides[i]; + } + result += index; + } + else { + for (Index i = 0; i < omitted_dim; ++i) { + const Index partial_m_stride = m_strides[i] / m_dimensions[omitted_dim]; + const Index idx = index / partial_m_stride; + index -= idx * partial_m_stride; + result += idx * m_strides[i]; + } + result += index; + } + // Value of index_coords[omitted_dim] is not determined to this step + return result; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index getIndexFromOffset(Index base, Index omitted_dim, Index offset) const { + Index result = base + offset * m_strides[omitted_dim] ; + return result; + } + + protected: + Index m_size; + const FFT EIGEN_DEVICE_REF m_fft; + Dimensions m_dimensions; + array m_strides; + TensorEvaluator m_impl; + EvaluatorPointerType m_data; + const Device EIGEN_DEVICE_REF m_device; + + // This will support a maximum FFT size of 2^32 for each dimension + // m_sin_PI_div_n_LUT[i] = (-2) * std::sin(M_PI / std::pow(2,i)) ^ 2; + const RealScalar m_sin_PI_div_n_LUT[32] = { + RealScalar(0.0), + RealScalar(-2), + RealScalar(-0.999999999999999), + RealScalar(-0.292893218813453), + RealScalar(-0.0761204674887130), + RealScalar(-0.0192147195967696), + RealScalar(-0.00481527332780311), + RealScalar(-0.00120454379482761), + RealScalar(-3.01181303795779e-04), + RealScalar(-7.52981608554592e-05), + RealScalar(-1.88247173988574e-05), + RealScalar(-4.70619042382852e-06), + RealScalar(-1.17654829809007e-06), + RealScalar(-2.94137117780840e-07), + RealScalar(-7.35342821488550e-08), + RealScalar(-1.83835707061916e-08), + RealScalar(-4.59589268710903e-09), + RealScalar(-1.14897317243732e-09), + RealScalar(-2.87243293150586e-10), + RealScalar( -7.18108232902250e-11), + RealScalar(-1.79527058227174e-11), + RealScalar(-4.48817645568941e-12), + RealScalar(-1.12204411392298e-12), + RealScalar(-2.80511028480785e-13), + RealScalar(-7.01277571201985e-14), + RealScalar(-1.75319392800498e-14), + RealScalar(-4.38298482001247e-15), + RealScalar(-1.09574620500312e-15), + RealScalar(-2.73936551250781e-16), + RealScalar(-6.84841378126949e-17), + RealScalar(-1.71210344531737e-17), + RealScalar(-4.28025861329343e-18) + }; + + // m_minus_sin_2_PI_div_n_LUT[i] = -std::sin(2 * M_PI / std::pow(2,i)); + const RealScalar m_minus_sin_2_PI_div_n_LUT[32] = { + RealScalar(0.0), + RealScalar(0.0), + RealScalar(-1.00000000000000e+00), + RealScalar(-7.07106781186547e-01), + RealScalar(-3.82683432365090e-01), + RealScalar(-1.95090322016128e-01), + RealScalar(-9.80171403295606e-02), + RealScalar(-4.90676743274180e-02), + RealScalar(-2.45412285229123e-02), + RealScalar(-1.22715382857199e-02), + RealScalar(-6.13588464915448e-03), + RealScalar(-3.06795676296598e-03), + RealScalar(-1.53398018628477e-03), + RealScalar(-7.66990318742704e-04), + RealScalar(-3.83495187571396e-04), + RealScalar(-1.91747597310703e-04), + RealScalar(-9.58737990959773e-05), + RealScalar(-4.79368996030669e-05), + RealScalar(-2.39684498084182e-05), + RealScalar(-1.19842249050697e-05), + RealScalar(-5.99211245264243e-06), + RealScalar(-2.99605622633466e-06), + RealScalar(-1.49802811316901e-06), + RealScalar(-7.49014056584716e-07), + RealScalar(-3.74507028292384e-07), + RealScalar(-1.87253514146195e-07), + RealScalar(-9.36267570730981e-08), + RealScalar(-4.68133785365491e-08), + RealScalar(-2.34066892682746e-08), + RealScalar(-1.17033446341373e-08), + RealScalar(-5.85167231706864e-09), + RealScalar(-2.92583615853432e-09) + }; +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_FFT_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h new file mode 100644 index 0000000..ca39bb8 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h @@ -0,0 +1,379 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_FIXED_SIZE_H +#define EIGEN_CXX11_TENSOR_TENSOR_FIXED_SIZE_H + +namespace Eigen { + +/** \class TensorFixedSize + * \ingroup CXX11_Tensor_Module + * + * \brief The fixed sized version of the tensor class. + * + * The fixed sized equivalent of + * Eigen::Tensor t(3, 5, 7); + * is + * Eigen::TensorFixedSize> t; + */ + +template +class TensorFixedSize : public TensorBase > +{ + public: + typedef TensorFixedSize Self; + typedef TensorBase > Base; + typedef typename Eigen::internal::nested::type Nested; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Index Index; + typedef Scalar_ Scalar; + typedef typename NumTraits::Real RealScalar; + typedef typename Base::CoeffReturnType CoeffReturnType; + + static const int Options = Options_; + + enum { + IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0), + PacketAccess = (internal::packet_traits::size > 1), + BlockAccess = false, + PreferBlockAccess = false, + Layout = Options_ & RowMajor ? RowMajor : ColMajor, + CoordAccess = true, + RawAccess = true + }; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockNotImplemented TensorBlock; + //===--------------------------------------------------------------------===// + + typedef Dimensions_ Dimensions; + static const std::size_t NumIndices = Dimensions::count; + + protected: + TensorStorage m_storage; + + public: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rank() const { return NumIndices; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index dimension(std::size_t n) const { return m_storage.dimensions()[n]; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_storage.dimensions(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const { return m_storage.size(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar *data() { return m_storage.data(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar *data() const { return m_storage.data(); } + + // This makes EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + // work, because that uses base().coeffRef() - and we don't yet + // implement a similar class hierarchy + inline Self& base() { return *this; } + inline const Self& base() const { return *this; } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index firstIndex, IndexTypes... otherIndices) const + { + // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + return coeff(array{{firstIndex, otherIndices...}}); + } +#endif + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& coeff(const array& indices) const + { + eigen_internal_assert(checkIndexRange(indices)); + return m_storage.data()[linearizedIndex(indices)]; + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const + { + eigen_internal_assert(index >= 0 && index < size()); + return m_storage.data()[index]; + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& coeff() const + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + return m_storage.data()[0]; + } + + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index firstIndex, IndexTypes... otherIndices) + { + // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + return coeffRef(array{{firstIndex, otherIndices...}}); + } +#endif + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& coeffRef(const array& indices) + { + eigen_internal_assert(checkIndexRange(indices)); + return m_storage.data()[linearizedIndex(indices)]; + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) + { + eigen_internal_assert(index >= 0 && index < size()); + return m_storage.data()[index]; + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& coeffRef() + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + return m_storage.data()[0]; + } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) const + { + // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + return this->operator()(array{{firstIndex, otherIndices...}}); + } +#else + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1) const + { + if (Options&RowMajor) { + const Index index = i1 + i0 * m_storage.dimensions()[1]; + return m_storage.data()[index]; + } else { + const Index index = i0 + i1 * m_storage.dimensions()[0]; + return m_storage.data()[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2) const + { + if (Options&RowMajor) { + const Index index = i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0); + return m_storage.data()[index]; + } else { + const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * i2); + return m_storage.data()[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3) const + { + if (Options&RowMajor) { + const Index index = i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0)); + return m_storage.data()[index]; + } else { + const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * i3)); + return m_storage.data()[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const + { + if (Options&RowMajor) { + const Index index = i4 + m_storage.dimensions()[4] * (i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0))); + return m_storage.data()[index]; + } else { + const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * (i3 + m_storage.dimensions()[3] * i4))); + return m_storage.data()[index]; + } + } +#endif + + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(const array& indices) const + { + eigen_assert(checkIndexRange(indices)); + return coeff(indices); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index index) const + { + eigen_internal_assert(index >= 0 && index < size()); + return coeff(index); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()() const + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + return coeff(); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator[](Index index) const + { + // The bracket operator is only for vectors, use the parenthesis operator instead. + EIGEN_STATIC_ASSERT(NumIndices == 1, YOU_MADE_A_PROGRAMMING_MISTAKE); + return coeff(index); + } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) + { + // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + return operator()(array{{firstIndex, otherIndices...}}); + } +#else + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1) + { + if (Options&RowMajor) { + const Index index = i1 + i0 * m_storage.dimensions()[1]; + return m_storage.data()[index]; + } else { + const Index index = i0 + i1 * m_storage.dimensions()[0]; + return m_storage.data()[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2) + { + if (Options&RowMajor) { + const Index index = i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0); + return m_storage.data()[index]; + } else { + const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * i2); + return m_storage.data()[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3) + { + if (Options&RowMajor) { + const Index index = i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0)); + return m_storage.data()[index]; + } else { + const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * i3)); + return m_storage.data()[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) + { + if (Options&RowMajor) { + const Index index = i4 + m_storage.dimensions()[4] * (i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0))); + return m_storage.data()[index]; + } else { + const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * (i3 + m_storage.dimensions()[3] * i4))); + return m_storage.data()[index]; + } + } +#endif + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(const array& indices) + { + eigen_assert(checkIndexRange(indices)); + return coeffRef(indices); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index index) + { + eigen_assert(index >= 0 && index < size()); + return coeffRef(index); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()() + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + return coeffRef(); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator[](Index index) + { + // The bracket operator is only for vectors, use the parenthesis operator instead + EIGEN_STATIC_ASSERT(NumIndices == 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + return coeffRef(index); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorFixedSize() + : m_storage() + { + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorFixedSize(const Self& other) + : m_storage(other.m_storage) + { + } + +#if EIGEN_HAS_RVALUE_REFERENCES + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorFixedSize(Self&& other) + : m_storage(other.m_storage) + { + } +#endif + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorFixedSize(const TensorBase& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other.derived()); + internal::TensorExecutor::run(assign, DefaultDevice()); + } + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorFixedSize(const TensorBase& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other.derived()); + internal::TensorExecutor::run(assign, DefaultDevice()); + } + + // FIXME: check that the dimensions of other match the dimensions of *this. + // Unfortunately this isn't possible yet when the rhs is an expression. + EIGEN_TENSOR_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(TensorFixedSize) + + + protected: + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE bool checkIndexRange(const array& /*indices*/) const + { + using internal::array_apply_and_reduce; + using internal::array_zip_and_reduce; + using internal::greater_equal_zero_op; + using internal::logical_and_op; + using internal::lesser_op; + + return true; + // check whether the indices are all >= 0 + /* array_apply_and_reduce(indices) && + // check whether the indices fit in the dimensions + array_zip_and_reduce(indices, m_storage.dimensions());*/ + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index linearizedIndex(const array& indices) const + { + if (Options&RowMajor) { + return m_storage.dimensions().IndexOfRowMajor(indices); + } else { + return m_storage.dimensions().IndexOfColMajor(indices); + } + } +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_FIXED_SIZE_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h new file mode 100644 index 0000000..e800ded --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h @@ -0,0 +1,237 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H +#define EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H + +namespace Eigen { + +/** \class TensorForcedEval + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor reshaping class. + * + * + */ +namespace internal { +template +struct traits > +{ + // Type promotion to handle the case where the types of the lhs and the rhs are different. + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename traits::StorageKind StorageKind; + typedef typename traits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; + typedef typename XprTraits::PointerType PointerType; + + enum { + Flags = 0 + }; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorForcedEvalOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorForcedEvalOp type; +}; + +} // end namespace internal + + + +template +class TensorForcedEvalOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename internal::remove_const::type CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorForcedEvalOp(const XprType& expr) + : m_xpr(expr) {} + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + protected: + typename XprType::Nested m_xpr; +}; + +namespace internal { +template +struct non_integral_type_placement_new{ + template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(Index numValues, StorageType m_buffer) { + // Initialize non-trivially constructible types. + if (!internal::is_arithmetic::value) { + for (Index i = 0; i < numValues; ++i) new (m_buffer + i) CoeffReturnType(); + } +} +}; + +// SYCL does not support non-integral types +// having new (m_buffer + i) CoeffReturnType() causes the following compiler error for SYCL Devices +// no matching function for call to 'operator new' +template +struct non_integral_type_placement_new { + template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(Index, StorageType) { +} +}; +} // end namespace internal + +template +struct TensorEvaluator, Device> +{ + typedef const typename internal::remove_all::type ArgType; + typedef TensorForcedEvalOp XprType; + typedef typename ArgType::Scalar Scalar; + typedef typename TensorEvaluator::Dimensions Dimensions; + typedef typename XprType::Index Index; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = PacketType::size; + typedef typename Eigen::internal::traits::PointerType TensorPointerType; + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; + + enum { + IsAligned = true, + PacketAccess = (PacketType::size > 1), + BlockAccess = internal::is_arithmetic::value, + PreferBlockAccess = false, + Layout = TensorEvaluator::Layout, + RawAccess = true + }; + + static const int NumDims = internal::traits::NumDimensions; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockDescriptor TensorBlockDesc; + typedef internal::TensorBlockScratchAllocator TensorBlockScratch; + + typedef typename internal::TensorMaterializedBlock + TensorBlock; + //===--------------------------------------------------------------------===// + + TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device), m_op(op.expression()), + m_device(device), m_buffer(NULL) + { } + + EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); } + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { + const Index numValues = internal::array_prod(m_impl.dimensions()); + m_buffer = m_device.get((CoeffReturnType*)m_device.allocate_temp(numValues * sizeof(CoeffReturnType))); + + internal::non_integral_type_placement_new()(numValues, m_buffer); + + typedef TensorEvalToOp< const typename internal::remove_const::type > EvalTo; + EvalTo evalToTmp(m_device.get(m_buffer), m_op); + + internal::TensorExecutor< + const EvalTo, typename internal::remove_const::type, + /*Vectorizable=*/internal::IsVectorizable::value, + /*Tiling=*/internal::IsTileable::value>:: + run(evalToTmp, m_device); + + return true; + } + +#ifdef EIGEN_USE_THREADS + template + EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync( + EvaluatorPointerType, EvalSubExprsCallback done) { + const Index numValues = internal::array_prod(m_impl.dimensions()); + m_buffer = m_device.get((CoeffReturnType*)m_device.allocate_temp( + numValues * sizeof(CoeffReturnType))); + typedef TensorEvalToOp::type> + EvalTo; + EvalTo evalToTmp(m_device.get(m_buffer), m_op); + + auto on_done = std::bind([](EvalSubExprsCallback done_) { done_(true); }, + std::move(done)); + internal::TensorAsyncExecutor< + const EvalTo, typename internal::remove_const::type, + decltype(on_done), + /*Vectorizable=*/internal::IsVectorizable::value, + /*Tiling=*/internal::IsTileable::value>:: + runAsync(evalToTmp, m_device, std::move(on_done)); + } +#endif + + EIGEN_STRONG_INLINE void cleanup() { + m_device.deallocate_temp(m_buffer); + m_buffer = NULL; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + return m_buffer[index]; + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + return internal::ploadt(m_buffer + index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + internal::TensorBlockResourceRequirements getResourceRequirements() const { + return internal::TensorBlockResourceRequirements::any(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock + block(TensorBlockDesc& desc, TensorBlockScratch& scratch, + bool /*root_of_expr_ast*/ = false) const { + assert(m_buffer != NULL); + return TensorBlock::materialize(m_buffer, m_impl.dimensions(), desc, scratch); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + EvaluatorPointerType data() const { return m_buffer; } + +#ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_buffer.bind(cgh); + m_impl.bind(cgh); + } +#endif + private: + TensorEvaluator m_impl; + const ArgType m_op; + const Device EIGEN_DEVICE_REF m_device; + EvaluatorPointerType m_buffer; +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h new file mode 100644 index 0000000..246ebe4 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h @@ -0,0 +1,191 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_FORWARD_DECLARATIONS_H +#define EIGEN_CXX11_TENSOR_TENSOR_FORWARD_DECLARATIONS_H + +namespace Eigen { + +// MakePointer class is used as a container of the address space of the pointer +// on the host and on the device. From the host side it generates the T* pointer +// and when EIGEN_USE_SYCL is used it construct a buffer with a map_allocator to +// T* m_data on the host. It is always called on the device. +// Specialisation of MakePointer class for creating the sycl buffer with +// map_allocator. +template struct MakePointer { + typedef T* Type; + typedef const T* ConstType; +}; + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T* constCast(const T* data) { + return const_cast(data); +} + +// The StorageMemory class is a container of the device specific pointer +// used for refering to a Pointer on TensorEvaluator class. While the TensorExpression +// is a device-agnostic type and need MakePointer class for type conversion, +// the TensorEvaluator class can be specialized for a device, hence it is possible +// to construct different types of temproray storage memory in TensorEvaluator +// for different devices by specializing the following StorageMemory class. +template struct StorageMemory: MakePointer {}; + +namespace internal{ +template struct Pointer_type_promotion { + static const bool val=false; +}; +template struct Pointer_type_promotion { + static const bool val = true; +}; +template struct TypeConversion { + typedef A* type; +}; +} + + +template class MakePointer_ = MakePointer> class TensorMap; +template class Tensor; +template class TensorFixedSize; +template class TensorRef; +template class TensorBase; + +template class TensorCwiseNullaryOp; +template class TensorCwiseUnaryOp; +template class TensorCwiseBinaryOp; +template class TensorCwiseTernaryOp; +template class TensorSelectOp; +template class MakePointer_ = MakePointer > class TensorReductionOp; +template class TensorIndexTupleOp; +template class TensorTupleReducerOp; +template class TensorConcatenationOp; +template class TensorContractionOp; +template class TensorConversionOp; +template class TensorConvolutionOp; +template class TensorFFTOp; +template class TensorPatchOp; +template class TensorImagePatchOp; +template class TensorVolumePatchOp; +template class TensorBroadcastingOp; +template class TensorChippingOp; +template class TensorReshapingOp; +template class TensorLayoutSwapOp; +template class TensorSlicingOp; +template class TensorReverseOp; +template class TensorPaddingOp; +template class TensorShufflingOp; +template class TensorStridingOp; +template class TensorStridingSlicingOp; +template class TensorInflationOp; +template class TensorGeneratorOp; +template class TensorAssignOp; +template class TensorScanOp; +template class TensorTraceOp; + +template class TensorCustomUnaryOp; +template class TensorCustomBinaryOp; + +template class MakePointer_ = MakePointer> class TensorEvalToOp; +template class TensorForcedEvalOp; + +template class TensorDevice; +template class TensorAsyncDevice; +template struct TensorEvaluator; + +struct NoOpOutputKernel; + +struct DefaultDevice; +struct ThreadPoolDevice; +struct GpuDevice; +struct SyclDevice; + +#ifdef EIGEN_USE_SYCL + +template struct MakeSYCLPointer { + typedef Eigen::TensorSycl::internal::RangeAccess Type; +}; + +template +EIGEN_STRONG_INLINE const Eigen::TensorSycl::internal::RangeAccess& +constCast(const Eigen::TensorSycl::internal::RangeAccess& data) { + return data; +} + +template +struct StorageMemory : MakeSYCLPointer {}; +template +struct StorageMemory : StorageMemory {}; + +namespace TensorSycl { +namespace internal{ +template class GenericNondeterministicReducer; +} +} +#endif + + +enum FFTResultType { + RealPart = 0, + ImagPart = 1, + BothParts = 2 +}; + +enum FFTDirection { + FFT_FORWARD = 0, + FFT_REVERSE = 1 +}; + + +namespace internal { + +template +struct IsVectorizable { + static const bool value = TensorEvaluator::PacketAccess; +}; + +template +struct IsVectorizable { + static const bool value = TensorEvaluator::PacketAccess && + TensorEvaluator::IsAligned; +}; + +// Tiled evaluation strategy. +enum TiledEvaluation { + Off = 0, // tiled evaluation is not supported + On = 1, // still work in progress (see TensorBlock.h) +}; + +template +struct IsTileable { + // Check that block evaluation is supported and it's a preferred option (at + // least one sub-expression has much faster block evaluation, e.g. + // broadcasting). + static const bool BlockAccess = + TensorEvaluator::BlockAccess && + TensorEvaluator::PreferBlockAccess; + + static const TiledEvaluation value = + BlockAccess ? TiledEvaluation::On : TiledEvaluation::Off; +}; + +template ::value, + TiledEvaluation Tiling = IsTileable::value> +class TensorExecutor; + +template ::value, + TiledEvaluation Tiling = IsTileable::value> +class TensorAsyncExecutor; + + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_FORWARD_DECLARATIONS_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h new file mode 100644 index 0000000..d963032 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -0,0 +1,488 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H +#define EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H + +namespace Eigen { +namespace internal { + + +/** \internal + * \brief Template functor to compute the modulo between an array and a scalar. + */ +template +struct scalar_mod_op { + EIGEN_DEVICE_FUNC scalar_mod_op(const Scalar& divisor) : m_divisor(divisor) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a % m_divisor; } + const Scalar m_divisor; +}; +template +struct functor_traits > +{ enum { Cost = scalar_div_cost::value, PacketAccess = false }; }; + + +/** \internal + * \brief Template functor to compute the modulo between 2 arrays. + */ +template +struct scalar_mod2_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_mod2_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a, const Scalar& b) const { return a % b; } +}; +template +struct functor_traits > +{ enum { Cost = scalar_div_cost::value, PacketAccess = false }; }; + +template +struct scalar_fmod_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_fmod_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar + operator()(const Scalar& a, const Scalar& b) const { + return numext::fmod(a, b); + } +}; +template +struct functor_traits > { + enum { Cost = 13, // Reciprocal throughput of FPREM on Haswell. + PacketAccess = false }; +}; + +template +struct reducer_traits { + enum { + Cost = 1, + PacketAccess = false, + IsStateful = false, + IsExactlyAssociative = true + }; +}; + +// Standard reduction functors +template struct SumReducer +{ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { + internal::scalar_sum_op sum_op; + *accum = sum_op(*accum, t); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const { + (*accum) = padd(*accum, p); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { + internal::scalar_cast_op conv; + return conv(0); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { + return pset1(initialize()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { + return accum; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const { + return vaccum; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const { + internal::scalar_sum_op sum_op; + return sum_op(saccum, predux(vaccum)); + } +}; + +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = PacketType::HasAdd, + IsStateful = false, + IsExactlyAssociative = NumTraits::IsInteger + }; +}; + +template struct MeanReducer +{ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + MeanReducer() : scalarCount_(0), packetCount_(0) { } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) { + internal::scalar_sum_op sum_op; + *accum = sum_op(*accum, t); + scalarCount_++; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) { + (*accum) = padd(*accum, p); + packetCount_++; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { + internal::scalar_cast_op conv; + return conv(0); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { + return pset1(initialize()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { + internal::scalar_quotient_op quotient_op; + return quotient_op(accum, T(scalarCount_)); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const { + return pdiv(vaccum, pset1(T(packetCount_))); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const { + internal::scalar_sum_op sum_op; + internal::scalar_quotient_op quotient_op; + return quotient_op( + sum_op(saccum, predux(vaccum)), + T(scalarCount_ + packetCount_ * unpacket_traits::size)); + } + + protected: + DenseIndex scalarCount_; + DenseIndex packetCount_; +}; + +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = PacketType::HasAdd && + PacketType::HasDiv && !NumTraits::IsInteger, + IsStateful = true, + IsExactlyAssociative = NumTraits::IsInteger + }; +}; + + +template +struct MinMaxBottomValue { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T bottom_value() { + return Eigen::NumTraits::lowest(); + } +}; +template +struct MinMaxBottomValue { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T bottom_value() { + return -Eigen::NumTraits::infinity(); + } +}; +template +struct MinMaxBottomValue { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T bottom_value() { + return Eigen::NumTraits::highest(); + } +}; +template +struct MinMaxBottomValue { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T bottom_value() { + return Eigen::NumTraits::infinity(); + } +}; + + +template struct MaxReducer +{ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { + scalar_max_op op; + *accum = op(t, *accum); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const { + scalar_max_op op; + (*accum) = op.packetOp(*accum, p); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { + return MinMaxBottomValue::IsInteger>::bottom_value(); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { + return pset1(initialize()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { + return accum; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const { + return vaccum; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const { + scalar_max_op op; + return op(saccum, op.predux(vaccum)); + } +}; + +template + struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = PacketType::HasMax, + IsStateful = false, + IsExactlyAssociative = (NaNPropagation!=PropagateFast) + }; +}; + +template struct MinReducer +{ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { + scalar_min_op op; + *accum = op(t, *accum); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const { + scalar_min_op op; + (*accum) = op.packetOp(*accum, p); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { + return MinMaxBottomValue::IsInteger>::bottom_value(); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { + return pset1(initialize()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { + return accum; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const { + return vaccum; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const { + scalar_min_op op; + return op(saccum, op.predux(vaccum)); + } +}; + +template + struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = PacketType::HasMin, + IsStateful = false, + IsExactlyAssociative = (NaNPropagation!=PropagateFast) + }; +}; + +template struct ProdReducer +{ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { + internal::scalar_product_op prod_op; + (*accum) = prod_op(*accum, t); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const { + (*accum) = pmul(*accum, p); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { + internal::scalar_cast_op conv; + return conv(1); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { + return pset1(initialize()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { + return accum; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const { + return vaccum; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const { + internal::scalar_product_op prod_op; + return prod_op(saccum, predux_mul(vaccum)); + } +}; + +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::MulCost, + PacketAccess = PacketType::HasMul, + IsStateful = false, + IsExactlyAssociative = true + }; +}; + + +struct AndReducer +{ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(bool t, bool* accum) const { + *accum = *accum && t; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool initialize() const { + return true; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool finalize(bool accum) const { + return accum; + } +}; + +template +struct reducer_traits { + enum { + Cost = 1, + PacketAccess = false, + IsStateful = false, + IsExactlyAssociative = true + }; +}; + + +struct OrReducer { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(bool t, bool* accum) const { + *accum = *accum || t; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool initialize() const { + return false; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool finalize(bool accum) const { + return accum; + } +}; + +template +struct reducer_traits { + enum { + Cost = 1, + PacketAccess = false, + IsStateful = false, + IsExactlyAssociative = true + }; +}; + +// Argmin/Argmax reducers. Returns the first occurrence if multiple locations +// contain the same min/max value. +template struct ArgMaxTupleReducer +{ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { + if (t.second < accum->second) { + return; + } else if (t.second > accum->second || accum->first > t.first ) { + *accum = t; + } + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { + return T(0, NumTraits::lowest()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T& accum) const { + return accum; + } +}; + +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = false, + IsStateful = false, + IsExactlyAssociative = true + }; +}; + + +template struct ArgMinTupleReducer +{ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T& t, T* accum) const { + if (t.second > accum->second) { + return; + } else if (t.second < accum->second || accum->first > t.first) { + *accum = t; + } + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { + return T(0, NumTraits::highest()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T& accum) const { + return accum; + } +}; + +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = false, + IsStateful = false, + IsExactlyAssociative = true + }; +}; + + +template +class GaussianGenerator { + public: + static const bool PacketAccess = false; + + EIGEN_DEVICE_FUNC GaussianGenerator(const array& means, + const array& std_devs) + : m_means(means) + { + EIGEN_UNROLL_LOOP + for (size_t i = 0; i < NumDims; ++i) { + m_two_sigmas[i] = std_devs[i] * std_devs[i] * 2; + } + } + + EIGEN_DEVICE_FUNC T operator()(const array& coordinates) const { + T tmp = T(0); + EIGEN_UNROLL_LOOP + for (size_t i = 0; i < NumDims; ++i) { + T offset = coordinates[i] - m_means[i]; + tmp += offset * offset / m_two_sigmas[i]; + } + return numext::exp(-tmp); + } + + private: + array m_means; + array m_two_sigmas; +}; + +template +struct functor_traits > { + enum { + Cost = NumDims * (2 * NumTraits::AddCost + NumTraits::MulCost + + functor_traits >::Cost) + + functor_traits >::Cost, + PacketAccess = GaussianGenerator::PacketAccess + }; +}; + +template +struct scalar_clamp_op { + EIGEN_DEVICE_FUNC inline scalar_clamp_op(const Scalar& _min, const Scalar& _max) : m_min(_min), m_max(_max) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar + operator()(const Scalar& x) const { + return numext::mini(numext::maxi(x, m_min), m_max); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet + packetOp(const Packet& x) const { + return internal::pmin(internal::pmax(x, pset1(m_min)), pset1(m_max)); + } + const Scalar m_min; + const Scalar m_max; +}; +template +struct functor_traits > +{ enum { Cost = 2 * NumTraits::AddCost, PacketAccess = (packet_traits::HasMin && packet_traits::HasMax)}; }; + +} // end namespace internal +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h new file mode 100644 index 0000000..174bf06 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h @@ -0,0 +1,302 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H +#define EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H + +namespace Eigen { + +/** \class TensorGeneratorOp + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor generator class. + * + * + */ +namespace internal { +template +struct traits > : public traits +{ + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; + typedef typename XprTraits::PointerType PointerType; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorGeneratorOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorGeneratorOp type; +}; + +} // end namespace internal + + + +template +class TensorGeneratorOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorGeneratorOp(const XprType& expr, const Generator& generator) + : m_xpr(expr), m_generator(generator) {} + + EIGEN_DEVICE_FUNC + const Generator& generator() const { return m_generator; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + protected: + typename XprType::Nested m_xpr; + const Generator m_generator; +}; + + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorGeneratorOp XprType; + typedef typename XprType::Index Index; + typedef typename TensorEvaluator::Dimensions Dimensions; + static const int NumDims = internal::array_size::value; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; + enum { + IsAligned = false, + PacketAccess = (PacketType::size > 1), + BlockAccess = true, + PreferBlockAccess = true, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + typedef internal::TensorIntDivisor IndexDivisor; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockDescriptor TensorBlockDesc; + typedef internal::TensorBlockScratchAllocator TensorBlockScratch; + + typedef typename internal::TensorMaterializedBlock + TensorBlock; + //===--------------------------------------------------------------------===// + + EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_device(device), m_generator(op.generator()) + { + TensorEvaluator argImpl(op.expression(), device); + m_dimensions = argImpl.dimensions(); + + if (static_cast(Layout) == static_cast(ColMajor)) { + m_strides[0] = 1; + EIGEN_UNROLL_LOOP + for (int i = 1; i < NumDims; ++i) { + m_strides[i] = m_strides[i - 1] * m_dimensions[i - 1]; + if (m_strides[i] != 0) m_fast_strides[i] = IndexDivisor(m_strides[i]); + } + } else { + m_strides[NumDims - 1] = 1; + EIGEN_UNROLL_LOOP + for (int i = NumDims - 2; i >= 0; --i) { + m_strides[i] = m_strides[i + 1] * m_dimensions[i + 1]; + if (m_strides[i] != 0) m_fast_strides[i] = IndexDivisor(m_strides[i]); + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) { + return true; + } + EIGEN_STRONG_INLINE void cleanup() { + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + array coords; + extract_coordinates(index, coords); + return m_generator(coords); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + const int packetSize = PacketType::size; + EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+packetSize-1 < dimensions().TotalSize()); + + EIGEN_ALIGN_MAX typename internal::remove_const::type values[packetSize]; + for (int i = 0; i < packetSize; ++i) { + values[i] = coeff(index+i); + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + internal::TensorBlockResourceRequirements getResourceRequirements() const { + const size_t target_size = m_device.firstLevelCacheSize(); + // TODO(ezhulenev): Generator should have a cost. + return internal::TensorBlockResourceRequirements::skewed( + target_size); + } + + struct BlockIteratorState { + Index stride; + Index span; + Index size; + Index count; + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock + block(TensorBlockDesc& desc, TensorBlockScratch& scratch, + bool /*root_of_expr_ast*/ = false) const { + static const bool is_col_major = + static_cast(Layout) == static_cast(ColMajor); + + // Compute spatial coordinates for the first block element. + array coords; + extract_coordinates(desc.offset(), coords); + array initial_coords = coords; + + // Offset in the output block buffer. + Index offset = 0; + + // Initialize output block iterator state. Dimension in this array are + // always in inner_most -> outer_most order (col major layout). + array it; + for (int i = 0; i < NumDims; ++i) { + const int dim = is_col_major ? i : NumDims - 1 - i; + it[i].size = desc.dimension(dim); + it[i].stride = i == 0 ? 1 : (it[i - 1].size * it[i - 1].stride); + it[i].span = it[i].stride * (it[i].size - 1); + it[i].count = 0; + } + eigen_assert(it[0].stride == 1); + + // Prepare storage for the materialized generator result. + const typename TensorBlock::Storage block_storage = + TensorBlock::prepareStorage(desc, scratch); + + CoeffReturnType* block_buffer = block_storage.data(); + + static const int packet_size = PacketType::size; + + static const int inner_dim = is_col_major ? 0 : NumDims - 1; + const Index inner_dim_size = it[0].size; + const Index inner_dim_vectorized = inner_dim_size - packet_size; + + while (it[NumDims - 1].count < it[NumDims - 1].size) { + Index i = 0; + // Generate data for the vectorized part of the inner-most dimension. + for (; i <= inner_dim_vectorized; i += packet_size) { + for (Index j = 0; j < packet_size; ++j) { + array j_coords = coords; // Break loop dependence. + j_coords[inner_dim] += j; + *(block_buffer + offset + i + j) = m_generator(j_coords); + } + coords[inner_dim] += packet_size; + } + // Finalize non-vectorized part of the inner-most dimension. + for (; i < inner_dim_size; ++i) { + *(block_buffer + offset + i) = m_generator(coords); + coords[inner_dim]++; + } + coords[inner_dim] = initial_coords[inner_dim]; + + // For the 1d tensor we need to generate only one inner-most dimension. + if (NumDims == 1) break; + + // Update offset. + for (i = 1; i < NumDims; ++i) { + if (++it[i].count < it[i].size) { + offset += it[i].stride; + coords[is_col_major ? i : NumDims - 1 - i]++; + break; + } + if (i != NumDims - 1) it[i].count = 0; + coords[is_col_major ? i : NumDims - 1 - i] = + initial_coords[is_col_major ? i : NumDims - 1 - i]; + offset -= it[i].span; + } + } + + return block_storage.AsTensorMaterializedBlock(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool) const { + // TODO(rmlarsen): This is just a placeholder. Define interface to make + // generators return their cost. + return TensorOpCost(0, 0, TensorOpCost::AddCost() + + TensorOpCost::MulCost()); + } + + EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; } + +#ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler&) const {} +#endif + + protected: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + void extract_coordinates(Index index, array& coords) const { + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumDims - 1; i > 0; --i) { + const Index idx = index / m_fast_strides[i]; + index -= idx * m_strides[i]; + coords[i] = idx; + } + coords[0] = index; + } else { + for (int i = 0; i < NumDims - 1; ++i) { + const Index idx = index / m_fast_strides[i]; + index -= idx * m_strides[i]; + coords[i] = idx; + } + coords[NumDims-1] = index; + } + } + + const Device EIGEN_DEVICE_REF m_device; + Dimensions m_dimensions; + array m_strides; + array m_fast_strides; + Generator m_generator; +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h new file mode 100644 index 0000000..665b861 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h @@ -0,0 +1,33 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Eugene Brevdo +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_GLOBAL_FUNCTIONS_H +#define EIGEN_CXX11_TENSOR_TENSOR_GLOBAL_FUNCTIONS_H + +namespace Eigen { + +/** \cpp11 \returns an expression of the coefficient-wise betainc(\a x, \a a, \a b) to the given tensors. + * + * This function computes the regularized incomplete beta function (integral). + * + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const + TensorCwiseTernaryOp, + const ADerived, const BDerived, const XDerived> + betainc(const ADerived& a, const BDerived& b, const XDerived& x) { + return TensorCwiseTernaryOp< + internal::scalar_betainc_op, const ADerived, + const BDerived, const XDerived>( + a, b, x, internal::scalar_betainc_op()); +} + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_GLOBAL_FUNCTIONS_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h new file mode 100644 index 0000000..cb53ce2 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h @@ -0,0 +1,99 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// Copyright (C) 2018 Deven Desai +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#if defined(EIGEN_USE_GPU) && !defined(EIGEN_CXX11_TENSOR_GPU_HIP_CUDA_DEFINES_H) +#define EIGEN_CXX11_TENSOR_GPU_HIP_CUDA_DEFINES_H + +// Note that we are using EIGEN_USE_HIP here instead of EIGEN_HIPCC...this is by design +// There is code in the Tensorflow codebase that will define EIGEN_USE_GPU, but +// for some reason gets sent to the gcc/host compiler instead of the gpu/nvcc/hipcc compiler +// When compiling such files, gcc will end up trying to pick up the CUDA headers by +// default (see the code within "unsupported/Eigen/CXX11/Tensor" that is guarded by EIGEN_USE_GPU) +// This will obviously not work when trying to compile tensorflow on a system with no CUDA +// To work around this issue for HIP systems (and leave the default behaviour intact), the +// HIP tensorflow build defines EIGEN_USE_HIP when compiling all source files, and +// "unsupported/Eigen/CXX11/Tensor" has been updated to use HIP header when EIGEN_USE_HIP is +// defined. In continuation of that requirement, the guard here needs to be EIGEN_USE_HIP as well + +#if defined(EIGEN_USE_HIP) + +#define gpuStream_t hipStream_t +#define gpuDeviceProp_t hipDeviceProp_t +#define gpuError_t hipError_t +#define gpuSuccess hipSuccess +#define gpuErrorNotReady hipErrorNotReady +#define gpuGetDeviceCount hipGetDeviceCount +#define gpuGetLastError hipGetLastError +#define gpuPeekAtLastError hipPeekAtLastError +#define gpuGetErrorName hipGetErrorName +#define gpuGetErrorString hipGetErrorString +#define gpuGetDeviceProperties hipGetDeviceProperties +#define gpuStreamDefault hipStreamDefault +#define gpuGetDevice hipGetDevice +#define gpuSetDevice hipSetDevice +#define gpuMalloc hipMalloc +#define gpuFree hipFree +#define gpuMemsetAsync hipMemsetAsync +#define gpuMemcpyAsync hipMemcpyAsync +#define gpuMemcpyDeviceToDevice hipMemcpyDeviceToDevice +#define gpuMemcpyDeviceToHost hipMemcpyDeviceToHost +#define gpuMemcpyHostToDevice hipMemcpyHostToDevice +#define gpuStreamQuery hipStreamQuery +#define gpuSharedMemConfig hipSharedMemConfig +#define gpuDeviceSetSharedMemConfig hipDeviceSetSharedMemConfig +#define gpuStreamSynchronize hipStreamSynchronize +#define gpuDeviceSynchronize hipDeviceSynchronize +#define gpuMemcpy hipMemcpy + +#else + +#define gpuStream_t cudaStream_t +#define gpuDeviceProp_t cudaDeviceProp +#define gpuError_t cudaError_t +#define gpuSuccess cudaSuccess +#define gpuErrorNotReady cudaErrorNotReady +#define gpuGetDeviceCount cudaGetDeviceCount +#define gpuGetLastError cudaGetLastError +#define gpuPeekAtLastError cudaPeekAtLastError +#define gpuGetErrorName cudaGetErrorName +#define gpuGetErrorString cudaGetErrorString +#define gpuGetDeviceProperties cudaGetDeviceProperties +#define gpuStreamDefault cudaStreamDefault +#define gpuGetDevice cudaGetDevice +#define gpuSetDevice cudaSetDevice +#define gpuMalloc cudaMalloc +#define gpuFree cudaFree +#define gpuMemsetAsync cudaMemsetAsync +#define gpuMemcpyAsync cudaMemcpyAsync +#define gpuMemcpyDeviceToDevice cudaMemcpyDeviceToDevice +#define gpuMemcpyDeviceToHost cudaMemcpyDeviceToHost +#define gpuMemcpyHostToDevice cudaMemcpyHostToDevice +#define gpuStreamQuery cudaStreamQuery +#define gpuSharedMemConfig cudaSharedMemConfig +#define gpuDeviceSetSharedMemConfig cudaDeviceSetSharedMemConfig +#define gpuStreamSynchronize cudaStreamSynchronize +#define gpuDeviceSynchronize cudaDeviceSynchronize +#define gpuMemcpy cudaMemcpy + +#endif + +// gpu_assert can be overridden +#ifndef gpu_assert + +#if defined(EIGEN_HIP_DEVICE_COMPILE) +// HIPCC do not support the use of assert on the GPU side. +#define gpu_assert(COND) +#else +#define gpu_assert(COND) assert(COND) +#endif + +#endif // gpu_assert + +#endif // EIGEN_CXX11_TENSOR_GPU_HIP_CUDA_DEFINES_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaUndefines.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaUndefines.h new file mode 100644 index 0000000..1d142f2 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaUndefines.h @@ -0,0 +1,44 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// Copyright (C) 2018 Deven Desai +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#if defined(EIGEN_CXX11_TENSOR_GPU_HIP_CUDA_DEFINES_H) + +#ifndef EIGEN_PERMANENTLY_ENABLE_GPU_HIP_CUDA_DEFINES + +#undef gpuStream_t +#undef gpuDeviceProp_t +#undef gpuError_t +#undef gpuSuccess +#undef gpuErrorNotReady +#undef gpuGetDeviceCount +#undef gpuGetErrorString +#undef gpuGetDeviceProperties +#undef gpuStreamDefault +#undef gpuGetDevice +#undef gpuSetDevice +#undef gpuMalloc +#undef gpuFree +#undef gpuMemsetAsync +#undef gpuMemcpyAsync +#undef gpuMemcpyDeviceToDevice +#undef gpuMemcpyDeviceToHost +#undef gpuMemcpyHostToDevice +#undef gpuStreamQuery +#undef gpuSharedMemConfig +#undef gpuDeviceSetSharedMemConfig +#undef gpuStreamSynchronize +#undef gpuDeviceSynchronize +#undef gpuMemcpy + +#endif // EIGEN_PERMANENTLY_ENABLE_GPU_HIP_CUDA_DEFINES + +#undef EIGEN_CXX11_TENSOR_GPU_HIP_CUDA_DEFINES_H + +#endif // EIGEN_CXX11_TENSOR_GPU_HIP_CUDA_DEFINES_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h new file mode 100644 index 0000000..a901c5d --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h @@ -0,0 +1,79 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_IO_H +#define EIGEN_CXX11_TENSOR_TENSOR_IO_H + +namespace Eigen { + +namespace internal { + +// Print the tensor as a 2d matrix +template +struct TensorPrinter { + static void run (std::ostream& os, const Tensor& tensor) { + typedef typename internal::remove_const::type Scalar; + typedef typename Tensor::Index Index; + const Index total_size = internal::array_prod(tensor.dimensions()); + if (total_size > 0) { + const Index first_dim = Eigen::internal::array_get<0>(tensor.dimensions()); + static const int layout = Tensor::Layout; + Map > matrix(const_cast(tensor.data()), first_dim, total_size/first_dim); + os << matrix; + } + } +}; + + +// Print the tensor as a vector +template +struct TensorPrinter { + static void run (std::ostream& os, const Tensor& tensor) { + typedef typename internal::remove_const::type Scalar; + typedef typename Tensor::Index Index; + const Index total_size = internal::array_prod(tensor.dimensions()); + if (total_size > 0) { + Map > array(const_cast(tensor.data()), total_size); + os << array; + } + } +}; + + +// Print the tensor as a scalar +template +struct TensorPrinter { + static void run (std::ostream& os, const Tensor& tensor) { + os << tensor.coeff(0); + } +}; +} + +template +std::ostream& operator << (std::ostream& os, const TensorBase& expr) { + typedef TensorEvaluator, DefaultDevice> Evaluator; + typedef typename Evaluator::Dimensions Dimensions; + + // Evaluate the expression if needed + TensorForcedEvalOp eval = expr.eval(); + Evaluator tensor(eval, DefaultDevice()); + tensor.evalSubExprsIfNeeded(NULL); + + // Print the result + static const int rank = internal::array_size::value; + internal::TensorPrinter::run(os, tensor); + + // Cleanup. + tensor.cleanup(); + return os; +} + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_IO_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h new file mode 100644 index 0000000..dd51850 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h @@ -0,0 +1,603 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_IMAGE_PATCH_H +#define EIGEN_CXX11_TENSOR_TENSOR_IMAGE_PATCH_H + +namespace Eigen { + +/** \class TensorImagePatch + * \ingroup CXX11_Tensor_Module + * + * \brief Patch extraction specialized for image processing. + * This assumes that the input has a least 3 dimensions ordered as follow: + * 1st dimension: channels (of size d) + * 2nd dimension: rows (of size r) + * 3rd dimension: columns (of size c) + * There can be additional dimensions such as time (for video) or batch (for + * bulk processing after the first 3. + * Calling the image patch code with patch_rows and patch_cols is equivalent + * to calling the regular patch extraction code with parameters d, patch_rows, + * patch_cols, and 1 for all the additional dimensions. + */ +namespace internal { + +template +struct traits > : public traits +{ + typedef typename internal::remove_const::type Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions + 1; + static const int Layout = XprTraits::Layout; + typedef typename XprTraits::PointerType PointerType; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorImagePatchOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorImagePatchOp type; +}; + +template +struct ImagePatchCopyOp { + typedef typename Self::Index Index; + typedef typename Self::Scalar Scalar; + typedef typename Self::Impl Impl; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( + const Self& self, const Index num_coeff_to_copy, const Index dst_index, + Scalar* dst_data, const Index src_index) { + const Impl& impl = self.impl(); + for (Index i = 0; i < num_coeff_to_copy; ++i) { + dst_data[dst_index + i] = impl.coeff(src_index + i); + } + } +}; + +template +struct ImagePatchCopyOp { + typedef typename Self::Index Index; + typedef typename Self::Scalar Scalar; + typedef typename Self::Impl Impl; + typedef typename packet_traits::type Packet; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( + const Self& self, const Index num_coeff_to_copy, const Index dst_index, + Scalar* dst_data, const Index src_index) { + const Impl& impl = self.impl(); + const Index packet_size = internal::unpacket_traits::size; + const Index vectorized_size = + (num_coeff_to_copy / packet_size) * packet_size; + for (Index i = 0; i < vectorized_size; i += packet_size) { + Packet p = impl.template packet(src_index + i); + internal::pstoret(dst_data + dst_index + i, p); + } + for (Index i = vectorized_size; i < num_coeff_to_copy; ++i) { + dst_data[dst_index + i] = impl.coeff(src_index + i); + } + } +}; + +template +struct ImagePatchPaddingOp { + typedef typename Self::Index Index; + typedef typename Self::Scalar Scalar; + typedef typename packet_traits::type Packet; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( + const Index num_coeff_to_pad, const Scalar padding_value, + const Index dst_index, Scalar* dst_data) { + const Index packet_size = internal::unpacket_traits::size; + const Packet padded_packet = internal::pset1(padding_value); + const Index vectorized_size = + (num_coeff_to_pad / packet_size) * packet_size; + for (Index i = 0; i < vectorized_size; i += packet_size) { + internal::pstoret(dst_data + dst_index + i, + padded_packet); + } + for (Index i = vectorized_size; i < num_coeff_to_pad; ++i) { + dst_data[dst_index + i] = padding_value; + } + } +}; + +} // end namespace internal + +template +class TensorImagePatchOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorImagePatchOp(const XprType& expr, DenseIndex patch_rows, DenseIndex patch_cols, + DenseIndex row_strides, DenseIndex col_strides, + DenseIndex in_row_strides, DenseIndex in_col_strides, + DenseIndex row_inflate_strides, DenseIndex col_inflate_strides, + PaddingType padding_type, Scalar padding_value) + : m_xpr(expr), m_patch_rows(patch_rows), m_patch_cols(patch_cols), + m_row_strides(row_strides), m_col_strides(col_strides), + m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides), + m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides), + m_padding_explicit(false), m_padding_top(0), m_padding_bottom(0), m_padding_left(0), m_padding_right(0), + m_padding_type(padding_type), m_padding_value(padding_value) {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorImagePatchOp(const XprType& expr, DenseIndex patch_rows, DenseIndex patch_cols, + DenseIndex row_strides, DenseIndex col_strides, + DenseIndex in_row_strides, DenseIndex in_col_strides, + DenseIndex row_inflate_strides, DenseIndex col_inflate_strides, + DenseIndex padding_top, DenseIndex padding_bottom, + DenseIndex padding_left, DenseIndex padding_right, + Scalar padding_value) + : m_xpr(expr), m_patch_rows(patch_rows), m_patch_cols(patch_cols), + m_row_strides(row_strides), m_col_strides(col_strides), + m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides), + m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides), + m_padding_explicit(true), m_padding_top(padding_top), m_padding_bottom(padding_bottom), + m_padding_left(padding_left), m_padding_right(padding_right), + m_padding_type(PADDING_VALID), m_padding_value(padding_value) {} + + + EIGEN_DEVICE_FUNC + DenseIndex patch_rows() const { return m_patch_rows; } + EIGEN_DEVICE_FUNC + DenseIndex patch_cols() const { return m_patch_cols; } + EIGEN_DEVICE_FUNC + DenseIndex row_strides() const { return m_row_strides; } + EIGEN_DEVICE_FUNC + DenseIndex col_strides() const { return m_col_strides; } + EIGEN_DEVICE_FUNC + DenseIndex in_row_strides() const { return m_in_row_strides; } + EIGEN_DEVICE_FUNC + DenseIndex in_col_strides() const { return m_in_col_strides; } + EIGEN_DEVICE_FUNC + DenseIndex row_inflate_strides() const { return m_row_inflate_strides; } + EIGEN_DEVICE_FUNC + DenseIndex col_inflate_strides() const { return m_col_inflate_strides; } + EIGEN_DEVICE_FUNC + bool padding_explicit() const { return m_padding_explicit; } + EIGEN_DEVICE_FUNC + DenseIndex padding_top() const { return m_padding_top; } + EIGEN_DEVICE_FUNC + DenseIndex padding_bottom() const { return m_padding_bottom; } + EIGEN_DEVICE_FUNC + DenseIndex padding_left() const { return m_padding_left; } + EIGEN_DEVICE_FUNC + DenseIndex padding_right() const { return m_padding_right; } + EIGEN_DEVICE_FUNC + PaddingType padding_type() const { return m_padding_type; } + EIGEN_DEVICE_FUNC + Scalar padding_value() const { return m_padding_value; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + protected: + typename XprType::Nested m_xpr; + const DenseIndex m_patch_rows; + const DenseIndex m_patch_cols; + const DenseIndex m_row_strides; + const DenseIndex m_col_strides; + const DenseIndex m_in_row_strides; + const DenseIndex m_in_col_strides; + const DenseIndex m_row_inflate_strides; + const DenseIndex m_col_inflate_strides; + const bool m_padding_explicit; + const DenseIndex m_padding_top; + const DenseIndex m_padding_bottom; + const DenseIndex m_padding_left; + const DenseIndex m_padding_right; + const PaddingType m_padding_type; + const Scalar m_padding_value; +}; + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorImagePatchOp XprType; + typedef typename XprType::Index Index; + static const int NumInputDims = internal::array_size::Dimensions>::value; + static const int NumDims = NumInputDims + 1; + typedef DSizes Dimensions; + typedef typename internal::remove_const::type Scalar; + typedef TensorEvaluator, + Device> Self; + typedef TensorEvaluator Impl; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = PacketType::size; + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; + + enum { + IsAligned = false, + PacketAccess = TensorEvaluator::PacketAccess, + BlockAccess = false, + PreferBlockAccess = true, + Layout = TensorEvaluator::Layout, + CoordAccess = false, + RawAccess = false + }; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockNotImplemented TensorBlock; + //===--------------------------------------------------------------------===// + + EIGEN_STRONG_INLINE TensorEvaluator( const XprType& op, const Device& device) + : m_device(device), m_impl(op.expression(), device) + { + EIGEN_STATIC_ASSERT((NumDims >= 4), YOU_MADE_A_PROGRAMMING_MISTAKE); + + m_paddingValue = op.padding_value(); + + const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); + + // Caches a few variables. + if (static_cast(Layout) == static_cast(ColMajor)) { + m_inputDepth = input_dims[0]; + m_inputRows = input_dims[1]; + m_inputCols = input_dims[2]; + } else { + m_inputDepth = input_dims[NumInputDims-1]; + m_inputRows = input_dims[NumInputDims-2]; + m_inputCols = input_dims[NumInputDims-3]; + } + + m_row_strides = op.row_strides(); + m_col_strides = op.col_strides(); + + // Input strides and effective input/patch size + m_in_row_strides = op.in_row_strides(); + m_in_col_strides = op.in_col_strides(); + m_row_inflate_strides = op.row_inflate_strides(); + m_col_inflate_strides = op.col_inflate_strides(); + // The "effective" input rows and input cols are the input rows and cols + // after inflating them with zeros. + // For examples, a 2x3 matrix with row_inflate_strides and + // col_inflate_strides of 2 comes from: + // A B C + // D E F + // + // to a matrix is 3 x 5: + // + // A . B . C + // . . . . . + // D . E . F + + m_input_rows_eff = (m_inputRows - 1) * m_row_inflate_strides + 1; + m_input_cols_eff = (m_inputCols - 1) * m_col_inflate_strides + 1; + m_patch_rows_eff = op.patch_rows() + (op.patch_rows() - 1) * (m_in_row_strides - 1); + m_patch_cols_eff = op.patch_cols() + (op.patch_cols() - 1) * (m_in_col_strides - 1); + + if (op.padding_explicit()) { + m_outputRows = numext::ceil((m_input_rows_eff + op.padding_top() + op.padding_bottom() - m_patch_rows_eff + 1.f) / static_cast(m_row_strides)); + m_outputCols = numext::ceil((m_input_cols_eff + op.padding_left() + op.padding_right() - m_patch_cols_eff + 1.f) / static_cast(m_col_strides)); + m_rowPaddingTop = op.padding_top(); + m_colPaddingLeft = op.padding_left(); + } else { + // Computing padding from the type + switch (op.padding_type()) { + case PADDING_VALID: + m_outputRows = numext::ceil((m_input_rows_eff - m_patch_rows_eff + 1.f) / static_cast(m_row_strides)); + m_outputCols = numext::ceil((m_input_cols_eff - m_patch_cols_eff + 1.f) / static_cast(m_col_strides)); + // Calculate the padding + m_rowPaddingTop = numext::maxi(0, ((m_outputRows - 1) * m_row_strides + m_patch_rows_eff - m_input_rows_eff) / 2); + m_colPaddingLeft = numext::maxi(0, ((m_outputCols - 1) * m_col_strides + m_patch_cols_eff - m_input_cols_eff) / 2); + break; + case PADDING_SAME: + m_outputRows = numext::ceil(m_input_rows_eff / static_cast(m_row_strides)); + m_outputCols = numext::ceil(m_input_cols_eff / static_cast(m_col_strides)); + // Calculate the padding + m_rowPaddingTop = ((m_outputRows - 1) * m_row_strides + m_patch_rows_eff - m_input_rows_eff) / 2; + m_colPaddingLeft = ((m_outputCols - 1) * m_col_strides + m_patch_cols_eff - m_input_cols_eff) / 2; + // The padding size calculation for PADDING_SAME has been updated to + // be consistent with how TensorFlow extracts its paddings. + m_rowPaddingTop = numext::maxi(0, m_rowPaddingTop); + m_colPaddingLeft = numext::maxi(0, m_colPaddingLeft); + break; + default: + eigen_assert(false && "unexpected padding"); + m_outputCols=0; // silence the uninitialised warning; + m_outputRows=0; //// silence the uninitialised warning; + } + } + eigen_assert(m_outputRows > 0); + eigen_assert(m_outputCols > 0); + + // Dimensions for result of extraction. + if (static_cast(Layout) == static_cast(ColMajor)) { + // ColMajor + // 0: depth + // 1: patch_rows + // 2: patch_cols + // 3: number of patches + // 4 and beyond: anything else (such as batch). + m_dimensions[0] = input_dims[0]; + m_dimensions[1] = op.patch_rows(); + m_dimensions[2] = op.patch_cols(); + m_dimensions[3] = m_outputRows * m_outputCols; + for (int i = 4; i < NumDims; ++i) { + m_dimensions[i] = input_dims[i-1]; + } + } else { + // RowMajor + // NumDims-1: depth + // NumDims-2: patch_rows + // NumDims-3: patch_cols + // NumDims-4: number of patches + // NumDims-5 and beyond: anything else (such as batch). + m_dimensions[NumDims-1] = input_dims[NumInputDims-1]; + m_dimensions[NumDims-2] = op.patch_rows(); + m_dimensions[NumDims-3] = op.patch_cols(); + m_dimensions[NumDims-4] = m_outputRows * m_outputCols; + for (int i = NumDims-5; i >= 0; --i) { + m_dimensions[i] = input_dims[i]; + } + } + + // Strides for moving the patch in various dimensions. + if (static_cast(Layout) == static_cast(ColMajor)) { + m_colStride = m_dimensions[1]; + m_patchStride = m_colStride * m_dimensions[2] * m_dimensions[0]; + m_otherStride = m_patchStride * m_dimensions[3]; + } else { + m_colStride = m_dimensions[NumDims-2]; + m_patchStride = m_colStride * m_dimensions[NumDims-3] * m_dimensions[NumDims-1]; + m_otherStride = m_patchStride * m_dimensions[NumDims-4]; + } + + // Strides for navigating through the input tensor. + m_rowInputStride = m_inputDepth; + m_colInputStride = m_inputDepth * m_inputRows; + m_patchInputStride = m_inputDepth * m_inputRows * m_inputCols; + + // Fast representations of different variables. + m_fastOtherStride = internal::TensorIntDivisor(m_otherStride); + m_fastPatchStride = internal::TensorIntDivisor(m_patchStride); + m_fastColStride = internal::TensorIntDivisor(m_colStride); + m_fastInflateRowStride = internal::TensorIntDivisor(m_row_inflate_strides); + m_fastInflateColStride = internal::TensorIntDivisor(m_col_inflate_strides); + m_fastInputColsEff = internal::TensorIntDivisor(m_input_cols_eff); + + // Number of patches in the width dimension. + m_fastOutputRows = internal::TensorIntDivisor(m_outputRows); + if (static_cast(Layout) == static_cast(ColMajor)) { + m_fastOutputDepth = internal::TensorIntDivisor(m_dimensions[0]); + } else { + m_fastOutputDepth = internal::TensorIntDivisor(m_dimensions[NumDims-1]); + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + +#ifdef EIGEN_USE_THREADS + template + EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync( + EvaluatorPointerType, EvalSubExprsCallback done) { + m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); }); + } +#endif // EIGEN_USE_THREADS + + EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + // Patch index corresponding to the passed in index. + const Index patchIndex = index / m_fastPatchStride; + // Find the offset of the element wrt the location of the first element. + const Index patchOffset = (index - patchIndex * m_patchStride) / m_fastOutputDepth; + + // Other ways to index this element. + const Index otherIndex = (NumDims == 4) ? 0 : index / m_fastOtherStride; + const Index patch2DIndex = (NumDims == 4) ? patchIndex : (index - otherIndex * m_otherStride) / m_fastPatchStride; + + // Calculate col index in the input original tensor. + const Index colIndex = patch2DIndex / m_fastOutputRows; + const Index colOffset = patchOffset / m_fastColStride; + const Index inputCol = colIndex * m_col_strides + colOffset * m_in_col_strides - m_colPaddingLeft; + const Index origInputCol = (m_col_inflate_strides == 1) ? inputCol : ((inputCol >= 0) ? (inputCol / m_fastInflateColStride) : 0); + if (inputCol < 0 || inputCol >= m_input_cols_eff || + ((m_col_inflate_strides != 1) && (inputCol != origInputCol * m_col_inflate_strides))) { + return Scalar(m_paddingValue); + } + + // Calculate row index in the original input tensor. + const Index rowIndex = patch2DIndex - colIndex * m_outputRows; + const Index rowOffset = patchOffset - colOffset * m_colStride; + const Index inputRow = rowIndex * m_row_strides + rowOffset * m_in_row_strides - m_rowPaddingTop; + const Index origInputRow = (m_row_inflate_strides == 1) ? inputRow : ((inputRow >= 0) ? (inputRow / m_fastInflateRowStride) : 0); + if (inputRow < 0 || inputRow >= m_input_rows_eff || + ((m_row_inflate_strides != 1) && (inputRow != origInputRow * m_row_inflate_strides))) { + return Scalar(m_paddingValue); + } + + const int depth_index = static_cast(Layout) == static_cast(ColMajor) ? 0 : NumDims - 1; + const Index depth = index - (index / m_fastOutputDepth) * m_dimensions[depth_index]; + + const Index inputIndex = depth + origInputRow * m_rowInputStride + origInputCol * m_colInputStride + otherIndex * m_patchInputStride; + return m_impl.coeff(inputIndex); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); + + if (m_in_row_strides != 1 || m_in_col_strides != 1 || m_row_inflate_strides != 1 || m_col_inflate_strides != 1) { + return packetWithPossibleZero(index); + } + + const Index indices[2] = {index, index + PacketSize - 1}; + const Index patchIndex = indices[0] / m_fastPatchStride; + if (patchIndex != indices[1] / m_fastPatchStride) { + return packetWithPossibleZero(index); + } + const Index otherIndex = (NumDims == 4) ? 0 : indices[0] / m_fastOtherStride; + eigen_assert(otherIndex == indices[1] / m_fastOtherStride); + + // Find the offset of the element wrt the location of the first element. + const Index patchOffsets[2] = {(indices[0] - patchIndex * m_patchStride) / m_fastOutputDepth, + (indices[1] - patchIndex * m_patchStride) / m_fastOutputDepth}; + + const Index patch2DIndex = (NumDims == 4) ? patchIndex : (indices[0] - otherIndex * m_otherStride) / m_fastPatchStride; + eigen_assert(patch2DIndex == (indices[1] - otherIndex * m_otherStride) / m_fastPatchStride); + + const Index colIndex = patch2DIndex / m_fastOutputRows; + const Index colOffsets[2] = {patchOffsets[0] / m_fastColStride, patchOffsets[1] / m_fastColStride}; + + // Calculate col indices in the original input tensor. + const Index inputCols[2] = {colIndex * m_col_strides + colOffsets[0] - + m_colPaddingLeft, colIndex * m_col_strides + colOffsets[1] - m_colPaddingLeft}; + if (inputCols[1] < 0 || inputCols[0] >= m_inputCols) { + return internal::pset1(Scalar(m_paddingValue)); + } + + if (inputCols[0] == inputCols[1]) { + const Index rowIndex = patch2DIndex - colIndex * m_outputRows; + const Index rowOffsets[2] = {patchOffsets[0] - colOffsets[0]*m_colStride, patchOffsets[1] - colOffsets[1]*m_colStride}; + eigen_assert(rowOffsets[0] <= rowOffsets[1]); + // Calculate col indices in the original input tensor. + const Index inputRows[2] = {rowIndex * m_row_strides + rowOffsets[0] - + m_rowPaddingTop, rowIndex * m_row_strides + rowOffsets[1] - m_rowPaddingTop}; + + if (inputRows[1] < 0 || inputRows[0] >= m_inputRows) { + return internal::pset1(Scalar(m_paddingValue)); + } + + if (inputRows[0] >= 0 && inputRows[1] < m_inputRows) { + // no padding + const int depth_index = static_cast(Layout) == static_cast(ColMajor) ? 0 : NumDims - 1; + const Index depth = index - (index / m_fastOutputDepth) * m_dimensions[depth_index]; + const Index inputIndex = depth + inputRows[0] * m_rowInputStride + inputCols[0] * m_colInputStride + otherIndex * m_patchInputStride; + return m_impl.template packet(inputIndex); + } + } + + return packetWithPossibleZero(index); + } + + EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator& impl() const { return m_impl; } + +#ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_impl.bind(cgh); + } +#endif + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rowPaddingTop() const { return m_rowPaddingTop; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index colPaddingLeft() const { return m_colPaddingLeft; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index outputRows() const { return m_outputRows; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index outputCols() const { return m_outputCols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index userRowStride() const { return m_row_strides; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index userColStride() const { return m_col_strides; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index userInRowStride() const { return m_in_row_strides; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index userInColStride() const { return m_in_col_strides; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rowInflateStride() const { return m_row_inflate_strides; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index colInflateStride() const { return m_col_inflate_strides; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + // We conservatively estimate the cost for the code path where the computed + // index is inside the original image and + // TensorEvaluator::CoordAccess is false. + const double compute_cost = 3 * TensorOpCost::DivCost() + + 6 * TensorOpCost::MulCost() + + 8 * TensorOpCost::MulCost(); + return m_impl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, compute_cost, vectorized, PacketSize); + } + + protected: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const + { + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + EIGEN_UNROLL_LOOP + for (int i = 0; i < PacketSize; ++i) { + values[i] = coeff(index+i); + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + + Dimensions m_dimensions; + + Index m_otherStride; + Index m_patchStride; + Index m_colStride; + Index m_row_strides; + Index m_col_strides; + + Index m_in_row_strides; + Index m_in_col_strides; + Index m_row_inflate_strides; + Index m_col_inflate_strides; + + Index m_input_rows_eff; + Index m_input_cols_eff; + Index m_patch_rows_eff; + Index m_patch_cols_eff; + + internal::TensorIntDivisor m_fastOtherStride; + internal::TensorIntDivisor m_fastPatchStride; + internal::TensorIntDivisor m_fastColStride; + internal::TensorIntDivisor m_fastInflateRowStride; + internal::TensorIntDivisor m_fastInflateColStride; + internal::TensorIntDivisor m_fastInputColsEff; + + Index m_rowInputStride; + Index m_colInputStride; + Index m_patchInputStride; + + Index m_inputDepth; + Index m_inputRows; + Index m_inputCols; + + Index m_outputRows; + Index m_outputCols; + + Index m_rowPaddingTop; + Index m_colPaddingLeft; + + internal::TensorIntDivisor m_fastOutputRows; + internal::TensorIntDivisor m_fastOutputDepth; + + Scalar m_paddingValue; + + const Device EIGEN_DEVICE_REF m_device; + TensorEvaluator m_impl; +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_IMAGE_PATCH_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h new file mode 100644 index 0000000..2d8c7b9 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h @@ -0,0 +1,738 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_INDEX_LIST_H +#define EIGEN_CXX11_TENSOR_TENSOR_INDEX_LIST_H + + +#if EIGEN_HAS_CONSTEXPR && EIGEN_HAS_VARIADIC_TEMPLATES + +#define EIGEN_HAS_INDEX_LIST + +namespace Eigen { + +/** \internal + * + * \class TensorIndexList + * \ingroup CXX11_Tensor_Module + * + * \brief Set of classes used to encode a set of Tensor dimensions/indices. + * + * The indices in the list can be known at compile time or at runtime. A mix + * of static and dynamic indices can also be provided if needed. The tensor + * code will attempt to take advantage of the indices that are known at + * compile time to optimize the code it generates. + * + * This functionality requires a c++11 compliant compiler. If your compiler + * is older you need to use arrays of indices instead. + * + * Several examples are provided in the cxx11_tensor_index_list.cpp file. + * + * \sa Tensor + */ + +template +struct type2index { + static const Index value = n; + EIGEN_DEVICE_FUNC constexpr operator Index() const { return n; } + EIGEN_DEVICE_FUNC void set(Index val) { + eigen_assert(val == n); + } +}; + +// This can be used with IndexPairList to get compile-time constant pairs, +// such as IndexPairList, type2indexpair<3,4>>(). +template +struct type2indexpair { + static const Index first = f; + static const Index second = s; + + constexpr EIGEN_DEVICE_FUNC operator IndexPair() const { + return IndexPair(f, s); + } + + EIGEN_DEVICE_FUNC void set(const IndexPair& val) { + eigen_assert(val.first == f); + eigen_assert(val.second == s); + } +}; + + +template struct NumTraits > +{ + typedef Index Real; + enum { + IsComplex = 0, + RequireInitialization = false, + ReadCost = 1, + AddCost = 1, + MulCost = 1 + }; + + EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR EIGEN_STRONG_INLINE Real epsilon() { return 0; } + EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR EIGEN_STRONG_INLINE Real dummy_precision() { return 0; } + EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR EIGEN_STRONG_INLINE Real highest() { return n; } + EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR EIGEN_STRONG_INLINE Real lowest() { return n; } +}; + +namespace internal { +template +EIGEN_DEVICE_FUNC void update_value(T& val, Index new_val) { + val = internal::convert_index(new_val); +} +template +EIGEN_DEVICE_FUNC void update_value(type2index& val, Index new_val) { + val.set(new_val); +} + +template +EIGEN_DEVICE_FUNC void update_value(T& val, IndexPair new_val) { + val = new_val; +} +template +EIGEN_DEVICE_FUNC void update_value(type2indexpair& val, IndexPair new_val) { + val.set(new_val); +} + + +template +struct is_compile_time_constant { + static constexpr bool value = false; +}; + +template +struct is_compile_time_constant > { + static constexpr bool value = true; +}; +template +struct is_compile_time_constant > { + static constexpr bool value = true; +}; +template +struct is_compile_time_constant& > { + static constexpr bool value = true; +}; +template +struct is_compile_time_constant& > { + static constexpr bool value = true; +}; + +template +struct is_compile_time_constant > { + static constexpr bool value = true; +}; +template +struct is_compile_time_constant > { + static constexpr bool value = true; +}; +template +struct is_compile_time_constant& > { + static constexpr bool value = true; +}; +template +struct is_compile_time_constant& > { + static constexpr bool value = true; +}; + + +template +struct IndexTuple; + +template +struct IndexTuple { + EIGEN_DEVICE_FUNC constexpr IndexTuple() : head(), others() { } + EIGEN_DEVICE_FUNC constexpr IndexTuple(const T& v, const O... o) : head(v), others(o...) { } + + constexpr static int count = 1 + sizeof...(O); + T head; + IndexTuple others; + typedef T Head; + typedef IndexTuple Other; +}; + +template + struct IndexTuple { + EIGEN_DEVICE_FUNC constexpr IndexTuple() : head() { } + EIGEN_DEVICE_FUNC constexpr IndexTuple(const T& v) : head(v) { } + + constexpr static int count = 1; + T head; + typedef T Head; +}; + + +template +struct IndexTupleExtractor; + +template +struct IndexTupleExtractor { + + typedef typename IndexTupleExtractor::ValType ValType; + + EIGEN_DEVICE_FUNC static constexpr ValType& get_val(IndexTuple& val) { + return IndexTupleExtractor::get_val(val.others); + } + + EIGEN_DEVICE_FUNC static constexpr const ValType& get_val(const IndexTuple& val) { + return IndexTupleExtractor::get_val(val.others); + } + template + EIGEN_DEVICE_FUNC static void set_val(IndexTuple& val, V& new_val) { + IndexTupleExtractor::set_val(val.others, new_val); + } + +}; + +template + struct IndexTupleExtractor<0, T, O...> { + + typedef T ValType; + + EIGEN_DEVICE_FUNC static constexpr ValType& get_val(IndexTuple& val) { + return val.head; + } + EIGEN_DEVICE_FUNC static constexpr const ValType& get_val(const IndexTuple& val) { + return val.head; + } + template + EIGEN_DEVICE_FUNC static void set_val(IndexTuple& val, V& new_val) { + val.head = new_val; + } +}; + + + +template +EIGEN_DEVICE_FUNC constexpr typename IndexTupleExtractor::ValType& array_get(IndexTuple& tuple) { + return IndexTupleExtractor::get_val(tuple); +} +template +EIGEN_DEVICE_FUNC constexpr const typename IndexTupleExtractor::ValType& array_get(const IndexTuple& tuple) { + return IndexTupleExtractor::get_val(tuple); +} +template + struct array_size > { + static const size_t value = IndexTuple::count; +}; +template + struct array_size > { + static const size_t value = IndexTuple::count; +}; + + + + +template +struct tuple_coeff { + template + EIGEN_DEVICE_FUNC static constexpr ValueT get(const Index i, const IndexTuple& t) { + // return array_get(t) * (i == Idx) + tuple_coeff::get(i, t) * (i != Idx); + return (i == Idx ? array_get(t) : tuple_coeff::get(i, t)); + } + template + EIGEN_DEVICE_FUNC static void set(const Index i, IndexTuple& t, const ValueT& value) { + if (i == Idx) { + update_value(array_get(t), value); + } else { + tuple_coeff::set(i, t, value); + } + } + + template + EIGEN_DEVICE_FUNC static constexpr bool value_known_statically(const Index i, const IndexTuple& t) { + return ((i == Idx) & is_compile_time_constant::ValType>::value) || + tuple_coeff::value_known_statically(i, t); + } + + template + EIGEN_DEVICE_FUNC static constexpr bool values_up_to_known_statically(const IndexTuple& t) { + return is_compile_time_constant::ValType>::value && + tuple_coeff::values_up_to_known_statically(t); + } + + template + EIGEN_DEVICE_FUNC static constexpr bool values_up_to_statically_known_to_increase(const IndexTuple& t) { + return is_compile_time_constant::ValType>::value && + is_compile_time_constant::ValType>::value && + array_get(t) > array_get(t) && + tuple_coeff::values_up_to_statically_known_to_increase(t); + } +}; + +template +struct tuple_coeff<0, ValueT> { + template + EIGEN_DEVICE_FUNC static constexpr ValueT get(const Index /*i*/, const IndexTuple& t) { + // eigen_assert (i == 0); // gcc fails to compile assertions in constexpr + return array_get<0>(t)/* * (i == 0)*/; + } + template + EIGEN_DEVICE_FUNC static void set(const Index i, IndexTuple& t, const ValueT value) { + eigen_assert (i == 0); + update_value(array_get<0>(t), value); + } + template + EIGEN_DEVICE_FUNC static constexpr bool value_known_statically(const Index i, const IndexTuple&) { + return is_compile_time_constant::ValType>::value && (i == 0); + } + + template + EIGEN_DEVICE_FUNC static constexpr bool values_up_to_known_statically(const IndexTuple&) { + return is_compile_time_constant::ValType>::value; + } + + template + EIGEN_DEVICE_FUNC static constexpr bool values_up_to_statically_known_to_increase(const IndexTuple&) { + return true; + } +}; +} // namespace internal + + + +template +struct IndexList : internal::IndexTuple { + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr Index operator[] (const Index i) const { + return internal::tuple_coeff >::value-1, Index>::get(i, *this); + } + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr Index get(const Index i) const { + return internal::tuple_coeff >::value-1, Index>::get(i, *this); + } + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC void set(const Index i, const Index value) { + return internal::tuple_coeff >::value-1, Index>::set(i, *this, value); + } + + EIGEN_DEVICE_FUNC constexpr IndexList(const internal::IndexTuple& other) : internal::IndexTuple(other) { } + EIGEN_DEVICE_FUNC constexpr IndexList(FirstType& first, OtherTypes... other) : internal::IndexTuple(first, other...) { } + EIGEN_DEVICE_FUNC constexpr IndexList() : internal::IndexTuple() { } + + EIGEN_DEVICE_FUNC constexpr bool value_known_statically(const Index i) const { + return internal::tuple_coeff >::value-1, Index>::value_known_statically(i, *this); + } + EIGEN_DEVICE_FUNC constexpr bool all_values_known_statically() const { + return internal::tuple_coeff >::value-1, Index>::values_up_to_known_statically(*this); + } + + EIGEN_DEVICE_FUNC constexpr bool values_statically_known_to_increase() const { + return internal::tuple_coeff >::value-1, Index>::values_up_to_statically_known_to_increase(*this); + } +}; + +template +std::ostream& operator<<(std::ostream& os, + const IndexList& dims) { + os << "["; + for (size_t i = 0; i < 1 + sizeof...(OtherTypes); ++i) { + if (i > 0) os << ", "; + os << dims[i]; + } + os << "]"; + return os; +} + +template +constexpr IndexList make_index_list(FirstType val1, OtherTypes... other_vals) { + return IndexList(val1, other_vals...); +} + + +template +struct IndexPairList : internal::IndexTuple { + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr IndexPair operator[] (const Index i) const { + return internal::tuple_coeff >::value-1, IndexPair>::get(i, *this); + } + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC void set(const Index i, const IndexPair value) { + return internal::tuple_coeff>::value-1, IndexPair >::set(i, *this, value); + } + + EIGEN_DEVICE_FUNC constexpr IndexPairList(const internal::IndexTuple& other) : internal::IndexTuple(other) { } + EIGEN_DEVICE_FUNC constexpr IndexPairList() : internal::IndexTuple() { } + + EIGEN_DEVICE_FUNC constexpr bool value_known_statically(const Index i) const { + return internal::tuple_coeff >::value-1, Index>::value_known_statically(i, *this); + } +}; + +namespace internal { + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index array_prod(const IndexList& sizes) { + Index result = 1; + EIGEN_UNROLL_LOOP + for (size_t i = 0; i < array_size >::value; ++i) { + result *= sizes[i]; + } + return result; +} + +template struct array_size > { + static const size_t value = array_size >::value; +}; +template struct array_size > { + static const size_t value = array_size >::value; +}; + +template struct array_size > { + static const size_t value = std::tuple_size >::value; +}; +template struct array_size > { + static const size_t value = std::tuple_size >::value; +}; + +template EIGEN_DEVICE_FUNC constexpr Index array_get(IndexList& a) { + return IndexTupleExtractor::get_val(a); +} +template EIGEN_DEVICE_FUNC constexpr Index array_get(const IndexList& a) { + return IndexTupleExtractor::get_val(a); +} + +template +struct index_known_statically_impl { + EIGEN_DEVICE_FUNC static constexpr bool run(const Index) { + return false; + } +}; + +template +struct index_known_statically_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const Index i) { + return IndexList().value_known_statically(i); + } +}; + +template +struct index_known_statically_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const Index i) { + return IndexList().value_known_statically(i); + } +}; + + +template +struct all_indices_known_statically_impl { + static constexpr bool run() { + return false; + } +}; + +template +struct all_indices_known_statically_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run() { + return IndexList().all_values_known_statically(); + } +}; + +template +struct all_indices_known_statically_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run() { + return IndexList().all_values_known_statically(); + } +}; + + +template +struct indices_statically_known_to_increase_impl { + EIGEN_DEVICE_FUNC static constexpr bool run() { + return false; + } +}; + +template + struct indices_statically_known_to_increase_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run() { + return Eigen::IndexList().values_statically_known_to_increase(); + } +}; + +template + struct indices_statically_known_to_increase_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run() { + return Eigen::IndexList().values_statically_known_to_increase(); + } +}; + + +template +struct index_statically_eq_impl { + EIGEN_DEVICE_FUNC static constexpr bool run(Index, Index) { + return false; + } +}; + +template +struct index_statically_eq_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) { + return IndexList().value_known_statically(i) & + (IndexList().get(i) == value); + } +}; + +template +struct index_statically_eq_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) { + return IndexList().value_known_statically(i) & + (IndexList().get(i) == value); + } +}; + + +template +struct index_statically_ne_impl { + EIGEN_DEVICE_FUNC static constexpr bool run(Index, Index) { + return false; + } +}; + +template +struct index_statically_ne_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) { + return IndexList().value_known_statically(i) & + (IndexList().get(i) != value); + } +}; + +template +struct index_statically_ne_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) { + return IndexList().value_known_statically(i) & + (IndexList().get(i) != value); + } +}; + + +template +struct index_statically_gt_impl { + EIGEN_DEVICE_FUNC static constexpr bool run(Index, Index) { + return false; + } +}; + +template +struct index_statically_gt_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) { + return IndexList().value_known_statically(i) & + (IndexList().get(i) > value); + } +}; + +template +struct index_statically_gt_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) { + return IndexList().value_known_statically(i) & + (IndexList().get(i) > value); + } +}; + + + +template +struct index_statically_lt_impl { + EIGEN_DEVICE_FUNC static constexpr bool run(Index, Index) { + return false; + } +}; + +template +struct index_statically_lt_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) { + return IndexList().value_known_statically(i) & + (IndexList().get(i) < value); + } +}; + +template +struct index_statically_lt_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) { + return IndexList().value_known_statically(i) & + (IndexList().get(i) < value); + } +}; + + + +template +struct index_pair_first_statically_eq_impl { + EIGEN_DEVICE_FUNC static constexpr bool run(Index, Index) { + return false; + } +}; + +template +struct index_pair_first_statically_eq_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) { + return IndexPairList().value_known_statically(i) & + (IndexPairList().operator[](i).first == value); + } +}; + +template +struct index_pair_first_statically_eq_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) { + return IndexPairList().value_known_statically(i) & + (IndexPairList().operator[](i).first == value); + } +}; + + + +template +struct index_pair_second_statically_eq_impl { + EIGEN_DEVICE_FUNC static constexpr bool run(Index, Index) { + return false; + } +}; + +template +struct index_pair_second_statically_eq_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) { + return IndexPairList().value_known_statically(i) & + (IndexPairList().operator[](i).second == value); + } +}; + +template +struct index_pair_second_statically_eq_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const Index i, const Index value) { + return IndexPairList().value_known_statically(i) & + (IndexPairList().operator[](i).second == value); + } +}; + + +} // end namespace internal +} // end namespace Eigen + +#else + +namespace Eigen { +namespace internal { + +template +struct index_known_statically_impl { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const Index) { + return false; + } +}; + +template +struct all_indices_known_statically_impl { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() { + return false; + } +}; + +template +struct indices_statically_known_to_increase_impl { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() { + return false; + } +}; + +template +struct index_statically_eq_impl { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(Index, Index) { + return false; + } +}; + +template +struct index_statically_ne_impl { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(Index, Index) { + return false; + } +}; + +template +struct index_statically_gt_impl { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(Index, Index) { + return false; + } +}; + +template +struct index_statically_lt_impl { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(Index, Index) { + return false; + } +}; + +template +struct index_pair_first_statically_eq_impl { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(Index, Index) { + return false; + } +}; + +template +struct index_pair_second_statically_eq_impl { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(Index, Index) { + return false; + } +}; + + + +} // end namespace internal +} // end namespace Eigen + +#endif + + +namespace Eigen { +namespace internal { +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_known_statically(Index i) { + return index_known_statically_impl::run(i); +} + +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool all_indices_known_statically() { + return all_indices_known_statically_impl::run(); +} + +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool indices_statically_known_to_increase() { + return indices_statically_known_to_increase_impl::run(); +} + +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_statically_eq(Index i, Index value) { + return index_statically_eq_impl::run(i, value); +} + +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_statically_ne(Index i, Index value) { + return index_statically_ne_impl::run(i, value); +} + +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_statically_gt(Index i, Index value) { + return index_statically_gt_impl::run(i, value); +} + +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_statically_lt(Index i, Index value) { + return index_statically_lt_impl::run(i, value); +} + +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_pair_first_statically_eq(Index i, Index value) { + return index_pair_first_statically_eq_impl::run(i, value); +} + +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_pair_second_statically_eq(Index i, Index value) { + return index_pair_second_statically_eq_impl::run(i, value); +} + +} // end namespace internal +} // end namespace Eigen + + +#endif // EIGEN_CXX11_TENSOR_TENSOR_INDEX_LIST_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h new file mode 100644 index 0000000..c5cb61a --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h @@ -0,0 +1,247 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Ke Yang +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_INFLATION_H +#define EIGEN_CXX11_TENSOR_TENSOR_INFLATION_H + +namespace Eigen { + +/** \class TensorInflation + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor inflation class. + * + * + */ +namespace internal { +template +struct traits > : public traits +{ + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; + typedef typename XprTraits::PointerType PointerType; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorInflationOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorInflationOp type; +}; + +} // end namespace internal + +template +class TensorInflationOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorInflationOp(const XprType& expr, const Strides& strides) + : m_xpr(expr), m_strides(strides) {} + + EIGEN_DEVICE_FUNC + const Strides& strides() const { return m_strides; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + protected: + typename XprType::Nested m_xpr; + const Strides m_strides; +}; + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorInflationOp XprType; + typedef typename XprType::Index Index; + static const int NumDims = internal::array_size::Dimensions>::value; + typedef DSizes Dimensions; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = PacketType::size; + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; + + enum { + IsAligned = /*TensorEvaluator::IsAligned*/ false, + PacketAccess = TensorEvaluator::PacketAccess, + BlockAccess = false, + PreferBlockAccess = false, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockNotImplemented TensorBlock; + //===--------------------------------------------------------------------===// + + EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device), m_strides(op.strides()) + { + m_dimensions = m_impl.dimensions(); + // Expand each dimension to the inflated dimension. + for (int i = 0; i < NumDims; ++i) { + m_dimensions[i] = (m_dimensions[i] - 1) * op.strides()[i] + 1; + } + + // Remember the strides for fast division. + for (int i = 0; i < NumDims; ++i) { + m_fastStrides[i] = internal::TensorIntDivisor(m_strides[i]); + } + + const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); + if (static_cast(Layout) == static_cast(ColMajor)) { + m_outputStrides[0] = 1; + m_inputStrides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; + m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; + } + } else { // RowMajor + m_outputStrides[NumDims-1] = 1; + m_inputStrides[NumDims-1] = 1; + for (int i = NumDims - 2; i >= 0; --i) { + m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1]; + m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + // Computes the input index given the output index. Returns true if the output + // index doesn't fall into a hole. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool getInputIndex(Index index, Index* inputIndex) const + { + eigen_assert(index < dimensions().TotalSize()); + *inputIndex = 0; + if (static_cast(Layout) == static_cast(ColMajor)) { + EIGEN_UNROLL_LOOP + for (int i = NumDims - 1; i > 0; --i) { + const Index idx = index / m_outputStrides[i]; + if (idx != idx / m_fastStrides[i] * m_strides[i]) { + return false; + } + *inputIndex += idx / m_strides[i] * m_inputStrides[i]; + index -= idx * m_outputStrides[i]; + } + if (index != index / m_fastStrides[0] * m_strides[0]) { + return false; + } + *inputIndex += index / m_strides[0]; + return true; + } else { + EIGEN_UNROLL_LOOP + for (int i = 0; i < NumDims - 1; ++i) { + const Index idx = index / m_outputStrides[i]; + if (idx != idx / m_fastStrides[i] * m_strides[i]) { + return false; + } + *inputIndex += idx / m_strides[i] * m_inputStrides[i]; + index -= idx * m_outputStrides[i]; + } + if (index != index / m_fastStrides[NumDims-1] * m_strides[NumDims-1]) { + return false; + } + *inputIndex += index / m_strides[NumDims - 1]; + } + return true; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + Index inputIndex = 0; + if (getInputIndex(index, &inputIndex)) { + return m_impl.coeff(inputIndex); + } else { + return Scalar(0); + } + } + + // TODO(yangke): optimize this function so that we can detect and produce + // all-zero packets + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); + + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + EIGEN_UNROLL_LOOP + for (int i = 0; i < PacketSize; ++i) { + values[i] = coeff(index+i); + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + const double compute_cost = NumDims * (3 * TensorOpCost::DivCost() + + 3 * TensorOpCost::MulCost() + + 2 * TensorOpCost::AddCost()); + const double input_size = m_impl.dimensions().TotalSize(); + const double output_size = m_dimensions.TotalSize(); + if (output_size == 0) + return TensorOpCost(); + return m_impl.costPerCoeff(vectorized) + + TensorOpCost(sizeof(CoeffReturnType) * input_size / output_size, 0, + compute_cost, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; } + +#ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_impl.bind(cgh); + } +#endif + + protected: + Dimensions m_dimensions; + array m_outputStrides; + array m_inputStrides; + TensorEvaluator m_impl; + const Strides m_strides; + array, NumDims> m_fastStrides; +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_INFLATION_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h new file mode 100644 index 0000000..26a3818 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h @@ -0,0 +1,82 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_INITIALIZER_H +#define EIGEN_CXX11_TENSOR_TENSOR_INITIALIZER_H + +#if EIGEN_HAS_VARIADIC_TEMPLATES + +#include + +namespace Eigen { + +/** \class TensorInitializer + * \ingroup CXX11_Tensor_Module + * + * \brief Helper template to initialize Tensors from std::initializer_lists. + */ +namespace internal { + +template +struct Initializer { + typedef std::initializer_list< + typename Initializer::InitList> InitList; + + static void run(TensorEvaluator& tensor, + Eigen::array::Index, traits::NumDimensions>* indices, + const InitList& vals) { + int i = 0; + for (const auto& v : vals) { + (*indices)[traits::NumDimensions - N] = i++; + Initializer::run(tensor, indices, v); + } + } +}; + +template +struct Initializer { + typedef std::initializer_list::Scalar> InitList; + + static void run(TensorEvaluator& tensor, + Eigen::array::Index, traits::NumDimensions>* indices, + const InitList& vals) { + int i = 0; + // There is likely a faster way to do that than iterating. + for (const auto& v : vals) { + (*indices)[traits::NumDimensions - 1] = i++; + tensor.coeffRef(*indices) = v; + } + } +}; + +template +struct Initializer { + typedef typename traits::Scalar InitList; + + static void run(TensorEvaluator& tensor, + Eigen::array::Index, traits::NumDimensions>*, + const InitList& v) { + tensor.coeffRef(0) = v; + } +}; + + +template +void initialize_tensor(TensorEvaluator& tensor, + const typename Initializer::NumDimensions>::InitList& vals) { + Eigen::array::Index, traits::NumDimensions> indices; + Initializer::NumDimensions>::run(tensor, &indices, vals); +} + +} // namespace internal +} // namespace Eigen + +#endif // EIGEN_HAS_VARIADIC_TEMPLATES + +#endif // EIGEN_CXX11_TENSOR_TENSOR_INITIALIZER_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h new file mode 100644 index 0000000..6d5cce4 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h @@ -0,0 +1,263 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H +#define EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H + + +namespace Eigen { + +/** \internal + * + * \class TensorIntDiv + * \ingroup CXX11_Tensor_Module + * + * \brief Fast integer division by a constant. + * + * See the paper from Granlund and Montgomery for explanation. + * (at https://doi.org/10.1145/773473.178249) + * + * \sa Tensor + */ + +namespace internal { + +namespace { + + // Note: result is undefined if val == 0 + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + typename internal::enable_if::type count_leading_zeros(const T val) + { +#ifdef EIGEN_GPU_COMPILE_PHASE + return __clz(val); +#elif defined(SYCL_DEVICE_ONLY) + return cl::sycl::clz(val); +#elif EIGEN_COMP_MSVC + unsigned long index; + _BitScanReverse(&index, val); + return 31 - index; +#else + EIGEN_STATIC_ASSERT(sizeof(unsigned long long) == 8, YOU_MADE_A_PROGRAMMING_MISTAKE); + return __builtin_clz(static_cast(val)); +#endif + } + + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + typename internal::enable_if::type count_leading_zeros(const T val) + { +#ifdef EIGEN_GPU_COMPILE_PHASE + return __clzll(val); +#elif defined(SYCL_DEVICE_ONLY) + return static_cast(cl::sycl::clz(val)); +#elif EIGEN_COMP_MSVC && EIGEN_ARCH_x86_64 + unsigned long index; + _BitScanReverse64(&index, val); + return 63 - index; +#elif EIGEN_COMP_MSVC + // MSVC's _BitScanReverse64 is not available for 32bits builds. + unsigned int lo = (unsigned int)(val&0xffffffff); + unsigned int hi = (unsigned int)((val>>32)&0xffffffff); + int n; + if(hi==0) + n = 32 + count_leading_zeros(lo); + else + n = count_leading_zeros(hi); + return n; +#else + EIGEN_STATIC_ASSERT(sizeof(unsigned long long) == 8, YOU_MADE_A_PROGRAMMING_MISTAKE); + return __builtin_clzll(static_cast(val)); +#endif + } + + template + struct UnsignedTraits { + typedef typename conditional::type type; + }; + + template + struct DividerTraits { + typedef typename UnsignedTraits::type type; + static const int N = sizeof(T) * 8; + }; + + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint32_t muluh(const uint32_t a, const T b) { +#if defined(EIGEN_GPU_COMPILE_PHASE) + return __umulhi(a, b); +#elif defined(SYCL_DEVICE_ONLY) + return cl::sycl::mul_hi(a, static_cast(b)); +#else + return (static_cast(a) * b) >> 32; +#endif + } + + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t muluh(const uint64_t a, const T b) { +#if defined(EIGEN_GPU_COMPILE_PHASE) + return __umul64hi(a, b); +#elif defined(SYCL_DEVICE_ONLY) + return cl::sycl::mul_hi(a, static_cast(b)); +#elif EIGEN_HAS_BUILTIN_INT128 + __uint128_t v = static_cast<__uint128_t>(a) * static_cast<__uint128_t>(b); + return static_cast(v >> 64); +#else + return (TensorUInt128, uint64_t>(a) * TensorUInt128, uint64_t>(b)).upper(); +#endif + } + + template + struct DividerHelper { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint32_t computeMultiplier(const int log_div, const T divider) { + EIGEN_STATIC_ASSERT(N == 32, YOU_MADE_A_PROGRAMMING_MISTAKE); + return static_cast((static_cast(1) << (N+log_div)) / divider - (static_cast(1) << N) + 1); + } + }; + + template + struct DividerHelper<64, T> { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t computeMultiplier(const int log_div, const T divider) { +#if EIGEN_HAS_BUILTIN_INT128 && !defined(EIGEN_GPU_COMPILE_PHASE) && !defined(SYCL_DEVICE_ONLY) + return static_cast((static_cast<__uint128_t>(1) << (64+log_div)) / static_cast<__uint128_t>(divider) - (static_cast<__uint128_t>(1) << 64) + 1); +#else + const uint64_t shift = 1ULL << log_div; + TensorUInt128 result = TensorUInt128 >(shift, 0) / TensorUInt128, uint64_t>(divider) + - TensorUInt128, static_val<0> >(1, 0) + + TensorUInt128, static_val<1> >(1); + return static_cast(result); +#endif + } + }; +} + + +template +struct TensorIntDivisor { + public: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() { + multiplier = 0; + shift1 = 0; + shift2 = 0; + } + + // Must have 0 < divider < 2^31. This is relaxed to + // 0 < divider < 2^63 when using 64-bit indices on platforms that support + // the __uint128_t type. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor(const T divider) { + const int N = DividerTraits::N; + eigen_assert(static_cast::type>(divider) < NumTraits::highest()/2); + eigen_assert(divider > 0); + + // fast ln2 + const int leading_zeros = count_leading_zeros(static_cast(divider)); + int log_div = N - leading_zeros; + // if divider is a power of two then log_div is 1 more than it should be. + if ((static_cast::type>(1) << (log_div-1)) == static_cast::type>(divider)) + log_div--; + + multiplier = DividerHelper::computeMultiplier(log_div, divider); + shift1 = log_div > 1 ? 1 : log_div; + shift2 = log_div > 1 ? log_div-1 : 0; + } + + // Must have 0 <= numerator. On platforms that don't support the __uint128_t + // type numerator should also be less than 2^32-1. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T divide(const T numerator) const { + eigen_assert(static_cast::type>(numerator) < NumTraits::highest()/2); + //eigen_assert(numerator >= 0); // this is implicitly asserted by the line above + + UnsignedType t1 = muluh(multiplier, numerator); + UnsignedType t = (static_cast(numerator) - t1) >> shift1; + return (t1 + t) >> shift2; + } + + private: + typedef typename DividerTraits::type UnsignedType; + UnsignedType multiplier; + int32_t shift1; + int32_t shift2; +}; + + +// Optimized version for signed 32 bit integers. +// Derived from Hacker's Delight. +// Only works for divisors strictly greater than one +template <> +class TensorIntDivisor { + public: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() { + magic = 0; + shift = 0; + } + // Must have 2 <= divider + EIGEN_DEVICE_FUNC TensorIntDivisor(int32_t divider) { + eigen_assert(divider >= 2); + calcMagic(divider); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int divide(const int32_t n) const { +#ifdef EIGEN_GPU_COMPILE_PHASE + return (__umulhi(magic, n) >> shift); +#elif defined(SYCL_DEVICE_ONLY) + return (cl::sycl::mul_hi(magic, static_cast(n)) >> shift); +#else + uint64_t v = static_cast(magic) * static_cast(n); + return (static_cast(v >> 32) >> shift); +#endif + } + +private: + // Compute the magic numbers. See Hacker's Delight section 10 for an in + // depth explanation. + EIGEN_DEVICE_FUNC void calcMagic(int32_t d) { + const unsigned two31 = 0x80000000; // 2**31. + unsigned ad = d; + unsigned t = two31 + (ad >> 31); + unsigned anc = t - 1 - t%ad; // Absolute value of nc. + int p = 31; // Init. p. + unsigned q1 = two31/anc; // Init. q1 = 2**p/|nc|. + unsigned r1 = two31 - q1*anc; // Init. r1 = rem(2**p, |nc|). + unsigned q2 = two31/ad; // Init. q2 = 2**p/|d|. + unsigned r2 = two31 - q2*ad; // Init. r2 = rem(2**p, |d|). + unsigned delta = 0; + do { + p = p + 1; + q1 = 2*q1; // Update q1 = 2**p/|nc|. + r1 = 2*r1; // Update r1 = rem(2**p, |nc|). + if (r1 >= anc) { // (Must be an unsigned + q1 = q1 + 1; // comparison here). + r1 = r1 - anc;} + q2 = 2*q2; // Update q2 = 2**p/|d|. + r2 = 2*r2; // Update r2 = rem(2**p, |d|). + if (r2 >= ad) { // (Must be an unsigned + q2 = q2 + 1; // comparison here). + r2 = r2 - ad;} + delta = ad - r2; + } while (q1 < delta || (q1 == delta && r1 == 0)); + + magic = (unsigned)(q2 + 1); + shift = p - 32; + } + + uint32_t magic; + int32_t shift; +}; + + +template +static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor& divisor) { + return divisor.divide(numerator); +} + + +} // end namespace internal +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h new file mode 100644 index 0000000..80106c1 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h @@ -0,0 +1,216 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_LAYOUT_SWAP_H +#define EIGEN_CXX11_TENSOR_TENSOR_LAYOUT_SWAP_H + +namespace Eigen { + +/** \class TensorLayoutSwap + * \ingroup CXX11_Tensor_Module + * + * \brief Swap the layout from col-major to row-major, or row-major + * to col-major, and invert the order of the dimensions. + * + * Beware: the dimensions are reversed by this operation. If you want to + * preserve the ordering of the dimensions, you need to combine this + * operation with a shuffle. + * + * \example: + * Tensor input(2, 4); + * Tensor output = input.swap_layout(); + * eigen_assert(output.dimension(0) == 4); + * eigen_assert(output.dimension(1) == 2); + * + * array shuffle(1, 0); + * output = input.swap_layout().shuffle(shuffle); + * eigen_assert(output.dimension(0) == 2); + * eigen_assert(output.dimension(1) == 4); + * + */ +namespace internal { +template +struct traits > : public traits +{ + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = traits::NumDimensions; + static const int Layout = (traits::Layout == ColMajor) ? RowMajor : ColMajor; + typedef typename XprTraits::PointerType PointerType; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorLayoutSwapOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorLayoutSwapOp type; +}; + +} // end namespace internal + + + +template +class TensorLayoutSwapOp : public TensorBase, WriteAccessors> +{ + public: + typedef TensorBase, WriteAccessors> Base; + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename internal::remove_const::type CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorLayoutSwapOp(const XprType& expr) + : m_xpr(expr) {} + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorLayoutSwapOp) + protected: + typename XprType::Nested m_xpr; +}; + + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorLayoutSwapOp XprType; + typedef typename XprType::Index Index; + static const int NumDims = internal::array_size::Dimensions>::value; + typedef DSizes Dimensions; + + enum { + IsAligned = TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess, + BlockAccess = false, + PreferBlockAccess = TensorEvaluator::PreferBlockAccess, + Layout = (static_cast(TensorEvaluator::Layout) == static_cast(ColMajor)) ? RowMajor : ColMajor, + CoordAccess = false, // to be implemented + RawAccess = TensorEvaluator::RawAccess + }; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockNotImplemented TensorBlock; + //===--------------------------------------------------------------------===// + + EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device) + { + for(int i = 0; i < NumDims; ++i) { + m_dimensions[i] = m_impl.dimensions()[NumDims-1-i]; + } + } + +#ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_impl.bind(cgh); + } +#endif + + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) { + return m_impl.evalSubExprsIfNeeded(data); + } + EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + return m_impl.coeff(index); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + return m_impl.template packet(index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + return m_impl.costPerCoeff(vectorized); + } + + EIGEN_DEVICE_FUNC typename Storage::Type data() const { + return constCast(m_impl.data()); + } + + const TensorEvaluator& impl() const { return m_impl; } + + protected: + TensorEvaluator m_impl; + Dimensions m_dimensions; +}; + + +// Eval as lvalue +template + struct TensorEvaluator, Device> + : public TensorEvaluator, Device> +{ + typedef TensorEvaluator, Device> Base; + typedef TensorLayoutSwapOp XprType; + + enum { + IsAligned = TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess, + BlockAccess = false, + PreferBlockAccess = TensorEvaluator::PreferBlockAccess, + Layout = (static_cast(TensorEvaluator::Layout) == static_cast(ColMajor)) ? RowMajor : ColMajor, + CoordAccess = false // to be implemented + }; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockNotImplemented TensorBlock; + //===--------------------------------------------------------------------===// + + EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : Base(op, device) + { } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) + { + return this->m_impl.coeffRef(index); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketReturnType& x) + { + this->m_impl.template writePacket(index, x); + } +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_LAYOUT_SWAP_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h new file mode 100644 index 0000000..73ff3d2 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h @@ -0,0 +1,98 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_META_MACROS_H +#define EIGEN_CXX11_TENSOR_TENSOR_META_MACROS_H + + +/** use this macro in sfinae selection in templated functions + * + * template::value , int >::type = 0 + * > + * void foo(){} + * + * becomes => + * + * template::value ) + * > + * void foo(){} + */ + +// SFINAE requires variadic templates +#if !defined(EIGEN_GPUCC) +#if EIGEN_HAS_VARIADIC_TEMPLATES + // SFINAE doesn't work for gcc <= 4.7 + #ifdef EIGEN_COMP_GNUC + #if EIGEN_GNUC_AT_LEAST(4,8) + #define EIGEN_HAS_SFINAE + #endif + #else + #define EIGEN_HAS_SFINAE + #endif +#endif +#endif + +#define EIGEN_SFINAE_ENABLE_IF( __condition__ ) \ + typename internal::enable_if< ( __condition__ ) , int >::type = 0 + +// Define a macro to use a reference on the host but a value on the device +#if defined(SYCL_DEVICE_ONLY) + #define EIGEN_DEVICE_REF +#else + #define EIGEN_DEVICE_REF & +#endif + +// Define a macro for catching SYCL exceptions if exceptions are enabled +#define EIGEN_SYCL_TRY_CATCH(X) \ + do { \ + EIGEN_TRY {X;} \ + EIGEN_CATCH(const cl::sycl::exception& e) { \ + EIGEN_THROW_X(std::runtime_error("SYCL exception at " + \ + std::string(__FILE__) + ":" + \ + std::to_string(__LINE__) + "\n" + \ + e.what())); \ + } \ + } while (false) + +// Define a macro if local memory flags are unset or one of them is set +// Setting both flags is the same as unsetting them +#if (!defined(EIGEN_SYCL_LOCAL_MEM) && !defined(EIGEN_SYCL_NO_LOCAL_MEM)) || \ + (defined(EIGEN_SYCL_LOCAL_MEM) && defined(EIGEN_SYCL_NO_LOCAL_MEM)) + #define EIGEN_SYCL_LOCAL_MEM_UNSET_OR_ON 1 + #define EIGEN_SYCL_LOCAL_MEM_UNSET_OR_OFF 1 +#elif defined(EIGEN_SYCL_LOCAL_MEM) && !defined(EIGEN_SYCL_NO_LOCAL_MEM) + #define EIGEN_SYCL_LOCAL_MEM_UNSET_OR_ON 1 +#elif !defined(EIGEN_SYCL_LOCAL_MEM) && defined(EIGEN_SYCL_NO_LOCAL_MEM) + #define EIGEN_SYCL_LOCAL_MEM_UNSET_OR_OFF 1 +#endif + +#if EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653) + #define EIGEN_TENSOR_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \ + using Base::operator =; \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const Derived& other) { Base::operator=(other); return *this; } \ + template \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const OtherDerived& other) { Base::operator=(other); return *this; } +#else + #define EIGEN_TENSOR_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \ + EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) +#endif + +/** \internal + * \brief Macro to manually inherit assignment operators. + * This is necessary, because the implicitly defined assignment operator gets deleted when a custom operator= is defined. + * This also inherits template operator=(const OtherDerived&) assignments. + * With C++11 or later this also default-implements the copy-constructor + */ +#define EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(Derived) \ + EIGEN_TENSOR_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \ + EIGEN_DEFAULT_COPY_CONSTRUCTOR(Derived) + +#endif diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h new file mode 100644 index 0000000..6834c97 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h @@ -0,0 +1,327 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_MAP_H +#define EIGEN_CXX11_TENSOR_TENSOR_MAP_H + +namespace Eigen { + +// FIXME use proper doxygen documentation (e.g. \tparam MakePointer_) + +/** \class TensorMap + * \ingroup CXX11_Tensor_Module + * + * \brief A tensor expression mapping an existing array of data. + * + */ +/// `template class MakePointer_` is added to convert the host pointer to the device pointer. +/// It is added due to the fact that for our device compiler `T*` is not allowed. +/// If we wanted to use the same Evaluator functions we have to convert that type to our pointer `T`. +/// This is done through our `MakePointer_` class. By default the Type in the `MakePointer_` is `T*` . +/// Therefore, by adding the default value, we managed to convert the type and it does not break any +/// existing code as its default value is `T*`. +template class MakePointer_> class TensorMap : public TensorBase > +{ + public: + typedef TensorMap Self; + typedef TensorBase > Base; + #ifdef EIGEN_USE_SYCL + typedef typename Eigen::internal::remove_reference::type>::type Nested; + #else + typedef typename Eigen::internal::nested::type Nested; + #endif + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Index Index; + typedef typename internal::traits::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + typedef typename PlainObjectType::Base::CoeffReturnType CoeffReturnType; + + typedef typename MakePointer_::Type PointerType; + typedef typename MakePointer_::ConstType PointerConstType; + + // WARN: PointerType still can be a pointer to const (const Scalar*), for + // example in TensorMap> expression. This type of + // expression should be illegal, but adding this restriction is not possible + // in practice (see https://bitbucket.org/eigen/eigen/pull-requests/488). + typedef typename internal::conditional< + bool(internal::is_lvalue::value), + PointerType, // use simple pointer in lvalue expressions + PointerConstType // use const pointer in rvalue expressions + >::type StoragePointerType; + + // If TensorMap was constructed over rvalue expression (e.g. const Tensor), + // we should return a reference to const from operator() (and others), even + // if TensorMap itself is not const. + typedef typename internal::conditional< + bool(internal::is_lvalue::value), + Scalar&, + const Scalar& + >::type StorageRefType; + + static const int Options = Options_; + + static const Index NumIndices = PlainObjectType::NumIndices; + typedef typename PlainObjectType::Dimensions Dimensions; + + enum { + IsAligned = ((int(Options_)&Aligned)==Aligned), + Layout = PlainObjectType::Layout, + CoordAccess = true, + RawAccess = true + }; + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorMap(StoragePointerType dataPtr) : m_data(dataPtr), m_dimensions() { + // The number of dimensions used to construct a tensor must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT((0 == NumIndices || NumIndices == Dynamic), YOU_MADE_A_PROGRAMMING_MISTAKE) + } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorMap(StoragePointerType dataPtr, Index firstDimension, IndexTypes... otherDimensions) : m_data(dataPtr), m_dimensions(firstDimension, otherDimensions...) { + // The number of dimensions used to construct a tensor must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT((sizeof...(otherDimensions) + 1 == NumIndices || NumIndices == Dynamic), YOU_MADE_A_PROGRAMMING_MISTAKE) + } +#else + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorMap(StoragePointerType dataPtr, Index firstDimension) : m_data(dataPtr), m_dimensions(firstDimension) { + // The number of dimensions used to construct a tensor must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT((1 == NumIndices || NumIndices == Dynamic), YOU_MADE_A_PROGRAMMING_MISTAKE) + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorMap(StoragePointerType dataPtr, Index dim1, Index dim2) : m_data(dataPtr), m_dimensions(dim1, dim2) { + EIGEN_STATIC_ASSERT(2 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE) + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorMap(StoragePointerType dataPtr, Index dim1, Index dim2, Index dim3) : m_data(dataPtr), m_dimensions(dim1, dim2, dim3) { + EIGEN_STATIC_ASSERT(3 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE) + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorMap(StoragePointerType dataPtr, Index dim1, Index dim2, Index dim3, Index dim4) : m_data(dataPtr), m_dimensions(dim1, dim2, dim3, dim4) { + EIGEN_STATIC_ASSERT(4 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE) + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorMap(StoragePointerType dataPtr, Index dim1, Index dim2, Index dim3, Index dim4, Index dim5) : m_data(dataPtr), m_dimensions(dim1, dim2, dim3, dim4, dim5) { + EIGEN_STATIC_ASSERT(5 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE) + } +#endif + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMap(StoragePointerType dataPtr, const array& dimensions) + : m_data(dataPtr), m_dimensions(dimensions) + { } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMap(StoragePointerType dataPtr, const Dimensions& dimensions) + : m_data(dataPtr), m_dimensions(dimensions) + { } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMap(PlainObjectType& tensor) + : m_data(tensor.data()), m_dimensions(tensor.dimensions()) + { } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index rank() const { return m_dimensions.rank(); } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index dimension(Index n) const { return m_dimensions[n]; } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index size() const { return m_dimensions.TotalSize(); } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE StoragePointerType data() { return m_data; } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE StoragePointerType data() const { return m_data; } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE StorageRefType operator()(const array& indices) const + { + // eigen_assert(checkIndexRange(indices)); + if (PlainObjectType::Options&RowMajor) { + const Index index = m_dimensions.IndexOfRowMajor(indices); + return m_data[index]; + } else { + const Index index = m_dimensions.IndexOfColMajor(indices); + return m_data[index]; + } + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE StorageRefType operator()() const + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE) + return m_data[0]; + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE StorageRefType operator()(Index index) const + { + eigen_internal_assert(index >= 0 && index < size()); + return m_data[index]; + } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE StorageRefType operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const + { + EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(internal::all((Eigen::NumTraits::highest() >= otherIndices)...)); + if (PlainObjectType::Options&RowMajor) { + const Index index = m_dimensions.IndexOfRowMajor(array{{firstIndex, secondIndex, otherIndices...}}); + return m_data[index]; + } else { + const Index index = m_dimensions.IndexOfColMajor(array{{firstIndex, secondIndex, otherIndices...}}); + return m_data[index]; + } + } +#else + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE StorageRefType operator()(Index i0, Index i1) const + { + if (PlainObjectType::Options&RowMajor) { + const Index index = i1 + i0 * m_dimensions[1]; + return m_data[index]; + } else { + const Index index = i0 + i1 * m_dimensions[0]; + return m_data[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE StorageRefType operator()(Index i0, Index i1, Index i2) const + { + if (PlainObjectType::Options&RowMajor) { + const Index index = i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0); + return m_data[index]; + } else { + const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * i2); + return m_data[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE StorageRefType operator()(Index i0, Index i1, Index i2, Index i3) const + { + if (PlainObjectType::Options&RowMajor) { + const Index index = i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0)); + return m_data[index]; + } else { + const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * i3)); + return m_data[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE StorageRefType operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const + { + if (PlainObjectType::Options&RowMajor) { + const Index index = i4 + m_dimensions[4] * (i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0))); + return m_data[index]; + } else { + const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * (i3 + m_dimensions[3] * i4))); + return m_data[index]; + } + } +#endif + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE StorageRefType operator()(const array& indices) + { + // eigen_assert(checkIndexRange(indices)); + if (PlainObjectType::Options&RowMajor) { + const Index index = m_dimensions.IndexOfRowMajor(indices); + return m_data[index]; + } else { + const Index index = m_dimensions.IndexOfColMajor(indices); + return m_data[index]; + } + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE StorageRefType operator()() + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE) + return m_data[0]; + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE StorageRefType operator()(Index index) + { + eigen_internal_assert(index >= 0 && index < size()); + return m_data[index]; + } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE StorageRefType operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) + { + static_assert(sizeof...(otherIndices) + 2 == NumIndices || NumIndices == Dynamic, "Number of indices used to access a tensor coefficient must be equal to the rank of the tensor."); + eigen_assert(internal::all((Eigen::NumTraits::highest() >= otherIndices)...)); + const std::size_t NumDims = sizeof...(otherIndices) + 2; + if (PlainObjectType::Options&RowMajor) { + const Index index = m_dimensions.IndexOfRowMajor(array{{firstIndex, secondIndex, otherIndices...}}); + return m_data[index]; + } else { + const Index index = m_dimensions.IndexOfColMajor(array{{firstIndex, secondIndex, otherIndices...}}); + return m_data[index]; + } + } +#else + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE StorageRefType operator()(Index i0, Index i1) + { + if (PlainObjectType::Options&RowMajor) { + const Index index = i1 + i0 * m_dimensions[1]; + return m_data[index]; + } else { + const Index index = i0 + i1 * m_dimensions[0]; + return m_data[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE StorageRefType operator()(Index i0, Index i1, Index i2) + { + if (PlainObjectType::Options&RowMajor) { + const Index index = i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0); + return m_data[index]; + } else { + const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * i2); + return m_data[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE StorageRefType operator()(Index i0, Index i1, Index i2, Index i3) + { + if (PlainObjectType::Options&RowMajor) { + const Index index = i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0)); + return m_data[index]; + } else { + const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * i3)); + return m_data[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE StorageRefType operator()(Index i0, Index i1, Index i2, Index i3, Index i4) + { + if (PlainObjectType::Options&RowMajor) { + const Index index = i4 + m_dimensions[4] * (i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0))); + return m_data[index]; + } else { + const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * (i3 + m_dimensions[3] * i4))); + return m_data[index]; + } + } +#endif + + EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorMap) + + private: + StoragePointerType m_data; + Dimensions m_dimensions; +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_MAP_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h new file mode 100644 index 0000000..a6181d3 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h @@ -0,0 +1,311 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_META_H +#define EIGEN_CXX11_TENSOR_TENSOR_META_H + +namespace Eigen { + +template struct Cond {}; + +template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +const T1& choose(Cond, const T1& first, const T2&) { + return first; +} + +template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +const T2& choose(Cond, const T1&, const T2& second) { + return second; +} + + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T divup(const X x, const Y y) { + return static_cast((x + y - 1) / y); +} + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T divup(const T x, const T y) { + return static_cast((x + y - 1) / y); +} + +template struct max_n_1 { + static const size_t size = n; +}; +template <> struct max_n_1<0> { + static const size_t size = 1; +}; + + +// Default packet types +template +struct PacketType : internal::packet_traits { + typedef typename internal::packet_traits::type type; +}; + +// For CUDA packet types when using a GpuDevice +#if defined(EIGEN_USE_GPU) && defined(EIGEN_HAS_GPU_FP16) + +typedef ulonglong2 Packet4h2; +template<> +struct PacketType { + typedef Packet4h2 type; + static const int size = 8; + enum { + HasAdd = 1, + HasSub = 1, + HasMul = 1, + HasNegate = 1, + HasAbs = 1, + HasArg = 0, + HasAbs2 = 0, + HasMin = 1, + HasMax = 1, + HasConj = 0, + HasSetLinear = 0, + HasBlend = 0, + + HasDiv = 1, + HasSqrt = 1, + HasRsqrt = 1, + HasExp = 1, + HasExpm1 = 0, + HasLog = 1, + HasLog1p = 0, + HasLog10 = 0, + HasPow = 1, + }; +}; +#endif + +#if defined(EIGEN_USE_SYCL) + +namespace TensorSycl { +namespace internal { + +template struct PlusOp { + static constexpr Index Value = A + B; +}; + +template struct DivOp { + static constexpr Index Value = A / B; +}; + +template class StepOp> +struct static_for { + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void loop(UnaryOperator op) { + op(start); + static_for::Value, end, step, + StepOp>::loop(op); + } +}; +template class StepOp> +struct static_for { + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void loop(UnaryOperator) {} +}; + +template +struct Vectorise { + static const int PacketSize = 1; + typedef OutScalar PacketReturnType; +}; + +template +struct Vectorise { + static const int PacketSize = Eigen::PacketType::size; + typedef typename Eigen::PacketType::type PacketReturnType; +}; + +static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index roundUp(Index x, Index y) { + return ((((x) + (y)-1) / (y)) * (y)); +} + +} // namespace internal +} // namespace TensorSycl + +template <> + struct PacketType { + typedef half type; + static const int size = 1; + enum { + HasAdd = 0, + HasSub = 0, + HasMul = 0, + HasNegate = 0, + HasAbs = 0, + HasArg = 0, + HasAbs2 = 0, + HasMin = 0, + HasMax = 0, + HasConj = 0, + HasSetLinear = 0, + HasBlend = 0 + }; +}; +template +struct PacketType : internal::default_packet_traits { + typedef Scalar type; + typedef Scalar half; + enum { + Vectorizable = 0, + size = 1, + AlignedOnScalar = 0, + HasHalfPacket = 0 + }; + enum { + HasAdd = 0, + HasSub = 0, + HasMul = 0, + HasNegate = 0, + HasAbs = 0, + HasAbs2 = 0, + HasMin = 0, + HasMax = 0, + HasConj = 0, + HasSetLinear = 0 + }; + +}; + +template +struct PacketType : PacketType{}; + +#ifndef EIGEN_DONT_VECTORIZE_SYCL +#define PACKET_TYPE(CVQual, Type, val, lengths, DEV)\ +template<> struct PacketType : internal::sycl_packet_traits \ +{\ + typedef typename internal::packet_traits::type type;\ + typedef typename internal::packet_traits::half half;\ +}; + + +PACKET_TYPE(const, float, 1, 4, SyclDevice) +PACKET_TYPE(, float, 1, 4, SyclDevice) +PACKET_TYPE(const, float, 1, 4, const SyclDevice) +PACKET_TYPE(, float, 1, 4, const SyclDevice) + +PACKET_TYPE(const, double, 0, 2, SyclDevice) +PACKET_TYPE(, double, 0, 2, SyclDevice) +PACKET_TYPE(const, double, 0, 2, const SyclDevice) +PACKET_TYPE(, double, 0, 2, const SyclDevice) +#undef PACKET_TYPE + +template<> struct PacketType: PacketType{}; +template<> struct PacketType: PacketType{}; +#endif +#endif + +// Tuple mimics std::pair but works on e.g. nvcc. +template struct Tuple { + public: + U first; + V second; + + typedef U first_type; + typedef V second_type; + + EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Tuple() : first(), second() {} + + EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Tuple(const U& f, const V& s) : first(f), second(s) {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + void swap(Tuple& rhs) { + using numext::swap; + swap(first, rhs.first); + swap(second, rhs.second); + } +}; + +template +EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +bool operator==(const Tuple& x, const Tuple& y) { + return (x.first == y.first && x.second == y.second); +} + +template +EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +bool operator!=(const Tuple& x, const Tuple& y) { + return !(x == y); +} + + +// Can't use std::pairs on cuda devices +template struct IndexPair { + EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE IndexPair() : first(0), second(0) {} + EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE IndexPair(Idx f, Idx s) : first(f), second(s) {} + + EIGEN_DEVICE_FUNC void set(IndexPair val) { + first = val.first; + second = val.second; + } + + Idx first; + Idx second; +}; + + +#ifdef EIGEN_HAS_SFINAE +namespace internal { + + template + EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + array customIndices2Array(IndexType& idx, numeric_list) { + return { idx[Is]... }; + } + template + EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + array customIndices2Array(IndexType&, numeric_list) { + return array(); + } + + /** Make an array (for index/dimensions) out of a custom index */ + template + EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + array customIndices2Array(IndexType& idx) { + return customIndices2Array(idx, typename gen_numeric_list::type{}); + } + + + template + struct is_base_of + { + + typedef char (&yes)[1]; + typedef char (&no)[2]; + + template + struct Host + { + operator BB*() const; + operator DD*(); + }; + + template + static yes check(D*, T); + static no check(B*, int); + + static const bool value = sizeof(check(Host(), int())) == sizeof(yes); + }; + +} +#endif + + + +} // namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_META_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h new file mode 100644 index 0000000..b3f00f7 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h @@ -0,0 +1,1102 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H +#define EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H + +namespace Eigen { + +/** \class TensorReshaping + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor reshaping class. + * + * + */ +namespace internal { +template +struct traits > : public traits +{ + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = array_size::value; + static const int Layout = XprTraits::Layout; + typedef typename XprTraits::PointerType PointerType; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorReshapingOpEIGEN_DEVICE_REF type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorReshapingOp type; +}; + +} // end namespace internal + + + +template +class TensorReshapingOp : public TensorBase, WriteAccessors> +{ + public: + typedef TensorBase, WriteAccessors> Base; + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename internal::remove_const::type CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReshapingOp(const XprType& expr, const NewDimensions& dims) + : m_xpr(expr), m_dims(dims) {} + + EIGEN_DEVICE_FUNC + const NewDimensions& dimensions() const { return m_dims; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorReshapingOp) + + protected: + typename XprType::Nested m_xpr; + const NewDimensions m_dims; +}; + + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorReshapingOp XprType; + typedef NewDimensions Dimensions; + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; + typedef StorageMemory::type, Device> ConstCastStorage; + + static const int NumOutputDims = internal::array_size::value; + static const int NumInputDims = internal::array_size::Dimensions>::value; + + enum ReshapingKind { + // We do not use layout information to determine reshaping kind. + // Depending on the layout `N` can be inner or outer dimension. + OneByN = 0, // expr.reshape(1, N) + NByOne = 1, // expr.reshape(N, 1) + Runtime = 2 // Reshape dimensions are dynamic (specified at runtime). + }; + + // clang-format off + static const ReshapingKind kind = +#if defined(EIGEN_HAS_INDEX_LIST) + (NumOutputDims == 2 && internal::index_statically_eq(/*index=*/0, /*value=*/1)) ? OneByN + : (NumOutputDims == 2 && internal::index_statically_eq(/*index=*/1, /*value=*/1)) ? NByOne + : Runtime; +#else + Runtime; +#endif + // clang-format on + + enum { + IsAligned = TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess, + // For trivial reshapes with raw access to underlying data we will provide + // zero overhead block access. + // TODO(ezhulenev): Consider adding block access without raw access? + BlockAccess = TensorEvaluator::RawAccess && + NumInputDims > 0 && NumOutputDims > 0, + PreferBlockAccess = false, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = TensorEvaluator::RawAccess + }; + + typedef typename internal::remove_const::type ScalarNoConst; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockDescriptor TensorBlockDesc; + typedef internal::TensorBlockScratchAllocator TensorBlockScratch; + + typedef + typename internal::TensorMaterializedBlock + TensorBlock; + //===--------------------------------------------------------------------===// + + EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device), m_dimensions(op.dimensions()) + { + // The total size of the reshaped tensor must be equal to the total size + // of the input tensor. + eigen_assert(internal::array_prod(m_impl.dimensions()) == internal::array_prod(op.dimensions())); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + +#ifdef EIGEN_USE_THREADS + template + EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync( + EvaluatorPointerType data, EvalSubExprsCallback done) { + m_impl.evalSubExprsIfNeededAsync(data, std::move(done)); + } +#endif + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) { + return m_impl.evalSubExprsIfNeeded(data); + } + EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + return m_impl.coeff(index); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + return m_impl.template packet(index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + return m_impl.costPerCoeff(vectorized); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + internal::TensorBlockResourceRequirements getResourceRequirements() const { + return internal::TensorBlockResourceRequirements::any(); + } + + // required in block(OutputTensorBlock* output_block) const + // For C++03 compatibility this must be defined outside the method + struct BlockIteratorState { + Index stride; + Index span; + Index size; + Index count; + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock + block(TensorBlockDesc& desc, TensorBlockScratch& scratch, + bool /*root_of_expr_ast*/ = false) const { + eigen_assert(m_impl.data() != NULL); + eigen_assert((kind == Runtime) || + (kind == OneByN && desc.dimensions()[0] == 1) || + (kind == NByOne && desc.dimensions()[1] == 1)); + + if (kind == OneByN || kind == NByOne) { + // We can guarantee at compile time that block is just a contiguous slice + // of the underlying expression memory buffer. + return TensorBlock(internal::TensorBlockKind::kView, + m_impl.data() + desc.offset(), desc.dimensions()); + } else { + // This will do additional runtime checks, and in the end it might be also + // a view, or it might be a block materialized in the temporary buffer. + return TensorBlock::materialize(m_impl.data(), m_dimensions, desc, + scratch); + } + } + + EIGEN_DEVICE_FUNC typename Storage::Type data() const { + return constCast(m_impl.data()); + } + + EIGEN_DEVICE_FUNC const TensorEvaluator& impl() const { return m_impl; } + + #ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_impl.bind(cgh); + } + #endif + protected: + TensorEvaluator m_impl; + NewDimensions m_dimensions; +}; + + +// Eval as lvalue +template + struct TensorEvaluator, Device> + : public TensorEvaluator, Device> + +{ + typedef TensorEvaluator, Device> Base; + typedef TensorReshapingOp XprType; + typedef NewDimensions Dimensions; + + enum { + IsAligned = TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess, + BlockAccess = TensorEvaluator::RawAccess, + PreferBlockAccess = false, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = TensorEvaluator::RawAccess + }; + + EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : Base(op, device) + { } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockDescriptor + TensorBlockDesc; + //===--------------------------------------------------------------------===// + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) + { + return this->m_impl.coeffRef(index); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketReturnType& x) + { + this->m_impl.template writePacket(index, x); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock( + const TensorBlockDesc& desc, const TensorBlock& block) { + assert(this->m_impl.data() != NULL); + + typedef typename TensorBlock::XprType TensorBlockExpr; + typedef internal::TensorBlockAssignment< + Scalar, TensorEvaluator::NumOutputDims, TensorBlockExpr, Index> + TensorBlockAssign; + + TensorBlockAssign::Run( + TensorBlockAssign::target(desc.dimensions(), + internal::strides(this->dimensions()), + this->m_impl.data(), desc.offset()), + block.expr()); + } +}; + + +/** \class TensorSlicing + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor slicing class. + * + * + */ +namespace internal { +template +struct traits > : public traits +{ + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = array_size::value; + static const int Layout = XprTraits::Layout; + typedef typename XprTraits::PointerType PointerType; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorSlicingOpEIGEN_DEVICE_REF type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorSlicingOp type; +}; + +} // end namespace internal + + + +template +class TensorSlicingOp : public TensorBase > +{ + public: + typedef TensorBase > Base; + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorSlicingOp(const XprType& expr, const StartIndices& indices, const Sizes& sizes) + : m_xpr(expr), m_indices(indices), m_sizes(sizes) {} + + EIGEN_DEVICE_FUNC + const StartIndices& startIndices() const { return m_indices; } + EIGEN_DEVICE_FUNC + const Sizes& sizes() const { return m_sizes; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorSlicingOp) + + protected: + typename XprType::Nested m_xpr; + const StartIndices m_indices; + const Sizes m_sizes; +}; + + +// Fixme: figure out the exact threshold +namespace { +template struct MemcpyTriggerForSlicing { + EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const Device& device) : threshold_(2 * device.numThreads()) { } + EIGEN_DEVICE_FUNC bool operator ()(Index total, Index contiguous) const { + const bool prefer_block_evaluation = BlockAccess && total > 32*1024; + return !prefer_block_evaluation && contiguous > threshold_; + } + + private: + Index threshold_; +}; + +// It is very expensive to start the memcpy kernel on GPU: we therefore only +// use it for large copies. +#ifdef EIGEN_USE_GPU +template struct MemcpyTriggerForSlicing { + EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const GpuDevice&) { } + EIGEN_DEVICE_FUNC bool operator ()(Index, Index contiguous) const { return contiguous > 4*1024*1024; } +}; +#endif + +// It is very expensive to start the memcpy kernel on GPU: we therefore only +// use it for large copies. +#ifdef EIGEN_USE_SYCL +template struct MemcpyTriggerForSlicing { + EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const SyclDevice&) { } + EIGEN_DEVICE_FUNC bool operator ()(Index, Index contiguous) const { return contiguous > 4*1024*1024; } +}; +#endif + +} + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorSlicingOp XprType; + static const int NumDims = internal::array_size::value; + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + typedef Sizes Dimensions; + typedef StorageMemory Storage; + typedef StorageMemory::type, Device> ConstCastStorage; + typedef typename Storage::Type EvaluatorPointerType; + + enum { + // Alignment can't be guaranteed at compile time since it depends on the + // slice offsets and sizes. + IsAligned = false, + PacketAccess = TensorEvaluator::PacketAccess, + BlockAccess = TensorEvaluator::BlockAccess && + // FIXME: Temporary workaround for bug in slicing of bool tensors. + !internal::is_same::type, bool>::value, + PreferBlockAccess = true, + Layout = TensorEvaluator::Layout, + CoordAccess = false, + RawAccess = false + }; + + typedef typename internal::remove_const::type ScalarNoConst; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockDescriptor TensorBlockDesc; + typedef internal::TensorBlockScratchAllocator TensorBlockScratch; + + // Tensor slicing does not change the block type. + typedef typename TensorEvaluator::TensorBlock + TensorBlock; + //===--------------------------------------------------------------------===// + + EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device), m_device(device), m_dimensions(op.sizes()), m_offsets(op.startIndices()) + { + m_is_identity = true; + for (int i = 0; i < internal::array_size::value; ++i) { + eigen_assert(m_impl.dimensions()[i] >= + op.sizes()[i] + op.startIndices()[i]); + if (m_impl.dimensions()[i] != op.sizes()[i] || + op.startIndices()[i] != 0) { + m_is_identity = false; + } + } + + // No strides for scalars. + if (NumDims == 0) return; + + const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); + const Sizes& output_dims = op.sizes(); + if (static_cast(Layout) == static_cast(ColMajor)) { + m_inputStrides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; + } + + // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed. + m_outputStrides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1]; + m_fastOutputStrides[i] = internal::TensorIntDivisor(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1); + } + } else { + m_inputStrides[NumDims-1] = 1; + for (int i = NumDims - 2; i >= 0; --i) { + m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; + } + + // Don't initialize m_fastOutputStrides[NumDims-1] since it won't ever be accessed. + m_outputStrides[NumDims-1] = 1; + for (int i = NumDims - 2; i >= 0; --i) { + m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1]; + m_fastOutputStrides[i] = internal::TensorIntDivisor(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1); + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) { + m_impl.evalSubExprsIfNeeded(NULL); + if (!NumTraits::type>::RequireInitialization + && data && m_impl.data()) { + Index contiguous_values = 1; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = 0; i < NumDims; ++i) { + contiguous_values *= dimensions()[i]; + if (dimensions()[i] != m_impl.dimensions()[i]) { + break; + } + } + } else { + for (int i = NumDims-1; i >= 0; --i) { + contiguous_values *= dimensions()[i]; + if (dimensions()[i] != m_impl.dimensions()[i]) { + break; + } + } + } + // Use memcpy if it's going to be faster than using the regular evaluation. + const MemcpyTriggerForSlicing trigger(m_device); + if (trigger(internal::array_prod(dimensions()), contiguous_values)) { + EvaluatorPointerType src = (EvaluatorPointerType)m_impl.data(); + for (Index i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) { + Index offset = srcCoeff(i); + m_device.memcpy((void*)(m_device.get(data + i)), m_device.get(src+offset), contiguous_values * sizeof(Scalar)); + } + return false; + } + } + return true; + } + +#ifdef EIGEN_USE_THREADS + template + EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync( + EvaluatorPointerType /*data*/, EvalSubExprsCallback done) { + m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); }); + } +#endif // EIGEN_USE_THREADS + + EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + if (m_is_identity) { + return m_impl.coeff(index); + } else { + return m_impl.coeff(srcCoeff(index)); + } + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + const int packetSize = PacketType::size; + EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+packetSize-1 < internal::array_prod(dimensions())); + + if (m_is_identity) { + return m_impl.template packet(index); + } + + Index inputIndices[] = {0, 0}; + Index indices[] = {index, index + packetSize - 1}; + if (static_cast(Layout) == static_cast(ColMajor)) { + EIGEN_UNROLL_LOOP + for (int i = NumDims - 1; i > 0; --i) { + const Index idx0 = indices[0] / m_fastOutputStrides[i]; + const Index idx1 = indices[1] / m_fastOutputStrides[i]; + inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i]; + inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i]; + indices[0] -= idx0 * m_outputStrides[i]; + indices[1] -= idx1 * m_outputStrides[i]; + } + inputIndices[0] += (indices[0] + m_offsets[0]); + inputIndices[1] += (indices[1] + m_offsets[0]); + } else { + EIGEN_UNROLL_LOOP + for (int i = 0; i < NumDims - 1; ++i) { + const Index idx0 = indices[0] / m_fastOutputStrides[i]; + const Index idx1 = indices[1] / m_fastOutputStrides[i]; + inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i]; + inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i]; + indices[0] -= idx0 * m_outputStrides[i]; + indices[1] -= idx1 * m_outputStrides[i]; + } + inputIndices[0] += (indices[0] + m_offsets[NumDims-1]); + inputIndices[1] += (indices[1] + m_offsets[NumDims-1]); + } + if (inputIndices[1] - inputIndices[0] == packetSize - 1) { + PacketReturnType rslt = m_impl.template packet(inputIndices[0]); + return rslt; + } + else { + EIGEN_ALIGN_MAX typename internal::remove_const::type values[packetSize]; + values[0] = m_impl.coeff(inputIndices[0]); + values[packetSize-1] = m_impl.coeff(inputIndices[1]); + EIGEN_UNROLL_LOOP + for (int i = 1; i < packetSize-1; ++i) { + values[i] = coeff(index+i); + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, m_is_identity ? 1 : NumDims); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + internal::TensorBlockResourceRequirements getResourceRequirements() const { + const size_t target_size = m_device.lastLevelCacheSize(); + return internal::TensorBlockResourceRequirements::merge( + internal::TensorBlockResourceRequirements::skewed(target_size), + m_impl.getResourceRequirements()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock + block(TensorBlockDesc& desc, TensorBlockScratch& scratch, + bool /*root_of_expr_ast*/ = false) const { + TensorBlockDesc arg_desc = desc.WithOffset(srcCoeff(desc.offset())); + TensorBlock block = m_impl.block(arg_desc, scratch); + if (!arg_desc.HasDestinationBuffer()) desc.DropDestinationBuffer(); + return block; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Storage::Type data() const { + typename Storage::Type result = constCast(m_impl.data()); + if (result) { + Index offset = 0; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = 0; i < NumDims; ++i) { + if (m_dimensions[i] != m_impl.dimensions()[i]) { + offset += m_offsets[i] * m_inputStrides[i]; + for (int j = i+1; j < NumDims; ++j) { + if (m_dimensions[j] > 1) { + return NULL; + } + offset += m_offsets[j] * m_inputStrides[j]; + } + break; + } + } + } else { + for (int i = NumDims - 1; i >= 0; --i) { + if (m_dimensions[i] != m_impl.dimensions()[i]) { + offset += m_offsets[i] * m_inputStrides[i]; + for (int j = i-1; j >= 0; --j) { + if (m_dimensions[j] > 1) { + return NULL; + } + offset += m_offsets[j] * m_inputStrides[j]; + } + break; + } + } + } + return result + offset; + } + return NULL; + } +#ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_impl.bind(cgh); + } +#endif + + protected: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const + { + Index inputIndex = 0; + if (static_cast(Layout) == static_cast(ColMajor)) { + EIGEN_UNROLL_LOOP + for (int i = NumDims - 1; i > 0; --i) { + const Index idx = index / m_fastOutputStrides[i]; + inputIndex += (idx + m_offsets[i]) * m_inputStrides[i]; + index -= idx * m_outputStrides[i]; + } + inputIndex += (index + m_offsets[0]); + } else { + EIGEN_UNROLL_LOOP + for (int i = 0; i < NumDims - 1; ++i) { + const Index idx = index / m_fastOutputStrides[i]; + inputIndex += (idx + m_offsets[i]) * m_inputStrides[i]; + index -= idx * m_outputStrides[i]; + } + inputIndex += (index + m_offsets[NumDims-1]); + } + return inputIndex; + } + + array m_outputStrides; + array, NumDims> m_fastOutputStrides; + array m_inputStrides; + TensorEvaluator m_impl; + const Device EIGEN_DEVICE_REF m_device; + Dimensions m_dimensions; + bool m_is_identity; + const StartIndices m_offsets; +}; + + +// Eval as lvalue +template +struct TensorEvaluator, Device> + : public TensorEvaluator, Device> +{ + typedef TensorEvaluator, Device> Base; + typedef TensorSlicingOp XprType; + static const int NumDims = internal::array_size::value; + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + typedef Sizes Dimensions; + + enum { + IsAligned = false, + PacketAccess = TensorEvaluator::PacketAccess, + BlockAccess = TensorEvaluator::BlockAccess, + PreferBlockAccess = true, + Layout = TensorEvaluator::Layout, + CoordAccess = false, + RawAccess = (NumDims == 1) & TensorEvaluator::RawAccess + }; + + typedef typename internal::remove_const::type ScalarNoConst; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockDescriptor TensorBlockDesc; + typedef internal::TensorBlockScratchAllocator TensorBlockScratch; + //===--------------------------------------------------------------------===// + + EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : Base(op, device) + { } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) + { + if (this->m_is_identity) { + return this->m_impl.coeffRef(index); + } else { + return this->m_impl.coeffRef(this->srcCoeff(index)); + } + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketReturnType& x) + { + if (this->m_is_identity) { + this->m_impl.template writePacket(index, x); + return; + } + + const int packetSize = PacketType::size; + Index inputIndices[] = {0, 0}; + Index indices[] = {index, index + packetSize - 1}; + if (static_cast(Layout) == static_cast(ColMajor)) { + EIGEN_UNROLL_LOOP + for (int i = NumDims - 1; i > 0; --i) { + const Index idx0 = indices[0] / this->m_fastOutputStrides[i]; + const Index idx1 = indices[1] / this->m_fastOutputStrides[i]; + inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i]; + inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i]; + indices[0] -= idx0 * this->m_outputStrides[i]; + indices[1] -= idx1 * this->m_outputStrides[i]; + } + inputIndices[0] += (indices[0] + this->m_offsets[0]); + inputIndices[1] += (indices[1] + this->m_offsets[0]); + } else { + EIGEN_UNROLL_LOOP + for (int i = 0; i < NumDims - 1; ++i) { + const Index idx0 = indices[0] / this->m_fastOutputStrides[i]; + const Index idx1 = indices[1] / this->m_fastOutputStrides[i]; + inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i]; + inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i]; + indices[0] -= idx0 * this->m_outputStrides[i]; + indices[1] -= idx1 * this->m_outputStrides[i]; + } + inputIndices[0] += (indices[0] + this->m_offsets[NumDims-1]); + inputIndices[1] += (indices[1] + this->m_offsets[NumDims-1]); + } + if (inputIndices[1] - inputIndices[0] == packetSize - 1) { + this->m_impl.template writePacket(inputIndices[0], x); + } + else { + EIGEN_ALIGN_MAX CoeffReturnType values[packetSize]; + internal::pstore(values, x); + this->m_impl.coeffRef(inputIndices[0]) = values[0]; + this->m_impl.coeffRef(inputIndices[1]) = values[packetSize-1]; + EIGEN_UNROLL_LOOP + for (int i = 1; i < packetSize-1; ++i) { + this->coeffRef(index+i) = values[i]; + } + } + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock( + const TensorBlockDesc& desc, const TensorBlock& block) { + TensorBlockDesc arg_desc = desc.WithOffset(this->srcCoeff(desc.offset())); + this->m_impl.writeBlock(arg_desc, block); + } +}; + +namespace internal { +template +struct traits > : public traits +{ + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = array_size::value; + static const int Layout = XprTraits::Layout; + typedef typename XprTraits::PointerType PointerType; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorStridingSlicingOpEIGEN_DEVICE_REF type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorStridingSlicingOp type; +}; + +} // end namespace internal + + +template +class TensorStridingSlicingOp : public TensorBase > +{ + public: + typedef TensorBase > Base; + typedef typename internal::traits::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename internal::nested::type Nested; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingSlicingOp( + const XprType& expr, const StartIndices& startIndices, + const StopIndices& stopIndices, const Strides& strides) + : m_xpr(expr), m_startIndices(startIndices), m_stopIndices(stopIndices), + m_strides(strides) {} + + EIGEN_DEVICE_FUNC + const StartIndices& startIndices() const { return m_startIndices; } + EIGEN_DEVICE_FUNC + const StartIndices& stopIndices() const { return m_stopIndices; } + EIGEN_DEVICE_FUNC + const StartIndices& strides() const { return m_strides; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorStridingSlicingOp) + + protected: + typename XprType::Nested m_xpr; + const StartIndices m_startIndices; + const StopIndices m_stopIndices; + const Strides m_strides; +}; + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorStridingSlicingOp XprType; + static const int NumDims = internal::array_size::value; + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; + typedef Strides Dimensions; + + enum { + // Alignment can't be guaranteed at compile time since it depends on the + // slice offsets and sizes. + IsAligned = false, + PacketAccess = false, + BlockAccess = false, + PreferBlockAccess = TensorEvaluator::PreferBlockAccess, + Layout = TensorEvaluator::Layout, + RawAccess = false + }; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockNotImplemented TensorBlock; + //===--------------------------------------------------------------------===// + + EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device), + m_device(device), + m_strides(op.strides()) + { + // Handle degenerate intervals by gracefully clamping and allowing m_dimensions to be zero + DSizes startIndicesClamped, stopIndicesClamped; + for (ptrdiff_t i = 0; i < internal::array_size::value; ++i) { + eigen_assert(m_strides[i] != 0 && "0 stride is invalid"); + if (m_strides[i] > 0) { + startIndicesClamped[i] = + clamp(op.startIndices()[i], 0, m_impl.dimensions()[i]); + stopIndicesClamped[i] = + clamp(op.stopIndices()[i], 0, m_impl.dimensions()[i]); + } else { + /* implies m_strides[i] < 0 by assert */ + startIndicesClamped[i] = + clamp(op.startIndices()[i], -1, m_impl.dimensions()[i] - 1); + stopIndicesClamped[i] = + clamp(op.stopIndices()[i], -1, m_impl.dimensions()[i] - 1); + } + m_startIndices[i] = startIndicesClamped[i]; + } + + typedef typename TensorEvaluator::Dimensions InputDimensions; + const InputDimensions& input_dims = m_impl.dimensions(); + + // compute output tensor shape + m_is_identity = true; + for (int i = 0; i < NumDims; i++) { + Index interval = stopIndicesClamped[i] - startIndicesClamped[i]; + if (interval == 0 || ((interval < 0) != (m_strides[i] < 0))) { + m_dimensions[i] = 0; + } else { + m_dimensions[i] = + (interval / m_strides[i]) + (interval % m_strides[i] != 0 ? 1 : 0); + eigen_assert(m_dimensions[i] >= 0); + } + if (m_strides[i] != 1 || interval != m_impl.dimensions()[i]) { + m_is_identity = false; + } + } + + Strides output_dims = m_dimensions; + + if (static_cast(Layout) == static_cast(ColMajor)) { + m_inputStrides[0] = m_strides[0]; + m_offsets[0] = startIndicesClamped[0]; + Index previousDimProduct = 1; + for (int i = 1; i < NumDims; ++i) { + previousDimProduct *= input_dims[i-1]; + m_inputStrides[i] = previousDimProduct * m_strides[i]; + m_offsets[i] = startIndicesClamped[i] * previousDimProduct; + } + + // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed. + m_outputStrides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1]; + m_fastOutputStrides[i] = internal::TensorIntDivisor(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1); + } + } else { + m_inputStrides[NumDims-1] = m_strides[NumDims-1]; + m_offsets[NumDims-1] = startIndicesClamped[NumDims-1]; + Index previousDimProduct = 1; + for (int i = NumDims - 2; i >= 0; --i) { + previousDimProduct *= input_dims[i+1]; + m_inputStrides[i] = previousDimProduct * m_strides[i]; + m_offsets[i] = startIndicesClamped[i] * previousDimProduct; + } + + m_outputStrides[NumDims-1] = 1; + for (int i = NumDims - 2; i >= 0; --i) { + m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1]; + m_fastOutputStrides[i] = internal::TensorIntDivisor(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1); + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + + EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + if (m_is_identity) { + return m_impl.coeff(index); + } else { + return m_impl.coeff(srcCoeff(index)); + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, m_is_identity ? 1 : NumDims); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Storage::Type data() const { + return NULL; + } +#ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_impl.bind(cgh); + } +#endif + protected: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const + { + Index inputIndex = 0; + if (static_cast(Layout) == static_cast(ColMajor)) { + EIGEN_UNROLL_LOOP + for (int i = NumDims - 1; i >= 0; --i) { + const Index idx = index / m_fastOutputStrides[i]; + inputIndex += idx * m_inputStrides[i] + m_offsets[i]; + index -= idx * m_outputStrides[i]; + } + } else { + EIGEN_UNROLL_LOOP + for (int i = 0; i < NumDims; ++i) { + const Index idx = index / m_fastOutputStrides[i]; + inputIndex += idx * m_inputStrides[i] + m_offsets[i]; + index -= idx * m_outputStrides[i]; + } + } + return inputIndex; + } + + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index clamp(Index value, Index min, Index max) { +#ifndef SYCL_DEVICE_ONLY + return numext::maxi(min, numext::mini(max,value)); +#else + return cl::sycl::clamp(value, min, max); +#endif + } + + array m_outputStrides; + array, NumDims> m_fastOutputStrides; + array m_inputStrides; + bool m_is_identity; + TensorEvaluator m_impl; + const Device EIGEN_DEVICE_REF m_device; + DSizes m_startIndices; // clamped startIndices + DSizes m_dimensions; + DSizes m_offsets; // offset in a flattened shape + const Strides m_strides; +}; + +// Eval as lvalue +template +struct TensorEvaluator, Device> + : public TensorEvaluator, Device> +{ + typedef TensorEvaluator, Device> Base; + typedef TensorStridingSlicingOp XprType; + static const int NumDims = internal::array_size::value; + + enum { + IsAligned = false, + PacketAccess = false, + BlockAccess = false, + PreferBlockAccess = TensorEvaluator::PreferBlockAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = TensorEvaluator::CoordAccess, + RawAccess = false + }; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockNotImplemented TensorBlock; + //===--------------------------------------------------------------------===// + + EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : Base(op, device) + { } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + typedef Strides Dimensions; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) + { + if (this->m_is_identity) { + return this->m_impl.coeffRef(index); + } else { + return this->m_impl.coeffRef(this->srcCoeff(index)); + } + } +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h new file mode 100644 index 0000000..ee44382 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h @@ -0,0 +1,708 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_PADDING_H +#define EIGEN_CXX11_TENSOR_TENSOR_PADDING_H + +namespace Eigen { + +/** \class TensorPadding + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor padding class. + * At the moment only padding with a constant value is supported. + * + */ +namespace internal { +template +struct traits > : public traits +{ + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; + typedef typename XprTraits::PointerType PointerType; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorPaddingOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorPaddingOp type; +}; + +} // end namespace internal + + + +template +class TensorPaddingOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorPaddingOp(const XprType& expr, const PaddingDimensions& padding_dims, const Scalar padding_value) + : m_xpr(expr), m_padding_dims(padding_dims), m_padding_value(padding_value) {} + + EIGEN_DEVICE_FUNC + const PaddingDimensions& padding() const { return m_padding_dims; } + EIGEN_DEVICE_FUNC + Scalar padding_value() const { return m_padding_value; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + protected: + typename XprType::Nested m_xpr; + const PaddingDimensions m_padding_dims; + const Scalar m_padding_value; +}; + + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorPaddingOp XprType; + typedef typename XprType::Index Index; + static const int NumDims = internal::array_size::value; + typedef DSizes Dimensions; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = PacketType::size; + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; + + enum { + IsAligned = true, + PacketAccess = TensorEvaluator::PacketAccess, + BlockAccess = TensorEvaluator::RawAccess, + PreferBlockAccess = true, + Layout = TensorEvaluator::Layout, + CoordAccess = true, + RawAccess = false + }; + + typedef typename internal::remove_const::type ScalarNoConst; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockDescriptor TensorBlockDesc; + typedef internal::TensorBlockScratchAllocator TensorBlockScratch; + + typedef typename internal::TensorMaterializedBlock + TensorBlock; + //===--------------------------------------------------------------------===// + + EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device), m_padding(op.padding()), m_paddingValue(op.padding_value()), m_device(device) + { + // The padding op doesn't change the rank of the tensor. Directly padding a scalar would lead + // to a vector, which doesn't make sense. Instead one should reshape the scalar into a vector + // of 1 element first and then pad. + EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); + + // Compute dimensions + m_dimensions = m_impl.dimensions(); + for (int i = 0; i < NumDims; ++i) { + m_dimensions[i] += m_padding[i].first + m_padding[i].second; + } + const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); + if (static_cast(Layout) == static_cast(ColMajor)) { + m_inputStrides[0] = 1; + m_outputStrides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; + m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; + } + m_outputStrides[NumDims] = m_outputStrides[NumDims-1] * m_dimensions[NumDims-1]; + } else { + m_inputStrides[NumDims - 1] = 1; + m_outputStrides[NumDims] = 1; + for (int i = NumDims - 2; i >= 0; --i) { + m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; + m_outputStrides[i+1] = m_outputStrides[i+2] * m_dimensions[i+1]; + } + m_outputStrides[0] = m_outputStrides[1] * m_dimensions[0]; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + +#ifdef EIGEN_USE_THREADS + template + EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync( + EvaluatorPointerType, EvalSubExprsCallback done) { + m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); }); + } +#endif // EIGEN_USE_THREADS + + EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + eigen_assert(index < dimensions().TotalSize()); + Index inputIndex = 0; + if (static_cast(Layout) == static_cast(ColMajor)) { + EIGEN_UNROLL_LOOP + for (int i = NumDims - 1; i > 0; --i) { + const Index idx = index / m_outputStrides[i]; + if (isPaddingAtIndexForDim(idx, i)) { + return m_paddingValue; + } + inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; + index -= idx * m_outputStrides[i]; + } + if (isPaddingAtIndexForDim(index, 0)) { + return m_paddingValue; + } + inputIndex += (index - m_padding[0].first); + } else { + EIGEN_UNROLL_LOOP + for (int i = 0; i < NumDims - 1; ++i) { + const Index idx = index / m_outputStrides[i+1]; + if (isPaddingAtIndexForDim(idx, i)) { + return m_paddingValue; + } + inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; + index -= idx * m_outputStrides[i+1]; + } + if (isPaddingAtIndexForDim(index, NumDims-1)) { + return m_paddingValue; + } + inputIndex += (index - m_padding[NumDims-1].first); + } + return m_impl.coeff(inputIndex); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + if (static_cast(Layout) == static_cast(ColMajor)) { + return packetColMajor(index); + } + return packetRowMajor(index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + TensorOpCost cost = m_impl.costPerCoeff(vectorized); + if (static_cast(Layout) == static_cast(ColMajor)) { + EIGEN_UNROLL_LOOP + for (int i = 0; i < NumDims; ++i) + updateCostPerDimension(cost, i, i == 0); + } else { + EIGEN_UNROLL_LOOP + for (int i = NumDims - 1; i >= 0; --i) + updateCostPerDimension(cost, i, i == NumDims - 1); + } + return cost; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + internal::TensorBlockResourceRequirements getResourceRequirements() const { + const size_t target_size = m_device.lastLevelCacheSize(); + return internal::TensorBlockResourceRequirements::merge( + internal::TensorBlockResourceRequirements::skewed(target_size), + m_impl.getResourceRequirements()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock + block(TensorBlockDesc& desc, TensorBlockScratch& scratch, + bool /*root_of_expr_ast*/ = false) const { + // If one of the dimensions is zero, return empty block view. + if (desc.size() == 0) { + return TensorBlock(internal::TensorBlockKind::kView, NULL, + desc.dimensions()); + } + + static const bool IsColMajor = Layout == static_cast(ColMajor); + const int inner_dim_idx = IsColMajor ? 0 : NumDims - 1; + + Index offset = desc.offset(); + + // Compute offsets in the output tensor corresponding to the desc.offset(). + DSizes output_offsets; + for (int i = NumDims - 1; i > 0; --i) { + const int dim = IsColMajor ? i : NumDims - i - 1; + const int stride_dim = IsColMajor ? dim : dim + 1; + output_offsets[dim] = offset / m_outputStrides[stride_dim]; + offset -= output_offsets[dim] * m_outputStrides[stride_dim]; + } + output_offsets[inner_dim_idx] = offset; + + // Offsets in the input corresponding to output offsets. + DSizes input_offsets = output_offsets; + for (int i = 0; i < NumDims; ++i) { + const int dim = IsColMajor ? i : NumDims - i - 1; + input_offsets[dim] = input_offsets[dim] - m_padding[dim].first; + } + + // Compute offset in the input buffer (at this point it might be illegal and + // point outside of the input buffer, because we don't check for negative + // offsets, it will be autocorrected in the block iteration loop below). + Index input_offset = 0; + for (int i = 0; i < NumDims; ++i) { + const int dim = IsColMajor ? i : NumDims - i - 1; + input_offset += input_offsets[dim] * m_inputStrides[dim]; + } + + // Destination buffer and scratch buffer both indexed from 0 and have the + // same dimensions as the requested block (for destination buffer this + // property is guaranteed by `desc.destination()`). + Index output_offset = 0; + const DSizes output_strides = + internal::strides(desc.dimensions()); + + // NOTE(ezhulenev): We initialize bock iteration state for `NumDims - 1` + // dimensions, skipping innermost dimension. In theory it should be possible + // to squeeze matching innermost dimensions, however in practice that did + // not show any improvements in benchmarks. Also in practice first outer + // dimension usually has padding, and will prevent squeezing. + + // Initialize output block iterator state. Dimension in this array are + // always in inner_most -> outer_most order (col major layout). + array it; + for (int i = 0; i < NumDims - 1; ++i) { + const int dim = IsColMajor ? i + 1 : NumDims - i - 2; + it[i].count = 0; + it[i].size = desc.dimension(dim); + + it[i].input_stride = m_inputStrides[dim]; + it[i].input_span = it[i].input_stride * (it[i].size - 1); + + it[i].output_stride = output_strides[dim]; + it[i].output_span = it[i].output_stride * (it[i].size - 1); + } + + const Index input_inner_dim_size = + static_cast(m_impl.dimensions()[inner_dim_idx]); + + // Total output size. + const Index output_size = desc.size(); + + // We will fill inner dimension of this size in the output. It might be + // larger than the inner dimension in the input, so we might have to pad + // before/after we copy values from the input inner dimension. + const Index output_inner_dim_size = desc.dimension(inner_dim_idx); + + // How many values to fill with padding BEFORE reading from the input inner + // dimension. + const Index output_inner_pad_before_size = + input_offsets[inner_dim_idx] < 0 + ? numext::mini(numext::abs(input_offsets[inner_dim_idx]), + output_inner_dim_size) + : 0; + + // How many values we can actually copy from the input inner dimension. + const Index output_inner_copy_size = numext::mini( + // Want to copy from input. + (output_inner_dim_size - output_inner_pad_before_size), + // Can copy from input. + numext::maxi(input_inner_dim_size - (input_offsets[inner_dim_idx] + + output_inner_pad_before_size), + Index(0))); + + eigen_assert(output_inner_copy_size >= 0); + + // How many values to fill with padding AFTER reading from the input inner + // dimension. + const Index output_inner_pad_after_size = + (output_inner_dim_size - output_inner_copy_size - + output_inner_pad_before_size); + + // Sanity check, sum of all sizes must be equal to the output size. + eigen_assert(output_inner_dim_size == + (output_inner_pad_before_size + output_inner_copy_size + + output_inner_pad_after_size)); + + // Keep track of current coordinates and padding in the output. + DSizes output_coord = output_offsets; + DSizes output_padded; + for (int i = 0; i < NumDims; ++i) { + const int dim = IsColMajor ? i : NumDims - i - 1; + output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim); + } + + typedef internal::StridedLinearBufferCopy LinCopy; + + // Prepare storage for the materialized padding result. + const typename TensorBlock::Storage block_storage = + TensorBlock::prepareStorage(desc, scratch); + + // TODO(ezhulenev): Squeeze multiple non-padded inner dimensions into a + // single logical inner dimension. + + // When possible we squeeze writes for the innermost (only if non-padded) + // dimension with the first padded dimension. This allows to reduce the + // number of calls to LinCopy and better utilize vector instructions. + const bool squeeze_writes = + NumDims > 1 && + // inner dimension is not padded + (input_inner_dim_size == m_dimensions[inner_dim_idx]) && + // and equal to the block inner dimension + (input_inner_dim_size == output_inner_dim_size); + + const int squeeze_dim = IsColMajor ? inner_dim_idx + 1 : inner_dim_idx - 1; + + // Maximum coordinate on a squeeze dimension that we can write to. + const Index squeeze_max_coord = + squeeze_writes ? numext::mini( + // max non-padded element in the input + static_cast(m_dimensions[squeeze_dim] - + m_padding[squeeze_dim].second), + // max element in the output buffer + static_cast(output_offsets[squeeze_dim] + + desc.dimension(squeeze_dim))) + : static_cast(0); + + // Iterate copying data from `m_impl.data()` to the output buffer. + for (Index size = 0; size < output_size;) { + // Detect if we are in the padded region (exclude innermost dimension). + bool is_padded = false; + for (int j = 1; j < NumDims; ++j) { + const int dim = IsColMajor ? j : NumDims - j - 1; + is_padded = output_padded[dim]; + if (is_padded) break; + } + + if (is_padded) { + // Fill single innermost dimension with padding value. + size += output_inner_dim_size; + + LinCopy::template Run( + typename LinCopy::Dst(output_offset, 1, block_storage.data()), + typename LinCopy::Src(0, 0, &m_paddingValue), + output_inner_dim_size); + + + } else if (squeeze_writes) { + // Squeeze multiple reads from innermost dimensions. + const Index squeeze_num = squeeze_max_coord - output_coord[squeeze_dim]; + size += output_inner_dim_size * squeeze_num; + + // Copy `squeeze_num` inner dimensions from input to output. + LinCopy::template Run( + typename LinCopy::Dst(output_offset, 1, block_storage.data()), + typename LinCopy::Src(input_offset, 1, m_impl.data()), + output_inner_dim_size * squeeze_num); + + // Update iteration state for only `squeeze_num - 1` processed inner + // dimensions, because we have another iteration state update at the end + // of the loop that will update iteration state for the last inner + // processed dimension. + it[0].count += (squeeze_num - 1); + input_offset += it[0].input_stride * (squeeze_num - 1); + output_offset += it[0].output_stride * (squeeze_num - 1); + output_coord[squeeze_dim] += (squeeze_num - 1); + + } else { + // Single read from innermost dimension. + size += output_inner_dim_size; + + { // Fill with padding before copying from input inner dimension. + const Index out = output_offset; + + LinCopy::template Run( + typename LinCopy::Dst(out, 1, block_storage.data()), + typename LinCopy::Src(0, 0, &m_paddingValue), + output_inner_pad_before_size); + } + + { // Copy data from input inner dimension. + const Index out = output_offset + output_inner_pad_before_size; + const Index in = input_offset + output_inner_pad_before_size; + + eigen_assert(output_inner_copy_size == 0 || m_impl.data() != NULL); + + LinCopy::template Run( + typename LinCopy::Dst(out, 1, block_storage.data()), + typename LinCopy::Src(in, 1, m_impl.data()), + output_inner_copy_size); + } + + { // Fill with padding after copying from input inner dimension. + const Index out = output_offset + output_inner_pad_before_size + + output_inner_copy_size; + + LinCopy::template Run( + typename LinCopy::Dst(out, 1, block_storage.data()), + typename LinCopy::Src(0, 0, &m_paddingValue), + output_inner_pad_after_size); + } + } + + for (int j = 0; j < NumDims - 1; ++j) { + const int dim = IsColMajor ? j + 1 : NumDims - j - 2; + + if (++it[j].count < it[j].size) { + input_offset += it[j].input_stride; + output_offset += it[j].output_stride; + output_coord[dim] += 1; + output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim); + break; + } + it[j].count = 0; + input_offset -= it[j].input_span; + output_offset -= it[j].output_span; + output_coord[dim] -= it[j].size - 1; + output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim); + } + } + + return block_storage.AsTensorMaterializedBlock(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const { return NULL; } + +#ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_impl.bind(cgh); + } +#endif + + private: + struct BlockIteratorState { + BlockIteratorState() + : count(0), + size(0), + input_stride(0), + input_span(0), + output_stride(0), + output_span(0) {} + + Index count; + Index size; + Index input_stride; + Index input_span; + Index output_stride; + Index output_span; + }; + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isPaddingAtIndexForDim( + Index index, int dim_index) const { +#if defined(EIGEN_HAS_INDEX_LIST) + return (!internal::index_pair_first_statically_eq(dim_index, 0) && + index < m_padding[dim_index].first) || + (!internal::index_pair_second_statically_eq(dim_index, 0) && + index >= m_dimensions[dim_index] - m_padding[dim_index].second); +#else + return (index < m_padding[dim_index].first) || + (index >= m_dimensions[dim_index] - m_padding[dim_index].second); +#endif + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isLeftPaddingCompileTimeZero( + int dim_index) const { +#if defined(EIGEN_HAS_INDEX_LIST) + return internal::index_pair_first_statically_eq(dim_index, 0); +#else + EIGEN_UNUSED_VARIABLE(dim_index); + return false; +#endif + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isRightPaddingCompileTimeZero( + int dim_index) const { +#if defined(EIGEN_HAS_INDEX_LIST) + return internal::index_pair_second_statically_eq(dim_index, 0); +#else + EIGEN_UNUSED_VARIABLE(dim_index); + return false; +#endif + } + + + void updateCostPerDimension(TensorOpCost& cost, int i, bool first) const { + const double in = static_cast(m_impl.dimensions()[i]); + const double out = in + m_padding[i].first + m_padding[i].second; + if (out == 0) + return; + const double reduction = in / out; + cost *= reduction; + if (first) { + cost += TensorOpCost(0, 0, 2 * TensorOpCost::AddCost() + + reduction * (1 * TensorOpCost::AddCost())); + } else { + cost += TensorOpCost(0, 0, 2 * TensorOpCost::AddCost() + + 2 * TensorOpCost::MulCost() + + reduction * (2 * TensorOpCost::MulCost() + + 1 * TensorOpCost::DivCost())); + } + } + + protected: + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const + { + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); + + const Index initialIndex = index; + Index inputIndex = 0; + EIGEN_UNROLL_LOOP + for (int i = NumDims - 1; i > 0; --i) { + const Index firstIdx = index; + const Index lastIdx = index + PacketSize - 1; + const Index lastPaddedLeft = m_padding[i].first * m_outputStrides[i]; + const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i]; + const Index lastPaddedRight = m_outputStrides[i+1]; + + if (!isLeftPaddingCompileTimeZero(i) && lastIdx < lastPaddedLeft) { + // all the coefficient are in the padding zone. + return internal::pset1(m_paddingValue); + } + else if (!isRightPaddingCompileTimeZero(i) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) { + // all the coefficient are in the padding zone. + return internal::pset1(m_paddingValue); + } + else if ((isLeftPaddingCompileTimeZero(i) && isRightPaddingCompileTimeZero(i)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) { + // all the coefficient are between the 2 padding zones. + const Index idx = index / m_outputStrides[i]; + inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; + index -= idx * m_outputStrides[i]; + } + else { + // Every other case + return packetWithPossibleZero(initialIndex); + } + } + + const Index lastIdx = index + PacketSize - 1; + const Index firstIdx = index; + const Index lastPaddedLeft = m_padding[0].first; + const Index firstPaddedRight = (m_dimensions[0] - m_padding[0].second); + const Index lastPaddedRight = m_outputStrides[1]; + + if (!isLeftPaddingCompileTimeZero(0) && lastIdx < lastPaddedLeft) { + // all the coefficient are in the padding zone. + return internal::pset1(m_paddingValue); + } + else if (!isRightPaddingCompileTimeZero(0) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) { + // all the coefficient are in the padding zone. + return internal::pset1(m_paddingValue); + } + else if ((isLeftPaddingCompileTimeZero(0) && isRightPaddingCompileTimeZero(0)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) { + // all the coefficient are between the 2 padding zones. + inputIndex += (index - m_padding[0].first); + return m_impl.template packet(inputIndex); + } + // Every other case + return packetWithPossibleZero(initialIndex); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const + { + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); + + const Index initialIndex = index; + Index inputIndex = 0; + EIGEN_UNROLL_LOOP + for (int i = 0; i < NumDims - 1; ++i) { + const Index firstIdx = index; + const Index lastIdx = index + PacketSize - 1; + const Index lastPaddedLeft = m_padding[i].first * m_outputStrides[i+1]; + const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i+1]; + const Index lastPaddedRight = m_outputStrides[i]; + + if (!isLeftPaddingCompileTimeZero(i) && lastIdx < lastPaddedLeft) { + // all the coefficient are in the padding zone. + return internal::pset1(m_paddingValue); + } + else if (!isRightPaddingCompileTimeZero(i) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) { + // all the coefficient are in the padding zone. + return internal::pset1(m_paddingValue); + } + else if ((isLeftPaddingCompileTimeZero(i) && isRightPaddingCompileTimeZero(i)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) { + // all the coefficient are between the 2 padding zones. + const Index idx = index / m_outputStrides[i+1]; + inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; + index -= idx * m_outputStrides[i+1]; + } + else { + // Every other case + return packetWithPossibleZero(initialIndex); + } + } + + const Index lastIdx = index + PacketSize - 1; + const Index firstIdx = index; + const Index lastPaddedLeft = m_padding[NumDims-1].first; + const Index firstPaddedRight = (m_dimensions[NumDims-1] - m_padding[NumDims-1].second); + const Index lastPaddedRight = m_outputStrides[NumDims-1]; + + if (!isLeftPaddingCompileTimeZero(NumDims-1) && lastIdx < lastPaddedLeft) { + // all the coefficient are in the padding zone. + return internal::pset1(m_paddingValue); + } + else if (!isRightPaddingCompileTimeZero(NumDims-1) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) { + // all the coefficient are in the padding zone. + return internal::pset1(m_paddingValue); + } + else if ((isLeftPaddingCompileTimeZero(NumDims-1) && isRightPaddingCompileTimeZero(NumDims-1)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) { + // all the coefficient are between the 2 padding zones. + inputIndex += (index - m_padding[NumDims-1].first); + return m_impl.template packet(inputIndex); + } + // Every other case + return packetWithPossibleZero(initialIndex); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const + { + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + EIGEN_UNROLL_LOOP + for (int i = 0; i < PacketSize; ++i) { + values[i] = coeff(index+i); + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + + Dimensions m_dimensions; + array m_outputStrides; + array m_inputStrides; + TensorEvaluator m_impl; + PaddingDimensions m_padding; + + Scalar m_paddingValue; + + const Device EIGEN_DEVICE_REF m_device; +}; + + + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_PADDING_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h new file mode 100644 index 0000000..413d25d --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h @@ -0,0 +1,291 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_PATCH_H +#define EIGEN_CXX11_TENSOR_TENSOR_PATCH_H + +namespace Eigen { + +/** \class TensorPatch + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor patch class. + * + * + */ +namespace internal { +template +struct traits > : public traits +{ + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions + 1; + static const int Layout = XprTraits::Layout; + typedef typename XprTraits::PointerType PointerType; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorPatchOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorPatchOp type; +}; + +} // end namespace internal + + + +template +class TensorPatchOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorPatchOp(const XprType& expr, const PatchDim& patch_dims) + : m_xpr(expr), m_patch_dims(patch_dims) {} + + EIGEN_DEVICE_FUNC + const PatchDim& patch_dims() const { return m_patch_dims; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + protected: + typename XprType::Nested m_xpr; + const PatchDim m_patch_dims; +}; + + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorPatchOp XprType; + typedef typename XprType::Index Index; + static const int NumDims = internal::array_size::Dimensions>::value + 1; + typedef DSizes Dimensions; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = PacketType::size; + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; + + + enum { + IsAligned = false, + PacketAccess = TensorEvaluator::PacketAccess, + BlockAccess = false, + PreferBlockAccess = TensorEvaluator::PreferBlockAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, + RawAccess = false + }; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockNotImplemented TensorBlock; + //===--------------------------------------------------------------------===// + + EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device) + { + Index num_patches = 1; + const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); + const PatchDim& patch_dims = op.patch_dims(); + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = 0; i < NumDims-1; ++i) { + m_dimensions[i] = patch_dims[i]; + num_patches *= (input_dims[i] - patch_dims[i] + 1); + } + m_dimensions[NumDims-1] = num_patches; + + m_inputStrides[0] = 1; + m_patchStrides[0] = 1; + for (int i = 1; i < NumDims-1; ++i) { + m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; + m_patchStrides[i] = m_patchStrides[i-1] * (input_dims[i-1] - patch_dims[i-1] + 1); + } + m_outputStrides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; + } + } else { + for (int i = 0; i < NumDims-1; ++i) { + m_dimensions[i+1] = patch_dims[i]; + num_patches *= (input_dims[i] - patch_dims[i] + 1); + } + m_dimensions[0] = num_patches; + + m_inputStrides[NumDims-2] = 1; + m_patchStrides[NumDims-2] = 1; + for (int i = NumDims-3; i >= 0; --i) { + m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; + m_patchStrides[i] = m_patchStrides[i+1] * (input_dims[i+1] - patch_dims[i+1] + 1); + } + m_outputStrides[NumDims-1] = 1; + for (int i = NumDims-2; i >= 0; --i) { + m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1]; + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + + EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + Index output_stride_index = (static_cast(Layout) == static_cast(ColMajor)) ? NumDims - 1 : 0; + // Find the location of the first element of the patch. + Index patchIndex = index / m_outputStrides[output_stride_index]; + // Find the offset of the element wrt the location of the first element. + Index patchOffset = index - patchIndex * m_outputStrides[output_stride_index]; + Index inputIndex = 0; + if (static_cast(Layout) == static_cast(ColMajor)) { + EIGEN_UNROLL_LOOP + for (int i = NumDims - 2; i > 0; --i) { + const Index patchIdx = patchIndex / m_patchStrides[i]; + patchIndex -= patchIdx * m_patchStrides[i]; + const Index offsetIdx = patchOffset / m_outputStrides[i]; + patchOffset -= offsetIdx * m_outputStrides[i]; + inputIndex += (patchIdx + offsetIdx) * m_inputStrides[i]; + } + } else { + EIGEN_UNROLL_LOOP + for (int i = 0; i < NumDims - 2; ++i) { + const Index patchIdx = patchIndex / m_patchStrides[i]; + patchIndex -= patchIdx * m_patchStrides[i]; + const Index offsetIdx = patchOffset / m_outputStrides[i+1]; + patchOffset -= offsetIdx * m_outputStrides[i+1]; + inputIndex += (patchIdx + offsetIdx) * m_inputStrides[i]; + } + } + inputIndex += (patchIndex + patchOffset); + return m_impl.coeff(inputIndex); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); + + Index output_stride_index = (static_cast(Layout) == static_cast(ColMajor)) ? NumDims - 1 : 0; + Index indices[2] = {index, index + PacketSize - 1}; + Index patchIndices[2] = {indices[0] / m_outputStrides[output_stride_index], + indices[1] / m_outputStrides[output_stride_index]}; + Index patchOffsets[2] = {indices[0] - patchIndices[0] * m_outputStrides[output_stride_index], + indices[1] - patchIndices[1] * m_outputStrides[output_stride_index]}; + + Index inputIndices[2] = {0, 0}; + if (static_cast(Layout) == static_cast(ColMajor)) { + EIGEN_UNROLL_LOOP + for (int i = NumDims - 2; i > 0; --i) { + const Index patchIdx[2] = {patchIndices[0] / m_patchStrides[i], + patchIndices[1] / m_patchStrides[i]}; + patchIndices[0] -= patchIdx[0] * m_patchStrides[i]; + patchIndices[1] -= patchIdx[1] * m_patchStrides[i]; + + const Index offsetIdx[2] = {patchOffsets[0] / m_outputStrides[i], + patchOffsets[1] / m_outputStrides[i]}; + patchOffsets[0] -= offsetIdx[0] * m_outputStrides[i]; + patchOffsets[1] -= offsetIdx[1] * m_outputStrides[i]; + + inputIndices[0] += (patchIdx[0] + offsetIdx[0]) * m_inputStrides[i]; + inputIndices[1] += (patchIdx[1] + offsetIdx[1]) * m_inputStrides[i]; + } + } else { + EIGEN_UNROLL_LOOP + for (int i = 0; i < NumDims - 2; ++i) { + const Index patchIdx[2] = {patchIndices[0] / m_patchStrides[i], + patchIndices[1] / m_patchStrides[i]}; + patchIndices[0] -= patchIdx[0] * m_patchStrides[i]; + patchIndices[1] -= patchIdx[1] * m_patchStrides[i]; + + const Index offsetIdx[2] = {patchOffsets[0] / m_outputStrides[i+1], + patchOffsets[1] / m_outputStrides[i+1]}; + patchOffsets[0] -= offsetIdx[0] * m_outputStrides[i+1]; + patchOffsets[1] -= offsetIdx[1] * m_outputStrides[i+1]; + + inputIndices[0] += (patchIdx[0] + offsetIdx[0]) * m_inputStrides[i]; + inputIndices[1] += (patchIdx[1] + offsetIdx[1]) * m_inputStrides[i]; + } + } + inputIndices[0] += (patchIndices[0] + patchOffsets[0]); + inputIndices[1] += (patchIndices[1] + patchOffsets[1]); + + if (inputIndices[1] - inputIndices[0] == PacketSize - 1) { + PacketReturnType rslt = m_impl.template packet(inputIndices[0]); + return rslt; + } + else { + EIGEN_ALIGN_MAX CoeffReturnType values[PacketSize]; + values[0] = m_impl.coeff(inputIndices[0]); + values[PacketSize-1] = m_impl.coeff(inputIndices[1]); + EIGEN_UNROLL_LOOP + for (int i = 1; i < PacketSize-1; ++i) { + values[i] = coeff(index+i); + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + const double compute_cost = NumDims * (TensorOpCost::DivCost() + + TensorOpCost::MulCost() + + 2 * TensorOpCost::AddCost()); + return m_impl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, compute_cost, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; } + +#ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_impl.bind(cgh); + } +#endif + + protected: + Dimensions m_dimensions; + array m_outputStrides; + array m_inputStrides; + array m_patchStrides; + + TensorEvaluator m_impl; + +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_PATCH_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h new file mode 100644 index 0000000..37c1d1c --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h @@ -0,0 +1,322 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner +// Copyright (C) 2018 Mehdi Goli Codeplay Software Ltd. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H +#define EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H + +namespace Eigen { +namespace internal { + +namespace { + +EIGEN_DEVICE_FUNC uint64_t get_random_seed() { +#if defined(EIGEN_GPU_COMPILE_PHASE) + // We don't support 3d kernels since we currently only use 1 and + // 2d kernels. + gpu_assert(threadIdx.z == 0); + return blockIdx.x * blockDim.x + threadIdx.x + + gridDim.x * blockDim.x * (blockIdx.y * blockDim.y + threadIdx.y); +#else + // Rely on Eigen's random implementation. + return random(); +#endif +} + +static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE unsigned PCG_XSH_RS_generator(uint64_t* state, uint64_t stream) { + // TODO: Unify with the implementation in the non blocking thread pool. + uint64_t current = *state; + // Update the internal state + *state = current * 6364136223846793005ULL + (stream << 1 | 1); + // Generate the random output (using the PCG-XSH-RS scheme) + return static_cast((current ^ (current >> 22)) >> (22 + (current >> 61))); +} + +static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE uint64_t PCG_XSH_RS_state(uint64_t seed) { + seed = seed ? seed : get_random_seed(); + return seed * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL; +} + +} // namespace + + +template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +T RandomToTypeUniform(uint64_t* state, uint64_t stream) { + unsigned rnd = PCG_XSH_RS_generator(state, stream); + return static_cast(rnd); +} + + +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +Eigen::half RandomToTypeUniform(uint64_t* state, uint64_t stream) { + // Generate 10 random bits for the mantissa, merge with exponent. + unsigned rnd = PCG_XSH_RS_generator(state, stream); + const uint16_t half_bits = static_cast(rnd & 0x3ffu) | (static_cast(15) << 10); + Eigen::half result = Eigen::numext::bit_cast(half_bits); + // Return the final result + return result - Eigen::half(1.0f); +} + +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +Eigen::bfloat16 RandomToTypeUniform(uint64_t* state, uint64_t stream) { + + // Generate 7 random bits for the mantissa, merge with exponent. + unsigned rnd = PCG_XSH_RS_generator(state, stream); + const uint16_t half_bits = static_cast(rnd & 0x7fu) | (static_cast(127) << 7); + Eigen::bfloat16 result = Eigen::numext::bit_cast(half_bits); + // Return the final result + return result - Eigen::bfloat16(1.0f); +} + +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float RandomToTypeUniform(uint64_t* state, uint64_t stream) { + typedef union { + uint32_t raw; + float fp; + } internal; + internal result; + // Generate 23 random bits for the mantissa mantissa + const unsigned rnd = PCG_XSH_RS_generator(state, stream); + result.raw = rnd & 0x7fffffu; + // Set the exponent + result.raw |= (static_cast(127) << 23); + // Return the final result + return result.fp - 1.0f; +} + +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double RandomToTypeUniform(uint64_t* state, uint64_t stream) { + typedef union { + uint64_t raw; + double dp; + } internal; + internal result; + result.raw = 0; + // Generate 52 random bits for the mantissa + // First generate the upper 20 bits + unsigned rnd1 = PCG_XSH_RS_generator(state, stream) & 0xfffffu; + // The generate the lower 32 bits + unsigned rnd2 = PCG_XSH_RS_generator(state, stream); + result.raw = (static_cast(rnd1) << 32) | rnd2; + // Set the exponent + result.raw |= (static_cast(1023) << 52); + // Return the final result + return result.dp - 1.0; +} + +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +std::complex RandomToTypeUniform >(uint64_t* state, uint64_t stream) { + return std::complex(RandomToTypeUniform(state, stream), + RandomToTypeUniform(state, stream)); +} +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +std::complex RandomToTypeUniform >(uint64_t* state, uint64_t stream) { + return std::complex(RandomToTypeUniform(state, stream), + RandomToTypeUniform(state, stream)); +} + +template class UniformRandomGenerator { + public: + static const bool PacketAccess = true; + + // Uses the given "seed" if non-zero, otherwise uses a random seed. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator( + uint64_t seed = 0) { + m_state = PCG_XSH_RS_state(seed); + #ifdef EIGEN_USE_SYCL + // In SYCL it is not possible to build PCG_XSH_RS_state in one step. + // Therefor, we need two step to initializate the m_state. + // IN SYCL, the constructor of the functor is s called on the CPU + // and we get the clock seed here from the CPU. However, This seed is + //the same for all the thread. As unlike CUDA, the thread.ID, BlockID, etc is not a global function. + // and only available on the Operator() function (which is called on the GPU). + // Thus for CUDA (((CLOCK + global_thread_id)* 6364136223846793005ULL) + 0xda3e39cb94b95bdbULL) is passed to each thread + // but for SYCL ((CLOCK * 6364136223846793005ULL) + 0xda3e39cb94b95bdbULL) is passed to each thread and each thread adds + // the (global_thread_id* 6364136223846793005ULL) for itself only once, in order to complete the construction + // similar to CUDA Therefore, the thread Id injection is not available at this stage. + //However when the operator() is called the thread ID will be avilable. So inside the opeator, + // we add the thrreadID, BlockId,... (which is equivalent of i) + //to the seed and construct the unique m_state per thead similar to cuda. + m_exec_once =false; + #endif + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator( + const UniformRandomGenerator& other) { + m_state = other.m_state; + #ifdef EIGEN_USE_SYCL + m_exec_once =other.m_exec_once; + #endif + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + T operator()(Index i) const { + #ifdef EIGEN_USE_SYCL + if(!m_exec_once) { + // This is the second stage of adding thread Id to the CPU clock seed and build unique seed per thread + // The (i * 6364136223846793005ULL) is the remaining part of the PCG_XSH_RS_state on the GPU side + m_state += (i * 6364136223846793005ULL); + m_exec_once =true; + } + #endif + T result = RandomToTypeUniform(&m_state, i); + return result; + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Packet packetOp(Index i) const { + const int packetSize = internal::unpacket_traits::size; + EIGEN_ALIGN_MAX T values[packetSize]; + #ifdef EIGEN_USE_SYCL + if(!m_exec_once) { + // This is the second stage of adding thread Id to the CPU clock seed and build unique seed per thread + m_state += (i * 6364136223846793005ULL); + m_exec_once =true; + } + #endif + EIGEN_UNROLL_LOOP + for (int j = 0; j < packetSize; ++j) { + values[j] = RandomToTypeUniform(&m_state, i); + } + return internal::pload(values); + } + + private: + mutable uint64_t m_state; + #ifdef EIGEN_USE_SYCL + mutable bool m_exec_once; + #endif +}; + +template +struct functor_traits > { + enum { + // Rough estimate for floating point, multiplied by ceil(sizeof(T) / sizeof(float)). + Cost = 12 * NumTraits::AddCost * + ((sizeof(Scalar) + sizeof(float) - 1) / sizeof(float)), + PacketAccess = UniformRandomGenerator::PacketAccess + }; +}; + + + +template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +T RandomToTypeNormal(uint64_t* state, uint64_t stream) { + // Use the ratio of uniform method to generate numbers following a normal + // distribution. See for example Numerical Recipes chapter 7.3.9 for the + // details. + T u, v, q; + do { + u = RandomToTypeUniform(state, stream); + v = T(1.7156) * (RandomToTypeUniform(state, stream) - T(0.5)); + const T x = u - T(0.449871); + const T y = numext::abs(v) + T(0.386595); + q = x*x + y * (T(0.196)*y - T(0.25472)*x); + } while (q > T(0.27597) && + (q > T(0.27846) || v*v > T(-4) * numext::log(u) * u*u)); + + return v/u; +} + +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +std::complex RandomToTypeNormal >(uint64_t* state, uint64_t stream) { + return std::complex(RandomToTypeNormal(state, stream), + RandomToTypeNormal(state, stream)); +} +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +std::complex RandomToTypeNormal >(uint64_t* state, uint64_t stream) { + return std::complex(RandomToTypeNormal(state, stream), + RandomToTypeNormal(state, stream)); +} + + +template class NormalRandomGenerator { + public: + static const bool PacketAccess = true; + + // Uses the given "seed" if non-zero, otherwise uses a random seed. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator(uint64_t seed = 0) { + m_state = PCG_XSH_RS_state(seed); + #ifdef EIGEN_USE_SYCL + // In SYCL it is not possible to build PCG_XSH_RS_state in one step. + // Therefor, we need two steps to initializate the m_state. + // IN SYCL, the constructor of the functor is s called on the CPU + // and we get the clock seed here from the CPU. However, This seed is + //the same for all the thread. As unlike CUDA, the thread.ID, BlockID, etc is not a global function. + // and only available on the Operator() function (which is called on the GPU). + // Therefore, the thread Id injection is not available at this stage. However when the operator() + //is called the thread ID will be avilable. So inside the opeator, + // we add the thrreadID, BlockId,... (which is equivalent of i) + //to the seed and construct the unique m_state per thead similar to cuda. + m_exec_once =false; + #endif + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator( + const NormalRandomGenerator& other) { + m_state = other.m_state; +#ifdef EIGEN_USE_SYCL + m_exec_once=other.m_exec_once; +#endif + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + T operator()(Index i) const { + #ifdef EIGEN_USE_SYCL + if(!m_exec_once) { + // This is the second stage of adding thread Id to the CPU clock seed and build unique seed per thread + m_state += (i * 6364136223846793005ULL); + m_exec_once =true; + } + #endif + T result = RandomToTypeNormal(&m_state, i); + return result; + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Packet packetOp(Index i) const { + const int packetSize = internal::unpacket_traits::size; + EIGEN_ALIGN_MAX T values[packetSize]; + #ifdef EIGEN_USE_SYCL + if(!m_exec_once) { + // This is the second stage of adding thread Id to the CPU clock seed and build unique seed per thread + m_state += (i * 6364136223846793005ULL); + m_exec_once =true; + } + #endif + EIGEN_UNROLL_LOOP + for (int j = 0; j < packetSize; ++j) { + values[j] = RandomToTypeNormal(&m_state, i); + } + return internal::pload(values); + } + + private: + mutable uint64_t m_state; + #ifdef EIGEN_USE_SYCL + mutable bool m_exec_once; + #endif +}; + + +template +struct functor_traits > { + enum { + // On average, we need to generate about 3 random numbers + // 15 mul, 8 add, 1.5 logs + Cost = 3 * functor_traits >::Cost + + 15 * NumTraits::AddCost + 8 * NumTraits::AddCost + + 3 * functor_traits >::Cost / 2, + PacketAccess = NormalRandomGenerator::PacketAccess + }; +}; + + +} // end namespace internal +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h new file mode 100644 index 0000000..583f462 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -0,0 +1,998 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// Copyright (C) 2016 Mehdi Goli, Codeplay Software Ltd +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_H +#define EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_H + +// clang is incompatible with the CUDA syntax wrt making a kernel a class friend, +// so we'll use a macro to make clang happy. +#ifndef KERNEL_FRIEND +#if defined(__clang__) && (defined(__CUDA__) || defined(__HIP__)) +#define KERNEL_FRIEND friend __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 +#else +#define KERNEL_FRIEND friend +#endif +#endif + + +namespace Eigen { + + +/** \class TensorReduction + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor reduction class. + * + */ + +namespace internal { + template class MakePointer_ > + struct traits > + : traits +{ + typedef traits XprTraits; + typedef typename XprTraits::Scalar Scalar; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + static const int NumDimensions = XprTraits::NumDimensions - array_size::value; + static const int Layout = XprTraits::Layout; + typedef typename XprTraits::PointerType PointerType; + + template struct MakePointer { + // Intermediate typedef to workaround MSVC issue. + typedef MakePointer_ MakePointerT; + typedef typename MakePointerT::Type Type; + }; +}; + +template class MakePointer_> +struct eval, Eigen::Dense> +{ + typedef const TensorReductionOp& type; +}; + +template class MakePointer_> +struct nested, 1, typename eval >::type> +{ + typedef TensorReductionOp type; +}; + + +template struct DimInitializer { + template EIGEN_DEVICE_FUNC + static void run(const InputDims& input_dims, + const array::value>& reduced, + OutputDims* output_dims, ReducedDims* reduced_dims) { + const int NumInputDims = internal::array_size::value; + int outputIndex = 0; + int reduceIndex = 0; + for (int i = 0; i < NumInputDims; ++i) { + if (reduced[i]) { + (*reduced_dims)[reduceIndex] = input_dims[i]; + ++reduceIndex; + } else { + (*output_dims)[outputIndex] = input_dims[i]; + ++outputIndex; + } + } + } +}; + +template <> struct DimInitializer > { + template EIGEN_DEVICE_FUNC + static void run(const InputDims& input_dims, const array&, + Sizes<>*, array* reduced_dims) { + const int NumInputDims = internal::array_size::value; + for (int i = 0; i < NumInputDims; ++i) { + (*reduced_dims)[i] = input_dims[i]; + } + } +}; + + +template +struct are_inner_most_dims { + static const bool value = false; +}; +template +struct preserve_inner_most_dims { + static const bool value = false; +}; + +#if EIGEN_HAS_CONSTEXPR && EIGEN_HAS_VARIADIC_TEMPLATES +template +struct are_inner_most_dims{ + static const bool tmp1 = indices_statically_known_to_increase(); + static const bool tmp2 = index_statically_eq(0, 0); + static const bool tmp3 = index_statically_eq(array_size::value-1, array_size::value-1); + static const bool value = tmp1 & tmp2 & tmp3; +}; +template +struct are_inner_most_dims{ + static const bool tmp1 = indices_statically_known_to_increase(); + static const bool tmp2 = index_statically_eq(0, NumTensorDims - array_size::value); + static const bool tmp3 = index_statically_eq(array_size::value - 1, NumTensorDims - 1); + static const bool value = tmp1 & tmp2 & tmp3; + +}; +template +struct preserve_inner_most_dims{ + static const bool tmp1 = indices_statically_known_to_increase(); + static const bool tmp2 = index_statically_gt(0, 0); + static const bool value = tmp1 & tmp2; + +}; +template +struct preserve_inner_most_dims{ + static const bool tmp1 = indices_statically_known_to_increase(); + static const bool tmp2 = index_statically_lt(array_size::value - 1, NumTensorDims - 1); + static const bool value = tmp1 & tmp2; +}; +#endif + + +template +struct GenericDimReducer { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::CoeffReturnType* accum) { + EIGEN_STATIC_ASSERT((DimIndex > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); + for (int j = 0; j < self.m_reducedDims[DimIndex]; ++j) { + const typename Self::Index input = firstIndex + j * self.m_reducedStrides[DimIndex]; + GenericDimReducer::reduce(self, input, reducer, accum); + } + } +}; +template +struct GenericDimReducer<0, Self, Op> { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::CoeffReturnType* accum) { + for (int j = 0; j < self.m_reducedDims[0]; ++j) { + const typename Self::Index input = firstIndex + j * self.m_reducedStrides[0]; + reducer.reduce(self.m_impl.coeff(input), accum); + } + } +}; +template +struct GenericDimReducer<-1, Self, Op> { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index index, Op& reducer, typename Self::CoeffReturnType* accum) { + reducer.reduce(self.m_impl.coeff(index), accum); + } +}; + +template +struct InnerMostDimReducer { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType reduce(const Self& self, typename Self::Index firstIndex, typename Self::Index numValuesToReduce, Op& reducer) { + typename Self::CoeffReturnType accum = reducer.initialize(); + for (typename Self::Index j = 0; j < numValuesToReduce; ++j) { + reducer.reduce(self.m_impl.coeff(firstIndex + j), &accum); + } + return reducer.finalize(accum); + } +}; + +template +struct InnerMostDimReducer { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType reduce(const Self& self, typename Self::Index firstIndex, typename Self::Index numValuesToReduce, Op& reducer) { + const typename Self::Index packetSize = internal::unpacket_traits::size; + const typename Self::Index VectorizedSize = (numValuesToReduce / packetSize) * packetSize; + typename Self::PacketReturnType paccum = reducer.template initializePacket(); + for (typename Self::Index j = 0; j < VectorizedSize; j += packetSize) { + reducer.reducePacket(self.m_impl.template packet(firstIndex + j), &paccum); + } + typename Self::CoeffReturnType accum = reducer.initialize(); + for (typename Self::Index j = VectorizedSize; j < numValuesToReduce; ++j) { + reducer.reduce(self.m_impl.coeff(firstIndex + j), &accum); + } + return reducer.finalizeBoth(accum, paccum); + } +}; + +#if !defined(EIGEN_HIPCC) +static const int kLeafSize = 1024; + +template +struct InnerMostDimReducer { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType + reduce(const Self& self, typename Self::Index firstIndex, + typename Self::Index numValuesToReduce, Op& reducer) { + typename Self::CoeffReturnType accum = reducer.initialize(); + if (numValuesToReduce > kLeafSize) { + const typename Self::Index half = numValuesToReduce / 2; + reducer.reduce(reduce(self, firstIndex, half, reducer), &accum); + reducer.reduce( + reduce(self, firstIndex + half, numValuesToReduce - half, reducer), + &accum); + } else { + for (typename Self::Index j = 0; j < numValuesToReduce; ++j) { + reducer.reduce(self.m_impl.coeff(firstIndex + j), &accum); + } + } + return reducer.finalize(accum); + } +}; + +template +struct InnerMostDimReducer { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType + reduce(const Self& self, typename Self::Index firstIndex, + typename Self::Index numValuesToReduce, Op& reducer) { + const typename Self::Index packetSize = + internal::unpacket_traits::size; + typename Self::CoeffReturnType accum = reducer.initialize(); + if (numValuesToReduce > packetSize * kLeafSize) { + // Make sure the split point is aligned on a packet boundary. + const typename Self::Index split = + packetSize * + divup(firstIndex + divup(numValuesToReduce, typename Self::Index(2)), + packetSize); + const typename Self::Index num_left = + numext::mini(split - firstIndex, numValuesToReduce); + reducer.reduce(reduce(self, firstIndex, num_left, reducer), &accum); + if (num_left < numValuesToReduce) { + reducer.reduce( + reduce(self, split, numValuesToReduce - num_left, reducer), &accum); + } + return reducer.finalize(accum); + } else { + const typename Self::Index UnrollSize = + (numValuesToReduce / (2*packetSize)) * 2*packetSize; + const typename Self::Index VectorizedSize = + (numValuesToReduce / packetSize) * packetSize; + typename Self::PacketReturnType paccum = + reducer.template initializePacket(); + typename Self::PacketReturnType paccum2 = + reducer.template initializePacket(); + for (typename Self::Index j = 0; j < UnrollSize; j += packetSize * 2) { + reducer.reducePacket( + self.m_impl.template packet(firstIndex + j), &paccum); + reducer.reducePacket( + self.m_impl.template packet(firstIndex + j + packetSize), + &paccum2); + } + for (typename Self::Index j = UnrollSize; j < VectorizedSize; j+= packetSize) { + reducer.reducePacket(self.m_impl.template packet( + firstIndex + j), &paccum); + } + reducer.reducePacket(paccum2, &paccum); + for (typename Self::Index j = VectorizedSize; j < numValuesToReduce; + ++j) { + reducer.reduce(self.m_impl.coeff(firstIndex + j), &accum); + } + return reducer.finalizeBoth(accum, paccum); + } + } +}; +#endif + +template +struct InnerMostDimPreserver { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self&, typename Self::Index, Op&, typename Self::PacketReturnType*) { + eigen_assert(false && "should never be called"); + } +}; + +template +struct InnerMostDimPreserver { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::PacketReturnType* accum) { + EIGEN_STATIC_ASSERT((DimIndex > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); + for (typename Self::Index j = 0; j < self.m_reducedDims[DimIndex]; ++j) { + const typename Self::Index input = firstIndex + j * self.m_reducedStrides[DimIndex]; + InnerMostDimPreserver::reduce(self, input, reducer, accum); + } + } +}; + +template +struct InnerMostDimPreserver<0, Self, Op, true> { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::PacketReturnType* accum) { + for (typename Self::Index j = 0; j < self.m_reducedDims[0]; ++j) { + const typename Self::Index input = firstIndex + j * self.m_reducedStrides[0]; + reducer.reducePacket(self.m_impl.template packet(input), accum); + } + } +}; +template +struct InnerMostDimPreserver<-1, Self, Op, true> { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self&, typename Self::Index, Op&, typename Self::PacketReturnType*) { + eigen_assert(false && "should never be called"); + } +}; + +// Default full reducer +template +struct FullReducer { + static const bool HasOptimizedImplementation = false; + + static EIGEN_DEVICE_FUNC void run(const Self& self, Op& reducer, const Device&, typename Self::EvaluatorPointerType output) { + const typename Self::Index num_coeffs = array_prod(self.m_impl.dimensions()); + *output = InnerMostDimReducer::reduce(self, 0, num_coeffs, reducer); + } +}; + + +#ifdef EIGEN_USE_THREADS +// Multithreaded full reducers +template +struct FullReducerShard { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(const Self& self, typename Self::Index firstIndex, + typename Self::Index numValuesToReduce, Op& reducer, + typename Self::CoeffReturnType* output) { + *output = InnerMostDimReducer::reduce( + self, firstIndex, numValuesToReduce, reducer); + } +}; + +// Multithreaded full reducer +template +struct FullReducer { + static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful; + static const Index PacketSize = + unpacket_traits::size; + + // launch one reducer per thread and accumulate the result. + static void run(const Self& self, Op& reducer, const ThreadPoolDevice& device, + typename Self::CoeffReturnType* output) { + typedef typename Self::Index Index; + const Index num_coeffs = array_prod(self.m_impl.dimensions()); + if (num_coeffs == 0) { + *output = reducer.finalize(reducer.initialize()); + return; + } + const TensorOpCost cost = + self.m_impl.costPerCoeff(Vectorizable) + + TensorOpCost(0, 0, internal::functor_traits::Cost, Vectorizable, + PacketSize); + const int num_threads = TensorCostModel::numThreads( + num_coeffs, cost, device.numThreads()); + if (num_threads == 1) { + *output = + InnerMostDimReducer::reduce(self, 0, num_coeffs, reducer); + return; + } + const Index blocksize = + std::floor(static_cast(num_coeffs) / num_threads); + const Index numblocks = blocksize > 0 ? num_coeffs / blocksize : 0; + eigen_assert(num_coeffs >= numblocks * blocksize); + + Barrier barrier(internal::convert_index(numblocks)); + MaxSizeVector shards(numblocks, reducer.initialize()); + for (Index i = 0; i < numblocks; ++i) { + device.enqueue_with_barrier(&barrier, &FullReducerShard::run, + self, i * blocksize, blocksize, reducer, + &shards[i]); + } + typename Self::CoeffReturnType finalShard; + if (numblocks * blocksize < num_coeffs) { + finalShard = InnerMostDimReducer::reduce( + self, numblocks * blocksize, num_coeffs - numblocks * blocksize, + reducer); + } else { + finalShard = reducer.initialize(); + } + barrier.Wait(); + + for (Index i = 0; i < numblocks; ++i) { + reducer.reduce(shards[i], &finalShard); + } + *output = reducer.finalize(finalShard); + } +}; + +#endif + + +// Default inner reducer +template +struct InnerReducer { + static const bool HasOptimizedImplementation = false; + + EIGEN_DEVICE_FUNC static bool run(const Self&, Op&, const Device&, typename Self::CoeffReturnType*, typename Self::Index, typename Self::Index) { + eigen_assert(false && "Not implemented"); + return true; + } +}; + +// Default outer reducer +template +struct OuterReducer { + static const bool HasOptimizedImplementation = false; + + EIGEN_DEVICE_FUNC static bool run(const Self&, Op&, const Device&, typename Self::CoeffReturnType*, typename Self::Index, typename Self::Index) { + eigen_assert(false && "Not implemented"); + return true; + } +}; + +#ifdef EIGEN_USE_SYCL +// Default Generic reducer +template +struct GenericReducer { + static const bool HasOptimizedImplementation = false; + + EIGEN_DEVICE_FUNC static bool run(const Self&, Op&, const Device&, typename Self::CoeffReturnType*, typename Self::Index, typename Self::Index) { + eigen_assert(false && "Not implemented"); + return true; + } +}; +#endif + +#if defined(EIGEN_USE_GPU) && (defined(EIGEN_GPUCC)) +template +__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void FullReductionKernel(R, const S, I_, typename S::CoeffReturnType*, unsigned int*); + + +#if defined(EIGEN_HAS_GPU_FP16) +template +__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionInitFullReduxKernelHalfFloat(R, const S, I_, internal::packet_traits::type*); +template +__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void FullReductionKernelHalfFloat(R, const S, I_, half*, internal::packet_traits::type*); +template +__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void InnerReductionKernelHalfFloat(R, const S, I_, I_, half*); + +#endif + +template +__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void InnerReductionKernel(R, const S, I_, I_, typename S::CoeffReturnType*); + +template +__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void OuterReductionKernel(R, const S, I_, I_, typename S::CoeffReturnType*); +#endif + +/** + * For SYCL, the return type of the reduction is deduced from the initialize method of the given Op. + * This allows the reduction to have a different type for the accumulator than the input data type. + * If this is the case, the functor needs to have two reduce method: one for reducing an element of the input + * with the accumulator and the other for reducing two accumulators. + * Such a reducer can be useful for instance when the accumulator is a boolean or a bitset that checks for + * some properties of the input. + */ +template +struct ReductionReturnType { +#if defined(EIGEN_USE_SYCL) + typedef typename remove_const().initialize())>::type type; +#else + typedef typename remove_const::type type; +#endif +}; + +} // end namespace internal + + +template class MakePointer_> +class TensorReductionOp : public TensorBase, ReadOnlyAccessors> { + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename internal::remove_const::type CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorReductionOp(const XprType& expr, const Dims& dims) : m_expr(expr), m_dims(dims) + { } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorReductionOp(const XprType& expr, const Dims& dims, const Op& reducer) : m_expr(expr), m_dims(dims), m_reducer(reducer) + { } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const XprType& expression() const { return m_expr; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const Dims& dims() const { return m_dims; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const Op& reducer() const { return m_reducer; } + + protected: + typename XprType::Nested m_expr; + const Dims m_dims; + const Op m_reducer; +}; + +template +struct TensorReductionEvaluatorBase; + +// Eval as rvalue +template class MakePointer_, typename Device> +struct TensorReductionEvaluatorBase, Device> +{ + typedef internal::reducer_traits ReducerTraits; + typedef Dims ReducedDims; + typedef TensorReductionOp XprType; + typedef typename XprType::Index Index; + typedef ArgType ChildType; + typedef typename TensorEvaluator::Dimensions InputDimensions; + static const int NumInputDims = internal::array_size::value; + static const int NumReducedDims = internal::array_size::value; + static const int NumOutputDims = NumInputDims - NumReducedDims; + typedef typename internal::conditional, DSizes >::type Dimensions; + typedef typename XprType::Scalar Scalar; + typedef TensorReductionEvaluatorBase, Device> Self; + static const bool InputPacketAccess = TensorEvaluator::PacketAccess; + typedef typename internal::ReductionReturnType::type CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const Index PacketSize = PacketType::size; + + typedef typename Eigen::internal::traits::PointerType TensorPointerType; + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; + + // Subset of strides of the input tensor for the non-reduced dimensions. + // Indexed by output dimensions. + static const int NumPreservedStrides = max_n_1::size; + + enum { + IsAligned = false, + PacketAccess = Self::InputPacketAccess && ReducerTraits::PacketAccess, + BlockAccess = false, + PreferBlockAccess = true, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + typedef typename internal::remove_const::type ScalarNoConst; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockNotImplemented TensorBlock; + //===--------------------------------------------------------------------===// + + static const bool ReducingInnerMostDims = internal::are_inner_most_dims::value; + static const bool PreservingInnerMostDims = internal::preserve_inner_most_dims::value; + static const bool RunningFullReduction = (NumOutputDims==0); + + EIGEN_STRONG_INLINE TensorReductionEvaluatorBase(const XprType& op, const Device& device) + : m_impl(op.expression(), device), m_reducer(op.reducer()), m_result(NULL), m_device(device) + { + EIGEN_STATIC_ASSERT((NumInputDims >= NumReducedDims), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((!ReducingInnerMostDims | !PreservingInnerMostDims | (NumReducedDims == NumInputDims)), + YOU_MADE_A_PROGRAMMING_MISTAKE); + + // Build the bitmap indicating if an input dimension is reduced or not. + for (int i = 0; i < NumInputDims; ++i) { + m_reduced[i] = false; + } + for (int i = 0; i < NumReducedDims; ++i) { + eigen_assert(op.dims()[i] >= 0); + eigen_assert(op.dims()[i] < NumInputDims); + m_reduced[op.dims()[i]] = true; + } + + const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); + internal::DimInitializer::run(input_dims, m_reduced, &m_dimensions, &m_reducedDims); + + // Precompute output strides. + if (NumOutputDims > 0) { + if (static_cast(Layout) == static_cast(ColMajor)) { + m_outputStrides[0] = 1; + for (int i = 1; i < NumOutputDims; ++i) { + m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1]; + m_fastOutputStrides[i] = internal::TensorIntDivisor(m_outputStrides[i]); + } + } else { + m_outputStrides[NumOutputDims - 1] = 1; + for (int i = NumOutputDims - 2; i >= 0; --i) { + m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1]; + m_fastOutputStrides[i] = internal::TensorIntDivisor(m_outputStrides[i]); + } + } + } + + // Precompute input strides. + if (NumInputDims > 0) { + array input_strides; + if (static_cast(Layout) == static_cast(ColMajor)) { + input_strides[0] = 1; + for (int i = 1; i < NumInputDims; ++i) { + input_strides[i] = input_strides[i-1] * input_dims[i-1]; + } + } else { + input_strides.back() = 1; + for (int i = NumInputDims - 2; i >= 0; --i) { + input_strides[i] = input_strides[i + 1] * input_dims[i + 1]; + } + } + + int outputIndex = 0; + int reduceIndex = 0; + for (int i = 0; i < NumInputDims; ++i) { + if (m_reduced[i]) { + m_reducedStrides[reduceIndex] = input_strides[i]; + ++reduceIndex; + } else { + m_preservedStrides[outputIndex] = input_strides[i]; + m_output_to_input_dim_map[outputIndex] = i; + ++outputIndex; + } + } + } + + // Special case for full reductions + if (NumOutputDims == 0) { + m_preservedStrides[0] = internal::array_prod(input_dims); + } + + m_numValuesToReduce = + NumOutputDims == 0 + ? internal::array_prod(input_dims) + : (static_cast(Layout) == static_cast(ColMajor)) + ? m_preservedStrides[0] + : m_preservedStrides[NumOutputDims - 1]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_STRONG_INLINE + bool evalSubExprsIfNeededCommon(EvaluatorPointerType data) { + // Use the FullReducer if possible. + if ((RunningFullReduction && RunningOnSycl) ||(RunningFullReduction && + internal::FullReducer::HasOptimizedImplementation && + ((RunningOnGPU && (m_device.majorDeviceVersion() >= 3)) || + !RunningOnGPU))) { + bool need_assign = false; + if (!data) { + m_result = static_cast(m_device.get((CoeffReturnType*)m_device.allocate_temp(sizeof(CoeffReturnType)))); + data = m_result; + need_assign = true; + } + Op reducer(m_reducer); + internal::FullReducer::run(*this, reducer, m_device, data); + return need_assign; + } + + // Attempt to use an optimized reduction. + else if ((RunningOnGPU && (m_device.majorDeviceVersion() >= 3)) || (RunningOnSycl)) { + bool reducing_inner_dims = true; + for (int i = 0; i < NumReducedDims; ++i) { + if (static_cast(Layout) == static_cast(ColMajor)) { + reducing_inner_dims &= m_reduced[i]; + } else { + reducing_inner_dims &= m_reduced[NumInputDims - 1 - i]; + } + } + if (internal::InnerReducer::HasOptimizedImplementation && + (reducing_inner_dims || ReducingInnerMostDims)) { + const Index num_values_to_reduce = internal::array_prod(m_reducedDims); + const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions); + if (!data) { + if ((num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 128) || (RunningOnSycl)) { + data = static_cast(m_device.get((CoeffReturnType*)m_device.allocate_temp(sizeof(CoeffReturnType) * num_coeffs_to_preserve))); + m_result = data; + } + else { + return true; + } + } + Op reducer(m_reducer); + // For SYCL this if always return false + if (internal::InnerReducer::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) { + if (m_result) { + m_device.deallocate_temp(m_result); + m_result = NULL; + } + return true; + } else { + return (m_result != NULL); + } + } + + bool preserving_inner_dims = true; + for (int i = 0; i < NumReducedDims; ++i) { + if (static_cast(Layout) == static_cast(ColMajor)) { + preserving_inner_dims &= m_reduced[NumInputDims - 1 - i]; + } else { + preserving_inner_dims &= m_reduced[i]; + } + } + if (internal::OuterReducer::HasOptimizedImplementation && + preserving_inner_dims) { + const Index num_values_to_reduce = internal::array_prod(m_reducedDims); + const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions); + if (!data) { + if ((num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 32) || (RunningOnSycl)) { + data = static_cast(m_device.get((CoeffReturnType*)m_device.allocate_temp(sizeof(CoeffReturnType) * num_coeffs_to_preserve))); + m_result = data; + } + else { + return true; + } + } + Op reducer(m_reducer); + // For SYCL this if always return false + if (internal::OuterReducer::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) { + if (m_result) { + m_device.deallocate_temp(m_result); + m_result = NULL; + } + return true; + } else { + return (m_result != NULL); + } + } + #if defined(EIGEN_USE_SYCL) + // If there is no Optimised version for SYCL, the reduction expression + // must break into two subexpression and use the SYCL generic Reducer on the device. + if(RunningOnSycl) { + const Index num_values_to_reduce = internal::array_prod(m_reducedDims); + const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions); + if (!data) { + data = static_cast(m_device.get((CoeffReturnType*)m_device.allocate_temp(sizeof(CoeffReturnType) * num_coeffs_to_preserve))); + m_result = data; + } + Op reducer(m_reducer); + internal::GenericReducer::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve); + return (m_result != NULL); + } + #endif + } + return true; + } + +#ifdef EIGEN_USE_THREADS + template + EIGEN_STRONG_INLINE + void + evalSubExprsIfNeededAsync(EvaluatorPointerType data, + EvalSubExprsCallback done) { + m_impl.evalSubExprsIfNeededAsync(NULL, [this, data, done](bool) { + done(evalSubExprsIfNeededCommon(data)); + }); + } +#endif + + EIGEN_STRONG_INLINE + bool evalSubExprsIfNeeded(EvaluatorPointerType data) { + m_impl.evalSubExprsIfNeeded(NULL); + return evalSubExprsIfNeededCommon(data); + } + + EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + if (m_result) { + m_device.deallocate_temp(m_result); + m_result = NULL; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + if (( RunningFullReduction || RunningOnGPU) && m_result ) { + return *(m_result + index); + } + Op reducer(m_reducer); + if (ReducingInnerMostDims || RunningFullReduction) { + const Index num_values_to_reduce = + (static_cast(Layout) == static_cast(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumPreservedStrides - 1]; + return internal::InnerMostDimReducer::reduce(*this, firstInput(index), + num_values_to_reduce, reducer); + } else { + typename Self::CoeffReturnType accum = reducer.initialize(); + internal::GenericDimReducer::reduce(*this, firstInput(index), reducer, &accum); + return reducer.finalize(accum); + } + } + + // TODO(bsteiner): provide a more efficient implementation. + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index + PacketSize - 1 < Index(internal::array_prod(dimensions()))); + + if (RunningOnGPU && m_result) { + return internal::pload(m_result + index); + } + + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + if (ReducingInnerMostDims) { + const Index num_values_to_reduce = + (static_cast(Layout) == static_cast(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumPreservedStrides - 1]; + const Index firstIndex = firstInput(index); + for (Index i = 0; i < PacketSize; ++i) { + Op reducer(m_reducer); + values[i] = internal::InnerMostDimReducer::reduce(*this, firstIndex + i * num_values_to_reduce, + num_values_to_reduce, reducer); + } + } else if (PreservingInnerMostDims) { + const Index firstIndex = firstInput(index); + const int innermost_dim = (static_cast(Layout) == static_cast(ColMajor)) ? 0 : NumOutputDims - 1; + // TBD: extend this the the n innermost dimensions that we preserve. + if (((firstIndex % m_dimensions[innermost_dim]) + PacketSize - 1) < m_dimensions[innermost_dim]) { + Op reducer(m_reducer); + typename Self::PacketReturnType accum = reducer.template initializePacket(); + internal::InnerMostDimPreserver::reduce(*this, firstIndex, reducer, &accum); + return reducer.finalizePacket(accum); + } else { + for (int i = 0; i < PacketSize; ++i) { + values[i] = coeff(index + i); + } + } + } else { + for (int i = 0; i < PacketSize; ++i) { + values[i] = coeff(index + i); + } + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + + // Must be called after evalSubExprsIfNeeded(). + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + if (RunningFullReduction && m_result) { + return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); + } else { + const Index num_values_to_reduce = internal::array_prod(m_reducedDims); + const double compute_cost = num_values_to_reduce * internal::functor_traits::Cost; + return m_impl.costPerCoeff(vectorized) * num_values_to_reduce + + TensorOpCost(0, 0, compute_cost, vectorized, PacketSize); + } + } + + EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_result; } + EIGEN_DEVICE_FUNC const TensorEvaluator& impl() const { return m_impl; } + EIGEN_DEVICE_FUNC const Device& device() const { return m_device; } +#ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_impl.bind(cgh); + m_result.bind(cgh); + } +#endif + + private: + template friend struct internal::GenericDimReducer; + template friend struct internal::InnerMostDimReducer; + template friend struct internal::InnerMostDimPreserver; + template friend struct internal::FullReducer; +#ifdef EIGEN_USE_THREADS + template friend struct internal::FullReducerShard; +#endif +#if defined(EIGEN_USE_GPU) && (defined(EIGEN_GPUCC)) + template KERNEL_FRIEND void internal::FullReductionKernel(R, const S, I_, typename S::CoeffReturnType*, unsigned int*); +#if defined(EIGEN_HAS_GPU_FP16) + template KERNEL_FRIEND void internal::ReductionInitFullReduxKernelHalfFloat(R, const S, I_, internal::packet_traits::type*); + template KERNEL_FRIEND void internal::FullReductionKernelHalfFloat(R, const S, I_, half*, internal::packet_traits::type*); + template KERNEL_FRIEND void internal::InnerReductionKernelHalfFloat(R, const S, I_, I_, half*); +#endif + template KERNEL_FRIEND void internal::InnerReductionKernel(R, const S, I_, I_, typename S::CoeffReturnType*); + + template KERNEL_FRIEND void internal::OuterReductionKernel(R, const S, I_, I_, typename S::CoeffReturnType*); +#endif + +#if defined(EIGEN_USE_SYCL) + template < typename Evaluator_, typename Op__> friend class TensorSycl::internal::GenericNondeterministicReducer; + // SYCL need the Generic reducer for the case the recution algorithm is neither inner, outer, and full reducer + template friend struct internal::GenericReducer; +#endif + + + template friend struct internal::InnerReducer; + + struct BlockIteratorState { + Index input_dim; + Index output_size; + Index output_count; + }; + + // Returns the Index in the input tensor of the first value that needs to be + // used to compute the reduction at output index "index". + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index firstInput(Index index) const { + if (ReducingInnerMostDims) { + if (static_cast(Layout) == static_cast(ColMajor)) { + return index * m_preservedStrides[0]; + } else { + return index * m_preservedStrides[NumPreservedStrides - 1]; + } + } + // TBD: optimize the case where we preserve the innermost dimensions. + Index startInput = 0; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumOutputDims - 1; i > 0; --i) { + // This is index_i in the output tensor. + const Index idx = index / m_outputStrides[i]; + startInput += idx * m_preservedStrides[i]; + index -= idx * m_outputStrides[i]; + } + if (PreservingInnerMostDims) { + eigen_assert(m_preservedStrides[0] == 1); + startInput += index; + } else { + startInput += index * m_preservedStrides[0]; + } + } else { + for (int i = 0; i < NumOutputDims - 1; ++i) { + // This is index_i in the output tensor. + const Index idx = index / m_outputStrides[i]; + startInput += idx * m_preservedStrides[i]; + index -= idx * m_outputStrides[i]; + } + if (PreservingInnerMostDims) { + eigen_assert(m_preservedStrides[NumPreservedStrides - 1] == 1); + startInput += index; + } else { + startInput += index * m_preservedStrides[NumPreservedStrides - 1]; + } + } + return startInput; + } + + // Bitmap indicating if an input dimension is reduced or not. + array m_reduced; + // Dimensions of the output of the operation. + Dimensions m_dimensions; + // Precomputed strides for the output tensor. + array m_outputStrides; + array, NumOutputDims> m_fastOutputStrides; + array m_preservedStrides; + // Map from output to input dimension index. + array m_output_to_input_dim_map; + // How many values go into each reduction + Index m_numValuesToReduce; + + // Subset of strides of the input tensor for the reduced dimensions. + // Indexed by reduced dimensions. + array m_reducedStrides; + // Size of the input dimensions that are reduced. + // Indexed by reduced dimensions. + array m_reducedDims; + + // Evaluator for the input expression. + TensorEvaluator m_impl; + + // Operation to apply for computing the reduction. + Op m_reducer; + + // For full reductions +#if defined(EIGEN_USE_GPU) && (defined(EIGEN_GPUCC)) + static const bool RunningOnGPU = internal::is_same::value; + static const bool RunningOnSycl = false; +#elif defined(EIGEN_USE_SYCL) +static const bool RunningOnSycl = internal::is_same::type, Eigen::SyclDevice>::value; +static const bool RunningOnGPU = false; +#else + static const bool RunningOnGPU = false; + static const bool RunningOnSycl = false; +#endif + EvaluatorPointerType m_result; + + const Device EIGEN_DEVICE_REF m_device; +}; + +template class MakePointer_, typename Device> +struct TensorEvaluator, Device> +: public TensorReductionEvaluatorBase, Device> { + typedef TensorReductionEvaluatorBase, Device> Base; + EIGEN_STRONG_INLINE TensorEvaluator(const typename Base::XprType& op, const Device& device) : Base(op, device){} +}; + + +template class MakePointer_> +struct TensorEvaluator, Eigen::SyclDevice> +: public TensorReductionEvaluatorBase, Eigen::SyclDevice> { + + typedef TensorReductionEvaluatorBase, Eigen::SyclDevice> Base; + EIGEN_STRONG_INLINE TensorEvaluator(const typename Base::XprType& op, const Eigen::SyclDevice& device) : Base(op, device){} + // The coeff function in the base the recursive method which is not an standard layout and cannot be used in the SYCL kernel + //Therefore the coeff function should be overridden by for SYCL kernel + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Base::CoeffReturnType coeff(typename Base::Index index) const { + return *(this->data() + index); + } + // The packet function in the base the recursive method which is not an standard layout and cannot be used in the SYCL kernel + //Therefore the packet function should be overridden by for SYCL kernel + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Base::PacketReturnType packet(typename Base::Index index) const { + return internal::pload(this->data() + index); + } +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h new file mode 100644 index 0000000..68780cd --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h @@ -0,0 +1,6 @@ + +#if defined(__clang__) || defined(__GNUC__) +#warning "Deprecated header file, please either include the main Eigen/CXX11/Tensor header or the respective TensorReductionGpu.h file" +#endif + +#include "TensorReductionGpu.h" diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h new file mode 100644 index 0000000..db4e8d8 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h @@ -0,0 +1,966 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_GPU_H +#define EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_GPU_H + +namespace Eigen { +namespace internal { + + +#if defined(EIGEN_USE_GPU) && defined(EIGEN_GPUCC) +// Full reducers for GPU, don't vectorize for now + +// Reducer function that enables multiple gpu thread to safely accumulate at the same +// output address. It basically reads the current value of the output variable, and +// attempts to update it with the new value. If in the meantime another gpu thread +// updated the content of the output address it will try again. +template +__device__ EIGEN_ALWAYS_INLINE void atomicReduce(T* output, T accum, R& reducer) { +#if (defined(EIGEN_HIP_DEVICE_COMPILE) && defined(__HIP_ARCH_HAS_WARP_SHUFFLE__)) || (EIGEN_CUDA_ARCH >= 300) + if (sizeof(T) == 4) + { + unsigned int oldval = *reinterpret_cast(output); + unsigned int newval = oldval; + reducer.reduce(accum, reinterpret_cast(&newval)); + if (newval == oldval) { + return; + } + unsigned int readback; + while ((readback = atomicCAS((unsigned int*)output, oldval, newval)) != oldval) { + oldval = readback; + newval = oldval; + reducer.reduce(accum, reinterpret_cast(&newval)); + if (newval == oldval) { + return; + } + } + } + else if (sizeof(T) == 8) { + unsigned long long oldval = *reinterpret_cast(output); + unsigned long long newval = oldval; + reducer.reduce(accum, reinterpret_cast(&newval)); + if (newval == oldval) { + return; + } + unsigned long long readback; + while ((readback = atomicCAS((unsigned long long*)output, oldval, newval)) != oldval) { + oldval = readback; + newval = oldval; + reducer.reduce(accum, reinterpret_cast(&newval)); + if (newval == oldval) { + return; + } + } + } + else { + gpu_assert(0 && "Wordsize not supported"); + } +#else // EIGEN_CUDA_ARCH >= 300 + gpu_assert(0 && "Shouldn't be called on unsupported device"); +#endif // EIGEN_CUDA_ARCH >= 300 +} + +// We extend atomicExch to support extra data types +template +__device__ inline Type atomicExchCustom(Type* address, Type val) { + return atomicExch(address, val); +} + +template <> +__device__ inline double atomicExchCustom(double* address, double val) { + unsigned long long int* address_as_ull = reinterpret_cast(address); + return __longlong_as_double(atomicExch(address_as_ull, __double_as_longlong(val))); +} + +#ifdef EIGEN_HAS_GPU_FP16 +template +__device__ inline void atomicReduce(half2* output, half2 accum, R& reducer) { + unsigned int oldval = *reinterpret_cast(output); + unsigned int newval = oldval; + reducer.reducePacket(accum, reinterpret_cast(&newval)); + if (newval == oldval) { + return; + } + unsigned int readback; + while ((readback = atomicCAS((unsigned int*)output, oldval, newval)) != oldval) { + oldval = readback; + newval = oldval; + reducer.reducePacket(accum, reinterpret_cast(&newval)); + if (newval == oldval) { + return; + } + } +} +// reduction should be associative since reduction is not atomic in wide vector but atomic in half2 operations +template +__device__ inline void atomicReduce(Packet4h2* output, Packet4h2 accum, R& reducer) { + half2* houtput=reinterpret_cast(output); + half2* haccum=reinterpret_cast(&accum); + for(int i=0;i<4;++i){ + atomicReduce(houtput+i,*(haccum+i),reducer); + } +} +#endif // EIGEN_HAS_GPU_FP16 + +template <> +__device__ inline void atomicReduce(float* output, float accum, SumReducer&) { +#if (defined(EIGEN_HIP_DEVICE_COMPILE) && defined(__HIP_ARCH_HAS_WARP_SHUFFLE__)) || (EIGEN_CUDA_ARCH >= 300) + atomicAdd(output, accum); +#else // EIGEN_CUDA_ARCH >= 300 + gpu_assert(0 && "Shouldn't be called on unsupported device"); +#endif // EIGEN_CUDA_ARCH >= 300 +} + + +template +__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionInitKernel(const CoeffType val, Index num_preserved_coeffs, CoeffType* output) { + const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x; + const Index num_threads = blockDim.x * gridDim.x; + for (Index i = thread_id; i < num_preserved_coeffs; i += num_threads) { + output[i] = val; + } +} + + +template +__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void FullReductionKernel(Reducer reducer, const Self input, Index num_coeffs, + typename Self::CoeffReturnType* output, unsigned int* semaphore) { +#if (defined(EIGEN_HIP_DEVICE_COMPILE) && defined(__HIP_ARCH_HAS_WARP_SHUFFLE__)) || (EIGEN_CUDA_ARCH >= 300) + // Initialize the output value + const Index first_index = blockIdx.x * BlockSize * NumPerThread + threadIdx.x; + if (gridDim.x == 1) { + if (first_index == 0) { + *output = reducer.initialize(); + } + } + else { + if (threadIdx.x == 0) { + unsigned int block = atomicCAS(semaphore, 0u, 1u); + if (block == 0) { + // We're the first block to run, initialize the output value + atomicExchCustom(output, reducer.initialize()); + __threadfence(); + atomicExch(semaphore, 2u); + } + else { + // Wait for the first block to initialize the output value. + // Use atomicCAS here to ensure that the reads aren't cached + unsigned int val; + do { + val = atomicCAS(semaphore, 2u, 2u); + } + while (val < 2u); + } + } + } + + __syncthreads(); + + eigen_assert(gridDim.x == 1 || *semaphore >= 2u); + + typename Self::CoeffReturnType accum = reducer.initialize(); + Index max_iter = numext::mini(num_coeffs - first_index, NumPerThread*BlockSize); + for (Index i = 0; i < max_iter; i+=BlockSize) { + const Index index = first_index + i; + eigen_assert(index < num_coeffs); + typename Self::CoeffReturnType val = input.m_impl.coeff(index); + reducer.reduce(val, &accum); + } + +#pragma unroll + for (int offset = warpSize/2; offset > 0; offset /= 2) { + #if defined(EIGEN_HIPCC) + // use std::is_floating_point to determine the type of reduced_val + // This is needed because when Type == double, hipcc will give a "call to __shfl_down is ambguous" error + // and list the float and int versions of __shfl_down as the candidate functions. + if (std::is_floating_point::value) { + reducer.reduce(__shfl_down(static_cast(accum), offset, warpSize), &accum); + } else { + reducer.reduce(__shfl_down(static_cast(accum), offset, warpSize), &accum); + } + #elif defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000 + reducer.reduce(__shfl_down(accum, offset, warpSize), &accum); + #else + reducer.reduce(__shfl_down_sync(0xFFFFFFFF, accum, offset, warpSize), &accum); + #endif + } + + if ((threadIdx.x & (warpSize - 1)) == 0) { + atomicReduce(output, accum, reducer); + } + + if (gridDim.x > 1 && threadIdx.x == 0) { + // Let the last block reset the semaphore + atomicInc(semaphore, gridDim.x + 1); +#if defined(EIGEN_HIPCC) + __threadfence_system(); +#endif + } +#else // EIGEN_CUDA_ARCH >= 300 + gpu_assert(0 && "Shouldn't be called on unsupported device"); +#endif // EIGEN_CUDA_ARCH >= 300 +} + + +#ifdef EIGEN_HAS_GPU_FP16 +template +__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionInitFullReduxKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs, + packet_traits::type* scratch) { + eigen_assert(blockDim.x == 1); + eigen_assert(gridDim.x == 1); + typedef packet_traits::type packet_type; + Index packet_remainder = + num_coeffs % Index(unpacket_traits::size); + if (packet_remainder != 0) { + half2* h2scratch = reinterpret_cast(scratch); + for (Index i = num_coeffs - packet_remainder; i + 2 <= num_coeffs; i += 2) { + *h2scratch = + __halves2half2(input.m_impl.coeff(i), input.m_impl.coeff(i + 1)); + h2scratch++; + } + if ((num_coeffs & 1) != 0) { + half lastCoeff = input.m_impl.coeff(num_coeffs - 1); + *h2scratch = __halves2half2(lastCoeff, reducer.initialize()); + } + } else { + *scratch = reducer.template initializePacket(); + } +} + +template +__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionInitKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs, half* output) { + const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x; + const Index num_threads = blockDim.x * gridDim.x; + typedef typename packet_traits::type PacketType; + + const Index num_packets = + num_coeffs / Index(unpacket_traits::size); + PacketType* p_output = reinterpret_cast(output); + for (Index i = thread_id; i < num_packets; i += num_threads) { + p_output[i] = reducer.template initializePacket(); + } + Index packet_remainder = + num_coeffs % Index(unpacket_traits::size); + if (thread_id < packet_remainder) { + output[num_coeffs - packet_remainder + thread_id] = reducer.initialize(); + } +} + +template +__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void FullReductionKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs, + half* output, packet_traits::type* scratch) { + typedef typename packet_traits::type PacketType; + const int packet_width = unpacket_traits::size; + eigen_assert(NumPerThread % packet_width == 0); + const Index first_index = + blockIdx.x * BlockSize * NumPerThread + packet_width * threadIdx.x; + + // Initialize the output value if it wasn't initialized by the ReductionInitKernel + + if (gridDim.x == 1) { + if (first_index == 0) { + int rem = num_coeffs % packet_width; + if (rem != 0) { + half2* p_scratch = reinterpret_cast(scratch); + *scratch = reducer.template initializePacket(); + for (int i = 0; i < rem / 2; i++) { + *p_scratch = __halves2half2( + input.m_impl.coeff(num_coeffs - packet_width + 2 * i), + input.m_impl.coeff(num_coeffs - packet_width + 2 * i + 1)); + p_scratch++; + } + if ((num_coeffs & 1) != 0) { + half last = input.m_impl.coeff(num_coeffs - 1); + *p_scratch = __halves2half2(last, reducer.initialize()); + } + } else { + *scratch = reducer.template initializePacket(); + } + } + __syncthreads(); + } + + PacketType accum = reducer.template initializePacket(); + const Index max_iter = + numext::mini((num_coeffs - first_index) / packet_width, + NumPerThread * BlockSize / packet_width); + for (Index i = 0; i < max_iter; i += BlockSize) { + const Index index = first_index + packet_width * i; + eigen_assert(index + packet_width < num_coeffs); + PacketType val = input.m_impl.template packet(index); + reducer.reducePacket(val, &accum); + } + +#pragma unroll + for (int offset = warpSize/2; offset > 0; offset /= 2) { + #if defined(EIGEN_HIPCC) + PacketType r1; + half2* hr = reinterpret_cast(&r1); + half2* hacc = reinterpret_cast(&accum); + for (int i = 0; i < packet_width / 2; i++) { + // FIXME : remove this workaround once we have native half/half2 support for __shfl_down + union { int i; half2 h; } wka_in, wka_out; + wka_in.h = hacc[i]; + wka_out.i = __shfl_down(wka_in.i, offset, warpSize); + hr[i] = wka_out.h; + } + reducer.reducePacket(r1, &accum); + #elif defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000 + PacketType r1; + half2* hr = reinterpret_cast(&r1); + half2* hacc = reinterpret_cast(&accum); + for (int i = 0; i < packet_width / 2; i++) { + hr[i] = __shfl_down(hacc[i], offset, warpSize); + } + reducer.reducePacket(r1, &accum); + #else + PacketType r1; + half2* hr = reinterpret_cast(&r1); + half2* hacc = reinterpret_cast(&accum); + for (int i = 0; i < packet_width / 2; i++) { + hr[i] = __shfl_down_sync(0xFFFFFFFF, hacc[i], (unsigned)offset, warpSize); + } + reducer.reducePacket(r1, &accum); + + #endif + } + + if ((threadIdx.x & (warpSize - 1)) == 0) { + atomicReduce(scratch, accum, reducer); + } + + __syncthreads(); + half2* rv1 = reinterpret_cast(scratch); + if (packet_width > 2) { + reducer.reducePacket(rv1[2], rv1); + reducer.reducePacket(rv1[3], rv1 + 1); + reducer.reducePacket(rv1[1], rv1); + } + if (gridDim.x == 1) { + if (first_index == 0) { + half tmp = __low2half(*rv1); + reducer.reduce(__high2half(*rv1), &tmp); + *output = tmp; + } + } +} + +template +__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionCleanupKernelHalfFloat(Op reducer, half* output, packet_traits::type* scratch) { + eigen_assert(threadIdx.x == 1); + half2* pscratch = reinterpret_cast(scratch); + half tmp = __float2half(0.f); + typedef packet_traits::type packet_type; + for (int i = 0; i < unpacket_traits::size; i += 2) { + reducer.reduce(__low2half(*pscratch), &tmp); + reducer.reduce(__high2half(*pscratch), &tmp); + pscratch++; + } + *output = tmp; +} + +#endif // EIGEN_HAS_GPU_FP16 + +template +struct FullReductionLauncher { + static void run(const Self&, Op&, const GpuDevice&, OutputType*, typename Self::Index) { + gpu_assert(false && "Should only be called on doubles, floats and half floats"); + } +}; + +// Specialization for float and double +template +struct FullReductionLauncher< + Self, Op, OutputType, PacketAccess, + typename internal::enable_if< + internal::is_same::value || + internal::is_same::value, + void>::type> { + static void run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output, typename Self::Index num_coeffs) { + + typedef typename Self::Index Index; + const int block_size = 256; + const int num_per_thread = 128; + const int num_blocks = divup(num_coeffs, block_size * num_per_thread); + + unsigned int* semaphore = NULL; + if (num_blocks > 1) { + semaphore = device.semaphore(); + } + + LAUNCH_GPU_KERNEL((FullReductionKernel), + num_blocks, block_size, 0, device, reducer, self, num_coeffs, output, semaphore); + } +}; + +#ifdef EIGEN_HAS_GPU_FP16 +template +struct FullReductionLauncher { + static void run(const Self&, Op&, const GpuDevice&, half*, typename Self::Index) { + gpu_assert(false && "Should not be called since there is no packet accessor"); + } +}; + +template +struct FullReductionLauncher { + static void run(const Self& self, Op& reducer, const GpuDevice& device, half* output, typename Self::Index num_coeffs) { + typedef typename Self::Index Index; + typedef typename packet_traits::type PacketType; + + const int block_size = 256; + const int num_per_thread = 128; + const int num_blocks = divup(num_coeffs, block_size * num_per_thread); + PacketType* scratch = static_cast(device.scratchpad()); + // half2* scratch = static_cast(device.scratchpad()); + + if (num_blocks > 1) { + // We initialize the output and the scrathpad outside the reduction kernel when we can't be sure that there + // won't be a race conditions between multiple thread blocks. + LAUNCH_GPU_KERNEL((ReductionInitFullReduxKernelHalfFloat), + 1, 1, 0, device, reducer, self, num_coeffs, scratch); + } + + LAUNCH_GPU_KERNEL((FullReductionKernelHalfFloat), + num_blocks, block_size, 0, device, reducer, self, num_coeffs, output, scratch); + + if (num_blocks > 1) { + LAUNCH_GPU_KERNEL((ReductionCleanupKernelHalfFloat), + 1, 1, 0, device, reducer, output, scratch); + } + } +}; +#endif // EIGEN_HAS_GPU_FP16 + + +template +struct FullReducer { + // Unfortunately nvidia doesn't support well exotic types such as complex, + // so reduce the scope of the optimized version of the code to the simple cases + // of doubles, floats and half floats +#ifdef EIGEN_HAS_GPU_FP16 + static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful && + (internal::is_same::value || + internal::is_same::value || + (internal::is_same::value && reducer_traits::PacketAccess)); +#else // EIGEN_HAS_GPU_FP16 + static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful && + (internal::is_same::value || + internal::is_same::value); +#endif // EIGEN_HAS_GPU_FP16 + + template + static void run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output) { + gpu_assert(HasOptimizedImplementation && "Should only be called on doubles, floats or half floats"); + const Index num_coeffs = array_prod(self.m_impl.dimensions()); + // Don't crash when we're called with an input tensor of size 0. + if (num_coeffs == 0) { + return; + } + + FullReductionLauncher::PacketAccess>::run(self, reducer, device, output, num_coeffs); + } +}; + + +template +__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void InnerReductionKernel(Reducer reducer, const Self input, Index num_coeffs_to_reduce, Index num_preserved_coeffs, + typename Self::CoeffReturnType* output) { +#if (defined(EIGEN_HIP_DEVICE_COMPILE) && defined(__HIP_ARCH_HAS_WARP_SHUFFLE__)) || (EIGEN_CUDA_ARCH >= 300) + typedef typename Self::CoeffReturnType Type; + eigen_assert(blockDim.y == 1); + eigen_assert(blockDim.z == 1); + eigen_assert(gridDim.y == 1); + eigen_assert(gridDim.z == 1); + + const int unroll_times = 16; + eigen_assert(NumPerThread % unroll_times == 0); + + const Index input_col_blocks = divup(num_coeffs_to_reduce, blockDim.x * NumPerThread); + const Index num_input_blocks = input_col_blocks * num_preserved_coeffs; + + const Index num_threads = blockDim.x * gridDim.x; + const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x; + + // Initialize the output values if they weren't initialized by the ReductionInitKernel + if (gridDim.x == 1) { + for (Index i = thread_id; i < num_preserved_coeffs; i += num_threads) { + output[i] = reducer.initialize(); + } + __syncthreads(); + } + + for (Index i = blockIdx.x; i < num_input_blocks; i += gridDim.x) { + const Index row = i / input_col_blocks; + + if (row < num_preserved_coeffs) { + const Index col_block = i % input_col_blocks; + const Index col_begin = col_block * blockDim.x * NumPerThread + threadIdx.x; + + Type reduced_val = reducer.initialize(); + + for (Index j = 0; j < NumPerThread; j += unroll_times) { + const Index last_col = col_begin + blockDim.x * (j + unroll_times - 1); + if (last_col >= num_coeffs_to_reduce) { + for (Index col = col_begin + blockDim.x * j; col < num_coeffs_to_reduce; col += blockDim.x) { + const Type val = input.m_impl.coeff(row * num_coeffs_to_reduce + col); + reducer.reduce(val, &reduced_val); + } + break; + } else { + // Faster version of the loop with no branches after unrolling. +#pragma unroll + for (int k = 0; k < unroll_times; ++k) { + const Index col = col_begin + blockDim.x * (j + k); + reducer.reduce(input.m_impl.coeff(row * num_coeffs_to_reduce + col), &reduced_val); + } + } + } + +#pragma unroll + for (int offset = warpSize/2; offset > 0; offset /= 2) { + #if defined(EIGEN_HIPCC) + // use std::is_floating_point to determine the type of reduced_val + // This is needed because when Type == double, hipcc will give a "call to __shfl_down is ambguous" error + // and list the float and int versions of __shfl_down as the candidate functions. + if (std::is_floating_point::value) { + reducer.reduce(__shfl_down(static_cast(reduced_val), offset), &reduced_val); + } else { + reducer.reduce(__shfl_down(static_cast(reduced_val), offset), &reduced_val); + } + #elif defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000 + reducer.reduce(__shfl_down(reduced_val, offset), &reduced_val); + #else + reducer.reduce(__shfl_down_sync(0xFFFFFFFF, reduced_val, offset), &reduced_val); + #endif + } + + if ((threadIdx.x & (warpSize - 1)) == 0) { + atomicReduce(&(output[row]), reduced_val, reducer); + } + } + } +#else // EIGEN_CUDA_ARCH >= 300 + gpu_assert(0 && "Shouldn't be called on unsupported device"); +#endif // EIGEN_CUDA_ARCH >= 300 +} + +#ifdef EIGEN_HAS_GPU_FP16 + +template +__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void InnerReductionKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs_to_reduce, Index num_preserved_coeffs, + half* output) { + eigen_assert(blockDim.y == 1); + eigen_assert(blockDim.z == 1); + eigen_assert(gridDim.y == 1); + eigen_assert(gridDim.z == 1); + + typedef typename packet_traits::type PacketType; + const int packet_width = unpacket_traits::size; + const int unroll_times = 16 / packet_width; + eigen_assert(NumPerThread % unroll_times == 0); + eigen_assert(unroll_times % 2 == 0); + + const Index input_col_blocks = divup(num_coeffs_to_reduce, blockDim.x * NumPerThread * 2); + const Index num_input_blocks = divup(input_col_blocks * num_preserved_coeffs, 2); + + const Index num_threads = blockDim.x * gridDim.x; + const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x; + + // Initialize the output values if they weren't initialized by the ReductionInitKernel + if (gridDim.x == 1) { + Index i = packet_width * thread_id; + for (; i + packet_width <= num_preserved_coeffs; + i += packet_width * num_threads) { + PacketType* poutput = reinterpret_cast(output + i); + *poutput = reducer.template initializePacket(); + } + if (i < num_preserved_coeffs) { + output[i] = reducer.initialize(); + } + __syncthreads(); + } + + for (Index i = blockIdx.x; i < num_input_blocks; i += gridDim.x) { + const Index row = 2 * (i / input_col_blocks); // everybody takes 2 rows + + if (row + 1 < num_preserved_coeffs) { + const Index col_block = i % input_col_blocks; + const Index col_begin = + packet_width * (col_block * blockDim.x * NumPerThread + threadIdx.x); + + PacketType reduced_val1 = reducer.template initializePacket(); + PacketType reduced_val2 = reducer.template initializePacket(); + + for (Index j = 0; j < NumPerThread; j += unroll_times) { + const Index last_col = + col_begin + blockDim.x * (j + unroll_times - 1) * packet_width; + if (last_col >= num_coeffs_to_reduce) { + Index col = col_begin + blockDim.x * j; + for (; col + packet_width <= num_coeffs_to_reduce; + col += blockDim.x) { + const PacketType val1 = input.m_impl.template packet( + row * num_coeffs_to_reduce + col); + reducer.reducePacket(val1, &reduced_val1); + const PacketType val2 = input.m_impl.template packet( + (row + 1) * num_coeffs_to_reduce + col); + reducer.reducePacket(val2, &reduced_val2); + } + if (col < num_coeffs_to_reduce) { + PacketType r1 = reducer.template initializePacket(); + PacketType r2 = reducer.template initializePacket(); + half2* hr1 = reinterpret_cast(&r1); + half2* hr2 = reinterpret_cast(&r2); + while (col + 1 < num_coeffs_to_reduce) { + *hr1 = __halves2half2( + input.m_impl.coeff(row * num_coeffs_to_reduce + col), + input.m_impl.coeff(row * num_coeffs_to_reduce + col + 1)); + *hr2 = __halves2half2( + input.m_impl.coeff((row + 1) * num_coeffs_to_reduce + col), + input.m_impl.coeff((row + 1) * num_coeffs_to_reduce + col + + 1)); + hr1++; + hr2++; + col += 2; + } + if (col < num_coeffs_to_reduce) { + // Peel; + const half last1 = + input.m_impl.coeff(row * num_coeffs_to_reduce + col); + *hr1 = __halves2half2(last1, reducer.initialize()); + const half last2 = + input.m_impl.coeff((row + 1) * num_coeffs_to_reduce + col); + *hr2 = __halves2half2(last2, reducer.initialize()); + } + reducer.reducePacket(r1, &reduced_val1); + reducer.reducePacket(r2, &reduced_val2); + } + break; + } else { + // Faster version of the loop with no branches after unrolling. +#pragma unroll + for (int k = 0; k < unroll_times; ++k) { + const Index col = col_begin + blockDim.x * (j + k) * packet_width; + reducer.reducePacket(input.m_impl.template packet( + row * num_coeffs_to_reduce + col), + &reduced_val1); + reducer.reducePacket(input.m_impl.template packet( + (row + 1) * num_coeffs_to_reduce + col), + &reduced_val2); + } + } + } + +#pragma unroll + for (int offset = warpSize/2; offset > 0; offset /= 2) { + #if defined(EIGEN_HIPCC) + PacketType r1; + PacketType r2; + half2* hr1 = reinterpret_cast(&r1); + half2* hr2 = reinterpret_cast(&r2); + half2* rv1 = reinterpret_cast(&reduced_val1); + half2* rv2 = reinterpret_cast(&reduced_val2); + for (int i = 0; i < packet_width / 2; i++) { + // FIXME : remove this workaround once we have native half/half2 support for __shfl_down + union { int i; half2 h; } wka_in1, wka_out1; + wka_in1.h = rv1[i]; + wka_out1.i = __shfl_down(wka_in1.i, offset, warpSize); + hr1[i] = wka_out1.h; + + union { int i; half2 h; } wka_in2, wka_out2; + wka_in2.h = rv2[i]; + wka_out2.i = __shfl_down(wka_in2.i, offset, warpSize); + hr2[i] = wka_out2.h; + } + reducer.reducePacket(r1, &reduced_val1); + reducer.reducePacket(r2, &reduced_val2); + #elif defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000 + PacketType r1; + PacketType r2; + half2* hr1 = reinterpret_cast(&r1); + half2* hr2 = reinterpret_cast(&r2); + half2* rv1 = reinterpret_cast(&reduced_val1); + half2* rv2 = reinterpret_cast(&reduced_val2); + for (int i = 0; i < packet_width / 2; i++) { + hr1[i] = __shfl_down(rv1[i], offset, warpSize); + hr2[i] = __shfl_down(rv2[i], offset, warpSize); + } + reducer.reducePacket(r1, &reduced_val1); + reducer.reducePacket(r2, &reduced_val2); + #else + PacketType r1; + PacketType r2; + half2* hr1 = reinterpret_cast(&r1); + half2* hr2 = reinterpret_cast(&r2); + half2* rr1 = reinterpret_cast(&reduced_val1); + half2* rr2 = reinterpret_cast(&reduced_val2); + for (int i = 0; i < packet_width / 2; i++) { + hr1[i] = + __shfl_down_sync(0xFFFFFFFF, rr1[i], (unsigned)offset, warpSize); + hr2[i] = + __shfl_down_sync(0xFFFFFFFF, rr2[i], (unsigned)offset, warpSize); + } + reducer.reducePacket(r1, &reduced_val1); + reducer.reducePacket(r2, &reduced_val2); + + #endif + } + half2* rv1 = reinterpret_cast(&reduced_val1); + half2* rv2 = reinterpret_cast(&reduced_val2); + half2 val; + if (packet_width > 2) { + reducer.reducePacket(rv1[2], rv1); + reducer.reducePacket(rv1[3], rv1 + 1); + reducer.reducePacket(rv1[1], rv1); + reducer.reducePacket(rv2[2], rv2); + reducer.reducePacket(rv2[3], rv2 + 1); + reducer.reducePacket(rv2[1], rv2); + } + half val1 = __low2half(*rv1); + reducer.reduce(__high2half(*rv1), &val1); + half val2 = __low2half(*rv2); + reducer.reduce(__high2half(*rv2), &val2); + val = __halves2half2(val1, val2); + if ((threadIdx.x & (warpSize - 1)) == 0) { + half* loc = output + row; + atomicReduce((half2*)loc, val, reducer); + } + } + } +} + +#endif // EIGEN_HAS_GPU_FP16 + +template +struct InnerReductionLauncher { + static EIGEN_DEVICE_FUNC bool run(const Self&, Op&, const GpuDevice&, OutputType*, typename Self::Index, typename Self::Index) { + gpu_assert(false && "Should only be called to reduce doubles, floats and half floats on a gpu device"); + return true; + } +}; + +// Specialization for float and double +template +struct InnerReductionLauncher< + Self, Op, OutputType, PacketAccess, + typename internal::enable_if< + internal::is_same::value || + internal::is_same::value, + void>::type> { + static bool run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) { + typedef typename Self::Index Index; + + const Index num_coeffs = num_coeffs_to_reduce * num_preserved_vals; + const int block_size = 256; + const int num_per_thread = 128; + const int dyn_blocks = divup(num_coeffs, block_size * num_per_thread); + const int max_blocks = device.getNumGpuMultiProcessors() * + device.maxGpuThreadsPerMultiProcessor() / block_size; + const int num_blocks = numext::mini(max_blocks, dyn_blocks); + + if (num_blocks > 1) { + // We initialize the outputs outside the reduction kernel when we can't be sure that there + // won't be a race conditions between multiple thread blocks. + const int dyn_blocks = divup(num_preserved_vals, 1024); + const int max_blocks = device.getNumGpuMultiProcessors() * + device.maxGpuThreadsPerMultiProcessor() / 1024; + const int num_blocks = numext::mini(max_blocks, dyn_blocks); + LAUNCH_GPU_KERNEL((ReductionInitKernel), + num_blocks, 1024, 0, device, reducer.initialize(), + num_preserved_vals, output); + } + + LAUNCH_GPU_KERNEL((InnerReductionKernel), + num_blocks, block_size, 0, device, reducer, self, num_coeffs_to_reduce, num_preserved_vals, output); + + return false; + } +}; + +#ifdef EIGEN_HAS_GPU_FP16 +template +struct InnerReductionLauncher { + static bool run(const Self&, Op&, const GpuDevice&, half*, typename Self::Index, typename Self::Index) { + gpu_assert(false && "Should not be called since there is no packet accessor"); + return true; + } +}; + +template +struct InnerReductionLauncher { + static bool run(const Self& self, Op& reducer, const GpuDevice& device, half* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) { + typedef typename Self::Index Index; + + if (num_preserved_vals % 2 != 0) { + // Not supported yet, revert to the slower code path + return true; + } + + const Index num_coeffs = num_coeffs_to_reduce * num_preserved_vals; + const int block_size = /*256*/128; + const int num_per_thread = /*128*/64; + const int dyn_blocks = divup(num_coeffs, block_size * num_per_thread); + const int max_blocks = device.getNumGpuMultiProcessors() * + device.maxGpuThreadsPerMultiProcessor() / block_size; + const int num_blocks = numext::mini(max_blocks, dyn_blocks); + + if (num_blocks > 1) { + // We initialize the outputs outside the reduction kernel when we can't be sure that there + // won't be a race conditions between multiple thread blocks. + LAUNCH_GPU_KERNEL((ReductionInitKernelHalfFloat), + 1, 1, 0, device, reducer, self, num_preserved_vals, output); + } + + LAUNCH_GPU_KERNEL((InnerReductionKernelHalfFloat), + num_blocks, block_size, 0, device, reducer, self, num_coeffs_to_reduce, num_preserved_vals, output); + + return false; + } +}; +#endif // EIGEN_HAS_GPU_FP16 + + +template +struct InnerReducer { + // Unfortunately nvidia doesn't support well exotic types such as complex, + // so reduce the scope of the optimized version of the code to the simple case + // of floats and half floats. +#ifdef EIGEN_HAS_GPU_FP16 + static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful && + (internal::is_same::value || + internal::is_same::value || + (internal::is_same::value && reducer_traits::PacketAccess)); +#else // EIGEN_HAS_GPU_FP16 + static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful && + (internal::is_same::value || + internal::is_same::value); +#endif // EIGEN_HAS_GPU_FP16 + + template + static bool run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) { + gpu_assert(HasOptimizedImplementation && "Should only be called on doubles, floats or half floats"); + const Index num_coeffs = array_prod(self.m_impl.dimensions()); + // Don't crash when we're called with an input tensor of size 0. + if (num_coeffs == 0) { + return true; + } + // It's faster to use the usual code. + if (num_coeffs_to_reduce <= 128) { + return true; + } + + return InnerReductionLauncher::PacketAccess>::run(self, reducer, device, output, num_coeffs_to_reduce, num_preserved_vals); + } +}; + +template +__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void OuterReductionKernel(Reducer reducer, const Self input, Index num_coeffs_to_reduce, Index num_preserved_coeffs, + typename Self::CoeffReturnType* output) { + const Index num_threads = blockDim.x * gridDim.x; + const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x; + // Initialize the output values if they weren't initialized by the ReductionInitKernel + if (gridDim.x == 1) { + for (Index i = thread_id; i < num_preserved_coeffs; i += num_threads) { + output[i] = reducer.initialize(); + } + __syncthreads(); + } + + // Do the reduction. + const Index max_iter = num_preserved_coeffs * divup(num_coeffs_to_reduce, NumPerThread); + for (Index i = thread_id; i < max_iter; i += num_threads) { + const Index input_col = i % num_preserved_coeffs; + const Index input_row = (i / num_preserved_coeffs) * NumPerThread; + typename Self::CoeffReturnType reduced_val = reducer.initialize(); + const Index max_row = numext::mini(input_row + NumPerThread, num_coeffs_to_reduce); + for (Index j = input_row; j < max_row; j++) { + typename Self::CoeffReturnType val = input.m_impl.coeff(j * num_preserved_coeffs + input_col); + reducer.reduce(val, &reduced_val); + } + atomicReduce(&(output[input_col]), reduced_val, reducer); + } +} + + +template +struct OuterReducer { + // Unfortunately nvidia doesn't support well exotic types such as complex, + // so reduce the scope of the optimized version of the code to the simple case + // of floats. + static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful && + (internal::is_same::value || + internal::is_same::value); + template + static + #if !defined(EIGEN_HIPCC) + // FIXME : leaving this EIGEN_DEVICE_FUNC in, results in the following runtime error + // (in the cxx11_tensor_reduction_gpu test) + // + // terminate called after throwing an instance of 'std::runtime_error' + // what(): No device code available for function: _ZN5Eigen8internal20OuterReductionKernelIL... + // + // don't know why this happens (and why is it a runtime error instead of a compile time error) + // + // this will be fixed by HIP PR#457 + EIGEN_DEVICE_FUNC + #endif + bool run(const Self&, Op&, const Device&, OutputType*, typename Self::Index, typename Self::Index) { + gpu_assert(false && "Should only be called to reduce doubles or floats on a gpu device"); + return true; + } + + static bool run(const Self& self, Op& reducer, const GpuDevice& device, float* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) { + typedef typename Self::Index Index; + + // It's faster to use the usual code. + if (num_coeffs_to_reduce <= 32) { + return true; + } + + const Index num_coeffs = num_coeffs_to_reduce * num_preserved_vals; + const int block_size = 256; + const int num_per_thread = 16; + const int dyn_blocks = divup(num_coeffs, block_size * num_per_thread); + const int max_blocks = device.getNumGpuMultiProcessors() * + device.maxGpuThreadsPerMultiProcessor() / block_size; + const int num_blocks = numext::mini(max_blocks, dyn_blocks); + + if (num_blocks > 1) { + // We initialize the outputs in the reduction kernel itself when we don't have to worry + // about race conditions between multiple thread blocks. + const int dyn_blocks = divup(num_preserved_vals, 1024); + const int max_blocks = device.getNumGpuMultiProcessors() * + device.maxGpuThreadsPerMultiProcessor() / 1024; + const int num_blocks = numext::mini(max_blocks, dyn_blocks); + LAUNCH_GPU_KERNEL((ReductionInitKernel), + num_blocks, 1024, 0, device, reducer.initialize(), + num_preserved_vals, output); + } + + LAUNCH_GPU_KERNEL((OuterReductionKernel), + num_blocks, block_size, 0, device, reducer, self, num_coeffs_to_reduce, num_preserved_vals, output); + + return false; + } +}; + +#endif // defined(EIGEN_USE_GPU) && defined(EIGEN_GPUCC) + + +} // end namespace internal +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_GPU_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h new file mode 100644 index 0000000..474eba0 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h @@ -0,0 +1,582 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/***************************************************************** + * TensorReductionSycl.h + * + * \brief: + * This is the specialization of the reduction operation. Two phase reduction approach + * is used since the GPU does not have Global Synchronization for global memory among + * different work-group/thread block. To solve the problem, we need to create two kernels + * to reduce the data, where the first kernel reduce the data locally and each local + * workgroup/thread-block save the input data into global memory. In the second phase (global reduction) + * one work-group uses one work-group/thread-block to reduces the intermediate data into one single element. + * Here is an NVIDIA presentation explaining the optimized two phase reduction algorithm on GPU: + * https://developer.download.nvidia.com/assets/cuda/files/reduction.pdf + * + *****************************************************************/ + +#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_REDUCTION_SYCL_HPP +#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_REDUCTION_SYCL_HPP +namespace Eigen { +namespace TensorSycl { +namespace internal { + +template +struct OpDefiner { + typedef typename Vectorise::PacketReturnType PacketReturnType; + typedef Op type; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE type get_op(Op &op) { return op; } + + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType finalise_op(const PacketReturnType &accumulator, + const Index &) { + return accumulator; + } +}; + +template +struct OpDefiner, CoeffReturnType, Index, false> { + typedef Eigen::internal::SumReducer type; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE type get_op(Eigen::internal::MeanReducer &) { + return type(); + } + + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType finalise_op(const CoeffReturnType &accumulator, + const Index &scale) { + ::Eigen::internal::scalar_quotient_op quotient_op; + return quotient_op(accumulator, CoeffReturnType(scale)); + } +}; + +template +struct OpDefiner, CoeffReturnType, Index, true> { + typedef typename Vectorise::PacketReturnType PacketReturnType; + typedef Eigen::internal::SumReducer type; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE type get_op(Eigen::internal::MeanReducer &) { + return type(); + } + + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType finalise_op(const PacketReturnType &accumulator, + const Index &scale) { + return ::Eigen::internal::pdiv(accumulator, ::Eigen::internal::pset1(CoeffReturnType(scale))); + } +}; + +template +struct SecondStepFullReducer { + typedef cl::sycl::accessor + LocalAccessor; + typedef OpDefiner OpDef; + typedef typename OpDef::type Op; + LocalAccessor scratch; + InputAccessor aI; + OutputAccessor outAcc; + Op op; + SecondStepFullReducer(LocalAccessor scratch_, InputAccessor aI_, OutputAccessor outAcc_, OpType op_) + : scratch(scratch_), aI(aI_), outAcc(outAcc_), op(OpDef::get_op(op_)) {} + + void operator()(cl::sycl::nd_item<1> itemID) { + // Our empirical research shows that the best performance will be achieved + // when there is only one element per thread to reduce in the second step. + // in this step the second step reduction time is almost negligible. + // Hence, in the second step of reduction the input size is fixed to the + // local size, thus, there is only one element read per thread. The + // algorithm must be changed if the number of reduce per thread in the + // second step is greater than 1. Otherwise, the result will be wrong. + const Index localid = itemID.get_local_id(0); + auto aInPtr = aI.get_pointer() + localid; + auto aOutPtr = outAcc.get_pointer(); + CoeffReturnType *scratchptr = scratch.get_pointer(); + CoeffReturnType accumulator = *aInPtr; + + scratchptr[localid] = op.finalize(accumulator); + for (Index offset = itemID.get_local_range(0) / 2; offset > 0; offset /= 2) { + itemID.barrier(cl::sycl::access::fence_space::local_space); + if (localid < offset) { + op.reduce(scratchptr[localid + offset], &accumulator); + scratchptr[localid] = op.finalize(accumulator); + } + } + if (localid == 0) *aOutPtr = op.finalize(accumulator); + } +}; + +// Full reduction first phase. In this version the vectorization is true and the reduction accept +// any generic reducerOp e.g( max, min, sum, mean, iamax, iamin, etc ). +template +class FullReductionKernelFunctor { + public: + typedef typename Evaluator::CoeffReturnType CoeffReturnType; + typedef typename Evaluator::Index Index; + typedef OpDefiner + OpDef; + + typedef typename OpDef::type Op; + typedef typename Evaluator::EvaluatorPointerType EvaluatorPointerType; + typedef typename Evaluator::PacketReturnType PacketReturnType; + typedef + typename ::Eigen::internal::conditional<(Evaluator::ReducerTraits::PacketAccess & Evaluator::InputPacketAccess), + PacketReturnType, CoeffReturnType>::type OutType; + typedef cl::sycl::accessor + LocalAccessor; + LocalAccessor scratch; + Evaluator evaluator; + EvaluatorPointerType final_output; + Index rng; + Op op; + + FullReductionKernelFunctor(LocalAccessor scratch_, Evaluator evaluator_, EvaluatorPointerType final_output_, + Index rng_, OpType op_) + : scratch(scratch_), evaluator(evaluator_), final_output(final_output_), rng(rng_), op(OpDef::get_op(op_)) {} + + void operator()(cl::sycl::nd_item<1> itemID) { compute_reduction(itemID); } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ::Eigen::internal::enable_if::type compute_reduction( + const cl::sycl::nd_item<1> &itemID) { + auto output_ptr = final_output.get_pointer(); + Index VectorizedRange = (rng / Evaluator::PacketSize) * Evaluator::PacketSize; + Index globalid = itemID.get_global_id(0); + Index localid = itemID.get_local_id(0); + Index step = Evaluator::PacketSize * itemID.get_global_range(0); + Index start = Evaluator::PacketSize * globalid; + // vectorizable parts + PacketReturnType packetAccumulator = op.template initializePacket(); + for (Index i = start; i < VectorizedRange; i += step) { + op.template reducePacket(evaluator.impl().template packet(i), &packetAccumulator); + } + globalid += VectorizedRange; + // non vectorizable parts + for (Index i = globalid; i < rng; i += itemID.get_global_range(0)) { + op.template reducePacket( + ::Eigen::TensorSycl::internal::PacketWrapper::convert_to_packet_type( + evaluator.impl().coeff(i), op.initialize()), + &packetAccumulator); + } + scratch[localid] = packetAccumulator = + OpDef::finalise_op(op.template finalizePacket(packetAccumulator), rng); + // reduction parts // Local size is always power of 2 + EIGEN_UNROLL_LOOP + for (Index offset = local_range / 2; offset > 0; offset /= 2) { + itemID.barrier(cl::sycl::access::fence_space::local_space); + if (localid < offset) { + op.template reducePacket(scratch[localid + offset], &packetAccumulator); + scratch[localid] = op.template finalizePacket(packetAccumulator); + } + } + if (localid == 0) { + output_ptr[itemID.get_group(0)] = + op.finalizeBoth(op.initialize(), op.template finalizePacket(packetAccumulator)); + } + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ::Eigen::internal::enable_if::type compute_reduction( + const cl::sycl::nd_item<1> &itemID) { + auto output_ptr = final_output.get_pointer(); + Index globalid = itemID.get_global_id(0); + Index localid = itemID.get_local_id(0); + // vectorizable parts + CoeffReturnType accumulator = op.initialize(); + // non vectorizable parts + for (Index i = globalid; i < rng; i += itemID.get_global_range(0)) { + op.reduce(evaluator.impl().coeff(i), &accumulator); + } + scratch[localid] = accumulator = OpDef::finalise_op(op.finalize(accumulator), rng); + + // reduction parts. the local size is always power of 2 + EIGEN_UNROLL_LOOP + for (Index offset = local_range / 2; offset > 0; offset /= 2) { + itemID.barrier(cl::sycl::access::fence_space::local_space); + if (localid < offset) { + op.reduce(scratch[localid + offset], &accumulator); + scratch[localid] = op.finalize(accumulator); + } + } + if (localid == 0) { + output_ptr[itemID.get_group(0)] = op.finalize(accumulator); + } + } +}; + +template +class GenericNondeterministicReducer { + public: + typedef typename Evaluator::CoeffReturnType CoeffReturnType; + typedef typename Evaluator::EvaluatorPointerType EvaluatorPointerType; + typedef typename Evaluator::Index Index; + typedef OpDefiner OpDef; + typedef typename OpDef::type Op; + template + GenericNondeterministicReducer(Scratch, Evaluator evaluator_, EvaluatorPointerType output_accessor_, OpType functor_, + Index range_, Index num_values_to_reduce_) + : evaluator(evaluator_), + output_accessor(output_accessor_), + functor(OpDef::get_op(functor_)), + range(range_), + num_values_to_reduce(num_values_to_reduce_) {} + + void operator()(cl::sycl::nd_item<1> itemID) { + auto output_accessor_ptr = output_accessor.get_pointer(); + /// const cast added as a naive solution to solve the qualifier drop error + Index globalid = static_cast(itemID.get_global_linear_id()); + if (globalid < range) { + CoeffReturnType accum = functor.initialize(); + Eigen::internal::GenericDimReducer::reduce( + evaluator, evaluator.firstInput(globalid), functor, &accum); + output_accessor_ptr[globalid] = OpDef::finalise_op(functor.finalize(accum), num_values_to_reduce); + } + } + + private: + Evaluator evaluator; + EvaluatorPointerType output_accessor; + Op functor; + Index range; + Index num_values_to_reduce; +}; + +enum class reduction_dim { inner_most, outer_most }; +// default is preserver +template +struct PartialReductionKernel { + typedef typename Evaluator::CoeffReturnType CoeffReturnType; + typedef typename Evaluator::EvaluatorPointerType EvaluatorPointerType; + typedef typename Evaluator::Index Index; + typedef OpDefiner OpDef; + typedef typename OpDef::type Op; + typedef cl::sycl::accessor + ScratchAcc; + ScratchAcc scratch; + Evaluator evaluator; + EvaluatorPointerType output_accessor; + Op op; + const Index preserve_elements_num_groups; + const Index reduce_elements_num_groups; + const Index num_coeffs_to_preserve; + const Index num_coeffs_to_reduce; + + PartialReductionKernel(ScratchAcc scratch_, Evaluator evaluator_, EvaluatorPointerType output_accessor_, OpType op_, + const Index preserve_elements_num_groups_, const Index reduce_elements_num_groups_, + const Index num_coeffs_to_preserve_, const Index num_coeffs_to_reduce_) + : scratch(scratch_), + evaluator(evaluator_), + output_accessor(output_accessor_), + op(OpDef::get_op(op_)), + preserve_elements_num_groups(preserve_elements_num_groups_), + reduce_elements_num_groups(reduce_elements_num_groups_), + num_coeffs_to_preserve(num_coeffs_to_preserve_), + num_coeffs_to_reduce(num_coeffs_to_reduce_) {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void element_wise_reduce(Index globalRId, Index globalPId, + CoeffReturnType &accumulator) { + if (globalPId >= num_coeffs_to_preserve) { + return; + } + Index global_offset = rt == reduction_dim::outer_most ? globalPId + (globalRId * num_coeffs_to_preserve) + : globalRId + (globalPId * num_coeffs_to_reduce); + Index localOffset = globalRId; + + const Index per_thread_local_stride = PannelParameters::LocalThreadSizeR * reduce_elements_num_groups; + const Index per_thread_global_stride = + rt == reduction_dim::outer_most ? num_coeffs_to_preserve * per_thread_local_stride : per_thread_local_stride; + for (Index i = globalRId; i < num_coeffs_to_reduce; i += per_thread_local_stride) { + op.reduce(evaluator.impl().coeff(global_offset), &accumulator); + localOffset += per_thread_local_stride; + global_offset += per_thread_global_stride; + } + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(cl::sycl::nd_item<1> itemID) { + const Index linearLocalThreadId = itemID.get_local_id(0); + Index pLocalThreadId = rt == reduction_dim::outer_most ? linearLocalThreadId % PannelParameters::LocalThreadSizeP + : linearLocalThreadId / PannelParameters::LocalThreadSizeR; + Index rLocalThreadId = rt == reduction_dim::outer_most ? linearLocalThreadId / PannelParameters::LocalThreadSizeP + : linearLocalThreadId % PannelParameters::LocalThreadSizeR; + const Index pGroupId = rt == reduction_dim::outer_most ? itemID.get_group(0) % preserve_elements_num_groups + : itemID.get_group(0) / reduce_elements_num_groups; + const Index rGroupId = rt == reduction_dim::outer_most ? itemID.get_group(0) / preserve_elements_num_groups + : itemID.get_group(0) % reduce_elements_num_groups; + + Index globalPId = pGroupId * PannelParameters::LocalThreadSizeP + pLocalThreadId; + const Index globalRId = rGroupId * PannelParameters::LocalThreadSizeR + rLocalThreadId; + auto scratchPtr = scratch.get_pointer().get(); + auto outPtr = + output_accessor.get_pointer() + (reduce_elements_num_groups > 1 ? rGroupId * num_coeffs_to_preserve : 0); + CoeffReturnType accumulator = op.initialize(); + + element_wise_reduce(globalRId, globalPId, accumulator); + + accumulator = OpDef::finalise_op(op.finalize(accumulator), num_coeffs_to_reduce); + scratchPtr[pLocalThreadId + rLocalThreadId * (PannelParameters::LocalThreadSizeP + PannelParameters::BC)] = + accumulator; + if (rt == reduction_dim::inner_most) { + pLocalThreadId = linearLocalThreadId % PannelParameters::LocalThreadSizeP; + rLocalThreadId = linearLocalThreadId / PannelParameters::LocalThreadSizeP; + globalPId = pGroupId * PannelParameters::LocalThreadSizeP + pLocalThreadId; + } + + /* Apply the reduction operation between the current local + * id and the one on the other half of the vector. */ + auto out_scratch_ptr = + scratchPtr + (pLocalThreadId + (rLocalThreadId * (PannelParameters::LocalThreadSizeP + PannelParameters::BC))); + itemID.barrier(cl::sycl::access::fence_space::local_space); + if (rt == reduction_dim::inner_most) { + accumulator = *out_scratch_ptr; + } + // The Local LocalThreadSizeR is always power of 2 + EIGEN_UNROLL_LOOP + for (Index offset = PannelParameters::LocalThreadSizeR >> 1; offset > 0; offset >>= 1) { + if (rLocalThreadId < offset) { + op.reduce(out_scratch_ptr[(PannelParameters::LocalThreadSizeP + PannelParameters::BC) * offset], &accumulator); + // The result has already been divided for mean reducer in the + // previous reduction so no need to divide furthermore + *out_scratch_ptr = op.finalize(accumulator); + } + /* All threads collectively read from global memory into local. + * The barrier ensures all threads' IO is resolved before + * execution continues (strictly speaking, all threads within + * a single work-group - there is no co-ordination between + * work-groups, only work-items). */ + itemID.barrier(cl::sycl::access::fence_space::local_space); + } + + if (rLocalThreadId == 0 && (globalPId < num_coeffs_to_preserve)) { + outPtr[globalPId] = op.finalize(accumulator); + } + } +}; + +template +struct SecondStepPartialReduction { + typedef OpDefiner OpDef; + typedef typename OpDef::type Op; + typedef cl::sycl::accessor + ScratchAccessor; + InputAccessor input_accessor; + OutputAccessor output_accessor; + Op op; + const Index num_coeffs_to_preserve; + const Index num_coeffs_to_reduce; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE SecondStepPartialReduction(ScratchAccessor, InputAccessor input_accessor_, + OutputAccessor output_accessor_, OpType op_, + const Index num_coeffs_to_preserve_, + const Index num_coeffs_to_reduce_) + : input_accessor(input_accessor_), + output_accessor(output_accessor_), + op(OpDef::get_op(op_)), + num_coeffs_to_preserve(num_coeffs_to_preserve_), + num_coeffs_to_reduce(num_coeffs_to_reduce_) {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(cl::sycl::nd_item<1> itemID) { + const Index globalId = itemID.get_global_id(0); + + if (globalId >= num_coeffs_to_preserve) return; + + auto in_ptr = input_accessor.get_pointer() + globalId; + + OutScalar accumulator = op.initialize(); +// num_coeffs_to_reduce is not bigger that 256 + for (Index i = 0; i < num_coeffs_to_reduce; i++) { + op.reduce(*in_ptr, &accumulator); + in_ptr += num_coeffs_to_preserve; + } + output_accessor.get_pointer()[globalId] = op.finalize(accumulator); + } +}; // namespace internal + +template +struct ReductionPannel { + static EIGEN_CONSTEXPR Index LocalThreadSizeP = LTP; + static EIGEN_CONSTEXPR Index LocalThreadSizeR = LTR; + static EIGEN_CONSTEXPR bool BC = BC_; +}; + +template +struct PartialReducerLauncher { + typedef typename Self::EvaluatorPointerType EvaluatorPointerType; + typedef typename Self::CoeffReturnType CoeffReturnType; + typedef typename Self::Storage Storage; + typedef typename Self::Index Index; + typedef ReductionPannel + PannelParameters; + + typedef PartialReductionKernel SyclReducerKerneType; + + static bool run(const Self &self, const Op &reducer, const Eigen::SyclDevice &dev, EvaluatorPointerType output, + Index num_coeffs_to_reduce, Index num_coeffs_to_preserve) { + Index roundUpP = roundUp(num_coeffs_to_preserve, PannelParameters::LocalThreadSizeP); + + // getPowerOfTwo makes sure local range is power of 2 and <= + // maxSyclThreadPerBlock this will help us to avoid extra check on the + // kernel + static_assert(!((PannelParameters::LocalThreadSizeP * PannelParameters::LocalThreadSizeR) & + (PannelParameters::LocalThreadSizeP * PannelParameters::LocalThreadSizeR - 1)), + "The Local thread size must be a power of 2 for the reduction " + "operation"); + + EIGEN_CONSTEXPR Index localRange = PannelParameters::LocalThreadSizeP * PannelParameters::LocalThreadSizeR; + // In this step, we force the code not to be more than 2-step reduction: + // Our empirical research shows that if each thread reduces at least 64 + // elemnts individually, we get better performance. However, this can change + // on different platforms. In this step we force the code not to be + // morthan step reduction: Our empirical research shows that for inner_most + // dim reducer, it is better to have 8 group in a reduce dimension for sizes + // > 1024 to achieve the best performance. + const Index reductionPerThread = 64; + Index cu = dev.getPowerOfTwo(dev.getNumSyclMultiProcessors(), true); + const Index pNumGroups = roundUpP / PannelParameters::LocalThreadSizeP; + Index rGroups = (cu + pNumGroups - 1) / pNumGroups; + const Index rNumGroups = num_coeffs_to_reduce > reductionPerThread * localRange ? std::min(rGroups, localRange) : 1; + const Index globalRange = pNumGroups * rNumGroups * localRange; + + EIGEN_CONSTEXPR Index scratchSize = + PannelParameters::LocalThreadSizeR * (PannelParameters::LocalThreadSizeP + PannelParameters::BC); + auto thread_range = cl::sycl::nd_range<1>(cl::sycl::range<1>(globalRange), cl::sycl::range<1>(localRange)); + if (rNumGroups > 1) { + CoeffReturnType *temp_pointer = static_cast( + dev.allocate_temp(num_coeffs_to_preserve * rNumGroups * sizeof(CoeffReturnType))); + EvaluatorPointerType temp_accessor = dev.get(temp_pointer); + dev.template unary_kernel_launcher( + self, temp_accessor, thread_range, scratchSize, reducer, pNumGroups, rNumGroups, num_coeffs_to_preserve, + num_coeffs_to_reduce); + + typedef SecondStepPartialReduction + SecondStepPartialReductionKernel; + + dev.template unary_kernel_launcher( + temp_accessor, output, + cl::sycl::nd_range<1>(cl::sycl::range<1>(pNumGroups * localRange), cl::sycl::range<1>(localRange)), Index(1), + reducer, num_coeffs_to_preserve, rNumGroups); + + self.device().deallocate_temp(temp_pointer); + } else { + dev.template unary_kernel_launcher( + self, output, thread_range, scratchSize, reducer, pNumGroups, rNumGroups, num_coeffs_to_preserve, + num_coeffs_to_reduce); + } + return false; + } +}; +} // namespace internal +} // namespace TensorSycl + +namespace internal { + +template +struct FullReducer { + typedef typename Self::CoeffReturnType CoeffReturnType; + typedef typename Self::EvaluatorPointerType EvaluatorPointerType; + static EIGEN_CONSTEXPR bool HasOptimizedImplementation = true; + static EIGEN_CONSTEXPR int PacketSize = Self::PacketAccess ? Self::PacketSize : 1; + static void run(const Self &self, Op &reducer, const Eigen::SyclDevice &dev, EvaluatorPointerType data) { + typedef typename conditional::type OutType; + static_assert(!((EIGEN_SYCL_LOCAL_THREAD_DIM0 * EIGEN_SYCL_LOCAL_THREAD_DIM1) & + (EIGEN_SYCL_LOCAL_THREAD_DIM0 * EIGEN_SYCL_LOCAL_THREAD_DIM1 - 1)), + "The Local thread size must be a power of 2 for the reduction " + "operation"); + EIGEN_CONSTEXPR Index local_range = EIGEN_SYCL_LOCAL_THREAD_DIM0 * EIGEN_SYCL_LOCAL_THREAD_DIM1; + + typename Self::Index inputSize = self.impl().dimensions().TotalSize(); + // In this step we force the code not to be more than 2-step reduction: + // Our empirical research shows that if each thread reduces at least 512 + // elemnts individually, we get better performance. + const Index reductionPerThread = 2048; + // const Index num_work_group = + Index reductionGroup = dev.getPowerOfTwo( + (inputSize + (reductionPerThread * local_range - 1)) / (reductionPerThread * local_range), true); + const Index num_work_group = std::min(reductionGroup, local_range); + // 1 + // ? local_range + // : 1); + const Index global_range = num_work_group * local_range; + + auto thread_range = cl::sycl::nd_range<1>(cl::sycl::range<1>(global_range), cl::sycl::range<1>(local_range)); + typedef TensorSycl::internal::FullReductionKernelFunctor reduction_kernel_t; + if (num_work_group > 1) { + CoeffReturnType *temp_pointer = + static_cast(dev.allocate_temp(num_work_group * sizeof(CoeffReturnType))); + typename Self::EvaluatorPointerType tmp_global_accessor = dev.get(temp_pointer); + dev.template unary_kernel_launcher(self, tmp_global_accessor, thread_range, + local_range, inputSize, reducer); + + typedef TensorSycl::internal::SecondStepFullReducer + GenericRKernel; + dev.template unary_kernel_launcher( + tmp_global_accessor, data, + cl::sycl::nd_range<1>(cl::sycl::range<1>(num_work_group), cl::sycl::range<1>(num_work_group)), num_work_group, + reducer); + + dev.deallocate_temp(temp_pointer); + } else { + dev.template unary_kernel_launcher(self, data, thread_range, local_range, inputSize, + reducer); + } + } +}; +// vectorizable inner_most most dim preserver +// col reduction +template +struct OuterReducer { + static EIGEN_CONSTEXPR bool HasOptimizedImplementation = true; + + static bool run(const Self &self, const Op &reducer, const Eigen::SyclDevice &dev, + typename Self::EvaluatorPointerType output, typename Self::Index num_coeffs_to_reduce, + typename Self::Index num_coeffs_to_preserve) { + return ::Eigen::TensorSycl::internal::PartialReducerLauncher< + Self, Op, ::Eigen::TensorSycl::internal::reduction_dim::outer_most>::run(self, reducer, dev, output, + num_coeffs_to_reduce, + num_coeffs_to_preserve); + } +}; +// row reduction +template +struct InnerReducer { + static EIGEN_CONSTEXPR bool HasOptimizedImplementation = true; + + static bool run(const Self &self, const Op &reducer, const Eigen::SyclDevice &dev, + typename Self::EvaluatorPointerType output, typename Self::Index num_coeffs_to_reduce, + typename Self::Index num_coeffs_to_preserve) { + return ::Eigen::TensorSycl::internal::PartialReducerLauncher< + Self, Op, ::Eigen::TensorSycl::internal::reduction_dim::inner_most>::run(self, reducer, dev, output, + num_coeffs_to_reduce, + num_coeffs_to_preserve); + } +}; + +// ArmgMax uses this kernel for partial reduction// +// TODO(@mehdi.goli) come up with a better kernel +// generic partial reduction +template +struct GenericReducer { + static EIGEN_CONSTEXPR bool HasOptimizedImplementation = false; + static bool run(const Self &self, const Op &reducer, const Eigen::SyclDevice &dev, + typename Self::EvaluatorPointerType output, typename Self::Index num_values_to_reduce, + typename Self::Index num_coeffs_to_preserve) { + typename Self::Index range, GRange, tileSize; + dev.parallel_for_setup(num_coeffs_to_preserve, tileSize, range, GRange); + + dev.template unary_kernel_launcher>( + self, output, cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), Index(1), + reducer, range, (num_values_to_reduce != 0) ? num_values_to_reduce : static_cast(1)); + return false; + } +}; + +} // namespace internal +} // namespace Eigen + +#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_REDUCTION_SYCL_HPP diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h new file mode 100644 index 0000000..a27d364 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h @@ -0,0 +1,454 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_REF_H +#define EIGEN_CXX11_TENSOR_TENSOR_REF_H + +namespace Eigen { + +namespace internal { + +template +class TensorLazyBaseEvaluator { + public: + TensorLazyBaseEvaluator() : m_refcount(0) { } + virtual ~TensorLazyBaseEvaluator() { } + + EIGEN_DEVICE_FUNC virtual const Dimensions& dimensions() const = 0; + EIGEN_DEVICE_FUNC virtual const Scalar* data() const = 0; + + EIGEN_DEVICE_FUNC virtual const Scalar coeff(DenseIndex index) const = 0; + EIGEN_DEVICE_FUNC virtual Scalar& coeffRef(DenseIndex index) = 0; + + void incrRefCount() { ++m_refcount; } + void decrRefCount() { --m_refcount; } + int refCount() const { return m_refcount; } + + private: + // No copy, no assignment; + TensorLazyBaseEvaluator(const TensorLazyBaseEvaluator& other); + TensorLazyBaseEvaluator& operator = (const TensorLazyBaseEvaluator& other); + + int m_refcount; +}; + + +template +class TensorLazyEvaluatorReadOnly : public TensorLazyBaseEvaluator::Scalar> { + public: + // typedef typename TensorEvaluator::Dimensions Dimensions; + typedef typename TensorEvaluator::Scalar Scalar; + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; + typedef TensorEvaluator EvalType; + + TensorLazyEvaluatorReadOnly(const Expr& expr, const Device& device) : m_impl(expr, device), m_dummy(Scalar(0)) { + m_dims = m_impl.dimensions(); + m_impl.evalSubExprsIfNeeded(NULL); + } + virtual ~TensorLazyEvaluatorReadOnly() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC virtual const Dimensions& dimensions() const { + return m_dims; + } + EIGEN_DEVICE_FUNC virtual const Scalar* data() const { + return m_impl.data(); + } + + EIGEN_DEVICE_FUNC virtual const Scalar coeff(DenseIndex index) const { + return m_impl.coeff(index); + } + EIGEN_DEVICE_FUNC virtual Scalar& coeffRef(DenseIndex /*index*/) { + eigen_assert(false && "can't reference the coefficient of a rvalue"); + return m_dummy; + }; + + protected: + TensorEvaluator m_impl; + Dimensions m_dims; + Scalar m_dummy; +}; + +template +class TensorLazyEvaluatorWritable : public TensorLazyEvaluatorReadOnly { + public: + typedef TensorLazyEvaluatorReadOnly Base; + typedef typename Base::Scalar Scalar; + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; + + TensorLazyEvaluatorWritable(const Expr& expr, const Device& device) : Base(expr, device) { + } + virtual ~TensorLazyEvaluatorWritable() { + } + + EIGEN_DEVICE_FUNC virtual Scalar& coeffRef(DenseIndex index) { + return this->m_impl.coeffRef(index); + } +}; + +template +class TensorLazyEvaluator : public internal::conditional::value), + TensorLazyEvaluatorWritable, + TensorLazyEvaluatorReadOnly >::type { + public: + typedef typename internal::conditional::value), + TensorLazyEvaluatorWritable, + TensorLazyEvaluatorReadOnly >::type Base; + typedef typename Base::Scalar Scalar; + + TensorLazyEvaluator(const Expr& expr, const Device& device) : Base(expr, device) { + } + virtual ~TensorLazyEvaluator() { + } +}; + +} // namespace internal + + +/** \class TensorRef + * \ingroup CXX11_Tensor_Module + * + * \brief A reference to a tensor expression + * The expression will be evaluated lazily (as much as possible). + * + */ +template class TensorRef : public TensorBase > +{ + public: + typedef TensorRef Self; + typedef typename PlainObjectType::Base Base; + typedef typename Eigen::internal::nested::type Nested; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Index Index; + typedef typename internal::traits::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + typedef typename Base::CoeffReturnType CoeffReturnType; + typedef Scalar* PointerType; + typedef PointerType PointerArgType; + + static const Index NumIndices = PlainObjectType::NumIndices; + typedef typename PlainObjectType::Dimensions Dimensions; + + enum { + IsAligned = false, + PacketAccess = false, + BlockAccess = false, + PreferBlockAccess = false, + Layout = PlainObjectType::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -----------===// + typedef internal::TensorBlockNotImplemented TensorBlock; + //===------------------------------------------------------------------===// + + EIGEN_STRONG_INLINE TensorRef() : m_evaluator(NULL) { + } + + template + EIGEN_STRONG_INLINE TensorRef(const Expression& expr) : m_evaluator(new internal::TensorLazyEvaluator(expr, DefaultDevice())) { + m_evaluator->incrRefCount(); + } + + template + EIGEN_STRONG_INLINE TensorRef& operator = (const Expression& expr) { + unrefEvaluator(); + m_evaluator = new internal::TensorLazyEvaluator(expr, DefaultDevice()); + m_evaluator->incrRefCount(); + return *this; + } + + ~TensorRef() { + unrefEvaluator(); + } + + TensorRef(const TensorRef& other) : m_evaluator(other.m_evaluator) { + eigen_assert(m_evaluator->refCount() > 0); + m_evaluator->incrRefCount(); + } + + TensorRef& operator = (const TensorRef& other) { + if (this != &other) { + unrefEvaluator(); + m_evaluator = other.m_evaluator; + eigen_assert(m_evaluator->refCount() > 0); + m_evaluator->incrRefCount(); + } + return *this; + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index rank() const { return m_evaluator->dimensions().size(); } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index dimension(Index n) const { return m_evaluator->dimensions()[n]; } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_evaluator->dimensions(); } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index size() const { return m_evaluator->dimensions().TotalSize(); } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar* data() const { return m_evaluator->data(); } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar operator()(Index index) const + { + return m_evaluator->coeff(index); + } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar operator()(Index firstIndex, IndexTypes... otherIndices) const + { + const std::size_t num_indices = (sizeof...(otherIndices) + 1); + const array indices{{firstIndex, otherIndices...}}; + return coeff(indices); + } + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& coeffRef(Index firstIndex, IndexTypes... otherIndices) + { + const std::size_t num_indices = (sizeof...(otherIndices) + 1); + const array indices{{firstIndex, otherIndices...}}; + return coeffRef(indices); + } +#else + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar operator()(Index i0, Index i1) const + { + array indices; + indices[0] = i0; + indices[1] = i1; + return coeff(indices); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar operator()(Index i0, Index i1, Index i2) const + { + array indices; + indices[0] = i0; + indices[1] = i1; + indices[2] = i2; + return coeff(indices); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar operator()(Index i0, Index i1, Index i2, Index i3) const + { + array indices; + indices[0] = i0; + indices[1] = i1; + indices[2] = i2; + indices[3] = i3; + return coeff(indices); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const + { + array indices; + indices[0] = i0; + indices[1] = i1; + indices[2] = i2; + indices[3] = i3; + indices[4] = i4; + return coeff(indices); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& coeffRef(Index i0, Index i1) + { + array indices; + indices[0] = i0; + indices[1] = i1; + return coeffRef(indices); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& coeffRef(Index i0, Index i1, Index i2) + { + array indices; + indices[0] = i0; + indices[1] = i1; + indices[2] = i2; + return coeffRef(indices); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3) + { + array indices; + indices[0] = i0; + indices[1] = i1; + indices[2] = i2; + indices[3] = i3; + return coeffRef(indices); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& coeffRef(Index i0, Index i1, Index i2, Index i3, Index i4) + { + array indices; + indices[0] = i0; + indices[1] = i1; + indices[2] = i2; + indices[3] = i3; + indices[4] = i4; + return coeffRef(indices); + } +#endif + + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar coeff(const array& indices) const + { + const Dimensions& dims = this->dimensions(); + Index index = 0; + if (PlainObjectType::Options & RowMajor) { + index += indices[0]; + for (size_t i = 1; i < NumIndices; ++i) { + index = index * dims[i] + indices[i]; + } + } else { + index += indices[NumIndices-1]; + for (int i = NumIndices-2; i >= 0; --i) { + index = index * dims[i] + indices[i]; + } + } + return m_evaluator->coeff(index); + } + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& coeffRef(const array& indices) + { + const Dimensions& dims = this->dimensions(); + Index index = 0; + if (PlainObjectType::Options & RowMajor) { + index += indices[0]; + for (size_t i = 1; i < NumIndices; ++i) { + index = index * dims[i] + indices[i]; + } + } else { + index += indices[NumIndices-1]; + for (int i = NumIndices-2; i >= 0; --i) { + index = index * dims[i] + indices[i]; + } + } + return m_evaluator->coeffRef(index); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar coeff(Index index) const + { + return m_evaluator->coeff(index); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) + { + return m_evaluator->coeffRef(index); + } + + private: + EIGEN_STRONG_INLINE void unrefEvaluator() { + if (m_evaluator) { + m_evaluator->decrRefCount(); + if (m_evaluator->refCount() == 0) { + delete m_evaluator; + } + } + } + + internal::TensorLazyBaseEvaluator* m_evaluator; +}; + + +// evaluator for rvalues +template +struct TensorEvaluator, Device> +{ + typedef typename Derived::Index Index; + typedef typename Derived::Scalar Scalar; + typedef typename Derived::Scalar CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + typedef typename Derived::Dimensions Dimensions; + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; + + enum { + IsAligned = false, + PacketAccess = false, + BlockAccess = false, + PreferBlockAccess = false, + Layout = TensorRef::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockNotImplemented TensorBlock; + //===--------------------------------------------------------------------===// + + EIGEN_STRONG_INLINE TensorEvaluator(const TensorRef& m, const Device&) + : m_ref(m) + { } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_ref.dimensions(); } + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { + return true; + } + + EIGEN_STRONG_INLINE void cleanup() { } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + return m_ref.coeff(index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { + return m_ref.coeffRef(index); + } + + EIGEN_DEVICE_FUNC const Scalar* data() const { return m_ref.data(); } + + protected: + TensorRef m_ref; +}; + + +// evaluator for lvalues +template +struct TensorEvaluator, Device> : public TensorEvaluator, Device> +{ + typedef typename Derived::Index Index; + typedef typename Derived::Scalar Scalar; + typedef typename Derived::Scalar CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + typedef typename Derived::Dimensions Dimensions; + + typedef TensorEvaluator, Device> Base; + + enum { + IsAligned = false, + PacketAccess = false, + BlockAccess = false, + PreferBlockAccess = false, + RawAccess = false + }; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockNotImplemented TensorBlock; + //===--------------------------------------------------------------------===// + + EIGEN_STRONG_INLINE TensorEvaluator(TensorRef& m, const Device& d) : Base(m, d) + { } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { + return this->m_ref.coeffRef(index); + } +}; + + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_REF_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h new file mode 100644 index 0000000..586ce68 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h @@ -0,0 +1,465 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Navdeep Jaitly +// Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H +#define EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H +namespace Eigen { + +/** \class TensorReverse + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor reverse elements class. + * + */ +namespace internal { +template +struct traits > : public traits +{ + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; + typedef typename XprTraits::PointerType PointerType; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorReverseOp& type; +}; + +template +struct nested, 1, + typename eval >::type> +{ + typedef TensorReverseOp type; +}; + +} // end namespace internal + +template +class TensorReverseOp : public TensorBase, WriteAccessors> +{ + public: + typedef TensorBase, WriteAccessors>Base; + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind + StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReverseOp( + const XprType& expr, const ReverseDimensions& reverse_dims) + : m_xpr(expr), m_reverse_dims(reverse_dims) { } + + EIGEN_DEVICE_FUNC + const ReverseDimensions& reverse() const { return m_reverse_dims; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorReverseOp) + + + protected: + typename XprType::Nested m_xpr; + const ReverseDimensions m_reverse_dims; +}; + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorReverseOp XprType; + typedef typename XprType::Index Index; + static const int NumDims = internal::array_size::value; + typedef DSizes Dimensions; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = PacketType::size; + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; + + enum { + IsAligned = false, + PacketAccess = TensorEvaluator::PacketAccess, + BlockAccess = NumDims > 0, + PreferBlockAccess = true, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + typedef internal::TensorIntDivisor IndexDivisor; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockDescriptor TensorBlockDesc; + typedef internal::TensorBlockScratchAllocator TensorBlockScratch; + + typedef typename TensorEvaluator::TensorBlock + ArgTensorBlock; + + typedef typename internal::TensorMaterializedBlock + TensorBlock; + //===--------------------------------------------------------------------===// + + EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device), + m_reverse(op.reverse()), + m_device(device) + { + // Reversing a scalar isn't supported yet. It would be a no-op anyway. + EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); + + // Compute strides + m_dimensions = m_impl.dimensions(); + if (static_cast(Layout) == static_cast(ColMajor)) { + m_strides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_strides[i] = m_strides[i-1] * m_dimensions[i-1]; + if (m_strides[i] > 0) m_fastStrides[i] = IndexDivisor(m_strides[i]); + } + } else { + m_strides[NumDims-1] = 1; + for (int i = NumDims - 2; i >= 0; --i) { + m_strides[i] = m_strides[i+1] * m_dimensions[i+1]; + if (m_strides[i] > 0) m_fastStrides[i] = IndexDivisor(m_strides[i]); + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + +#ifdef EIGEN_USE_THREADS + template + EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync( + EvaluatorPointerType, EvalSubExprsCallback done) { + m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); }); + } +#endif // EIGEN_USE_THREADS + + EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index reverseIndex( + Index index) const { + eigen_assert(index < dimensions().TotalSize()); + Index inputIndex = 0; + if (static_cast(Layout) == static_cast(ColMajor)) { + EIGEN_UNROLL_LOOP + for (int i = NumDims - 1; i > 0; --i) { + Index idx = index / m_fastStrides[i]; + index -= idx * m_strides[i]; + if (m_reverse[i]) { + idx = m_dimensions[i] - idx - 1; + } + inputIndex += idx * m_strides[i] ; + } + if (m_reverse[0]) { + inputIndex += (m_dimensions[0] - index - 1); + } else { + inputIndex += index; + } + } else { + EIGEN_UNROLL_LOOP + for (int i = 0; i < NumDims - 1; ++i) { + Index idx = index / m_fastStrides[i]; + index -= idx * m_strides[i]; + if (m_reverse[i]) { + idx = m_dimensions[i] - idx - 1; + } + inputIndex += idx * m_strides[i] ; + } + if (m_reverse[NumDims-1]) { + inputIndex += (m_dimensions[NumDims-1] - index - 1); + } else { + inputIndex += index; + } + } + return inputIndex; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff( + Index index) const { + return m_impl.coeff(reverseIndex(index)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + PacketReturnType packet(Index index) const + { + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); + + // TODO(ndjaitly): write a better packing routine that uses + // local structure. + EIGEN_ALIGN_MAX typename internal::remove_const::type + values[PacketSize]; + EIGEN_UNROLL_LOOP + for (int i = 0; i < PacketSize; ++i) { + values[i] = coeff(index+i); + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + internal::TensorBlockResourceRequirements getResourceRequirements() const { + const size_t target_size = m_device.lastLevelCacheSize(); + // Block evaluation reads underlying memory in reverse order, and default + // cost model does not properly catch this in bytes stored/loaded. + return internal::TensorBlockResourceRequirements::skewed( + target_size) + .addCostPerCoeff({0, 0, 24}); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock + block(TensorBlockDesc& desc, TensorBlockScratch& scratch, + bool /*root_of_expr_ast*/ = false) const { + // TODO(ezhulenev): If underlying tensor expression supports and prefers + // block evaluation we must use it. Currently we use coeff and packet + // access into the underlying tensor expression. + // static const bool useBlockAccessForArgType = + // TensorEvaluator::BlockAccess && + // TensorEvaluator::PreferBlockAccess; + + static const bool isColMajor = + static_cast(Layout) == static_cast(ColMajor); + + static const Index inner_dim_idx = isColMajor ? 0 : NumDims - 1; + const bool inner_dim_reversed = m_reverse[inner_dim_idx]; + + // Offset in the output block. + Index block_offset = 0; + + // Offset in the input Tensor. + Index input_offset = reverseIndex(desc.offset()); + + // Initialize output block iterator state. Dimension in this array are + // always in inner_most -> outer_most order (col major layout). + array it; + for (int i = 0; i < NumDims; ++i) { + const int dim = isColMajor ? i : NumDims - 1 - i; + it[i].size = desc.dimension(dim); + it[i].count = 0; + it[i].reverse = m_reverse[dim]; + + it[i].block_stride = + i == 0 ? 1 : (it[i - 1].size * it[i - 1].block_stride); + it[i].block_span = it[i].block_stride * (it[i].size - 1); + + it[i].input_stride = m_strides[dim]; + it[i].input_span = it[i].input_stride * (it[i].size - 1); + + if (it[i].reverse) { + it[i].input_stride = -1 * it[i].input_stride; + it[i].input_span = -1 * it[i].input_span; + } + } + + // If multiple inner dimensions have the same reverse flag, check if we can + // merge them into a single virtual inner dimension. + int effective_inner_dim = 0; + for (int i = 1; i < NumDims; ++i) { + if (it[i].reverse != it[effective_inner_dim].reverse) break; + if (it[i].block_stride != it[effective_inner_dim].size) break; + if (it[i].block_stride != numext::abs(it[i].input_stride)) break; + + it[i].size = it[effective_inner_dim].size * it[i].size; + + it[i].block_stride = 1; + it[i].input_stride = (inner_dim_reversed ? -1 : 1); + + it[i].block_span = it[i].block_stride * (it[i].size - 1); + it[i].input_span = it[i].input_stride * (it[i].size - 1); + + effective_inner_dim = i; + } + + eigen_assert(it[effective_inner_dim].block_stride == 1); + eigen_assert(it[effective_inner_dim].input_stride == + (inner_dim_reversed ? -1 : 1)); + + const Index inner_dim_size = it[effective_inner_dim].size; + + // Prepare storage for the materialized reverse result. + const typename TensorBlock::Storage block_storage = + TensorBlock::prepareStorage(desc, scratch); + CoeffReturnType* block_buffer = block_storage.data(); + + while (it[NumDims - 1].count < it[NumDims - 1].size) { + // Copy inner-most dimension data from reversed location in input. + Index dst = block_offset; + Index src = input_offset; + + // NOTE(ezhulenev): Adding vectorized path with internal::preverse showed + // worse results in benchmarks than a simple coefficient loop. + if (inner_dim_reversed) { + for (Index i = 0; i < inner_dim_size; ++i) { + block_buffer[dst] = m_impl.coeff(src); + ++dst; + --src; + } + } else { + for (Index i = 0; i < inner_dim_size; ++i) { + block_buffer[dst] = m_impl.coeff(src); + ++dst; + ++src; + } + } + + // For the 1d tensor we need to generate only one inner-most dimension. + if ((NumDims - effective_inner_dim) == 1) break; + + // Update offset. + for (Index i = effective_inner_dim + 1; i < NumDims; ++i) { + if (++it[i].count < it[i].size) { + block_offset += it[i].block_stride; + input_offset += it[i].input_stride; + break; + } + if (i != NumDims - 1) it[i].count = 0; + block_offset -= it[i].block_span; + input_offset -= it[i].input_span; + } + } + + return block_storage.AsTensorMaterializedBlock(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + double compute_cost = NumDims * (2 * TensorOpCost::AddCost() + + 2 * TensorOpCost::MulCost() + + TensorOpCost::DivCost()); + for (int i = 0; i < NumDims; ++i) { + if (m_reverse[i]) { + compute_cost += 2 * TensorOpCost::AddCost(); + } + } + return m_impl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, compute_cost, false /* vectorized */, PacketSize); + } + + EIGEN_DEVICE_FUNC typename Storage::Type data() const { return NULL; } + +#ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_impl.bind(cgh); + } +#endif + + protected: + Dimensions m_dimensions; + array m_strides; + array m_fastStrides; + TensorEvaluator m_impl; + ReverseDimensions m_reverse; + const Device EIGEN_DEVICE_REF m_device; + + private: + struct BlockIteratorState { + BlockIteratorState() + : size(0), + count(0), + reverse(false), + block_stride(0), + block_span(0), + input_stride(0), + input_span(0) {} + + Index size; + Index count; + bool reverse; + Index block_stride; + Index block_span; + Index input_stride; + Index input_span; + }; +}; + +// Eval as lvalue + +template +struct TensorEvaluator, Device> + : public TensorEvaluator, + Device> { + typedef TensorEvaluator, + Device> Base; + typedef TensorReverseOp XprType; + typedef typename XprType::Index Index; + static const int NumDims = internal::array_size::value; + typedef DSizes Dimensions; + + enum { + IsAligned = false, + PacketAccess = TensorEvaluator::PacketAccess, + BlockAccess = false, + PreferBlockAccess = false, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : Base(op, device) {} + + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = PacketType::size; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockNotImplemented TensorBlock; + //===--------------------------------------------------------------------===// + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const Dimensions& dimensions() const { return this->m_dimensions; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { + return this->m_impl.coeffRef(this->reverseIndex(index)); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketReturnType& x) { + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); + + // This code is pilfered from TensorMorphing.h + EIGEN_ALIGN_MAX CoeffReturnType values[PacketSize]; + internal::pstore(values, x); + EIGEN_UNROLL_LOOP + for (int i = 0; i < PacketSize; ++i) { + this->coeffRef(index+i) = values[i]; + } + } +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h new file mode 100644 index 0000000..beae854 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h @@ -0,0 +1,528 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Igor Babuschkin +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_SCAN_H +#define EIGEN_CXX11_TENSOR_TENSOR_SCAN_H + +namespace Eigen { + +namespace internal { + +template +struct traits > + : public traits { + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; + typedef typename XprTraits::PointerType PointerType; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorScanOp& type; +}; + +template +struct nested, 1, + typename eval >::type> +{ + typedef TensorScanOp type; +}; +} // end namespace internal + +/** \class TensorScan + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor scan class. + */ +template +class TensorScanOp + : public TensorBase, ReadOnlyAccessors> { +public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorScanOp( + const XprType& expr, const Index& axis, bool exclusive = false, const Op& op = Op()) + : m_expr(expr), m_axis(axis), m_accumulator(op), m_exclusive(exclusive) {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const Index axis() const { return m_axis; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const XprType& expression() const { return m_expr; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const Op accumulator() const { return m_accumulator; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + bool exclusive() const { return m_exclusive; } + +protected: + typename XprType::Nested m_expr; + const Index m_axis; + const Op m_accumulator; + const bool m_exclusive; +}; + + +namespace internal { + +template +EIGEN_STRONG_INLINE void ReduceScalar(Self& self, Index offset, + typename Self::CoeffReturnType* data) { + // Compute the scan along the axis, starting at the given offset + typename Self::CoeffReturnType accum = self.accumulator().initialize(); + if (self.stride() == 1) { + if (self.exclusive()) { + for (Index curr = offset; curr < offset + self.size(); ++curr) { + data[curr] = self.accumulator().finalize(accum); + self.accumulator().reduce(self.inner().coeff(curr), &accum); + } + } else { + for (Index curr = offset; curr < offset + self.size(); ++curr) { + self.accumulator().reduce(self.inner().coeff(curr), &accum); + data[curr] = self.accumulator().finalize(accum); + } + } + } else { + if (self.exclusive()) { + for (Index idx3 = 0; idx3 < self.size(); idx3++) { + Index curr = offset + idx3 * self.stride(); + data[curr] = self.accumulator().finalize(accum); + self.accumulator().reduce(self.inner().coeff(curr), &accum); + } + } else { + for (Index idx3 = 0; idx3 < self.size(); idx3++) { + Index curr = offset + idx3 * self.stride(); + self.accumulator().reduce(self.inner().coeff(curr), &accum); + data[curr] = self.accumulator().finalize(accum); + } + } + } +} + +template +EIGEN_STRONG_INLINE void ReducePacket(Self& self, Index offset, + typename Self::CoeffReturnType* data) { + using Scalar = typename Self::CoeffReturnType; + using Packet = typename Self::PacketReturnType; + // Compute the scan along the axis, starting at the calculated offset + Packet accum = self.accumulator().template initializePacket(); + if (self.stride() == 1) { + if (self.exclusive()) { + for (Index curr = offset; curr < offset + self.size(); ++curr) { + internal::pstoreu(data + curr, self.accumulator().finalizePacket(accum)); + self.accumulator().reducePacket(self.inner().template packet(curr), &accum); + } + } else { + for (Index curr = offset; curr < offset + self.size(); ++curr) { + self.accumulator().reducePacket(self.inner().template packet(curr), &accum); + internal::pstoreu(data + curr, self.accumulator().finalizePacket(accum)); + } + } + } else { + if (self.exclusive()) { + for (Index idx3 = 0; idx3 < self.size(); idx3++) { + const Index curr = offset + idx3 * self.stride(); + internal::pstoreu(data + curr, self.accumulator().finalizePacket(accum)); + self.accumulator().reducePacket(self.inner().template packet(curr), &accum); + } + } else { + for (Index idx3 = 0; idx3 < self.size(); idx3++) { + const Index curr = offset + idx3 * self.stride(); + self.accumulator().reducePacket(self.inner().template packet(curr), &accum); + internal::pstoreu(data + curr, self.accumulator().finalizePacket(accum)); + } + } + } +} + +template +struct ReduceBlock { + EIGEN_STRONG_INLINE void operator()(Self& self, Index idx1, + typename Self::CoeffReturnType* data) { + for (Index idx2 = 0; idx2 < self.stride(); idx2++) { + // Calculate the starting offset for the scan + Index offset = idx1 + idx2; + ReduceScalar(self, offset, data); + } + } +}; + +// Specialization for vectorized reduction. +template +struct ReduceBlock { + EIGEN_STRONG_INLINE void operator()(Self& self, Index idx1, + typename Self::CoeffReturnType* data) { + using Packet = typename Self::PacketReturnType; + const int PacketSize = internal::unpacket_traits::size; + Index idx2 = 0; + for (; idx2 + PacketSize <= self.stride(); idx2 += PacketSize) { + // Calculate the starting offset for the packet scan + Index offset = idx1 + idx2; + ReducePacket(self, offset, data); + } + for (; idx2 < self.stride(); idx2++) { + // Calculate the starting offset for the scan + Index offset = idx1 + idx2; + ReduceScalar(self, offset, data); + } + } +}; + +// Single-threaded CPU implementation of scan +template ::PacketAccess && + internal::reducer_traits::PacketAccess)> +struct ScanLauncher { + void operator()(Self& self, typename Self::CoeffReturnType* data) { + Index total_size = internal::array_prod(self.dimensions()); + + // We fix the index along the scan axis to 0 and perform a + // scan per remaining entry. The iteration is split into two nested + // loops to avoid an integer division by keeping track of each idx1 and + // idx2. + for (Index idx1 = 0; idx1 < total_size; idx1 += self.stride() * self.size()) { + ReduceBlock block_reducer; + block_reducer(self, idx1, data); + } + } +}; + +#ifdef EIGEN_USE_THREADS + +// Adjust block_size to avoid false sharing of cachelines among +// threads. Currently set to twice the cache line size on Intel and ARM +// processors. +EIGEN_STRONG_INLINE Index AdjustBlockSize(Index item_size, Index block_size) { + EIGEN_CONSTEXPR Index kBlockAlignment = 128; + const Index items_per_cacheline = + numext::maxi(1, kBlockAlignment / item_size); + return items_per_cacheline * divup(block_size, items_per_cacheline); +} + +template +struct ReduceBlock { + EIGEN_STRONG_INLINE void operator()(Self& self, Index idx1, + typename Self::CoeffReturnType* data) { + using Scalar = typename Self::CoeffReturnType; + using Packet = typename Self::PacketReturnType; + const int PacketSize = internal::unpacket_traits::size; + Index num_scalars = self.stride(); + Index num_packets = 0; + if (self.stride() >= PacketSize) { + num_packets = self.stride() / PacketSize; + self.device().parallelFor( + num_packets, + TensorOpCost(PacketSize * self.size(), PacketSize * self.size(), + 16 * PacketSize * self.size(), true, PacketSize), + // Make the shard size large enough that two neighboring threads + // won't write to the same cacheline of `data`. + [=](Index blk_size) { + return AdjustBlockSize(PacketSize * sizeof(Scalar), blk_size); + }, + [&](Index first, Index last) { + for (Index packet = first; packet < last; ++packet) { + const Index idx2 = packet * PacketSize; + ReducePacket(self, idx1 + idx2, data); + } + }); + num_scalars -= num_packets * PacketSize; + } + self.device().parallelFor( + num_scalars, TensorOpCost(self.size(), self.size(), 16 * self.size()), + // Make the shard size large enough that two neighboring threads + // won't write to the same cacheline of `data`. + [=](Index blk_size) { + return AdjustBlockSize(sizeof(Scalar), blk_size); + }, + [&](Index first, Index last) { + for (Index scalar = first; scalar < last; ++scalar) { + const Index idx2 = num_packets * PacketSize + scalar; + ReduceScalar(self, idx1 + idx2, data); + } + }); + } +}; + +template +struct ReduceBlock { + EIGEN_STRONG_INLINE void operator()(Self& self, Index idx1, + typename Self::CoeffReturnType* data) { + using Scalar = typename Self::CoeffReturnType; + self.device().parallelFor( + self.stride(), TensorOpCost(self.size(), self.size(), 16 * self.size()), + // Make the shard size large enough that two neighboring threads + // won't write to the same cacheline of `data`. + [=](Index blk_size) { + return AdjustBlockSize(sizeof(Scalar), blk_size); + }, + [&](Index first, Index last) { + for (Index idx2 = first; idx2 < last; ++idx2) { + ReduceScalar(self, idx1 + idx2, data); + } + }); + } +}; + +// Specialization for multi-threaded execution. +template +struct ScanLauncher { + void operator()(Self& self, typename Self::CoeffReturnType* data) { + using Scalar = typename Self::CoeffReturnType; + using Packet = typename Self::PacketReturnType; + const int PacketSize = internal::unpacket_traits::size; + const Index total_size = internal::array_prod(self.dimensions()); + const Index inner_block_size = self.stride() * self.size(); + bool parallelize_by_outer_blocks = (total_size >= (self.stride() * inner_block_size)); + + if ((parallelize_by_outer_blocks && total_size <= 4096) || + (!parallelize_by_outer_blocks && self.stride() < PacketSize)) { + ScanLauncher launcher; + launcher(self, data); + return; + } + + if (parallelize_by_outer_blocks) { + // Parallelize over outer blocks. + const Index num_outer_blocks = total_size / inner_block_size; + self.device().parallelFor( + num_outer_blocks, + TensorOpCost(inner_block_size, inner_block_size, + 16 * PacketSize * inner_block_size, Vectorize, + PacketSize), + [=](Index blk_size) { + return AdjustBlockSize(inner_block_size * sizeof(Scalar), blk_size); + }, + [&](Index first, Index last) { + for (Index idx1 = first; idx1 < last; ++idx1) { + ReduceBlock block_reducer; + block_reducer(self, idx1 * inner_block_size, data); + } + }); + } else { + // Parallelize over inner packets/scalars dimensions when the reduction + // axis is not an inner dimension. + ReduceBlock block_reducer; + for (Index idx1 = 0; idx1 < total_size; + idx1 += self.stride() * self.size()) { + block_reducer(self, idx1, data); + } + } + } +}; +#endif // EIGEN_USE_THREADS + +#if defined(EIGEN_USE_GPU) && (defined(EIGEN_GPUCC)) + +// GPU implementation of scan +// TODO(ibab) This placeholder implementation performs multiple scans in +// parallel, but it would be better to use a parallel scan algorithm and +// optimize memory access. +template +__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ScanKernel(Self self, Index total_size, typename Self::CoeffReturnType* data) { + // Compute offset as in the CPU version + Index val = threadIdx.x + blockIdx.x * blockDim.x; + Index offset = (val / self.stride()) * self.stride() * self.size() + val % self.stride(); + + if (offset + (self.size() - 1) * self.stride() < total_size) { + // Compute the scan along the axis, starting at the calculated offset + typename Self::CoeffReturnType accum = self.accumulator().initialize(); + for (Index idx = 0; idx < self.size(); idx++) { + Index curr = offset + idx * self.stride(); + if (self.exclusive()) { + data[curr] = self.accumulator().finalize(accum); + self.accumulator().reduce(self.inner().coeff(curr), &accum); + } else { + self.accumulator().reduce(self.inner().coeff(curr), &accum); + data[curr] = self.accumulator().finalize(accum); + } + } + } + __syncthreads(); + +} + +template +struct ScanLauncher { + void operator()(const Self& self, typename Self::CoeffReturnType* data) { + Index total_size = internal::array_prod(self.dimensions()); + Index num_blocks = (total_size / self.size() + 63) / 64; + Index block_size = 64; + + LAUNCH_GPU_KERNEL((ScanKernel), num_blocks, block_size, 0, self.device(), self, total_size, data); + } +}; +#endif // EIGEN_USE_GPU && (EIGEN_GPUCC) + +} // namespace internal + +// Eval as rvalue +template +struct TensorEvaluator, Device> { + + typedef TensorScanOp XprType; + typedef typename XprType::Index Index; + typedef const ArgType ChildTypeNoConst; + typedef const ArgType ChildType; + static const int NumDims = internal::array_size::Dimensions>::value; + typedef DSizes Dimensions; + typedef typename internal::remove_const::type Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + typedef TensorEvaluator, Device> Self; + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; + + enum { + IsAligned = false, + PacketAccess = (PacketType::size > 1), + BlockAccess = false, + PreferBlockAccess = false, + Layout = TensorEvaluator::Layout, + CoordAccess = false, + RawAccess = true + }; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockNotImplemented TensorBlock; + //===--------------------------------------------------------------------===// + + EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device), + m_device(device), + m_exclusive(op.exclusive()), + m_accumulator(op.accumulator()), + m_size(m_impl.dimensions()[op.axis()]), + m_stride(1), m_consume_dim(op.axis()), + m_output(NULL) { + + // Accumulating a scalar isn't supported. + EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); + eigen_assert(op.axis() >= 0 && op.axis() < NumDims); + + // Compute stride of scan axis + const Dimensions& dims = m_impl.dimensions(); + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = 0; i < op.axis(); ++i) { + m_stride = m_stride * dims[i]; + } + } else { + // dims can only be indexed through unsigned integers, + // so let's use an unsigned type to let the compiler knows. + // This prevents stupid warnings: ""'*((void*)(& evaluator)+64)[18446744073709551615]' may be used uninitialized in this function" + unsigned int axis = internal::convert_index(op.axis()); + for (unsigned int i = NumDims - 1; i > axis; --i) { + m_stride = m_stride * dims[i]; + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { + return m_impl.dimensions(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Index& stride() const { + return m_stride; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Index& consume_dim() const { + return m_consume_dim; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Index& size() const { + return m_size; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Op& accumulator() const { + return m_accumulator; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool exclusive() const { + return m_exclusive; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator& inner() const { + return m_impl; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Device& device() const { + return m_device; + } + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) { + m_impl.evalSubExprsIfNeeded(NULL); + internal::ScanLauncher launcher; + if (data) { + launcher(*this, data); + return false; + } + + const Index total_size = internal::array_prod(dimensions()); + m_output = static_cast(m_device.get((Scalar*) m_device.allocate_temp(total_size * sizeof(Scalar)))); + launcher(*this, m_output); + return true; + } + + template + EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const { + return internal::ploadt(m_output + index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const + { + return m_output; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + return m_output[index]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool) const { + return TensorOpCost(sizeof(CoeffReturnType), 0, 0); + } + + EIGEN_STRONG_INLINE void cleanup() { + if (m_output) { + m_device.deallocate_temp(m_output); + m_output = NULL; + } + m_impl.cleanup(); + } + +#ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_impl.bind(cgh); + m_output.bind(cgh); + } +#endif +protected: + TensorEvaluator m_impl; + const Device EIGEN_DEVICE_REF m_device; + const bool m_exclusive; + Op m_accumulator; + const Index m_size; + Index m_stride; + Index m_consume_dim; + EvaluatorPointerType m_output; +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_SCAN_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorScanSycl.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorScanSycl.h new file mode 100644 index 0000000..7f68ecb --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorScanSycl.h @@ -0,0 +1,513 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/***************************************************************** + * TensorScanSycl.h + * + * \brief: + * Tensor Scan Sycl implement the extend version of + * "Efficient parallel scan algorithms for GPUs." .for Tensor operations. + * The algorithm requires up to 3 stage (consequently 3 kernels) depending on + * the size of the tensor. In the first kernel (ScanKernelFunctor), each + * threads within the work-group individually reduces the allocated elements per + * thread in order to reduces the total number of blocks. In the next step all + * thread within the work-group will reduce the associated blocks into the + * temporary buffers. In the next kernel(ScanBlockKernelFunctor), the temporary + * buffer is given as an input and all the threads within a work-group scan and + * reduces the boundaries between the blocks (generated from the previous + * kernel). and write the data on the temporary buffer. If the second kernel is + * required, the third and final kerenl (ScanAdjustmentKernelFunctor) will + * adjust the final result into the output buffer. + * The original algorithm for the parallel prefix sum can be found here: + * + * Sengupta, Shubhabrata, Mark Harris, and Michael Garland. "Efficient parallel + * scan algorithms for GPUs." NVIDIA, Santa Clara, CA, Tech. Rep. NVR-2008-003 + *1, no. 1 (2008): 1-17. + *****************************************************************/ + +#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_SYCL_SYCL_HPP +#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_SYCL_SYCL_HPP + +namespace Eigen { +namespace TensorSycl { +namespace internal { + +#ifndef EIGEN_SYCL_MAX_GLOBAL_RANGE +#define EIGEN_SYCL_MAX_GLOBAL_RANGE (EIGEN_SYCL_LOCAL_THREAD_DIM0 * EIGEN_SYCL_LOCAL_THREAD_DIM1 * 4) +#endif + +template +struct ScanParameters { + // must be power of 2 + static EIGEN_CONSTEXPR index_t ScanPerThread = 8; + const index_t total_size; + const index_t non_scan_size; + const index_t scan_size; + const index_t non_scan_stride; + const index_t scan_stride; + const index_t panel_threads; + const index_t group_threads; + const index_t block_threads; + const index_t elements_per_group; + const index_t elements_per_block; + const index_t loop_range; + + ScanParameters(index_t total_size_, index_t non_scan_size_, index_t scan_size_, index_t non_scan_stride_, + index_t scan_stride_, index_t panel_threads_, index_t group_threads_, index_t block_threads_, + index_t elements_per_group_, index_t elements_per_block_, index_t loop_range_) + : total_size(total_size_), + non_scan_size(non_scan_size_), + scan_size(scan_size_), + non_scan_stride(non_scan_stride_), + scan_stride(scan_stride_), + panel_threads(panel_threads_), + group_threads(group_threads_), + block_threads(block_threads_), + elements_per_group(elements_per_group_), + elements_per_block(elements_per_block_), + loop_range(loop_range_) {} +}; + +enum class scan_step { first, second }; +template +struct ScanKernelFunctor { + typedef cl::sycl::accessor + LocalAccessor; + static EIGEN_CONSTEXPR int PacketSize = ScanParameters::ScanPerThread / 2; + + LocalAccessor scratch; + Evaluator dev_eval; + OutAccessor out_accessor; + OutAccessor temp_accessor; + const ScanParameters scanParameters; + Op accumulator; + const bool inclusive; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ScanKernelFunctor(LocalAccessor scratch_, const Evaluator dev_eval_, + OutAccessor out_accessor_, OutAccessor temp_accessor_, + const ScanParameters scanParameters_, Op accumulator_, + const bool inclusive_) + : scratch(scratch_), + dev_eval(dev_eval_), + out_accessor(out_accessor_), + temp_accessor(temp_accessor_), + scanParameters(scanParameters_), + accumulator(accumulator_), + inclusive(inclusive_) {} + + template + typename ::Eigen::internal::enable_if::type EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE + read(const Input &inpt, Index global_id) { + return inpt.coeff(global_id); + } + + template + typename ::Eigen::internal::enable_if::type EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE + read(const Input &inpt, Index global_id) { + return inpt[global_id]; + } + + template + typename ::Eigen::internal::enable_if::type EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + first_step_inclusive_Operation(InclusiveOp inclusive_op) { + inclusive_op(); + } + + template + typename ::Eigen::internal::enable_if::type EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + first_step_inclusive_Operation(InclusiveOp) {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(cl::sycl::nd_item<1> itemID) { + auto out_ptr = out_accessor.get_pointer(); + auto tmp_ptr = temp_accessor.get_pointer(); + auto scratch_ptr = scratch.get_pointer().get(); + + for (Index loop_offset = 0; loop_offset < scanParameters.loop_range; loop_offset++) { + Index data_offset = (itemID.get_global_id(0) + (itemID.get_global_range(0) * loop_offset)); + Index tmp = data_offset % scanParameters.panel_threads; + const Index panel_id = data_offset / scanParameters.panel_threads; + const Index group_id = tmp / scanParameters.group_threads; + tmp = tmp % scanParameters.group_threads; + const Index block_id = tmp / scanParameters.block_threads; + const Index local_id = tmp % scanParameters.block_threads; + // we put one element per packet in scratch_mem + const Index scratch_stride = scanParameters.elements_per_block / PacketSize; + const Index scratch_offset = (itemID.get_local_id(0) / scanParameters.block_threads) * scratch_stride; + CoeffReturnType private_scan[ScanParameters::ScanPerThread]; + CoeffReturnType inclusive_scan; + // the actual panel size is scan_size * non_scan_size. + // elements_per_panel is roundup to power of 2 for binary tree + const Index panel_offset = panel_id * scanParameters.scan_size * scanParameters.non_scan_size; + const Index group_offset = group_id * scanParameters.non_scan_stride; + // This will be effective when the size is bigger than elements_per_block + const Index block_offset = block_id * scanParameters.elements_per_block * scanParameters.scan_stride; + const Index thread_offset = (ScanParameters::ScanPerThread * local_id * scanParameters.scan_stride); + const Index global_offset = panel_offset + group_offset + block_offset + thread_offset; + Index next_elements = 0; + EIGEN_UNROLL_LOOP + for (int i = 0; i < ScanParameters::ScanPerThread; i++) { + Index global_id = global_offset + next_elements; + private_scan[i] = ((((block_id * scanParameters.elements_per_block) + + (ScanParameters::ScanPerThread * local_id) + i) < scanParameters.scan_size) && + (global_id < scanParameters.total_size)) + ? read(dev_eval, global_id) + : accumulator.initialize(); + next_elements += scanParameters.scan_stride; + } + first_step_inclusive_Operation([&]() EIGEN_DEVICE_FUNC { + if (inclusive) { + inclusive_scan = private_scan[ScanParameters::ScanPerThread - 1]; + } + }); + // This for loop must be 2 + EIGEN_UNROLL_LOOP + for (int packetIndex = 0; packetIndex < ScanParameters::ScanPerThread; packetIndex += PacketSize) { + Index private_offset = 1; + // build sum in place up the tree + EIGEN_UNROLL_LOOP + for (Index d = PacketSize >> 1; d > 0; d >>= 1) { + EIGEN_UNROLL_LOOP + for (Index l = 0; l < d; l++) { + Index ai = private_offset * (2 * l + 1) - 1 + packetIndex; + Index bi = private_offset * (2 * l + 2) - 1 + packetIndex; + CoeffReturnType accum = accumulator.initialize(); + accumulator.reduce(private_scan[ai], &accum); + accumulator.reduce(private_scan[bi], &accum); + private_scan[bi] = accumulator.finalize(accum); + } + private_offset *= 2; + } + scratch_ptr[2 * local_id + (packetIndex / PacketSize) + scratch_offset] = + private_scan[PacketSize - 1 + packetIndex]; + private_scan[PacketSize - 1 + packetIndex] = accumulator.initialize(); + // traverse down tree & build scan + EIGEN_UNROLL_LOOP + for (Index d = 1; d < PacketSize; d *= 2) { + private_offset >>= 1; + EIGEN_UNROLL_LOOP + for (Index l = 0; l < d; l++) { + Index ai = private_offset * (2 * l + 1) - 1 + packetIndex; + Index bi = private_offset * (2 * l + 2) - 1 + packetIndex; + CoeffReturnType accum = accumulator.initialize(); + accumulator.reduce(private_scan[ai], &accum); + accumulator.reduce(private_scan[bi], &accum); + private_scan[ai] = private_scan[bi]; + private_scan[bi] = accumulator.finalize(accum); + } + } + } + + Index offset = 1; + // build sum in place up the tree + for (Index d = scratch_stride >> 1; d > 0; d >>= 1) { + // Synchronise + itemID.barrier(cl::sycl::access::fence_space::local_space); + if (local_id < d) { + Index ai = offset * (2 * local_id + 1) - 1 + scratch_offset; + Index bi = offset * (2 * local_id + 2) - 1 + scratch_offset; + CoeffReturnType accum = accumulator.initialize(); + accumulator.reduce(scratch_ptr[ai], &accum); + accumulator.reduce(scratch_ptr[bi], &accum); + scratch_ptr[bi] = accumulator.finalize(accum); + } + offset *= 2; + } + // Synchronise + itemID.barrier(cl::sycl::access::fence_space::local_space); + // next step optimisation + if (local_id == 0) { + if (((scanParameters.elements_per_group / scanParameters.elements_per_block) > 1)) { + const Index temp_id = panel_id * (scanParameters.elements_per_group / scanParameters.elements_per_block) * + scanParameters.non_scan_size + + group_id * (scanParameters.elements_per_group / scanParameters.elements_per_block) + + block_id; + tmp_ptr[temp_id] = scratch_ptr[scratch_stride - 1 + scratch_offset]; + } + // clear the last element + scratch_ptr[scratch_stride - 1 + scratch_offset] = accumulator.initialize(); + } + // traverse down tree & build scan + for (Index d = 1; d < scratch_stride; d *= 2) { + offset >>= 1; + // Synchronise + itemID.barrier(cl::sycl::access::fence_space::local_space); + if (local_id < d) { + Index ai = offset * (2 * local_id + 1) - 1 + scratch_offset; + Index bi = offset * (2 * local_id + 2) - 1 + scratch_offset; + CoeffReturnType accum = accumulator.initialize(); + accumulator.reduce(scratch_ptr[ai], &accum); + accumulator.reduce(scratch_ptr[bi], &accum); + scratch_ptr[ai] = scratch_ptr[bi]; + scratch_ptr[bi] = accumulator.finalize(accum); + } + } + // Synchronise + itemID.barrier(cl::sycl::access::fence_space::local_space); + // This for loop must be 2 + EIGEN_UNROLL_LOOP + for (int packetIndex = 0; packetIndex < ScanParameters::ScanPerThread; packetIndex += PacketSize) { + EIGEN_UNROLL_LOOP + for (Index i = 0; i < PacketSize; i++) { + CoeffReturnType accum = private_scan[packetIndex + i]; + accumulator.reduce(scratch_ptr[2 * local_id + (packetIndex / PacketSize) + scratch_offset], &accum); + private_scan[packetIndex + i] = accumulator.finalize(accum); + } + } + first_step_inclusive_Operation([&]() EIGEN_DEVICE_FUNC { + if (inclusive) { + accumulator.reduce(private_scan[ScanParameters::ScanPerThread - 1], &inclusive_scan); + private_scan[0] = accumulator.finalize(inclusive_scan); + } + }); + next_elements = 0; + // right the first set of private param + EIGEN_UNROLL_LOOP + for (Index i = 0; i < ScanParameters::ScanPerThread; i++) { + Index global_id = global_offset + next_elements; + if ((((block_id * scanParameters.elements_per_block) + (ScanParameters::ScanPerThread * local_id) + i) < + scanParameters.scan_size) && + (global_id < scanParameters.total_size)) { + Index private_id = (i * !inclusive) + (((i + 1) % ScanParameters::ScanPerThread) * (inclusive)); + out_ptr[global_id] = private_scan[private_id]; + } + next_elements += scanParameters.scan_stride; + } + } // end for loop + } +}; + +template +struct ScanAdjustmentKernelFunctor { + typedef cl::sycl::accessor + LocalAccessor; + static EIGEN_CONSTEXPR int PacketSize = ScanParameters::ScanPerThread / 2; + InAccessor in_accessor; + OutAccessor out_accessor; + const ScanParameters scanParameters; + Op accumulator; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ScanAdjustmentKernelFunctor(LocalAccessor, InAccessor in_accessor_, + OutAccessor out_accessor_, + const ScanParameters scanParameters_, + Op accumulator_) + : in_accessor(in_accessor_), + out_accessor(out_accessor_), + scanParameters(scanParameters_), + accumulator(accumulator_) {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(cl::sycl::nd_item<1> itemID) { + auto in_ptr = in_accessor.get_pointer(); + auto out_ptr = out_accessor.get_pointer(); + + for (Index loop_offset = 0; loop_offset < scanParameters.loop_range; loop_offset++) { + Index data_offset = (itemID.get_global_id(0) + (itemID.get_global_range(0) * loop_offset)); + Index tmp = data_offset % scanParameters.panel_threads; + const Index panel_id = data_offset / scanParameters.panel_threads; + const Index group_id = tmp / scanParameters.group_threads; + tmp = tmp % scanParameters.group_threads; + const Index block_id = tmp / scanParameters.block_threads; + const Index local_id = tmp % scanParameters.block_threads; + + // the actual panel size is scan_size * non_scan_size. + // elements_per_panel is roundup to power of 2 for binary tree + const Index panel_offset = panel_id * scanParameters.scan_size * scanParameters.non_scan_size; + const Index group_offset = group_id * scanParameters.non_scan_stride; + // This will be effective when the size is bigger than elements_per_block + const Index block_offset = block_id * scanParameters.elements_per_block * scanParameters.scan_stride; + const Index thread_offset = ScanParameters::ScanPerThread * local_id * scanParameters.scan_stride; + + const Index global_offset = panel_offset + group_offset + block_offset + thread_offset; + const Index block_size = scanParameters.elements_per_group / scanParameters.elements_per_block; + const Index in_id = (panel_id * block_size * scanParameters.non_scan_size) + (group_id * block_size) + block_id; + CoeffReturnType adjust_val = in_ptr[in_id]; + + Index next_elements = 0; + EIGEN_UNROLL_LOOP + for (Index i = 0; i < ScanParameters::ScanPerThread; i++) { + Index global_id = global_offset + next_elements; + if ((((block_id * scanParameters.elements_per_block) + (ScanParameters::ScanPerThread * local_id) + i) < + scanParameters.scan_size) && + (global_id < scanParameters.total_size)) { + CoeffReturnType accum = adjust_val; + accumulator.reduce(out_ptr[global_id], &accum); + out_ptr[global_id] = accumulator.finalize(accum); + } + next_elements += scanParameters.scan_stride; + } + } + } +}; + +template +struct ScanInfo { + const Index &total_size; + const Index &scan_size; + const Index &panel_size; + const Index &non_scan_size; + const Index &scan_stride; + const Index &non_scan_stride; + + Index max_elements_per_block; + Index block_size; + Index panel_threads; + Index group_threads; + Index block_threads; + Index elements_per_group; + Index elements_per_block; + Index loop_range; + Index global_range; + Index local_range; + const Eigen::SyclDevice &dev; + EIGEN_STRONG_INLINE ScanInfo(const Index &total_size_, const Index &scan_size_, const Index &panel_size_, + const Index &non_scan_size_, const Index &scan_stride_, const Index &non_scan_stride_, + const Eigen::SyclDevice &dev_) + : total_size(total_size_), + scan_size(scan_size_), + panel_size(panel_size_), + non_scan_size(non_scan_size_), + scan_stride(scan_stride_), + non_scan_stride(non_scan_stride_), + dev(dev_) { + // must be power of 2 + local_range = std::min(Index(dev.getNearestPowerOfTwoWorkGroupSize()), + Index(EIGEN_SYCL_LOCAL_THREAD_DIM0 * EIGEN_SYCL_LOCAL_THREAD_DIM1)); + + max_elements_per_block = local_range * ScanParameters::ScanPerThread; + + elements_per_group = + dev.getPowerOfTwo(Index(roundUp(Index(scan_size), ScanParameters::ScanPerThread)), true); + const Index elements_per_panel = elements_per_group * non_scan_size; + elements_per_block = std::min(Index(elements_per_group), Index(max_elements_per_block)); + panel_threads = elements_per_panel / ScanParameters::ScanPerThread; + group_threads = elements_per_group / ScanParameters::ScanPerThread; + block_threads = elements_per_block / ScanParameters::ScanPerThread; + block_size = elements_per_group / elements_per_block; +#ifdef EIGEN_SYCL_MAX_GLOBAL_RANGE + const Index max_threads = std::min(Index(panel_threads * panel_size), Index(EIGEN_SYCL_MAX_GLOBAL_RANGE)); +#else + const Index max_threads = panel_threads * panel_size; +#endif + global_range = roundUp(max_threads, local_range); + loop_range = Index( + std::ceil(double(elements_per_panel * panel_size) / (global_range * ScanParameters::ScanPerThread))); + } + inline ScanParameters get_scan_parameter() { + return ScanParameters(total_size, non_scan_size, scan_size, non_scan_stride, scan_stride, panel_threads, + group_threads, block_threads, elements_per_group, elements_per_block, loop_range); + } + inline cl::sycl::nd_range<1> get_thread_range() { + return cl::sycl::nd_range<1>(cl::sycl::range<1>(global_range), cl::sycl::range<1>(local_range)); + } +}; + +template +struct SYCLAdjustBlockOffset { + EIGEN_STRONG_INLINE static void adjust_scan_block_offset(EvaluatorPointerType in_ptr, EvaluatorPointerType out_ptr, + Reducer &accumulator, const Index total_size, + const Index scan_size, const Index panel_size, + const Index non_scan_size, const Index scan_stride, + const Index non_scan_stride, const Eigen::SyclDevice &dev) { + auto scan_info = + ScanInfo(total_size, scan_size, panel_size, non_scan_size, scan_stride, non_scan_stride, dev); + + typedef ScanAdjustmentKernelFunctor + AdjustFuctor; + dev.template unary_kernel_launcher(in_ptr, out_ptr, scan_info.get_thread_range(), + scan_info.max_elements_per_block, + scan_info.get_scan_parameter(), accumulator); + } +}; + +template +struct ScanLauncher_impl { + template + EIGEN_STRONG_INLINE static void scan_block(Input in_ptr, EvaluatorPointerType out_ptr, Reducer &accumulator, + const Index total_size, const Index scan_size, const Index panel_size, + const Index non_scan_size, const Index scan_stride, + const Index non_scan_stride, const bool inclusive, + const Eigen::SyclDevice &dev) { + auto scan_info = + ScanInfo(total_size, scan_size, panel_size, non_scan_size, scan_stride, non_scan_stride, dev); + const Index temp_pointer_size = scan_info.block_size * non_scan_size * panel_size; + const Index scratch_size = scan_info.max_elements_per_block / (ScanParameters::ScanPerThread / 2); + CoeffReturnType *temp_pointer = + static_cast(dev.allocate_temp(temp_pointer_size * sizeof(CoeffReturnType))); + EvaluatorPointerType tmp_global_accessor = dev.get(temp_pointer); + + typedef ScanKernelFunctor ScanFunctor; + dev.template binary_kernel_launcher( + in_ptr, out_ptr, tmp_global_accessor, scan_info.get_thread_range(), scratch_size, + scan_info.get_scan_parameter(), accumulator, inclusive); + + if (scan_info.block_size > 1) { + ScanLauncher_impl::scan_block( + tmp_global_accessor, tmp_global_accessor, accumulator, temp_pointer_size, scan_info.block_size, panel_size, + non_scan_size, Index(1), scan_info.block_size, false, dev); + + SYCLAdjustBlockOffset::adjust_scan_block_offset( + tmp_global_accessor, out_ptr, accumulator, total_size, scan_size, panel_size, non_scan_size, scan_stride, + non_scan_stride, dev); + } + dev.deallocate_temp(temp_pointer); + } +}; + +} // namespace internal +} // namespace TensorSycl +namespace internal { +template +struct ScanLauncher { + typedef typename Self::Index Index; + typedef typename Self::CoeffReturnType CoeffReturnType; + typedef typename Self::Storage Storage; + typedef typename Self::EvaluatorPointerType EvaluatorPointerType; + void operator()(Self &self, EvaluatorPointerType data) { + const Index total_size = internal::array_prod(self.dimensions()); + const Index scan_size = self.size(); + const Index scan_stride = self.stride(); + // this is the scan op (can be sum or ...) + auto accumulator = self.accumulator(); + auto inclusive = !self.exclusive(); + auto consume_dim = self.consume_dim(); + auto dev = self.device(); + + auto dims = self.inner().dimensions(); + + Index non_scan_size = 1; + Index panel_size = 1; + if (static_cast(Self::Layout) == static_cast(ColMajor)) { + for (int i = 0; i < consume_dim; i++) { + non_scan_size *= dims[i]; + } + for (int i = consume_dim + 1; i < Self::NumDims; i++) { + panel_size *= dims[i]; + } + } else { + for (int i = Self::NumDims - 1; i > consume_dim; i--) { + non_scan_size *= dims[i]; + } + for (int i = consume_dim - 1; i >= 0; i--) { + panel_size *= dims[i]; + } + } + const Index non_scan_stride = (scan_stride > 1) ? 1 : scan_size; + auto eval_impl = self.inner(); + TensorSycl::internal::ScanLauncher_impl::scan_block( + eval_impl, data, accumulator, total_size, scan_size, panel_size, non_scan_size, scan_stride, non_scan_stride, + inclusive, dev); + } +}; +} // namespace internal +} // namespace Eigen + +#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_SYCL_SYCL_HPP diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h new file mode 100644 index 0000000..e5e5efd --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h @@ -0,0 +1,471 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H +#define EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H + +namespace Eigen { + +/** \class TensorShuffling + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor shuffling class. + * + * + */ +namespace internal { +template +struct traits > : public traits +{ + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; + typedef typename XprTraits::PointerType PointerType; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorShufflingOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorShufflingOp type; +}; + +} // end namespace internal + + + +template +class TensorShufflingOp : public TensorBase > +{ + public: + typedef TensorBase > Base; + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorShufflingOp(const XprType& expr, const Shuffle& shfl) + : m_xpr(expr), m_shuffle(shfl) {} + + EIGEN_DEVICE_FUNC + const Shuffle& shufflePermutation() const { return m_shuffle; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorShufflingOp) + + + protected: + typename XprType::Nested m_xpr; + const Shuffle m_shuffle; +}; + + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorEvaluator, Device> Self; + typedef TensorShufflingOp XprType; + typedef typename XprType::Index Index; + static const int NumDims = internal::array_size::Dimensions>::value; + typedef DSizes Dimensions; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = PacketType::size; + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; + + enum { + IsAligned = false, + PacketAccess = (PacketType::size > 1), + BlockAccess = TensorEvaluator::RawAccess, + PreferBlockAccess = true, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + typedef typename internal::remove_const::type ScalarNoConst; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockDescriptor TensorBlockDesc; + typedef internal::TensorBlockScratchAllocator TensorBlockScratch; + + typedef typename internal::TensorMaterializedBlock + TensorBlock; + //===--------------------------------------------------------------------===// + + EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_device(device), + m_impl(op.expression(), device) + { + const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); + const Shuffle& shuffle = op.shufflePermutation(); + m_is_identity = true; + for (int i = 0; i < NumDims; ++i) { + m_shuffle[i] = static_cast(shuffle[i]); + m_dimensions[i] = input_dims[shuffle[i]]; + m_inverseShuffle[shuffle[i]] = i; + if (m_is_identity && shuffle[i] != i) { + m_is_identity = false; + } + } + + if (static_cast(Layout) == static_cast(ColMajor)) { + m_unshuffledInputStrides[0] = 1; + m_outputStrides[0] = 1; + + for (int i = 1; i < NumDims; ++i) { + m_unshuffledInputStrides[i] = + m_unshuffledInputStrides[i - 1] * input_dims[i - 1]; + m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1]; + m_fastOutputStrides[i] = internal::TensorIntDivisor( + m_outputStrides[i] > 0 ? m_outputStrides[i] : Index(1)); + } + } else { + m_unshuffledInputStrides[NumDims - 1] = 1; + m_outputStrides[NumDims - 1] = 1; + for (int i = NumDims - 2; i >= 0; --i) { + m_unshuffledInputStrides[i] = + m_unshuffledInputStrides[i + 1] * input_dims[i + 1]; + m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1]; + m_fastOutputStrides[i] = internal::TensorIntDivisor( + m_outputStrides[i] > 0 ? m_outputStrides[i] : Index(1)); + } + } + + for (int i = 0; i < NumDims; ++i) { + m_inputStrides[i] = m_unshuffledInputStrides[shuffle[i]]; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + +#ifdef EIGEN_USE_THREADS + template + EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync( + EvaluatorPointerType, EvalSubExprsCallback done) { + m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); }); + } +#endif // EIGEN_USE_THREADS + + EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + if (m_is_identity) { + return m_impl.coeff(index); + } else { + return m_impl.coeff(srcCoeff(index)); + } + } + + template + struct PacketLoader { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + static PacketReturnType Run(const Self& self, Index index) { + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + EIGEN_UNROLL_LOOP + for (int i = 0; i < PacketSize; ++i) { + values[i] = self.coeff(index + i); + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + }; + + template + struct PacketLoader { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + static PacketReturnType Run(const Self& self, Index index) { + if (self.m_is_identity) { + return self.m_impl.template packet(index); + } else { + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + EIGEN_UNROLL_LOOP + for (int i = 0; i < PacketSize; ++i) { + values[i] = self.coeff(index + i); + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + } + }; + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index + PacketSize - 1 < dimensions().TotalSize()); + return PacketLoader::PacketAccess>::Run(*this, index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + internal::TensorBlockResourceRequirements getResourceRequirements() const { + static const int inner_dim = + Layout == static_cast(ColMajor) ? 0 : NumDims - 1; + + const size_t target_size = m_device.firstLevelCacheSize(); + const bool inner_dim_shuffled = m_shuffle[inner_dim] != inner_dim; + + // Shuffled inner dimensions leads to a random memory access, which is not + // captured by default cost model bytes loaded/stored. We add this cost + // explicitly. The number of cycles picked based on the benchmarks. + // TODO(ezhulenev): This number was picked based on a very questionable + // benchmarks, add benchmarks that are representative of real workloads. + using BlockRequirements = internal::TensorBlockResourceRequirements; + if (inner_dim_shuffled) { + return BlockRequirements::uniform(target_size) + .addCostPerCoeff({0, 0, NumDims * 28}); + } else { + return BlockRequirements::skewed(target_size); + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock + block(TensorBlockDesc& desc, TensorBlockScratch& scratch, + bool root_of_expr_ast = false) const { + assert(m_impl.data() != NULL); + + typedef internal::TensorBlockIO + TensorBlockIO; + typedef typename TensorBlockIO::Dst TensorBlockIODst; + typedef typename TensorBlockIO::Src TensorBlockIOSrc; + + const typename TensorBlock::Storage block_storage = + TensorBlock::prepareStorage( + desc, scratch, /*allow_strided_storage=*/root_of_expr_ast); + + typename TensorBlockIO::Dimensions input_strides(m_unshuffledInputStrides); + TensorBlockIOSrc src(input_strides, m_impl.data(), srcCoeff(desc.offset())); + + TensorBlockIODst dst(block_storage.dimensions(), block_storage.strides(), + block_storage.data()); + + typename TensorBlockIO::DimensionsMap dst_to_src_dim_map(m_shuffle); + TensorBlockIO::Copy(dst, src, dst_to_src_dim_map); + + return block_storage.AsTensorMaterializedBlock(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + const double compute_cost = m_is_identity ? TensorOpCost::AddCost() : + NumDims * (2 * TensorOpCost::AddCost() + + 2 * TensorOpCost::MulCost() + + TensorOpCost::DivCost()); + return m_impl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, compute_cost, m_is_identity /* vectorized */, PacketSize); + } + + EIGEN_DEVICE_FUNC typename Storage::Type data() const { return NULL; } + +#ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_impl.bind(cgh); + } +#endif + protected: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index GetBlockOutputIndex( + Index input_index, + const DSizes& input_block_strides, + const DSizes& output_block_strides, + const DSizes, NumDims>& fast_input_block_strides) const { + Index output_index = 0; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumDims - 1; i > 0; --i) { + const Index idx = input_index / fast_input_block_strides[i]; + output_index += idx * output_block_strides[m_inverseShuffle[i]]; + input_index -= idx * input_block_strides[i]; + } + return output_index + input_index * + output_block_strides[m_inverseShuffle[0]]; + } else { + for (int i = 0; i < NumDims - 1; ++i) { + const Index idx = input_index / fast_input_block_strides[i]; + output_index += idx * output_block_strides[m_inverseShuffle[i]]; + input_index -= idx * input_block_strides[i]; + } + return output_index + input_index * + output_block_strides[m_inverseShuffle[NumDims - 1]]; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const { + Index inputIndex = 0; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumDims - 1; i > 0; --i) { + const Index idx = index / m_fastOutputStrides[i]; + inputIndex += idx * m_inputStrides[i]; + index -= idx * m_outputStrides[i]; + } + return inputIndex + index * m_inputStrides[0]; + } else { + for (int i = 0; i < NumDims - 1; ++i) { + const Index idx = index / m_fastOutputStrides[i]; + inputIndex += idx * m_inputStrides[i]; + index -= idx * m_outputStrides[i]; + } + return inputIndex + index * m_inputStrides[NumDims - 1]; + } + } + + Dimensions m_dimensions; + bool m_is_identity; + array m_shuffle; + array m_inverseShuffle; // TODO(ezhulenev): Make it int type. + array m_outputStrides; + array, NumDims> m_fastOutputStrides; + array m_inputStrides; + array m_unshuffledInputStrides; + + const Device EIGEN_DEVICE_REF m_device; + TensorEvaluator m_impl; +}; + + +// Eval as lvalue +template +struct TensorEvaluator, Device> + : public TensorEvaluator, Device> +{ + typedef TensorEvaluator, Device> Base; + + typedef TensorShufflingOp XprType; + typedef typename XprType::Index Index; + static const int NumDims = internal::array_size::Dimensions>::value; + typedef DSizes Dimensions; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = PacketType::size; + + enum { + IsAligned = false, + PacketAccess = (PacketType::size > 1), + BlockAccess = TensorEvaluator::RawAccess, + PreferBlockAccess = true, + Layout = TensorEvaluator::Layout, + RawAccess = false + }; + + typedef typename internal::remove_const::type ScalarNoConst; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockDescriptor TensorBlockDesc; + //===--------------------------------------------------------------------===// + + EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : Base(op, device) + { } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) + { + return this->m_impl.coeffRef(this->srcCoeff(index)); + } + + template EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketReturnType& x) + { + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + internal::pstore(values, x); + EIGEN_UNROLL_LOOP + for (int i = 0; i < PacketSize; ++i) { + this->coeffRef(index+i) = values[i]; + } + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock( + const TensorBlockDesc& desc, const TensorBlock& block) { + eigen_assert(this->m_impl.data() != NULL); + + typedef internal::TensorBlockIO + TensorBlockIO; + typedef typename TensorBlockIO::Dst TensorBlockIODst; + typedef typename TensorBlockIO::Src TensorBlockIOSrc; + + const Scalar* block_buffer = block.data(); + + // TODO(ezhulenev): TensorBlockIO should be able to read from any Eigen + // expression with coefficient and packet access as `src`. + void* mem = NULL; + if (block_buffer == NULL) { + mem = this->m_device.allocate(desc.size() * sizeof(Scalar)); + ScalarNoConst* buf = static_cast(mem); + + typedef internal::TensorBlockAssignment< + ScalarNoConst, NumDims, typename TensorBlock::XprType, Index> + TensorBlockAssignment; + + TensorBlockAssignment::Run( + TensorBlockAssignment::target( + desc.dimensions(), internal::strides(desc.dimensions()), + buf), + block.expr()); + + block_buffer = buf; + } + + // Read from block. + TensorBlockIOSrc src(internal::strides(desc.dimensions()), + block_buffer); + + // Write to the output buffer. + typename TensorBlockIO::Dimensions output_strides( + this->m_unshuffledInputStrides); + typename TensorBlockIO::Dimensions output_dimensions; + for (int i = 0; i < NumDims; ++i) { + output_dimensions[this->m_shuffle[i]] = desc.dimension(i); + } + TensorBlockIODst dst(output_dimensions, output_strides, this->m_impl.data(), + this->srcCoeff(desc.offset())); + + // Reorder dimensions according to the shuffle. + typename TensorBlockIO::DimensionsMap dst_to_src_dim_map; + for (int i = 0; i < NumDims; ++i) { + dst_to_src_dim_map[i] = static_cast(this->m_inverseShuffle[i]); + } + TensorBlockIO::Copy(dst, src, dst_to_src_dim_map); + + // Deallocate temporary buffer used for the block materialization. + if (mem != NULL) this->m_device.deallocate(mem); + } +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h new file mode 100644 index 0000000..5ff0880 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h @@ -0,0 +1,161 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2013 Christian Seiler +// Copyright (C) 2014-2015 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSORSTORAGE_H +#define EIGEN_CXX11_TENSOR_TENSORSTORAGE_H + +#ifdef EIGEN_TENSOR_STORAGE_CTOR_PLUGIN + #define EIGEN_INTERNAL_TENSOR_STORAGE_CTOR_PLUGIN EIGEN_TENSOR_STORAGE_CTOR_PLUGIN; +#else + #define EIGEN_INTERNAL_TENSOR_STORAGE_CTOR_PLUGIN +#endif + +namespace Eigen { + +/** \internal + * + * \class TensorStorage + * \ingroup CXX11_Tensor_Module + * + * \brief Stores the data of a tensor + * + * This class stores the data of fixed-size, dynamic-size or mixed tensors + * in a way as compact as possible. + * + * \sa Tensor + */ +template class TensorStorage; + + +// Pure fixed-size storage +template +class TensorStorage +{ + private: + static const std::size_t Size = FixedDimensions::total_size; + + // Allocate an array of size at least one to prevent compiler warnings. + static const std::size_t MinSize = max_n_1::size; + EIGEN_ALIGN_MAX T m_data[MinSize]; + + public: + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorStorage() { + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE T *data() { return m_data; } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const T *data() const { return m_data; } + + static EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const FixedDimensions& dimensions() + { + static const FixedDimensions* singleton_dimensions = new FixedDimensions(); + return *singleton_dimensions; + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE DenseIndex size() const { return Size; } +}; + +// pure dynamic +template +class TensorStorage, Options_> +{ + public: + typedef IndexType Index; + typedef DSizes Dimensions; + typedef TensorStorage, Options_> Self; + + EIGEN_DEVICE_FUNC TensorStorage() : m_data(0), m_dimensions() { + if (NumIndices_ == 0) { + m_data = internal::conditional_aligned_new_auto(1); + } + } + EIGEN_DEVICE_FUNC TensorStorage(internal::constructor_without_unaligned_array_assert) + : m_data(0), m_dimensions(internal::template repeat(0)) {} + EIGEN_DEVICE_FUNC TensorStorage(Index size, const array& dimensions) + : m_data(internal::conditional_aligned_new_auto(size)), m_dimensions(dimensions) + { EIGEN_INTERNAL_TENSOR_STORAGE_CTOR_PLUGIN } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template + EIGEN_DEVICE_FUNC TensorStorage(DenseIndex... indices) : m_dimensions(indices...) { + m_data = internal::conditional_aligned_new_auto(internal::array_prod(m_dimensions)); + } +#endif + + EIGEN_DEVICE_FUNC TensorStorage(const Self& other) + : m_data(internal::conditional_aligned_new_auto(internal::array_prod(other.m_dimensions))) + , m_dimensions(other.m_dimensions) + { + internal::smart_copy(other.m_data, other.m_data+internal::array_prod(other.m_dimensions), m_data); + } + EIGEN_DEVICE_FUNC Self& operator=(const Self& other) + { + if (this != &other) { + Self tmp(other); + this->swap(tmp); + } + return *this; + } + +#if EIGEN_HAS_RVALUE_REFERENCES + EIGEN_DEVICE_FUNC TensorStorage(Self&& other) : TensorStorage() + { + *this = std::move(other); + } + + EIGEN_DEVICE_FUNC Self& operator=(Self&& other) + { + numext::swap(m_data, other.m_data); + numext::swap(m_dimensions, other.m_dimensions); + return *this; + } +#endif + + EIGEN_DEVICE_FUNC ~TensorStorage() { internal::conditional_aligned_delete_auto(m_data, internal::array_prod(m_dimensions)); } + EIGEN_DEVICE_FUNC void swap(Self& other) + { numext::swap(m_data,other.m_data); numext::swap(m_dimensions,other.m_dimensions); } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const {return m_dimensions;} + + EIGEN_DEVICE_FUNC void resize(Index size, const array& nbDimensions) + { + const Index currentSz = internal::array_prod(m_dimensions); + if(size != currentSz) + { + internal::conditional_aligned_delete_auto(m_data, currentSz); + if (size) + m_data = internal::conditional_aligned_new_auto(size); + else if (NumIndices_ == 0) { + m_data = internal::conditional_aligned_new_auto(1); + } + else + m_data = 0; + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) + } + m_dimensions = nbDimensions; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T *data() { return m_data; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T *data() const { return m_data; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const { return m_dimensions.TotalSize(); } + + private: + T *m_data; + Dimensions m_dimensions; +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSORSTORAGE_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h new file mode 100644 index 0000000..2f62a66 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h @@ -0,0 +1,346 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H +#define EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H + +namespace Eigen { + +/** \class TensorStriding + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor striding class. + * + * + */ +namespace internal { +template +struct traits > : public traits +{ + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; + typedef typename XprTraits::PointerType PointerType; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorStridingOpEIGEN_DEVICE_REF type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorStridingOp type; +}; + +} // end namespace internal + + + +template +class TensorStridingOp : public TensorBase > +{ + public: + typedef TensorBase > Base; + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingOp(const XprType& expr, const Strides& dims) + : m_xpr(expr), m_dims(dims) {} + + EIGEN_DEVICE_FUNC + const Strides& strides() const { return m_dims; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorStridingOp) + + protected: + typename XprType::Nested m_xpr; + const Strides m_dims; +}; + + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorStridingOp XprType; + typedef typename XprType::Index Index; + static const int NumDims = internal::array_size::Dimensions>::value; + typedef DSizes Dimensions; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = PacketType::size; + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; + + enum { + IsAligned = /*TensorEvaluator::IsAligned*/false, + PacketAccess = TensorEvaluator::PacketAccess, + BlockAccess = false, + PreferBlockAccess = TensorEvaluator::PreferBlockAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockNotImplemented TensorBlock; + //===--------------------------------------------------------------------===// + + EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device) + { + m_dimensions = m_impl.dimensions(); + for (int i = 0; i < NumDims; ++i) { + m_dimensions[i] =Eigen::numext::ceil(static_cast(m_dimensions[i]) / op.strides()[i]); + } + + const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); + if (static_cast(Layout) == static_cast(ColMajor)) { + m_outputStrides[0] = 1; + m_inputStrides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; + m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; + m_inputStrides[i-1] *= op.strides()[i-1]; + } + m_inputStrides[NumDims-1] *= op.strides()[NumDims-1]; + } else { // RowMajor + m_outputStrides[NumDims-1] = 1; + m_inputStrides[NumDims-1] = 1; + for (int i = NumDims - 2; i >= 0; --i) { + m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1]; + m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; + m_inputStrides[i+1] *= op.strides()[i+1]; + } + m_inputStrides[0] *= op.strides()[0]; + } + } + + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType/*data*/) { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + return m_impl.coeff(srcCoeff(index)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); + + Index inputIndices[] = {0, 0}; + Index indices[] = {index, index + PacketSize - 1}; + if (static_cast(Layout) == static_cast(ColMajor)) { + EIGEN_UNROLL_LOOP + for (int i = NumDims - 1; i > 0; --i) { + const Index idx0 = indices[0] / m_outputStrides[i]; + const Index idx1 = indices[1] / m_outputStrides[i]; + inputIndices[0] += idx0 * m_inputStrides[i]; + inputIndices[1] += idx1 * m_inputStrides[i]; + indices[0] -= idx0 * m_outputStrides[i]; + indices[1] -= idx1 * m_outputStrides[i]; + } + inputIndices[0] += indices[0] * m_inputStrides[0]; + inputIndices[1] += indices[1] * m_inputStrides[0]; + } else { // RowMajor + EIGEN_UNROLL_LOOP + for (int i = 0; i < NumDims - 1; ++i) { + const Index idx0 = indices[0] / m_outputStrides[i]; + const Index idx1 = indices[1] / m_outputStrides[i]; + inputIndices[0] += idx0 * m_inputStrides[i]; + inputIndices[1] += idx1 * m_inputStrides[i]; + indices[0] -= idx0 * m_outputStrides[i]; + indices[1] -= idx1 * m_outputStrides[i]; + } + inputIndices[0] += indices[0] * m_inputStrides[NumDims-1]; + inputIndices[1] += indices[1] * m_inputStrides[NumDims-1]; + } + if (inputIndices[1] - inputIndices[0] == PacketSize - 1) { + PacketReturnType rslt = m_impl.template packet(inputIndices[0]); + return rslt; + } + else { + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + values[0] = m_impl.coeff(inputIndices[0]); + values[PacketSize-1] = m_impl.coeff(inputIndices[1]); + EIGEN_UNROLL_LOOP + for (int i = 1; i < PacketSize-1; ++i) { + values[i] = coeff(index+i); + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + double compute_cost = (NumDims - 1) * (TensorOpCost::AddCost() + + TensorOpCost::MulCost() + + TensorOpCost::DivCost()) + + TensorOpCost::MulCost(); + if (vectorized) { + compute_cost *= 2; // packet() computes two indices + } + const int innerDim = (static_cast(Layout) == static_cast(ColMajor)) ? 0 : (NumDims - 1); + return m_impl.costPerCoeff(vectorized && m_inputStrides[innerDim] == 1) + + // Computation is not vectorized per se, but it is done once per packet. + TensorOpCost(0, 0, compute_cost, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC typename Storage::Type data() const { return NULL; } + +#ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_impl.bind(cgh); + } +#endif + protected: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const + { + Index inputIndex = 0; + if (static_cast(Layout) == static_cast(ColMajor)) { + EIGEN_UNROLL_LOOP + for (int i = NumDims - 1; i > 0; --i) { + const Index idx = index / m_outputStrides[i]; + inputIndex += idx * m_inputStrides[i]; + index -= idx * m_outputStrides[i]; + } + inputIndex += index * m_inputStrides[0]; + } else { // RowMajor + EIGEN_UNROLL_LOOP + for (int i = 0; i < NumDims - 1; ++i) { + const Index idx = index / m_outputStrides[i]; + inputIndex += idx * m_inputStrides[i]; + index -= idx * m_outputStrides[i]; + } + inputIndex += index * m_inputStrides[NumDims-1]; + } + return inputIndex; + } + + Dimensions m_dimensions; + array m_outputStrides; + array m_inputStrides; + TensorEvaluator m_impl; +}; + +// Eval as lvalue +template +struct TensorEvaluator, Device> + : public TensorEvaluator, Device> +{ + typedef TensorStridingOp XprType; + typedef TensorEvaluator Base; + // typedef typename XprType::Index Index; + static const int NumDims = internal::array_size::Dimensions>::value; + // typedef DSizes Dimensions; + + enum { + IsAligned = /*TensorEvaluator::IsAligned*/false, + PacketAccess = TensorEvaluator::PacketAccess, + PreferBlockAccess = false, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : Base(op, device) { } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = PacketType::size; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) + { + return this->m_impl.coeffRef(this->srcCoeff(index)); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketReturnType& x) + { + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+PacketSize-1 < this->dimensions().TotalSize()); + + Index inputIndices[] = {0, 0}; + Index indices[] = {index, index + PacketSize - 1}; + if (static_cast(Layout) == static_cast(ColMajor)) { + EIGEN_UNROLL_LOOP + for (int i = NumDims - 1; i > 0; --i) { + const Index idx0 = indices[0] / this->m_outputStrides[i]; + const Index idx1 = indices[1] / this->m_outputStrides[i]; + inputIndices[0] += idx0 * this->m_inputStrides[i]; + inputIndices[1] += idx1 * this->m_inputStrides[i]; + indices[0] -= idx0 * this->m_outputStrides[i]; + indices[1] -= idx1 * this->m_outputStrides[i]; + } + inputIndices[0] += indices[0] * this->m_inputStrides[0]; + inputIndices[1] += indices[1] * this->m_inputStrides[0]; + } else { // RowMajor + EIGEN_UNROLL_LOOP + for (int i = 0; i < NumDims - 1; ++i) { + const Index idx0 = indices[0] / this->m_outputStrides[i]; + const Index idx1 = indices[1] / this->m_outputStrides[i]; + inputIndices[0] += idx0 * this->m_inputStrides[i]; + inputIndices[1] += idx1 * this->m_inputStrides[i]; + indices[0] -= idx0 * this->m_outputStrides[i]; + indices[1] -= idx1 * this->m_outputStrides[i]; + } + inputIndices[0] += indices[0] * this->m_inputStrides[NumDims-1]; + inputIndices[1] += indices[1] * this->m_inputStrides[NumDims-1]; + } + if (inputIndices[1] - inputIndices[0] == PacketSize - 1) { + this->m_impl.template writePacket(inputIndices[0], x); + } + else { + EIGEN_ALIGN_MAX Scalar values[PacketSize]; + internal::pstore(values, x); + this->m_impl.coeffRef(inputIndices[0]) = values[0]; + this->m_impl.coeffRef(inputIndices[1]) = values[PacketSize-1]; + EIGEN_UNROLL_LOOP + for (int i = 1; i < PacketSize-1; ++i) { + this->coeffRef(index+i) = values[i]; + } + } + } +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h new file mode 100644 index 0000000..926ecdd --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h @@ -0,0 +1,303 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2017 Gagan Goel +// Copyright (C) 2017 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_TRACE_H +#define EIGEN_CXX11_TENSOR_TENSOR_TRACE_H + +namespace Eigen { + +/** \class TensorTrace + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor Trace class. + * + * + */ + +namespace internal { +template +struct traits > : public traits +{ + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions - array_size::value; + static const int Layout = XprTraits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorTraceOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorTraceOp type; +}; + +} // end namespace internal + + +template +class TensorTraceOp : public TensorBase > +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorTraceOp(const XprType& expr, const Dims& dims) + : m_xpr(expr), m_dims(dims) { + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const Dims& dims() const { return m_dims; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const typename internal::remove_all::type& expression() const { return m_xpr; } + + protected: + typename XprType::Nested m_xpr; + const Dims m_dims; +}; + + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorTraceOp XprType; + static const int NumInputDims = internal::array_size::Dimensions>::value; + static const int NumReducedDims = internal::array_size::value; + static const int NumOutputDims = NumInputDims - NumReducedDims; + typedef typename XprType::Index Index; + typedef DSizes Dimensions; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; + + enum { + IsAligned = false, + PacketAccess = TensorEvaluator::PacketAccess, + BlockAccess = false, + PreferBlockAccess = TensorEvaluator::PreferBlockAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, + RawAccess = false + }; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockNotImplemented TensorBlock; + //===--------------------------------------------------------------------===// + + EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device), m_traceDim(1), m_device(device) + { + + EIGEN_STATIC_ASSERT((NumOutputDims >= 0), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((NumReducedDims >= 2) || ((NumReducedDims == 0) && (NumInputDims == 0)), YOU_MADE_A_PROGRAMMING_MISTAKE); + + for (int i = 0; i < NumInputDims; ++i) { + m_reduced[i] = false; + } + + const Dims& op_dims = op.dims(); + for (int i = 0; i < NumReducedDims; ++i) { + eigen_assert(op_dims[i] >= 0); + eigen_assert(op_dims[i] < NumInputDims); + m_reduced[op_dims[i]] = true; + } + + // All the dimensions should be distinct to compute the trace + int num_distinct_reduce_dims = 0; + for (int i = 0; i < NumInputDims; ++i) { + if (m_reduced[i]) { + ++num_distinct_reduce_dims; + } + } + + eigen_assert(num_distinct_reduce_dims == NumReducedDims); + + // Compute the dimensions of the result. + const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); + + int output_index = 0; + int reduced_index = 0; + for (int i = 0; i < NumInputDims; ++i) { + if (m_reduced[i]) { + m_reducedDims[reduced_index] = input_dims[i]; + if (reduced_index > 0) { + // All the trace dimensions must have the same size + eigen_assert(m_reducedDims[0] == m_reducedDims[reduced_index]); + } + ++reduced_index; + } + else { + m_dimensions[output_index] = input_dims[i]; + ++output_index; + } + } + + if (NumReducedDims != 0) { + m_traceDim = m_reducedDims[0]; + } + + // Compute the output strides + if (NumOutputDims > 0) { + if (static_cast(Layout) == static_cast(ColMajor)) { + m_outputStrides[0] = 1; + for (int i = 1; i < NumOutputDims; ++i) { + m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1]; + } + } + else { + m_outputStrides.back() = 1; + for (int i = NumOutputDims - 2; i >= 0; --i) { + m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1]; + } + } + } + + // Compute the input strides + if (NumInputDims > 0) { + array input_strides; + if (static_cast(Layout) == static_cast(ColMajor)) { + input_strides[0] = 1; + for (int i = 1; i < NumInputDims; ++i) { + input_strides[i] = input_strides[i - 1] * input_dims[i - 1]; + } + } + else { + input_strides.back() = 1; + for (int i = NumInputDims - 2; i >= 0; --i) { + input_strides[i] = input_strides[i + 1] * input_dims[i + 1]; + } + } + + output_index = 0; + reduced_index = 0; + for (int i = 0; i < NumInputDims; ++i) { + if(m_reduced[i]) { + m_reducedStrides[reduced_index] = input_strides[i]; + ++reduced_index; + } + else { + m_preservedStrides[output_index] = input_strides[i]; + ++output_index; + } + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { + return m_dimensions; + } + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + + EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + // Initialize the result + CoeffReturnType result = internal::cast(0); + Index index_stride = 0; + for (int i = 0; i < NumReducedDims; ++i) { + index_stride += m_reducedStrides[i]; + } + + // If trace is requested along all dimensions, starting index would be 0 + Index cur_index = 0; + if (NumOutputDims != 0) + cur_index = firstInput(index); + for (Index i = 0; i < m_traceDim; ++i) { + result += m_impl.coeff(cur_index); + cur_index += index_stride; + } + + return result; + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { + + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE); + eigen_assert(index + PacketSize - 1 < dimensions().TotalSize()); + + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + for (int i = 0; i < PacketSize; ++i) { + values[i] = coeff(index + i); + } + PacketReturnType result = internal::ploadt(values); + return result; + } + +#ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_impl.bind(cgh); + } +#endif + + protected: + // Given the output index, finds the first index in the input tensor used to compute the trace + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index firstInput(Index index) const { + Index startInput = 0; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumOutputDims - 1; i > 0; --i) { + const Index idx = index / m_outputStrides[i]; + startInput += idx * m_preservedStrides[i]; + index -= idx * m_outputStrides[i]; + } + startInput += index * m_preservedStrides[0]; + } + else { + for (int i = 0; i < NumOutputDims - 1; ++i) { + const Index idx = index / m_outputStrides[i]; + startInput += idx * m_preservedStrides[i]; + index -= idx * m_outputStrides[i]; + } + startInput += index * m_preservedStrides[NumOutputDims - 1]; + } + return startInput; + } + + Dimensions m_dimensions; + TensorEvaluator m_impl; + // Initialize the size of the trace dimension + Index m_traceDim; + const Device EIGEN_DEVICE_REF m_device; + array m_reduced; + array m_reducedDims; + array m_outputStrides; + array m_reducedStrides; + array m_preservedStrides; +}; + + +} // End namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_TRACE_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h new file mode 100644 index 0000000..4f7fd34 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h @@ -0,0 +1,264 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_TRAITS_H +#define EIGEN_CXX11_TENSOR_TENSOR_TRAITS_H + +namespace Eigen { +namespace internal { + + +template +class compute_tensor_flags +{ + enum { + is_dynamic_size_storage = 1, + + is_aligned = + ( + ((Options&DontAlign)==0) && ( +#if EIGEN_MAX_STATIC_ALIGN_BYTES>0 + (!is_dynamic_size_storage) +#else + 0 +#endif + | +#if EIGEN_MAX_ALIGN_BYTES>0 + is_dynamic_size_storage +#else + 0 +#endif + ) + ), + packet_access_bit = packet_traits::Vectorizable && is_aligned ? PacketAccessBit : 0 + }; + + public: + enum { ret = packet_access_bit }; +}; + + +template +struct traits > +{ + typedef Scalar_ Scalar; + typedef Dense StorageKind; + typedef IndexType_ Index; + static const int NumDimensions = NumIndices_; + static const int Layout = Options_ & RowMajor ? RowMajor : ColMajor; + enum { + Options = Options_, + Flags = compute_tensor_flags::ret | (is_const::value ? 0 : LvalueBit) + }; + template struct MakePointer { + typedef T* Type; + }; + typedef typename MakePointer::Type PointerType; +}; + + +template +struct traits > +{ + typedef Scalar_ Scalar; + typedef Dense StorageKind; + typedef IndexType_ Index; + static const int NumDimensions = array_size::value; + static const int Layout = Options_ & RowMajor ? RowMajor : ColMajor; + enum { + Options = Options_, + Flags = compute_tensor_flags::ret | (is_const::value ? 0: LvalueBit) + }; + template struct MakePointer { + typedef T* Type; + }; + typedef typename MakePointer::Type PointerType; +}; + + +template class MakePointer_> +struct traits > + : public traits +{ + typedef traits BaseTraits; + typedef typename BaseTraits::Scalar Scalar; + typedef typename BaseTraits::StorageKind StorageKind; + typedef typename BaseTraits::Index Index; + static const int NumDimensions = BaseTraits::NumDimensions; + static const int Layout = BaseTraits::Layout; + enum { + Options = Options_, + Flags = BaseTraits::Flags + }; + template struct MakePointer { + // Intermediate typedef to workaround MSVC issue. + typedef MakePointer_ MakePointerT; + typedef typename MakePointerT::Type Type; + }; + typedef typename MakePointer::Type PointerType; +}; + +template +struct traits > + : public traits +{ + typedef traits BaseTraits; + typedef typename BaseTraits::Scalar Scalar; + typedef typename BaseTraits::StorageKind StorageKind; + typedef typename BaseTraits::Index Index; + static const int NumDimensions = BaseTraits::NumDimensions; + static const int Layout = BaseTraits::Layout; + enum { + Options = BaseTraits::Options, + Flags = BaseTraits::Flags + }; + typedef typename BaseTraits::PointerType PointerType; +}; + + +template +struct eval, Eigen::Dense> +{ + typedef const Tensor<_Scalar, NumIndices_, Options, IndexType_>EIGEN_DEVICE_REF type; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const Tensor<_Scalar, NumIndices_, Options, IndexType_>EIGEN_DEVICE_REF type; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorFixedSizeEIGEN_DEVICE_REF type; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorFixedSizeEIGEN_DEVICE_REF type; +}; + +template class MakePointer> +struct eval, Eigen::Dense> +{ + typedef const TensorMapEIGEN_DEVICE_REF type; +}; + +template class MakePointer> +struct eval, Eigen::Dense> +{ + typedef const TensorMapEIGEN_DEVICE_REF type; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorRefEIGEN_DEVICE_REF type; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorRefEIGEN_DEVICE_REF type; +}; + +// TODO nested<> does not exist anymore in Eigen/Core, and it thus has to be removed in favor of ref_selector. +template struct nested +{ + typedef typename ref_selector::type type; +}; + +template +struct nested > +{ + typedef const TensorEIGEN_DEVICE_REF type; +}; + +template +struct nested > +{ + typedef const TensorEIGEN_DEVICE_REF type; +}; + +template +struct nested > +{ + typedef const TensorFixedSizeEIGEN_DEVICE_REF type; +}; + +template +struct nested > +{ + typedef const TensorFixedSizeEIGEN_DEVICE_REF type; +}; + + +template +struct nested > +{ + typedef const TensorRefEIGEN_DEVICE_REF type; +}; + +template +struct nested > +{ + typedef const TensorRefEIGEN_DEVICE_REF type; +}; + +} // end namespace internal + +// Convolutional layers take in an input tensor of shape (D, R, C, B), or (D, C, +// R, B), and convolve it with a set of filters, which can also be presented as +// a tensor (D, K, K, M), where M is the number of filters, K is the filter +// size, and each 3-dimensional tensor of size (D, K, K) is a filter. For +// simplicity we assume that we always use square filters (which is usually the +// case in images), hence the two Ks in the tensor dimension. It also takes in +// a few additional parameters: +// Stride (S): The convolution stride is the offset between locations where we +// apply the filters. A larger stride means that the output will be +// spatially smaller. +// Padding (P): The padding we apply to the input tensor along the R and C +// dimensions. This is usually used to make sure that the spatial +// dimensions of the output matches our intention. +// +// Two types of padding are often used: +// SAME: The pad value is computed so that the output will have size +// R/S and C/S. +// VALID: no padding is carried out. +// When we do padding, the padded values at the padded locations are usually +// zero. +// +// The output dimensions for convolution, when given all the parameters above, +// are as follows: +// When Padding = SAME: the output size is (B, R', C', M), where +// R' = ceil(float(R) / float(S)) +// C' = ceil(float(C) / float(S)) +// where ceil is the ceiling function. The input tensor is padded with 0 as +// needed. The number of padded rows and columns are computed as: +// Pr = ((R' - 1) * S + K - R) / 2 +// Pc = ((C' - 1) * S + K - C) / 2 +// when the stride is 1, we have the simplified case R'=R, C'=C, Pr=Pc=(K-1)/2. +// This is where SAME comes from - the output has the same size as the input has. +// When Padding = VALID: the output size is computed as +// R' = ceil(float(R - K + 1) / float(S)) +// C' = ceil(float(C - K + 1) / float(S)) +// and the number of padded rows and columns are computed in the same way as in +// the SAME case. +// When the stride is 1, we have the simplified case R'=R-K+1, C'=C-K+1, Pr=0, +// Pc=0. +typedef enum { + PADDING_VALID = 1, + PADDING_SAME = 2 +} PaddingType; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_TRAITS_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h new file mode 100644 index 0000000..d23f2e4 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h @@ -0,0 +1,249 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_UINT128_H +#define EIGEN_CXX11_TENSOR_TENSOR_UINT128_H + +namespace Eigen { +namespace internal { + + +template +struct static_val { + static const uint64_t value = n; + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE operator uint64_t() const { return n; } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static_val() { } + + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static_val(const T& v) { + EIGEN_UNUSED_VARIABLE(v); + eigen_assert(v == n); + } +}; + + +template +struct TensorUInt128 +{ + HIGH high; + LOW low; + + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + TensorUInt128(const TensorUInt128& other) : high(other.high), low(other.low) { + EIGEN_STATIC_ASSERT(sizeof(OTHER_HIGH) <= sizeof(HIGH), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT(sizeof(OTHER_LOW) <= sizeof(LOW), YOU_MADE_A_PROGRAMMING_MISTAKE); + } + + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + TensorUInt128& operator = (const TensorUInt128& other) { + EIGEN_STATIC_ASSERT(sizeof(OTHER_HIGH) <= sizeof(HIGH), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT(sizeof(OTHER_LOW) <= sizeof(LOW), YOU_MADE_A_PROGRAMMING_MISTAKE); + high = other.high; + low = other.low; + return *this; + } + + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + explicit TensorUInt128(const T& x) : high(0), low(x) { + eigen_assert((static_cast::type>(x) <= NumTraits::highest())); + eigen_assert(x >= 0); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + TensorUInt128(HIGH y, LOW x) : high(y), low(x) { } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE operator LOW() const { + return low; + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LOW lower() const { + return low; + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HIGH upper() const { + return high; + } +}; + + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +bool operator == (const TensorUInt128& lhs, const TensorUInt128& rhs) +{ + return (lhs.high == rhs.high) & (lhs.low == rhs.low); +} + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +bool operator != (const TensorUInt128& lhs, const TensorUInt128& rhs) +{ + return (lhs.high != rhs.high) | (lhs.low != rhs.low); +} + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +bool operator >= (const TensorUInt128& lhs, const TensorUInt128& rhs) +{ + if (lhs.high != rhs.high) { + return lhs.high > rhs.high; + } + return lhs.low >= rhs.low; +} + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +bool operator < (const TensorUInt128& lhs, const TensorUInt128& rhs) +{ + if (lhs.high != rhs.high) { + return lhs.high < rhs.high; + } + return lhs.low < rhs.low; +} + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +TensorUInt128 operator + (const TensorUInt128& lhs, const TensorUInt128& rhs) +{ + TensorUInt128 result(lhs.high + rhs.high, lhs.low + rhs.low); + if (result.low < rhs.low) { + result.high += 1; + } + return result; +} + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +TensorUInt128 operator - (const TensorUInt128& lhs, const TensorUInt128& rhs) +{ + TensorUInt128 result(lhs.high - rhs.high, lhs.low - rhs.low); + if (result.low > lhs.low) { + result.high -= 1; + } + return result; +} + + +template +static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +TensorUInt128 operator * (const TensorUInt128& lhs, const TensorUInt128& rhs) +{ + // Split each 128-bit integer into 4 32-bit integers, and then do the + // multiplications by hand as follow: + // lhs a b c d + // rhs e f g h + // ----------- + // ah bh ch dh + // bg cg dg + // cf df + // de + // The result is stored in 2 64bit integers, high and low. + + const uint64_t LOW = 0x00000000FFFFFFFFLL; + const uint64_t HIGH = 0xFFFFFFFF00000000LL; + + uint64_t d = lhs.low & LOW; + uint64_t c = (lhs.low & HIGH) >> 32LL; + uint64_t b = lhs.high & LOW; + uint64_t a = (lhs.high & HIGH) >> 32LL; + + uint64_t h = rhs.low & LOW; + uint64_t g = (rhs.low & HIGH) >> 32LL; + uint64_t f = rhs.high & LOW; + uint64_t e = (rhs.high & HIGH) >> 32LL; + + // Compute the low 32 bits of low + uint64_t acc = d * h; + uint64_t low = acc & LOW; + // Compute the high 32 bits of low. Add a carry every time we wrap around + acc >>= 32LL; + uint64_t carry = 0; + uint64_t acc2 = acc + c * h; + if (acc2 < acc) { + carry++; + } + acc = acc2 + d * g; + if (acc < acc2) { + carry++; + } + low |= (acc << 32LL); + + // Carry forward the high bits of acc to initiate the computation of the + // low 32 bits of high + acc2 = (acc >> 32LL) | (carry << 32LL); + carry = 0; + + acc = acc2 + b * h; + if (acc < acc2) { + carry++; + } + acc2 = acc + c * g; + if (acc2 < acc) { + carry++; + } + acc = acc2 + d * f; + if (acc < acc2) { + carry++; + } + uint64_t high = acc & LOW; + + // Start to compute the high 32 bits of high. + acc2 = (acc >> 32LL) | (carry << 32LL); + + acc = acc2 + a * h; + acc2 = acc + b * g; + acc = acc2 + c * f; + acc2 = acc + d * e; + high |= (acc2 << 32LL); + + return TensorUInt128(high, low); +} + +template +static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +TensorUInt128 operator / (const TensorUInt128& lhs, const TensorUInt128& rhs) +{ + if (rhs == TensorUInt128, static_val<1> >(1)) { + return TensorUInt128(lhs.high, lhs.low); + } else if (lhs < rhs) { + return TensorUInt128(0); + } else { + // calculate the biggest power of 2 times rhs that's less than or equal to lhs + TensorUInt128 power2(1); + TensorUInt128 d(rhs); + TensorUInt128 tmp(lhs - d); + while (lhs >= d) { + tmp = tmp - d; + d = d + d; + power2 = power2 + power2; + } + + tmp = TensorUInt128(lhs.high, lhs.low); + TensorUInt128 result(0); + while (power2 != TensorUInt128, static_val<0> >(0)) { + if (tmp >= d) { + tmp = tmp - d; + result = result + power2; + } + // Shift right + power2 = TensorUInt128(power2.high >> 1, (power2.low >> 1) | (power2.high << 63)); + d = TensorUInt128(d.high >> 1, (d.low >> 1) | (d.high << 63)); + } + + return result; + } +} + + +} // namespace internal +} // namespace Eigen + + +#endif // EIGEN_CXX11_TENSOR_TENSOR_UINT128_H diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h b/external/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h new file mode 100644 index 0000000..0beb9ff --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h @@ -0,0 +1,629 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_VOLUME_PATCH_H +#define EIGEN_CXX11_TENSOR_TENSOR_VOLUME_PATCH_H + +namespace Eigen { + +/** \class TensorVolumePatch + * \ingroup CXX11_Tensor_Module + * + * \brief Patch extraction specialized for processing of volumetric data. + * This assumes that the input has a least 4 dimensions ordered as follows: + * - channels + * - planes + * - rows + * - columns + * - (optional) additional dimensions such as time or batch size. + * Calling the volume patch code with patch_planes, patch_rows, and patch_cols + * is equivalent to calling the regular patch extraction code with parameters + * d, patch_planes, patch_rows, patch_cols, and 1 for all the additional + * dimensions. + */ +namespace internal { + +template +struct traits > : public traits +{ + typedef typename internal::remove_const::type Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions + 1; + static const int Layout = XprTraits::Layout; + typedef typename XprTraits::PointerType PointerType; + +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorVolumePatchOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorVolumePatchOp type; +}; + +} // end namespace internal + +template +class TensorVolumePatchOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorVolumePatchOp(const XprType& expr, DenseIndex patch_planes, DenseIndex patch_rows, DenseIndex patch_cols, + DenseIndex plane_strides, DenseIndex row_strides, DenseIndex col_strides, + DenseIndex in_plane_strides, DenseIndex in_row_strides, DenseIndex in_col_strides, + DenseIndex plane_inflate_strides, DenseIndex row_inflate_strides, DenseIndex col_inflate_strides, + PaddingType padding_type, Scalar padding_value) + : m_xpr(expr), m_patch_planes(patch_planes), m_patch_rows(patch_rows), m_patch_cols(patch_cols), + m_plane_strides(plane_strides), m_row_strides(row_strides), m_col_strides(col_strides), + m_in_plane_strides(in_plane_strides), m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides), + m_plane_inflate_strides(plane_inflate_strides), m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides), + m_padding_explicit(false), m_padding_top_z(0), m_padding_bottom_z(0), m_padding_top(0), m_padding_bottom(0), m_padding_left(0), m_padding_right(0), + m_padding_type(padding_type), m_padding_value(padding_value) {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorVolumePatchOp(const XprType& expr, DenseIndex patch_planes, DenseIndex patch_rows, DenseIndex patch_cols, + DenseIndex plane_strides, DenseIndex row_strides, DenseIndex col_strides, + DenseIndex in_plane_strides, DenseIndex in_row_strides, DenseIndex in_col_strides, + DenseIndex plane_inflate_strides, DenseIndex row_inflate_strides, DenseIndex col_inflate_strides, + DenseIndex padding_top_z, DenseIndex padding_bottom_z, + DenseIndex padding_top, DenseIndex padding_bottom, + DenseIndex padding_left, DenseIndex padding_right, + Scalar padding_value) + : m_xpr(expr), m_patch_planes(patch_planes), m_patch_rows(patch_rows), m_patch_cols(patch_cols), + m_plane_strides(plane_strides), m_row_strides(row_strides), m_col_strides(col_strides), + m_in_plane_strides(in_plane_strides), m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides), + m_plane_inflate_strides(plane_inflate_strides), m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides), + m_padding_explicit(true), m_padding_top_z(padding_top_z), m_padding_bottom_z(padding_bottom_z), m_padding_top(padding_top), m_padding_bottom(padding_bottom), + m_padding_left(padding_left), m_padding_right(padding_right), + m_padding_type(PADDING_VALID), m_padding_value(padding_value) {} + + EIGEN_DEVICE_FUNC + DenseIndex patch_planes() const { return m_patch_planes; } + EIGEN_DEVICE_FUNC + DenseIndex patch_rows() const { return m_patch_rows; } + EIGEN_DEVICE_FUNC + DenseIndex patch_cols() const { return m_patch_cols; } + EIGEN_DEVICE_FUNC + DenseIndex plane_strides() const { return m_plane_strides; } + EIGEN_DEVICE_FUNC + DenseIndex row_strides() const { return m_row_strides; } + EIGEN_DEVICE_FUNC + DenseIndex col_strides() const { return m_col_strides; } + EIGEN_DEVICE_FUNC + DenseIndex in_plane_strides() const { return m_in_plane_strides; } + EIGEN_DEVICE_FUNC + DenseIndex in_row_strides() const { return m_in_row_strides; } + EIGEN_DEVICE_FUNC + DenseIndex in_col_strides() const { return m_in_col_strides; } + EIGEN_DEVICE_FUNC + DenseIndex plane_inflate_strides() const { return m_plane_inflate_strides; } + EIGEN_DEVICE_FUNC + DenseIndex row_inflate_strides() const { return m_row_inflate_strides; } + EIGEN_DEVICE_FUNC + DenseIndex col_inflate_strides() const { return m_col_inflate_strides; } + EIGEN_DEVICE_FUNC + bool padding_explicit() const { return m_padding_explicit; } + EIGEN_DEVICE_FUNC + DenseIndex padding_top_z() const { return m_padding_top_z; } + EIGEN_DEVICE_FUNC + DenseIndex padding_bottom_z() const { return m_padding_bottom_z; } + EIGEN_DEVICE_FUNC + DenseIndex padding_top() const { return m_padding_top; } + EIGEN_DEVICE_FUNC + DenseIndex padding_bottom() const { return m_padding_bottom; } + EIGEN_DEVICE_FUNC + DenseIndex padding_left() const { return m_padding_left; } + EIGEN_DEVICE_FUNC + DenseIndex padding_right() const { return m_padding_right; } + EIGEN_DEVICE_FUNC + PaddingType padding_type() const { return m_padding_type; } + EIGEN_DEVICE_FUNC + Scalar padding_value() const { return m_padding_value; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + protected: + typename XprType::Nested m_xpr; + const DenseIndex m_patch_planes; + const DenseIndex m_patch_rows; + const DenseIndex m_patch_cols; + const DenseIndex m_plane_strides; + const DenseIndex m_row_strides; + const DenseIndex m_col_strides; + const DenseIndex m_in_plane_strides; + const DenseIndex m_in_row_strides; + const DenseIndex m_in_col_strides; + const DenseIndex m_plane_inflate_strides; + const DenseIndex m_row_inflate_strides; + const DenseIndex m_col_inflate_strides; + const bool m_padding_explicit; + const DenseIndex m_padding_top_z; + const DenseIndex m_padding_bottom_z; + const DenseIndex m_padding_top; + const DenseIndex m_padding_bottom; + const DenseIndex m_padding_left; + const DenseIndex m_padding_right; + const PaddingType m_padding_type; + const Scalar m_padding_value; +}; + + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorVolumePatchOp XprType; + typedef typename XprType::Index Index; + static const int NumInputDims = internal::array_size::Dimensions>::value; + static const int NumDims = NumInputDims + 1; + typedef DSizes Dimensions; + typedef typename internal::remove_const::type Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = PacketType::size; + typedef StorageMemory Storage; + typedef typename Storage::Type EvaluatorPointerType; + + enum { + IsAligned = false, + PacketAccess = TensorEvaluator::PacketAccess, + BlockAccess = false, + PreferBlockAccess = TensorEvaluator::PreferBlockAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, + RawAccess = false + }; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockNotImplemented TensorBlock; + //===--------------------------------------------------------------------===// + + EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : + m_impl(op.expression(), device) + { + EIGEN_STATIC_ASSERT((NumDims >= 5), YOU_MADE_A_PROGRAMMING_MISTAKE); + + m_paddingValue = op.padding_value(); + + const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); + + // Cache a few variables. + if (static_cast(Layout) == static_cast(ColMajor)) { + m_inputDepth = input_dims[0]; + m_inputPlanes = input_dims[1]; + m_inputRows = input_dims[2]; + m_inputCols = input_dims[3]; + } else { + m_inputDepth = input_dims[NumInputDims-1]; + m_inputPlanes = input_dims[NumInputDims-2]; + m_inputRows = input_dims[NumInputDims-3]; + m_inputCols = input_dims[NumInputDims-4]; + } + + m_plane_strides = op.plane_strides(); + m_row_strides = op.row_strides(); + m_col_strides = op.col_strides(); + + // Input strides and effective input/patch size + m_in_plane_strides = op.in_plane_strides(); + m_in_row_strides = op.in_row_strides(); + m_in_col_strides = op.in_col_strides(); + m_plane_inflate_strides = op.plane_inflate_strides(); + m_row_inflate_strides = op.row_inflate_strides(); + m_col_inflate_strides = op.col_inflate_strides(); + + // The "effective" spatial size after inflating data with zeros. + m_input_planes_eff = (m_inputPlanes - 1) * m_plane_inflate_strides + 1; + m_input_rows_eff = (m_inputRows - 1) * m_row_inflate_strides + 1; + m_input_cols_eff = (m_inputCols - 1) * m_col_inflate_strides + 1; + m_patch_planes_eff = op.patch_planes() + (op.patch_planes() - 1) * (m_in_plane_strides - 1); + m_patch_rows_eff = op.patch_rows() + (op.patch_rows() - 1) * (m_in_row_strides - 1); + m_patch_cols_eff = op.patch_cols() + (op.patch_cols() - 1) * (m_in_col_strides - 1); + + if (op.padding_explicit()) { + m_outputPlanes = numext::ceil((m_input_planes_eff + op.padding_top_z() + op.padding_bottom_z() - m_patch_planes_eff + 1.f) / static_cast(m_plane_strides)); + m_outputRows = numext::ceil((m_input_rows_eff + op.padding_top() + op.padding_bottom() - m_patch_rows_eff + 1.f) / static_cast(m_row_strides)); + m_outputCols = numext::ceil((m_input_cols_eff + op.padding_left() + op.padding_right() - m_patch_cols_eff + 1.f) / static_cast(m_col_strides)); + m_planePaddingTop = op.padding_top_z(); + m_rowPaddingTop = op.padding_top(); + m_colPaddingLeft = op.padding_left(); + } else { + // Computing padding from the type + switch (op.padding_type()) { + case PADDING_VALID: + m_outputPlanes = numext::ceil((m_input_planes_eff - m_patch_planes_eff + 1.f) / static_cast(m_plane_strides)); + m_outputRows = numext::ceil((m_input_rows_eff - m_patch_rows_eff + 1.f) / static_cast(m_row_strides)); + m_outputCols = numext::ceil((m_input_cols_eff - m_patch_cols_eff + 1.f) / static_cast(m_col_strides)); + m_planePaddingTop = 0; + m_rowPaddingTop = 0; + m_colPaddingLeft = 0; + break; + case PADDING_SAME: { + m_outputPlanes = numext::ceil(m_input_planes_eff / static_cast(m_plane_strides)); + m_outputRows = numext::ceil(m_input_rows_eff / static_cast(m_row_strides)); + m_outputCols = numext::ceil(m_input_cols_eff / static_cast(m_col_strides)); + const Index dz = (m_outputPlanes - 1) * m_plane_strides + m_patch_planes_eff - m_input_planes_eff; + const Index dy = (m_outputRows - 1) * m_row_strides + m_patch_rows_eff - m_input_rows_eff; + const Index dx = (m_outputCols - 1) * m_col_strides + m_patch_cols_eff - m_input_cols_eff; + m_planePaddingTop = dz / 2; + m_rowPaddingTop = dy / 2; + m_colPaddingLeft = dx / 2; + break; + } + default: + eigen_assert(false && "unexpected padding"); + } + } + eigen_assert(m_outputRows > 0); + eigen_assert(m_outputCols > 0); + eigen_assert(m_outputPlanes > 0); + + // Dimensions for result of extraction. + if (static_cast(Layout) == static_cast(ColMajor)) { + // ColMajor + // 0: depth + // 1: patch_planes + // 2: patch_rows + // 3: patch_cols + // 4: number of patches + // 5 and beyond: anything else (such as batch). + m_dimensions[0] = input_dims[0]; + m_dimensions[1] = op.patch_planes(); + m_dimensions[2] = op.patch_rows(); + m_dimensions[3] = op.patch_cols(); + m_dimensions[4] = m_outputPlanes * m_outputRows * m_outputCols; + for (int i = 5; i < NumDims; ++i) { + m_dimensions[i] = input_dims[i-1]; + } + } else { + // RowMajor + // NumDims-1: depth + // NumDims-2: patch_planes + // NumDims-3: patch_rows + // NumDims-4: patch_cols + // NumDims-5: number of patches + // NumDims-6 and beyond: anything else (such as batch). + m_dimensions[NumDims-1] = input_dims[NumInputDims-1]; + m_dimensions[NumDims-2] = op.patch_planes(); + m_dimensions[NumDims-3] = op.patch_rows(); + m_dimensions[NumDims-4] = op.patch_cols(); + m_dimensions[NumDims-5] = m_outputPlanes * m_outputRows * m_outputCols; + for (int i = NumDims-6; i >= 0; --i) { + m_dimensions[i] = input_dims[i]; + } + } + + // Strides for the output tensor. + if (static_cast(Layout) == static_cast(ColMajor)) { + m_rowStride = m_dimensions[1]; + m_colStride = m_dimensions[2] * m_rowStride; + m_patchStride = m_colStride * m_dimensions[3] * m_dimensions[0]; + m_otherStride = m_patchStride * m_dimensions[4]; + } else { + m_rowStride = m_dimensions[NumDims-2]; + m_colStride = m_dimensions[NumDims-3] * m_rowStride; + m_patchStride = m_colStride * m_dimensions[NumDims-4] * m_dimensions[NumDims-1]; + m_otherStride = m_patchStride * m_dimensions[NumDims-5]; + } + + // Strides for navigating through the input tensor. + m_planeInputStride = m_inputDepth; + m_rowInputStride = m_inputDepth * m_inputPlanes; + m_colInputStride = m_inputDepth * m_inputRows * m_inputPlanes; + m_otherInputStride = m_inputDepth * m_inputRows * m_inputCols * m_inputPlanes; + + m_outputPlanesRows = m_outputPlanes * m_outputRows; + + // Fast representations of different variables. + m_fastOtherStride = internal::TensorIntDivisor(m_otherStride); + + m_fastPatchStride = internal::TensorIntDivisor(m_patchStride); + m_fastColStride = internal::TensorIntDivisor(m_colStride); + m_fastRowStride = internal::TensorIntDivisor(m_rowStride); + m_fastInputRowStride = internal::TensorIntDivisor(m_row_inflate_strides); + m_fastInputColStride = internal::TensorIntDivisor(m_col_inflate_strides); + m_fastInputPlaneStride = internal::TensorIntDivisor(m_plane_inflate_strides); + m_fastInputColsEff = internal::TensorIntDivisor(m_input_cols_eff); + m_fastOutputPlanes = internal::TensorIntDivisor(m_outputPlanes); + m_fastOutputPlanesRows = internal::TensorIntDivisor(m_outputPlanesRows); + + if (static_cast(Layout) == static_cast(ColMajor)) { + m_fastOutputDepth = internal::TensorIntDivisor(m_dimensions[0]); + } else { + m_fastOutputDepth = internal::TensorIntDivisor(m_dimensions[NumDims-1]); + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + + EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + // Patch index corresponding to the passed in index. + const Index patchIndex = index / m_fastPatchStride; + + // Spatial offset within the patch. This has to be translated into 3D + // coordinates within the patch. + const Index patchOffset = (index - patchIndex * m_patchStride) / m_fastOutputDepth; + + // Batch, etc. + const Index otherIndex = (NumDims == 5) ? 0 : index / m_fastOtherStride; + const Index patch3DIndex = (NumDims == 5) ? patchIndex : (index - otherIndex * m_otherStride) / m_fastPatchStride; + + // Calculate column index in the input original tensor. + const Index colIndex = patch3DIndex / m_fastOutputPlanesRows; + const Index colOffset = patchOffset / m_fastColStride; + const Index inputCol = colIndex * m_col_strides + colOffset * m_in_col_strides - m_colPaddingLeft; + const Index origInputCol = (m_col_inflate_strides == 1) ? inputCol : ((inputCol >= 0) ? (inputCol / m_fastInputColStride) : 0); + if (inputCol < 0 || inputCol >= m_input_cols_eff || + ((m_col_inflate_strides != 1) && (inputCol != origInputCol * m_col_inflate_strides))) { + return Scalar(m_paddingValue); + } + + // Calculate row index in the original input tensor. + const Index rowIndex = (patch3DIndex - colIndex * m_outputPlanesRows) / m_fastOutputPlanes; + const Index rowOffset = (patchOffset - colOffset * m_colStride) / m_fastRowStride; + const Index inputRow = rowIndex * m_row_strides + rowOffset * m_in_row_strides - m_rowPaddingTop; + const Index origInputRow = (m_row_inflate_strides == 1) ? inputRow : ((inputRow >= 0) ? (inputRow / m_fastInputRowStride) : 0); + if (inputRow < 0 || inputRow >= m_input_rows_eff || + ((m_row_inflate_strides != 1) && (inputRow != origInputRow * m_row_inflate_strides))) { + return Scalar(m_paddingValue); + } + + // Calculate plane index in the original input tensor. + const Index planeIndex = (patch3DIndex - m_outputPlanes * (colIndex * m_outputRows + rowIndex)); + const Index planeOffset = patchOffset - colOffset * m_colStride - rowOffset * m_rowStride; + const Index inputPlane = planeIndex * m_plane_strides + planeOffset * m_in_plane_strides - m_planePaddingTop; + const Index origInputPlane = (m_plane_inflate_strides == 1) ? inputPlane : ((inputPlane >= 0) ? (inputPlane / m_fastInputPlaneStride) : 0); + if (inputPlane < 0 || inputPlane >= m_input_planes_eff || + ((m_plane_inflate_strides != 1) && (inputPlane != origInputPlane * m_plane_inflate_strides))) { + return Scalar(m_paddingValue); + } + + const int depth_index = static_cast(Layout) == static_cast(ColMajor) ? 0 : NumDims - 1; + const Index depth = index - (index / m_fastOutputDepth) * m_dimensions[depth_index]; + + const Index inputIndex = depth + + origInputRow * m_rowInputStride + + origInputCol * m_colInputStride + + origInputPlane * m_planeInputStride + + otherIndex * m_otherInputStride; + + return m_impl.coeff(inputIndex); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); + + if (m_in_row_strides != 1 || m_in_col_strides != 1 || m_row_inflate_strides != 1 || m_col_inflate_strides != 1 || + m_in_plane_strides != 1 || m_plane_inflate_strides != 1) { + return packetWithPossibleZero(index); + } + + const Index indices[2] = {index, index + PacketSize - 1}; + const Index patchIndex = indices[0] / m_fastPatchStride; + if (patchIndex != indices[1] / m_fastPatchStride) { + return packetWithPossibleZero(index); + } + const Index otherIndex = (NumDims == 5) ? 0 : indices[0] / m_fastOtherStride; + eigen_assert(otherIndex == indices[1] / m_fastOtherStride); + + // Find the offset of the element wrt the location of the first element. + const Index patchOffsets[2] = {(indices[0] - patchIndex * m_patchStride) / m_fastOutputDepth, + (indices[1] - patchIndex * m_patchStride) / m_fastOutputDepth}; + + const Index patch3DIndex = (NumDims == 5) ? patchIndex : (indices[0] - otherIndex * m_otherStride) / m_fastPatchStride; + eigen_assert(patch3DIndex == (indices[1] - otherIndex * m_otherStride) / m_fastPatchStride); + + const Index colIndex = patch3DIndex / m_fastOutputPlanesRows; + const Index colOffsets[2] = { + patchOffsets[0] / m_fastColStride, + patchOffsets[1] / m_fastColStride}; + + // Calculate col indices in the original input tensor. + const Index inputCols[2] = { + colIndex * m_col_strides + colOffsets[0] - m_colPaddingLeft, + colIndex * m_col_strides + colOffsets[1] - m_colPaddingLeft}; + if (inputCols[1] < 0 || inputCols[0] >= m_inputCols) { + return internal::pset1(Scalar(m_paddingValue)); + } + + if (inputCols[0] != inputCols[1]) { + return packetWithPossibleZero(index); + } + + const Index rowIndex = (patch3DIndex - colIndex * m_outputPlanesRows) / m_fastOutputPlanes; + const Index rowOffsets[2] = { + (patchOffsets[0] - colOffsets[0] * m_colStride) / m_fastRowStride, + (patchOffsets[1] - colOffsets[1] * m_colStride) / m_fastRowStride}; + eigen_assert(rowOffsets[0] <= rowOffsets[1]); + // Calculate col indices in the original input tensor. + const Index inputRows[2] = { + rowIndex * m_row_strides + rowOffsets[0] - m_rowPaddingTop, + rowIndex * m_row_strides + rowOffsets[1] - m_rowPaddingTop}; + + if (inputRows[1] < 0 || inputRows[0] >= m_inputRows) { + return internal::pset1(Scalar(m_paddingValue)); + } + + if (inputRows[0] != inputRows[1]) { + return packetWithPossibleZero(index); + } + + const Index planeIndex = (patch3DIndex - m_outputPlanes * (colIndex * m_outputRows + rowIndex)); + const Index planeOffsets[2] = { + patchOffsets[0] - colOffsets[0] * m_colStride - rowOffsets[0] * m_rowStride, + patchOffsets[1] - colOffsets[1] * m_colStride - rowOffsets[1] * m_rowStride}; + eigen_assert(planeOffsets[0] <= planeOffsets[1]); + const Index inputPlanes[2] = { + planeIndex * m_plane_strides + planeOffsets[0] - m_planePaddingTop, + planeIndex * m_plane_strides + planeOffsets[1] - m_planePaddingTop}; + + if (inputPlanes[1] < 0 || inputPlanes[0] >= m_inputPlanes) { + return internal::pset1(Scalar(m_paddingValue)); + } + + if (inputPlanes[0] >= 0 && inputPlanes[1] < m_inputPlanes) { + // no padding + const int depth_index = static_cast(Layout) == static_cast(ColMajor) ? 0 : NumDims - 1; + const Index depth = index - (index / m_fastOutputDepth) * m_dimensions[depth_index]; + const Index inputIndex = depth + + inputRows[0] * m_rowInputStride + + inputCols[0] * m_colInputStride + + m_planeInputStride * inputPlanes[0] + + otherIndex * m_otherInputStride; + return m_impl.template packet(inputIndex); + } + + return packetWithPossibleZero(index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + const double compute_cost = + 10 * TensorOpCost::DivCost() + 21 * TensorOpCost::MulCost() + + 8 * TensorOpCost::AddCost(); + return TensorOpCost(0, 0, compute_cost, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; } + + const TensorEvaluator& impl() const { return m_impl; } + + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index planePaddingTop() const { return m_planePaddingTop; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rowPaddingTop() const { return m_rowPaddingTop; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index colPaddingLeft() const { return m_colPaddingLeft; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index outputPlanes() const { return m_outputPlanes; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index outputRows() const { return m_outputRows; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index outputCols() const { return m_outputCols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index userPlaneStride() const { return m_plane_strides; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index userRowStride() const { return m_row_strides; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index userColStride() const { return m_col_strides; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index userInPlaneStride() const { return m_in_plane_strides; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index userInRowStride() const { return m_in_row_strides; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index userInColStride() const { return m_in_col_strides; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index planeInflateStride() const { return m_plane_inflate_strides; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rowInflateStride() const { return m_row_inflate_strides; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index colInflateStride() const { return m_col_inflate_strides; } + +#ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_impl.bind(cgh); + } +#endif + protected: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const + { + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + EIGEN_UNROLL_LOOP + for (int i = 0; i < PacketSize; ++i) { + values[i] = coeff(index+i); + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + + Dimensions m_dimensions; + + // Parameters passed to the constructor. + Index m_plane_strides; + Index m_row_strides; + Index m_col_strides; + + Index m_outputPlanes; + Index m_outputRows; + Index m_outputCols; + + Index m_planePaddingTop; + Index m_rowPaddingTop; + Index m_colPaddingLeft; + + Index m_in_plane_strides; + Index m_in_row_strides; + Index m_in_col_strides; + + Index m_plane_inflate_strides; + Index m_row_inflate_strides; + Index m_col_inflate_strides; + + // Cached input size. + Index m_inputDepth; + Index m_inputPlanes; + Index m_inputRows; + Index m_inputCols; + + // Other cached variables. + Index m_outputPlanesRows; + + // Effective input/patch post-inflation size. + Index m_input_planes_eff; + Index m_input_rows_eff; + Index m_input_cols_eff; + Index m_patch_planes_eff; + Index m_patch_rows_eff; + Index m_patch_cols_eff; + + // Strides for the output tensor. + Index m_otherStride; + Index m_patchStride; + Index m_rowStride; + Index m_colStride; + + // Strides for the input tensor. + Index m_planeInputStride; + Index m_rowInputStride; + Index m_colInputStride; + Index m_otherInputStride; + + internal::TensorIntDivisor m_fastOtherStride; + internal::TensorIntDivisor m_fastPatchStride; + internal::TensorIntDivisor m_fastColStride; + internal::TensorIntDivisor m_fastRowStride; + internal::TensorIntDivisor m_fastInputPlaneStride; + internal::TensorIntDivisor m_fastInputRowStride; + internal::TensorIntDivisor m_fastInputColStride; + internal::TensorIntDivisor m_fastInputColsEff; + internal::TensorIntDivisor m_fastOutputPlanesRows; + internal::TensorIntDivisor m_fastOutputPlanes; + internal::TensorIntDivisor m_fastOutputDepth; + + Scalar m_paddingValue; + + TensorEvaluator m_impl; + + +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_VOLUME_PATCH_H diff --git a/external/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h b/external/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h new file mode 100644 index 0000000..bc4f202 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h @@ -0,0 +1,293 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2013 Christian Seiler +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSORSYMMETRY_DYNAMICSYMMETRY_H +#define EIGEN_CXX11_TENSORSYMMETRY_DYNAMICSYMMETRY_H + +namespace Eigen { + +class DynamicSGroup +{ + public: + inline explicit DynamicSGroup() : m_numIndices(1), m_elements(), m_generators(), m_globalFlags(0) { m_elements.push_back(ge(Generator(0, 0, 0))); } + inline DynamicSGroup(const DynamicSGroup& o) : m_numIndices(o.m_numIndices), m_elements(o.m_elements), m_generators(o.m_generators), m_globalFlags(o.m_globalFlags) { } + inline DynamicSGroup(DynamicSGroup&& o) : m_numIndices(o.m_numIndices), m_elements(), m_generators(o.m_generators), m_globalFlags(o.m_globalFlags) { std::swap(m_elements, o.m_elements); } + inline DynamicSGroup& operator=(const DynamicSGroup& o) { m_numIndices = o.m_numIndices; m_elements = o.m_elements; m_generators = o.m_generators; m_globalFlags = o.m_globalFlags; return *this; } + inline DynamicSGroup& operator=(DynamicSGroup&& o) { m_numIndices = o.m_numIndices; std::swap(m_elements, o.m_elements); m_generators = o.m_generators; m_globalFlags = o.m_globalFlags; return *this; } + + void add(int one, int two, int flags = 0); + + template + inline void add(Gen_) { add(Gen_::One, Gen_::Two, Gen_::Flags); } + inline void addSymmetry(int one, int two) { add(one, two, 0); } + inline void addAntiSymmetry(int one, int two) { add(one, two, NegationFlag); } + inline void addHermiticity(int one, int two) { add(one, two, ConjugationFlag); } + inline void addAntiHermiticity(int one, int two) { add(one, two, NegationFlag | ConjugationFlag); } + + template + inline RV apply(const std::array& idx, RV initial, Args&&... args) const + { + eigen_assert(N >= m_numIndices && "Can only apply symmetry group to objects that have at least the required amount of indices."); + for (std::size_t i = 0; i < size(); i++) + initial = Op::run(h_permute(i, idx, typename internal::gen_numeric_list::type()), m_elements[i].flags, initial, std::forward(args)...); + return initial; + } + + template + inline RV apply(const std::vector& idx, RV initial, Args&&... args) const + { + eigen_assert(idx.size() >= m_numIndices && "Can only apply symmetry group to objects that have at least the required amount of indices."); + for (std::size_t i = 0; i < size(); i++) + initial = Op::run(h_permute(i, idx), m_elements[i].flags, initial, std::forward(args)...); + return initial; + } + + inline int globalFlags() const { return m_globalFlags; } + inline std::size_t size() const { return m_elements.size(); } + + template + inline internal::tensor_symmetry_value_setter operator()(Tensor_& tensor, typename Tensor_::Index firstIndex, IndexTypes... otherIndices) const + { + static_assert(sizeof...(otherIndices) + 1 == Tensor_::NumIndices, "Number of indices used to access a tensor coefficient must be equal to the rank of the tensor."); + return operator()(tensor, std::array{{firstIndex, otherIndices...}}); + } + + template + inline internal::tensor_symmetry_value_setter operator()(Tensor_& tensor, std::array const& indices) const + { + return internal::tensor_symmetry_value_setter(tensor, *this, indices); + } + private: + struct GroupElement { + std::vector representation; + int flags; + bool isId() const + { + for (std::size_t i = 0; i < representation.size(); i++) + if (i != (size_t)representation[i]) + return false; + return true; + } + }; + struct Generator { + int one; + int two; + int flags; + constexpr inline Generator(int one_, int two_, int flags_) : one(one_), two(two_), flags(flags_) {} + }; + + std::size_t m_numIndices; + std::vector m_elements; + std::vector m_generators; + int m_globalFlags; + + template + inline std::array h_permute(std::size_t which, const std::array& idx, internal::numeric_list) const + { + return std::array{{ idx[n >= m_numIndices ? n : m_elements[which].representation[n]]... }}; + } + + template + inline std::vector h_permute(std::size_t which, std::vector idx) const + { + std::vector result; + result.reserve(idx.size()); + for (auto k : m_elements[which].representation) + result.push_back(idx[k]); + for (std::size_t i = m_numIndices; i < idx.size(); i++) + result.push_back(idx[i]); + return result; + } + + inline GroupElement ge(Generator const& g) const + { + GroupElement result; + result.representation.reserve(m_numIndices); + result.flags = g.flags; + for (std::size_t k = 0; k < m_numIndices; k++) { + if (k == (std::size_t)g.one) + result.representation.push_back(g.two); + else if (k == (std::size_t)g.two) + result.representation.push_back(g.one); + else + result.representation.push_back(int(k)); + } + return result; + } + + GroupElement mul(GroupElement, GroupElement) const; + inline GroupElement mul(Generator g1, GroupElement g2) const + { + return mul(ge(g1), g2); + } + + inline GroupElement mul(GroupElement g1, Generator g2) const + { + return mul(g1, ge(g2)); + } + + inline GroupElement mul(Generator g1, Generator g2) const + { + return mul(ge(g1), ge(g2)); + } + + inline int findElement(GroupElement e) const + { + for (auto ee : m_elements) { + if (ee.representation == e.representation) + return ee.flags ^ e.flags; + } + return -1; + } + + void updateGlobalFlags(int flagDiffOfSameGenerator); +}; + +// dynamic symmetry group that auto-adds the template parameters in the constructor +template +class DynamicSGroupFromTemplateArgs : public DynamicSGroup +{ + public: + inline DynamicSGroupFromTemplateArgs() : DynamicSGroup() + { + add_all(internal::type_list()); + } + inline DynamicSGroupFromTemplateArgs(DynamicSGroupFromTemplateArgs const& other) : DynamicSGroup(other) { } + inline DynamicSGroupFromTemplateArgs(DynamicSGroupFromTemplateArgs&& other) : DynamicSGroup(other) { } + inline DynamicSGroupFromTemplateArgs& operator=(const DynamicSGroupFromTemplateArgs& o) { DynamicSGroup::operator=(o); return *this; } + inline DynamicSGroupFromTemplateArgs& operator=(DynamicSGroupFromTemplateArgs&& o) { DynamicSGroup::operator=(o); return *this; } + + private: + template + inline void add_all(internal::type_list) + { + add(Gen1()); + add_all(internal::type_list()); + } + + inline void add_all(internal::type_list<>) + { + } +}; + +inline DynamicSGroup::GroupElement DynamicSGroup::mul(GroupElement g1, GroupElement g2) const +{ + eigen_internal_assert(g1.representation.size() == m_numIndices); + eigen_internal_assert(g2.representation.size() == m_numIndices); + + GroupElement result; + result.representation.reserve(m_numIndices); + for (std::size_t i = 0; i < m_numIndices; i++) { + int v = g2.representation[g1.representation[i]]; + eigen_assert(v >= 0); + result.representation.push_back(v); + } + result.flags = g1.flags ^ g2.flags; + return result; +} + +inline void DynamicSGroup::add(int one, int two, int flags) +{ + eigen_assert(one >= 0); + eigen_assert(two >= 0); + eigen_assert(one != two); + + if ((std::size_t)one >= m_numIndices || (std::size_t)two >= m_numIndices) { + std::size_t newNumIndices = (one > two) ? one : two + 1; + for (auto& gelem : m_elements) { + gelem.representation.reserve(newNumIndices); + for (std::size_t i = m_numIndices; i < newNumIndices; i++) + gelem.representation.push_back(i); + } + m_numIndices = newNumIndices; + } + + Generator g{one, two, flags}; + GroupElement e = ge(g); + + /* special case for first generator */ + if (m_elements.size() == 1) { + while (!e.isId()) { + m_elements.push_back(e); + e = mul(e, g); + } + + if (e.flags > 0) + updateGlobalFlags(e.flags); + + // only add in case we didn't have identity + if (m_elements.size() > 1) + m_generators.push_back(g); + return; + } + + int p = findElement(e); + if (p >= 0) { + updateGlobalFlags(p); + return; + } + + std::size_t coset_order = m_elements.size(); + m_elements.push_back(e); + for (std::size_t i = 1; i < coset_order; i++) + m_elements.push_back(mul(m_elements[i], e)); + m_generators.push_back(g); + + std::size_t coset_rep = coset_order; + do { + for (auto g : m_generators) { + e = mul(m_elements[coset_rep], g); + p = findElement(e); + if (p < 0) { + // element not yet in group + m_elements.push_back(e); + for (std::size_t i = 1; i < coset_order; i++) + m_elements.push_back(mul(m_elements[i], e)); + } else if (p > 0) { + updateGlobalFlags(p); + } + } + coset_rep += coset_order; + } while (coset_rep < m_elements.size()); +} + +inline void DynamicSGroup::updateGlobalFlags(int flagDiffOfSameGenerator) +{ + switch (flagDiffOfSameGenerator) { + case 0: + default: + // nothing happened + break; + case NegationFlag: + // every element is it's own negative => whole tensor is zero + m_globalFlags |= GlobalZeroFlag; + break; + case ConjugationFlag: + // every element is it's own conjugate => whole tensor is real + m_globalFlags |= GlobalRealFlag; + break; + case (NegationFlag | ConjugationFlag): + // every element is it's own negative conjugate => whole tensor is imaginary + m_globalFlags |= GlobalImagFlag; + break; + /* NOTE: + * since GlobalZeroFlag == GlobalRealFlag | GlobalImagFlag, if one generator + * causes the tensor to be real and the next one to be imaginary, this will + * trivially give the correct result + */ + } +} + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSORSYMMETRY_DYNAMICSYMMETRY_H + +/* + * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle; + */ diff --git a/external/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h b/external/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h new file mode 100644 index 0000000..942293b --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h @@ -0,0 +1,236 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2013 Christian Seiler +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSORSYMMETRY_STATICSYMMETRY_H +#define EIGEN_CXX11_TENSORSYMMETRY_STATICSYMMETRY_H + +namespace Eigen { + +namespace internal { + +template struct tensor_static_symgroup_permutate; + +template +struct tensor_static_symgroup_permutate> +{ + constexpr static std::size_t N = sizeof...(nn); + + template + constexpr static inline std::array run(const std::array& indices) + { + return {{indices[nn]...}}; + } +}; + +template +struct tensor_static_symgroup_element +{ + typedef indices_ indices; + constexpr static int flags = flags_; +}; + +template +struct tensor_static_symgroup_element_ctor +{ + typedef tensor_static_symgroup_element< + typename gen_numeric_list_swapped_pair::type, + Gen::Flags + > type; +}; + +template +struct tensor_static_symgroup_identity_ctor +{ + typedef tensor_static_symgroup_element< + typename gen_numeric_list::type, + 0 + > type; +}; + +template +struct tensor_static_symgroup_multiply_helper +{ + template + constexpr static inline numeric_list::value...> helper(numeric_list) { + return numeric_list::value...>(); + } +}; + +template +struct tensor_static_symgroup_multiply +{ + private: + typedef typename A::indices iia; + typedef typename B::indices iib; + constexpr static int ffa = A::flags; + constexpr static int ffb = B::flags; + + public: + static_assert(iia::count == iib::count, "Cannot multiply symmetry elements with different number of indices."); + + typedef tensor_static_symgroup_element< + decltype(tensor_static_symgroup_multiply_helper::helper(iia())), + ffa ^ ffb + > type; +}; + +template +struct tensor_static_symgroup_equality +{ + typedef typename A::indices iia; + typedef typename B::indices iib; + constexpr static int ffa = A::flags; + constexpr static int ffb = B::flags; + static_assert(iia::count == iib::count, "Cannot compare symmetry elements with different number of indices."); + + constexpr static bool value = is_same::value; + + private: + /* this should be zero if they are identical, or else the tensor + * will be forced to be pure real, pure imaginary or even pure zero + */ + constexpr static int flags_cmp_ = ffa ^ ffb; + + /* either they are not equal, then we don't care whether the flags + * match, or they are equal, and then we have to check + */ + constexpr static bool is_zero = value && flags_cmp_ == NegationFlag; + constexpr static bool is_real = value && flags_cmp_ == ConjugationFlag; + constexpr static bool is_imag = value && flags_cmp_ == (NegationFlag | ConjugationFlag); + + public: + constexpr static int global_flags = + (is_real ? GlobalRealFlag : 0) | + (is_imag ? GlobalImagFlag : 0) | + (is_zero ? GlobalZeroFlag : 0); +}; + +template +struct tensor_static_symgroup +{ + typedef StaticSGroup type; + constexpr static std::size_t size = type::static_size; +}; + +template +constexpr static inline std::array tensor_static_symgroup_index_permute(std::array idx, internal::numeric_list, internal::numeric_list) +{ + return {{ idx[ii]..., idx[jj]... }}; +} + +template +static inline std::vector tensor_static_symgroup_index_permute(std::vector idx, internal::numeric_list) +{ + std::vector result{{ idx[ii]... }}; + std::size_t target_size = idx.size(); + for (std::size_t i = result.size(); i < target_size; i++) + result.push_back(idx[i]); + return result; +} + +template struct tensor_static_symgroup_do_apply; + +template +struct tensor_static_symgroup_do_apply> +{ + template + static inline RV run(const std::array& idx, RV initial, Args&&... args) + { + static_assert(NumIndices >= SGNumIndices, "Can only apply symmetry group to objects that have at least the required amount of indices."); + typedef typename internal::gen_numeric_list::type remaining_indices; + initial = Op::run(tensor_static_symgroup_index_permute(idx, typename first::indices(), remaining_indices()), first::flags, initial, std::forward(args)...); + return tensor_static_symgroup_do_apply>::template run(idx, initial, args...); + } + + template + static inline RV run(const std::vector& idx, RV initial, Args&&... args) + { + eigen_assert(idx.size() >= SGNumIndices && "Can only apply symmetry group to objects that have at least the required amount of indices."); + initial = Op::run(tensor_static_symgroup_index_permute(idx, typename first::indices()), first::flags, initial, std::forward(args)...); + return tensor_static_symgroup_do_apply>::template run(idx, initial, args...); + } +}; + +template +struct tensor_static_symgroup_do_apply> +{ + template + static inline RV run(const std::array&, RV initial, Args&&...) + { + // do nothing + return initial; + } + + template + static inline RV run(const std::vector&, RV initial, Args&&...) + { + // do nothing + return initial; + } +}; + +} // end namespace internal + +template +class StaticSGroup +{ + constexpr static std::size_t NumIndices = internal::tensor_symmetry_num_indices::value; + typedef internal::group_theory::enumerate_group_elements< + internal::tensor_static_symgroup_multiply, + internal::tensor_static_symgroup_equality, + typename internal::tensor_static_symgroup_identity_ctor::type, + internal::type_list::type...> + > group_elements; + typedef typename group_elements::type ge; + public: + constexpr inline StaticSGroup() {} + constexpr inline StaticSGroup(const StaticSGroup&) {} + constexpr inline StaticSGroup(StaticSGroup&&) {} + + template + static inline RV apply(const std::array& idx, RV initial, Args&&... args) + { + return internal::tensor_static_symgroup_do_apply::template run(idx, initial, args...); + } + + template + static inline RV apply(const std::vector& idx, RV initial, Args&&... args) + { + eigen_assert(idx.size() == NumIndices); + return internal::tensor_static_symgroup_do_apply::template run(idx, initial, args...); + } + + constexpr static std::size_t static_size = ge::count; + + constexpr static inline std::size_t size() { + return ge::count; + } + constexpr static inline int globalFlags() { return group_elements::global_flags; } + + template + inline internal::tensor_symmetry_value_setter> operator()(Tensor_& tensor, typename Tensor_::Index firstIndex, IndexTypes... otherIndices) const + { + static_assert(sizeof...(otherIndices) + 1 == Tensor_::NumIndices, "Number of indices used to access a tensor coefficient must be equal to the rank of the tensor."); + return operator()(tensor, std::array{{firstIndex, otherIndices...}}); + } + + template + inline internal::tensor_symmetry_value_setter> operator()(Tensor_& tensor, std::array const& indices) const + { + return internal::tensor_symmetry_value_setter>(tensor, *this, indices); + } +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSORSYMMETRY_STATICSYMMETRY_H + +/* + * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle; + */ diff --git a/external/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h b/external/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h new file mode 100644 index 0000000..879d6cd --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h @@ -0,0 +1,338 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2013 Christian Seiler +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSORSYMMETRY_SYMMETRY_H +#define EIGEN_CXX11_TENSORSYMMETRY_SYMMETRY_H + +namespace Eigen { + +enum { + NegationFlag = 0x01, + ConjugationFlag = 0x02 +}; + +enum { + GlobalRealFlag = 0x01, + GlobalImagFlag = 0x02, + GlobalZeroFlag = 0x03 +}; + +namespace internal { + +template struct tensor_symmetry_pre_analysis; +template struct tensor_static_symgroup; +template struct tensor_static_symgroup_if; +template struct tensor_symmetry_calculate_flags; +template struct tensor_symmetry_assign_value; +template struct tensor_symmetry_num_indices; + +} // end namespace internal + +template +struct Symmetry +{ + static_assert(One_ != Two_, "Symmetries must cover distinct indices."); + constexpr static int One = One_; + constexpr static int Two = Two_; + constexpr static int Flags = 0; +}; + +template +struct AntiSymmetry +{ + static_assert(One_ != Two_, "Symmetries must cover distinct indices."); + constexpr static int One = One_; + constexpr static int Two = Two_; + constexpr static int Flags = NegationFlag; +}; + +template +struct Hermiticity +{ + static_assert(One_ != Two_, "Symmetries must cover distinct indices."); + constexpr static int One = One_; + constexpr static int Two = Two_; + constexpr static int Flags = ConjugationFlag; +}; + +template +struct AntiHermiticity +{ + static_assert(One_ != Two_, "Symmetries must cover distinct indices."); + constexpr static int One = One_; + constexpr static int Two = Two_; + constexpr static int Flags = ConjugationFlag | NegationFlag; +}; + +/** \class DynamicSGroup + * \ingroup TensorSymmetry_Module + * + * \brief Dynamic symmetry group + * + * The %DynamicSGroup class represents a symmetry group that need not be known at + * compile time. It is useful if one wants to support arbitrary run-time defineable + * symmetries for tensors, but it is also instantiated if a symmetry group is defined + * at compile time that would be either too large for the compiler to reasonably + * generate (using templates to calculate this at compile time is very inefficient) + * or that the compiler could generate the group but that it wouldn't make sense to + * unroll the loop for setting coefficients anymore. + */ +class DynamicSGroup; + +/** \internal + * + * \class DynamicSGroupFromTemplateArgs + * \ingroup TensorSymmetry_Module + * + * \brief Dynamic symmetry group, initialized from template arguments + * + * This class is a child class of DynamicSGroup. It uses the template arguments + * specified to initialize itself. + */ +template +class DynamicSGroupFromTemplateArgs; + +/** \class StaticSGroup + * \ingroup TensorSymmetry_Module + * + * \brief Static symmetry group + * + * This class represents a symmetry group that is known and resolved completely + * at compile time. Ideally, no run-time penalty is incurred compared to the + * manual unrolling of the symmetry. + * + * CAUTION: + * + * Do not use this class directly for large symmetry groups. The compiler + * may run into a limit, or segfault or in the very least will take a very, + * very, very long time to compile the code. Use the SGroup class instead + * if you want a static group. That class contains logic that will + * automatically select the DynamicSGroup class instead if the symmetry + * group becomes too large. (In that case, unrolling may not even be + * beneficial.) + */ +template +class StaticSGroup; + +/** \class SGroup + * \ingroup TensorSymmetry_Module + * + * \brief Symmetry group, initialized from template arguments + * + * This class represents a symmetry group whose generators are already + * known at compile time. It may or may not be resolved at compile time, + * depending on the estimated size of the group. + * + * \sa StaticSGroup + * \sa DynamicSGroup + */ +template +class SGroup : public internal::tensor_symmetry_pre_analysis::value, Gen...>::root_type +{ + public: + constexpr static std::size_t NumIndices = internal::tensor_symmetry_num_indices::value; + typedef typename internal::tensor_symmetry_pre_analysis::root_type Base; + + // make standard constructors + assignment operators public + inline SGroup() : Base() { } + inline SGroup(const SGroup& other) : Base(other) { } + inline SGroup(SGroup&& other) : Base(other) { } + inline SGroup& operator=(const SGroup& other) { Base::operator=(other); return *this; } + inline SGroup& operator=(SGroup&& other) { Base::operator=(other); return *this; } + + // all else is defined in the base class +}; + +namespace internal { + +template struct tensor_symmetry_num_indices +{ + constexpr static std::size_t value = 1; +}; + +template struct tensor_symmetry_num_indices, Sym...> +{ +private: + constexpr static std::size_t One = static_cast(One_); + constexpr static std::size_t Two = static_cast(Two_); + constexpr static std::size_t Three = tensor_symmetry_num_indices::value; + + // don't use std::max, since it's not constexpr until C++14... + constexpr static std::size_t maxOneTwoPlusOne = ((One > Two) ? One : Two) + 1; +public: + constexpr static std::size_t value = (maxOneTwoPlusOne > Three) ? maxOneTwoPlusOne : Three; +}; + +template struct tensor_symmetry_num_indices, Sym...> + : public tensor_symmetry_num_indices, Sym...> {}; +template struct tensor_symmetry_num_indices, Sym...> + : public tensor_symmetry_num_indices, Sym...> {}; +template struct tensor_symmetry_num_indices, Sym...> + : public tensor_symmetry_num_indices, Sym...> {}; + +/** \internal + * + * \class tensor_symmetry_pre_analysis + * \ingroup TensorSymmetry_Module + * + * \brief Pre-select whether to use a static or dynamic symmetry group + * + * When a symmetry group could in principle be determined at compile time, + * this template implements the logic whether to actually do that or whether + * to rather defer that to runtime. + * + * The logic is as follows: + *
+ *
No generators (trivial symmetry):
+ *
Use a trivial static group. Ideally, this has no performance impact + * compared to not using symmetry at all. In practice, this might not + * be the case.
+ *
More than 4 generators:
+ *
Calculate the group at run time, it is likely far too large for the + * compiler to be able to properly generate it in a realistic time.
+ *
Up to and including 4 generators:
+ *
Actually enumerate all group elements, but then check how many there + * are. If there are more than 16, it is unlikely that unrolling the + * loop (as is done in the static compile-time case) is sensible, so + * use a dynamic group instead. If there are at most 16 elements, actually + * use that static group. Note that the largest group with 4 generators + * still compiles with reasonable resources.
+ *
+ * + * Note: Example compile time performance with g++-4.6 on an Intenl Core i5-3470 + * with 16 GiB RAM (all generators non-redundant and the subgroups don't + * factorize): + * + * # Generators -O0 -ggdb -O2 + * ------------------------------------------------------------------- + * 1 0.5 s / 250 MiB 0.45s / 230 MiB + * 2 0.5 s / 260 MiB 0.5 s / 250 MiB + * 3 0.65s / 310 MiB 0.62s / 310 MiB + * 4 2.2 s / 860 MiB 1.7 s / 770 MiB + * 5 130 s / 13000 MiB 120 s / 11000 MiB + * + * It is clear that everything is still very efficient up to 4 generators, then + * the memory and CPU requirements become unreasonable. Thus we only instantiate + * the template group theory logic if the number of generators supplied is 4 or + * lower, otherwise this will be forced to be done during runtime, where the + * algorithm is reasonably fast. + */ +template +struct tensor_symmetry_pre_analysis +{ + typedef StaticSGroup<> root_type; +}; + +template +struct tensor_symmetry_pre_analysis +{ + constexpr static std::size_t max_static_generators = 4; + constexpr static std::size_t max_static_elements = 16; + typedef tensor_static_symgroup_if<(sizeof...(Gens_) + 1 <= max_static_generators), NumIndices, Gen_, Gens_...> helper; + constexpr static std::size_t possible_size = helper::size; + + typedef typename conditional< + possible_size == 0 || possible_size >= max_static_elements, + DynamicSGroupFromTemplateArgs, + typename helper::type + >::type root_type; +}; + +template +struct tensor_static_symgroup_if +{ + constexpr static std::size_t size = 0; + typedef void type; +}; + +template +struct tensor_static_symgroup_if : tensor_static_symgroup {}; + +template +struct tensor_symmetry_assign_value +{ + typedef typename Tensor_::Index Index; + typedef typename Tensor_::Scalar Scalar; + constexpr static std::size_t NumIndices = Tensor_::NumIndices; + + static inline int run(const std::array& transformed_indices, int transformation_flags, int dummy, Tensor_& tensor, const Scalar& value_) + { + Scalar value(value_); + if (transformation_flags & ConjugationFlag) + value = numext::conj(value); + if (transformation_flags & NegationFlag) + value = -value; + tensor.coeffRef(transformed_indices) = value; + return dummy; + } +}; + +template +struct tensor_symmetry_calculate_flags +{ + typedef typename Tensor_::Index Index; + constexpr static std::size_t NumIndices = Tensor_::NumIndices; + + static inline int run(const std::array& transformed_indices, int transform_flags, int current_flags, const std::array& orig_indices) + { + if (transformed_indices == orig_indices) { + if (transform_flags & (ConjugationFlag | NegationFlag)) + return current_flags | GlobalImagFlag; // anti-hermitian diagonal + else if (transform_flags & ConjugationFlag) + return current_flags | GlobalRealFlag; // hermitian diagonal + else if (transform_flags & NegationFlag) + return current_flags | GlobalZeroFlag; // anti-symmetric diagonal + } + return current_flags; + } +}; + +template +class tensor_symmetry_value_setter +{ + public: + typedef typename Tensor_::Index Index; + typedef typename Tensor_::Scalar Scalar; + constexpr static std::size_t NumIndices = Tensor_::NumIndices; + + inline tensor_symmetry_value_setter(Tensor_& tensor, Symmetry_ const& symmetry, std::array const& indices) + : m_tensor(tensor), m_symmetry(symmetry), m_indices(indices) { } + + inline tensor_symmetry_value_setter& operator=(Scalar const& value) + { + doAssign(value); + return *this; + } + private: + Tensor_& m_tensor; + Symmetry_ m_symmetry; + std::array m_indices; + + inline void doAssign(Scalar const& value) + { + #ifdef EIGEN_TENSOR_SYMMETRY_CHECK_VALUES + int value_flags = m_symmetry.template apply, int>(m_indices, m_symmetry.globalFlags(), m_indices); + if (value_flags & GlobalRealFlag) + eigen_assert(numext::imag(value) == 0); + if (value_flags & GlobalImagFlag) + eigen_assert(numext::real(value) == 0); + #endif + m_symmetry.template apply, int>(m_indices, 0, m_tensor, value); + } +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSORSYMMETRY_SYMMETRY_H + +/* + * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle; + */ diff --git a/external/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h b/external/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h new file mode 100644 index 0000000..54bf9db --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h @@ -0,0 +1,669 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2013 Christian Seiler +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSORSYMMETRY_TEMPLATEGROUPTHEORY_H +#define EIGEN_CXX11_TENSORSYMMETRY_TEMPLATEGROUPTHEORY_H + +namespace Eigen { + +namespace internal { + +namespace group_theory { + +/** \internal + * \file CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h + * This file contains C++ templates that implement group theory algorithms. + * + * The algorithms allow for a compile-time analysis of finite groups. + * + * Currently only Dimino's algorithm is implemented, which returns a list + * of all elements in a group given a set of (possibly redundant) generators. + * (One could also do that with the so-called orbital algorithm, but that + * is much more expensive and usually has no advantages.) + */ + +/********************************************************************** + * "Ok kid, here is where it gets complicated." + * - Amelia Pond in the "Doctor Who" episode + * "The Big Bang" + * + * Dimino's algorithm + * ================== + * + * The following is Dimino's algorithm in sequential form: + * + * Input: identity element, list of generators, equality check, + * multiplication operation + * Output: list of group elements + * + * 1. add identity element + * 2. remove identities from list of generators + * 3. add all powers of first generator that aren't the + * identity element + * 4. go through all remaining generators: + * a. if generator is already in the list of elements + * -> do nothing + * b. otherwise + * i. remember current # of elements + * (i.e. the size of the current subgroup) + * ii. add all current elements (which includes + * the identity) each multiplied from right + * with the current generator to the group + * iii. add all remaining cosets that are generated + * by products of the new generator with itself + * and all other generators seen so far + * + * In functional form, this is implemented as a long set of recursive + * templates that have a complicated relationship. + * + * The main interface for Dimino's algorithm is the template + * enumerate_group_elements. All lists are implemented as variadic + * type_list and numeric_list + * templates. + * + * 'Calling' templates is usually done via typedefs. + * + * This algorithm is an extended version of the basic version. The + * extension consists in the fact that each group element has a set + * of flags associated with it. Multiplication of two group elements + * with each other results in a group element whose flags are the + * XOR of the flags of the previous elements. Each time the algorithm + * notices that a group element it just calculated is already in the + * list of current elements, the flags of both will be compared and + * added to the so-called 'global flags' of the group. + * + * The rationale behind this extension is that this allows not only + * for the description of symmetries between tensor indices, but + * also allows for the description of hermiticity, antisymmetry and + * antihermiticity. Negation and conjugation each are specific bit + * in the flags value and if two different ways to reach a group + * element lead to two different flags, this poses a constraint on + * the allowed values of the resulting tensor. For example, if a + * group element is reach both with and without the conjugation + * flags, it is clear that the resulting tensor has to be real. + * + * Note that this flag mechanism is quite generic and may have other + * uses beyond tensor properties. + * + * IMPORTANT: + * This algorithm assumes the group to be finite. If you try to + * run it with a group that's infinite, the algorithm will only + * terminate once you hit a compiler limit (max template depth). + * Also note that trying to use this implementation to create a + * very large group will probably either make you hit the same + * limit, cause the compiler to segfault or at the very least + * take a *really* long time (hours, days, weeks - sic!) to + * compile. It is not recommended to plug in more than 4 + * generators, unless they are independent of each other. + */ + +/** \internal + * + * \class strip_identities + * \ingroup CXX11_TensorSymmetry_Module + * + * \brief Cleanse a list of group elements of the identity element + * + * This template is used to make a first pass through all initial + * generators of Dimino's algorithm and remove the identity + * elements. + * + * \sa enumerate_group_elements + */ +template class Equality, typename id, typename L> struct strip_identities; + +template< + template class Equality, + typename id, + typename t, + typename... ts +> +struct strip_identities> +{ + typedef typename conditional< + Equality::value, + typename strip_identities>::type, + typename concat, typename strip_identities>::type>::type + >::type type; + constexpr static int global_flags = Equality::global_flags | strip_identities>::global_flags; +}; + +template< + template class Equality, + typename id + EIGEN_TPL_PP_SPEC_HACK_DEFC(typename, ts) +> +struct strip_identities> +{ + typedef type_list<> type; + constexpr static int global_flags = 0; +}; + +/** \internal + * + * \class dimino_first_step_elements_helper + * \ingroup CXX11_TensorSymmetry_Module + * + * \brief Recursive template that adds powers of the first generator to the list of group elements + * + * This template calls itself recursively to add powers of the first + * generator to the list of group elements. It stops if it reaches + * the identity element again. + * + * \sa enumerate_group_elements, dimino_first_step_elements + */ +template< + template class Multiply, + template class Equality, + typename id, + typename g, + typename current_element, + typename elements, + bool dont_add_current_element // = false +> +struct dimino_first_step_elements_helper +#ifndef EIGEN_PARSED_BY_DOXYGEN + : // recursive inheritance is too difficult for Doxygen + public dimino_first_step_elements_helper< + Multiply, + Equality, + id, + g, + typename Multiply::type, + typename concat>::type, + Equality::type, id>::value + > {}; + +template< + template class Multiply, + template class Equality, + typename id, + typename g, + typename current_element, + typename elements +> +struct dimino_first_step_elements_helper +#endif // EIGEN_PARSED_BY_DOXYGEN +{ + typedef elements type; + constexpr static int global_flags = Equality::global_flags; +}; + +/** \internal + * + * \class dimino_first_step_elements + * \ingroup CXX11_TensorSymmetry_Module + * + * \brief Add all powers of the first generator to the list of group elements + * + * This template takes the first non-identity generator and generates the initial + * list of elements which consists of all powers of that generator. For a group + * with just one generated, it would be enumerated after this. + * + * \sa enumerate_group_elements + */ +template< + template class Multiply, + template class Equality, + typename id, + typename generators +> +struct dimino_first_step_elements +{ + typedef typename get<0, generators>::type first_generator; + typedef typename skip<1, generators>::type next_generators; + typedef type_list generators_done; + + typedef dimino_first_step_elements_helper< + Multiply, + Equality, + id, + first_generator, + first_generator, + type_list, + false + > helper; + typedef typename helper::type type; + constexpr static int global_flags = helper::global_flags; +}; + +/** \internal + * + * \class dimino_get_coset_elements + * \ingroup CXX11_TensorSymmetry_Module + * + * \brief Generate all elements of a specific coset + * + * This template generates all the elements of a specific coset by + * multiplying all elements in the given subgroup with the new + * coset representative. Note that the first element of the + * subgroup is always the identity element, so the first element of + * the result of this template is going to be the coset + * representative itself. + * + * Note that this template accepts an additional boolean parameter + * that specifies whether to actually generate the coset (true) or + * just return an empty list (false). + * + * \sa enumerate_group_elements, dimino_add_cosets_for_rep + */ +template< + template class Multiply, + typename sub_group_elements, + typename new_coset_rep, + bool generate_coset // = true +> +struct dimino_get_coset_elements +{ + typedef typename apply_op_from_right::type type; +}; + +template< + template class Multiply, + typename sub_group_elements, + typename new_coset_rep +> +struct dimino_get_coset_elements +{ + typedef type_list<> type; +}; + +/** \internal + * + * \class dimino_add_cosets_for_rep + * \ingroup CXX11_TensorSymmetry_Module + * + * \brief Recursive template for adding coset spaces + * + * This template multiplies the coset representative with a generator + * from the list of previous generators. If the new element is not in + * the group already, it adds the corresponding coset. Finally it + * proceeds to call itself with the next generator from the list. + * + * \sa enumerate_group_elements, dimino_add_all_coset_spaces + */ +template< + template class Multiply, + template class Equality, + typename id, + typename sub_group_elements, + typename elements, + typename generators, + typename rep_element, + int sub_group_size +> +struct dimino_add_cosets_for_rep; + +template< + template class Multiply, + template class Equality, + typename id, + typename sub_group_elements, + typename elements, + typename g, + typename... gs, + typename rep_element, + int sub_group_size +> +struct dimino_add_cosets_for_rep, rep_element, sub_group_size> +{ + typedef typename Multiply::type new_coset_rep; + typedef contained_in_list_gf _cil; + constexpr static bool add_coset = !_cil::value; + + typedef typename dimino_get_coset_elements< + Multiply, + sub_group_elements, + new_coset_rep, + add_coset + >::type coset_elements; + + typedef dimino_add_cosets_for_rep< + Multiply, + Equality, + id, + sub_group_elements, + typename concat::type, + type_list, + rep_element, + sub_group_size + > _helper; + + typedef typename _helper::type type; + constexpr static int global_flags = _cil::global_flags | _helper::global_flags; + + /* Note that we don't have to update global flags here, since + * we will only add these elements if they are not part of + * the group already. But that only happens if the coset rep + * is not already in the group, so the check for the coset rep + * will catch this. + */ +}; + +template< + template class Multiply, + template class Equality, + typename id, + typename sub_group_elements, + typename elements + EIGEN_TPL_PP_SPEC_HACK_DEFC(typename, empty), + typename rep_element, + int sub_group_size +> +struct dimino_add_cosets_for_rep, rep_element, sub_group_size> +{ + typedef elements type; + constexpr static int global_flags = 0; +}; + +/** \internal + * + * \class dimino_add_all_coset_spaces + * \ingroup CXX11_TensorSymmetry_Module + * + * \brief Recursive template for adding all coset spaces for a new generator + * + * This template tries to go through the list of generators (with + * the help of the dimino_add_cosets_for_rep template) as long as + * it still finds elements that are not part of the group and add + * the corresponding cosets. + * + * \sa enumerate_group_elements, dimino_add_cosets_for_rep + */ +template< + template class Multiply, + template class Equality, + typename id, + typename sub_group_elements, + typename elements, + typename generators, + int sub_group_size, + int rep_pos, + bool stop_condition // = false +> +struct dimino_add_all_coset_spaces +{ + typedef typename get::type rep_element; + typedef dimino_add_cosets_for_rep< + Multiply, + Equality, + id, + sub_group_elements, + elements, + generators, + rep_element, + sub_group_elements::count + > _ac4r; + typedef typename _ac4r::type new_elements; + + constexpr static int new_rep_pos = rep_pos + sub_group_elements::count; + constexpr static bool new_stop_condition = new_rep_pos >= new_elements::count; + + typedef dimino_add_all_coset_spaces< + Multiply, + Equality, + id, + sub_group_elements, + new_elements, + generators, + sub_group_size, + new_rep_pos, + new_stop_condition + > _helper; + + typedef typename _helper::type type; + constexpr static int global_flags = _helper::global_flags | _ac4r::global_flags; +}; + +template< + template class Multiply, + template class Equality, + typename id, + typename sub_group_elements, + typename elements, + typename generators, + int sub_group_size, + int rep_pos +> +struct dimino_add_all_coset_spaces +{ + typedef elements type; + constexpr static int global_flags = 0; +}; + +/** \internal + * + * \class dimino_add_generator + * \ingroup CXX11_TensorSymmetry_Module + * + * \brief Enlarge the group by adding a new generator. + * + * It accepts a boolean parameter that determines if the generator is redundant, + * i.e. was already seen in the group. In that case, it reduces to a no-op. + * + * \sa enumerate_group_elements, dimino_add_all_coset_spaces + */ +template< + template class Multiply, + template class Equality, + typename id, + typename elements, + typename generators_done, + typename current_generator, + bool redundant // = false +> +struct dimino_add_generator +{ + /* this template is only called if the generator is not redundant + * => all elements of the group multiplied with the new generator + * are going to be new elements of the most trivial coset space + */ + typedef typename apply_op_from_right::type multiplied_elements; + typedef typename concat::type new_elements; + + constexpr static int rep_pos = elements::count; + + typedef dimino_add_all_coset_spaces< + Multiply, + Equality, + id, + elements, // elements of previous subgroup + new_elements, + typename concat>::type, + elements::count, // size of previous subgroup + rep_pos, + false // don't stop (because rep_pos >= new_elements::count is always false at this point) + > _helper; + typedef typename _helper::type type; + constexpr static int global_flags = _helper::global_flags; +}; + +template< + template class Multiply, + template class Equality, + typename id, + typename elements, + typename generators_done, + typename current_generator +> +struct dimino_add_generator +{ + // redundant case + typedef elements type; + constexpr static int global_flags = 0; +}; + +/** \internal + * + * \class dimino_add_remaining_generators + * \ingroup CXX11_TensorSymmetry_Module + * + * \brief Recursive template that adds all remaining generators to a group + * + * Loop through the list of generators that remain and successively + * add them to the group. + * + * \sa enumerate_group_elements, dimino_add_generator + */ +template< + template class Multiply, + template class Equality, + typename id, + typename generators_done, + typename remaining_generators, + typename elements +> +struct dimino_add_remaining_generators +{ + typedef typename get<0, remaining_generators>::type first_generator; + typedef typename skip<1, remaining_generators>::type next_generators; + + typedef contained_in_list_gf _cil; + + typedef dimino_add_generator< + Multiply, + Equality, + id, + elements, + generators_done, + first_generator, + _cil::value + > _helper; + + typedef typename _helper::type new_elements; + + typedef dimino_add_remaining_generators< + Multiply, + Equality, + id, + typename concat>::type, + next_generators, + new_elements + > _next_iter; + + typedef typename _next_iter::type type; + constexpr static int global_flags = + _cil::global_flags | + _helper::global_flags | + _next_iter::global_flags; +}; + +template< + template class Multiply, + template class Equality, + typename id, + typename generators_done, + typename elements +> +struct dimino_add_remaining_generators, elements> +{ + typedef elements type; + constexpr static int global_flags = 0; +}; + +/** \internal + * + * \class enumerate_group_elements_noid + * \ingroup CXX11_TensorSymmetry_Module + * + * \brief Helper template that implements group element enumeration + * + * This is a helper template that implements the actual enumeration + * of group elements. This has been split so that the list of + * generators can be cleansed of the identity element before + * performing the actual operation. + * + * \sa enumerate_group_elements + */ +template< + template class Multiply, + template class Equality, + typename id, + typename generators, + int initial_global_flags = 0 +> +struct enumerate_group_elements_noid +{ + typedef dimino_first_step_elements first_step; + typedef typename first_step::type first_step_elements; + + typedef dimino_add_remaining_generators< + Multiply, + Equality, + id, + typename first_step::generators_done, + typename first_step::next_generators, // remaining_generators + typename first_step::type // first_step elements + > _helper; + + typedef typename _helper::type type; + constexpr static int global_flags = + initial_global_flags | + first_step::global_flags | + _helper::global_flags; +}; + +// in case when no generators are specified +template< + template class Multiply, + template class Equality, + typename id, + int initial_global_flags +> +struct enumerate_group_elements_noid, initial_global_flags> +{ + typedef type_list type; + constexpr static int global_flags = initial_global_flags; +}; + +/** \internal + * + * \class enumerate_group_elements + * \ingroup CXX11_TensorSymmetry_Module + * + * \brief Enumerate all elements in a finite group + * + * This template enumerates all elements in a finite group. It accepts + * the following template parameters: + * + * \tparam Multiply The multiplication operation that multiplies two group elements + * with each other. + * \tparam Equality The equality check operation that checks if two group elements + * are equal to another. + * \tparam id The identity element + * \tparam _generators A list of (possibly redundant) generators of the group + */ +template< + template class Multiply, + template class Equality, + typename id, + typename _generators +> +struct enumerate_group_elements + : public enumerate_group_elements_noid< + Multiply, + Equality, + id, + typename strip_identities::type, + strip_identities::global_flags + > +{ +}; + +} // end namespace group_theory + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSORSYMMETRY_TEMPLATEGROUPTHEORY_H + +/* + * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle; + */ diff --git a/external/unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h b/external/unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h new file mode 100644 index 0000000..e4c59dc --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h @@ -0,0 +1,67 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2018 Rasmus Munk Larsen +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +// Barrier is an object that allows one or more threads to wait until +// Notify has been called a specified number of times. + +#ifndef EIGEN_CXX11_THREADPOOL_BARRIER_H +#define EIGEN_CXX11_THREADPOOL_BARRIER_H + +namespace Eigen { + +class Barrier { + public: + Barrier(unsigned int count) : state_(count << 1), notified_(false) { + eigen_plain_assert(((count << 1) >> 1) == count); + } + ~Barrier() { eigen_plain_assert((state_ >> 1) == 0); } + + void Notify() { + unsigned int v = state_.fetch_sub(2, std::memory_order_acq_rel) - 2; + if (v != 1) { + // Clear the lowest bit (waiter flag) and check that the original state + // value was not zero. If it was zero, it means that notify was called + // more times than the original count. + eigen_plain_assert(((v + 2) & ~1) != 0); + return; // either count has not dropped to 0, or waiter is not waiting + } + std::unique_lock l(mu_); + eigen_plain_assert(!notified_); + notified_ = true; + cv_.notify_all(); + } + + void Wait() { + unsigned int v = state_.fetch_or(1, std::memory_order_acq_rel); + if ((v >> 1) == 0) return; + std::unique_lock l(mu_); + while (!notified_) { + cv_.wait(l); + } + } + + private: + std::mutex mu_; + std::condition_variable cv_; + std::atomic state_; // low bit is waiter flag + bool notified_; +}; + +// Notification is an object that allows a user to to wait for another +// thread to signal a notification that an event has occurred. +// +// Multiple threads can wait on the same Notification object, +// but only one caller must call Notify() on the object. +struct Notification : Barrier { + Notification() : Barrier(1){}; +}; + +} // namespace Eigen + +#endif // EIGEN_CXX11_THREADPOOL_BARRIER_H diff --git a/external/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h b/external/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h new file mode 100644 index 0000000..4549aa0 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h @@ -0,0 +1,249 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Dmitry Vyukov +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_THREADPOOL_EVENTCOUNT_H_ +#define EIGEN_CXX11_THREADPOOL_EVENTCOUNT_H_ + +namespace Eigen { + +// EventCount allows to wait for arbitrary predicates in non-blocking +// algorithms. Think of condition variable, but wait predicate does not need to +// be protected by a mutex. Usage: +// Waiting thread does: +// +// if (predicate) +// return act(); +// EventCount::Waiter& w = waiters[my_index]; +// ec.Prewait(&w); +// if (predicate) { +// ec.CancelWait(&w); +// return act(); +// } +// ec.CommitWait(&w); +// +// Notifying thread does: +// +// predicate = true; +// ec.Notify(true); +// +// Notify is cheap if there are no waiting threads. Prewait/CommitWait are not +// cheap, but they are executed only if the preceding predicate check has +// failed. +// +// Algorithm outline: +// There are two main variables: predicate (managed by user) and state_. +// Operation closely resembles Dekker mutual algorithm: +// https://en.wikipedia.org/wiki/Dekker%27s_algorithm +// Waiting thread sets state_ then checks predicate, Notifying thread sets +// predicate then checks state_. Due to seq_cst fences in between these +// operations it is guaranteed than either waiter will see predicate change +// and won't block, or notifying thread will see state_ change and will unblock +// the waiter, or both. But it can't happen that both threads don't see each +// other changes, which would lead to deadlock. +class EventCount { + public: + class Waiter; + + EventCount(MaxSizeVector& waiters) + : state_(kStackMask), waiters_(waiters) { + eigen_plain_assert(waiters.size() < (1 << kWaiterBits) - 1); + } + + ~EventCount() { + // Ensure there are no waiters. + eigen_plain_assert(state_.load() == kStackMask); + } + + // Prewait prepares for waiting. + // After calling Prewait, the thread must re-check the wait predicate + // and then call either CancelWait or CommitWait. + void Prewait() { + uint64_t state = state_.load(std::memory_order_relaxed); + for (;;) { + CheckState(state); + uint64_t newstate = state + kWaiterInc; + CheckState(newstate); + if (state_.compare_exchange_weak(state, newstate, + std::memory_order_seq_cst)) + return; + } + } + + // CommitWait commits waiting after Prewait. + void CommitWait(Waiter* w) { + eigen_plain_assert((w->epoch & ~kEpochMask) == 0); + w->state = Waiter::kNotSignaled; + const uint64_t me = (w - &waiters_[0]) | w->epoch; + uint64_t state = state_.load(std::memory_order_seq_cst); + for (;;) { + CheckState(state, true); + uint64_t newstate; + if ((state & kSignalMask) != 0) { + // Consume the signal and return immidiately. + newstate = state - kWaiterInc - kSignalInc; + } else { + // Remove this thread from pre-wait counter and add to the waiter stack. + newstate = ((state & kWaiterMask) - kWaiterInc) | me; + w->next.store(state & (kStackMask | kEpochMask), + std::memory_order_relaxed); + } + CheckState(newstate); + if (state_.compare_exchange_weak(state, newstate, + std::memory_order_acq_rel)) { + if ((state & kSignalMask) == 0) { + w->epoch += kEpochInc; + Park(w); + } + return; + } + } + } + + // CancelWait cancels effects of the previous Prewait call. + void CancelWait() { + uint64_t state = state_.load(std::memory_order_relaxed); + for (;;) { + CheckState(state, true); + uint64_t newstate = state - kWaiterInc; + // We don't know if the thread was also notified or not, + // so we should not consume a signal unconditionaly. + // Only if number of waiters is equal to number of signals, + // we know that the thread was notified and we must take away the signal. + if (((state & kWaiterMask) >> kWaiterShift) == + ((state & kSignalMask) >> kSignalShift)) + newstate -= kSignalInc; + CheckState(newstate); + if (state_.compare_exchange_weak(state, newstate, + std::memory_order_acq_rel)) + return; + } + } + + // Notify wakes one or all waiting threads. + // Must be called after changing the associated wait predicate. + void Notify(bool notifyAll) { + std::atomic_thread_fence(std::memory_order_seq_cst); + uint64_t state = state_.load(std::memory_order_acquire); + for (;;) { + CheckState(state); + const uint64_t waiters = (state & kWaiterMask) >> kWaiterShift; + const uint64_t signals = (state & kSignalMask) >> kSignalShift; + // Easy case: no waiters. + if ((state & kStackMask) == kStackMask && waiters == signals) return; + uint64_t newstate; + if (notifyAll) { + // Empty wait stack and set signal to number of pre-wait threads. + newstate = + (state & kWaiterMask) | (waiters << kSignalShift) | kStackMask; + } else if (signals < waiters) { + // There is a thread in pre-wait state, unblock it. + newstate = state + kSignalInc; + } else { + // Pop a waiter from list and unpark it. + Waiter* w = &waiters_[state & kStackMask]; + uint64_t next = w->next.load(std::memory_order_relaxed); + newstate = (state & (kWaiterMask | kSignalMask)) | next; + } + CheckState(newstate); + if (state_.compare_exchange_weak(state, newstate, + std::memory_order_acq_rel)) { + if (!notifyAll && (signals < waiters)) + return; // unblocked pre-wait thread + if ((state & kStackMask) == kStackMask) return; + Waiter* w = &waiters_[state & kStackMask]; + if (!notifyAll) w->next.store(kStackMask, std::memory_order_relaxed); + Unpark(w); + return; + } + } + } + + class Waiter { + friend class EventCount; + // Align to 128 byte boundary to prevent false sharing with other Waiter + // objects in the same vector. + EIGEN_ALIGN_TO_BOUNDARY(128) std::atomic next; + std::mutex mu; + std::condition_variable cv; + uint64_t epoch = 0; + unsigned state = kNotSignaled; + enum { + kNotSignaled, + kWaiting, + kSignaled, + }; + }; + + private: + // State_ layout: + // - low kWaiterBits is a stack of waiters committed wait + // (indexes in waiters_ array are used as stack elements, + // kStackMask means empty stack). + // - next kWaiterBits is count of waiters in prewait state. + // - next kWaiterBits is count of pending signals. + // - remaining bits are ABA counter for the stack. + // (stored in Waiter node and incremented on push). + static const uint64_t kWaiterBits = 14; + static const uint64_t kStackMask = (1ull << kWaiterBits) - 1; + static const uint64_t kWaiterShift = kWaiterBits; + static const uint64_t kWaiterMask = ((1ull << kWaiterBits) - 1) + << kWaiterShift; + static const uint64_t kWaiterInc = 1ull << kWaiterShift; + static const uint64_t kSignalShift = 2 * kWaiterBits; + static const uint64_t kSignalMask = ((1ull << kWaiterBits) - 1) + << kSignalShift; + static const uint64_t kSignalInc = 1ull << kSignalShift; + static const uint64_t kEpochShift = 3 * kWaiterBits; + static const uint64_t kEpochBits = 64 - kEpochShift; + static const uint64_t kEpochMask = ((1ull << kEpochBits) - 1) << kEpochShift; + static const uint64_t kEpochInc = 1ull << kEpochShift; + std::atomic state_; + MaxSizeVector& waiters_; + + static void CheckState(uint64_t state, bool waiter = false) { + static_assert(kEpochBits >= 20, "not enough bits to prevent ABA problem"); + const uint64_t waiters = (state & kWaiterMask) >> kWaiterShift; + const uint64_t signals = (state & kSignalMask) >> kSignalShift; + eigen_plain_assert(waiters >= signals); + eigen_plain_assert(waiters < (1 << kWaiterBits) - 1); + eigen_plain_assert(!waiter || waiters > 0); + (void)waiters; + (void)signals; + } + + void Park(Waiter* w) { + std::unique_lock lock(w->mu); + while (w->state != Waiter::kSignaled) { + w->state = Waiter::kWaiting; + w->cv.wait(lock); + } + } + + void Unpark(Waiter* w) { + for (Waiter* next; w; w = next) { + uint64_t wnext = w->next.load(std::memory_order_relaxed) & kStackMask; + next = wnext == kStackMask ? nullptr : &waiters_[wnext]; + unsigned state; + { + std::unique_lock lock(w->mu); + state = w->state; + w->state = Waiter::kSignaled; + } + // Avoid notifying if it wasn't waiting. + if (state == Waiter::kWaiting) w->cv.notify_one(); + } + } + + EventCount(const EventCount&) = delete; + void operator=(const EventCount&) = delete; +}; + +} // namespace Eigen + +#endif // EIGEN_CXX11_THREADPOOL_EVENTCOUNT_H_ diff --git a/external/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h b/external/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h new file mode 100644 index 0000000..23a2b54 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h @@ -0,0 +1,486 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Dmitry Vyukov +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_THREADPOOL_NONBLOCKING_THREAD_POOL_H +#define EIGEN_CXX11_THREADPOOL_NONBLOCKING_THREAD_POOL_H + +namespace Eigen { + +template +class ThreadPoolTempl : public Eigen::ThreadPoolInterface { + public: + typedef typename Environment::Task Task; + typedef RunQueue Queue; + + ThreadPoolTempl(int num_threads, Environment env = Environment()) + : ThreadPoolTempl(num_threads, true, env) {} + + ThreadPoolTempl(int num_threads, bool allow_spinning, + Environment env = Environment()) + : env_(env), + num_threads_(num_threads), + allow_spinning_(allow_spinning), + thread_data_(num_threads), + all_coprimes_(num_threads), + waiters_(num_threads), + global_steal_partition_(EncodePartition(0, num_threads_)), + blocked_(0), + spinning_(0), + done_(false), + cancelled_(false), + ec_(waiters_) { + waiters_.resize(num_threads_); + // Calculate coprimes of all numbers [1, num_threads]. + // Coprimes are used for random walks over all threads in Steal + // and NonEmptyQueueIndex. Iteration is based on the fact that if we take + // a random starting thread index t and calculate num_threads - 1 subsequent + // indices as (t + coprime) % num_threads, we will cover all threads without + // repetitions (effectively getting a presudo-random permutation of thread + // indices). + eigen_plain_assert(num_threads_ < kMaxThreads); + for (int i = 1; i <= num_threads_; ++i) { + all_coprimes_.emplace_back(i); + ComputeCoprimes(i, &all_coprimes_.back()); + } +#ifndef EIGEN_THREAD_LOCAL + init_barrier_.reset(new Barrier(num_threads_)); +#endif + thread_data_.resize(num_threads_); + for (int i = 0; i < num_threads_; i++) { + SetStealPartition(i, EncodePartition(0, num_threads_)); + thread_data_[i].thread.reset( + env_.CreateThread([this, i]() { WorkerLoop(i); })); + } +#ifndef EIGEN_THREAD_LOCAL + // Wait for workers to initialize per_thread_map_. Otherwise we might race + // with them in Schedule or CurrentThreadId. + init_barrier_->Wait(); +#endif + } + + ~ThreadPoolTempl() { + done_ = true; + + // Now if all threads block without work, they will start exiting. + // But note that threads can continue to work arbitrary long, + // block, submit new work, unblock and otherwise live full life. + if (!cancelled_) { + ec_.Notify(true); + } else { + // Since we were cancelled, there might be entries in the queues. + // Empty them to prevent their destructor from asserting. + for (size_t i = 0; i < thread_data_.size(); i++) { + thread_data_[i].queue.Flush(); + } + } + // Join threads explicitly (by destroying) to avoid destruction order within + // this class. + for (size_t i = 0; i < thread_data_.size(); ++i) + thread_data_[i].thread.reset(); + } + + void SetStealPartitions(const std::vector>& partitions) { + eigen_plain_assert(partitions.size() == static_cast(num_threads_)); + + // Pass this information to each thread queue. + for (int i = 0; i < num_threads_; i++) { + const auto& pair = partitions[i]; + unsigned start = pair.first, end = pair.second; + AssertBounds(start, end); + unsigned val = EncodePartition(start, end); + SetStealPartition(i, val); + } + } + + void Schedule(std::function fn) EIGEN_OVERRIDE { + ScheduleWithHint(std::move(fn), 0, num_threads_); + } + + void ScheduleWithHint(std::function fn, int start, + int limit) override { + Task t = env_.CreateTask(std::move(fn)); + PerThread* pt = GetPerThread(); + if (pt->pool == this) { + // Worker thread of this pool, push onto the thread's queue. + Queue& q = thread_data_[pt->thread_id].queue; + t = q.PushFront(std::move(t)); + } else { + // A free-standing thread (or worker of another pool), push onto a random + // queue. + eigen_plain_assert(start < limit); + eigen_plain_assert(limit <= num_threads_); + int num_queues = limit - start; + int rnd = Rand(&pt->rand) % num_queues; + eigen_plain_assert(start + rnd < limit); + Queue& q = thread_data_[start + rnd].queue; + t = q.PushBack(std::move(t)); + } + // Note: below we touch this after making w available to worker threads. + // Strictly speaking, this can lead to a racy-use-after-free. Consider that + // Schedule is called from a thread that is neither main thread nor a worker + // thread of this pool. Then, execution of w directly or indirectly + // completes overall computations, which in turn leads to destruction of + // this. We expect that such scenario is prevented by program, that is, + // this is kept alive while any threads can potentially be in Schedule. + if (!t.f) { + ec_.Notify(false); + } else { + env_.ExecuteTask(t); // Push failed, execute directly. + } + } + + void Cancel() EIGEN_OVERRIDE { + cancelled_ = true; + done_ = true; + + // Let each thread know it's been cancelled. +#ifdef EIGEN_THREAD_ENV_SUPPORTS_CANCELLATION + for (size_t i = 0; i < thread_data_.size(); i++) { + thread_data_[i].thread->OnCancel(); + } +#endif + + // Wake up the threads without work to let them exit on their own. + ec_.Notify(true); + } + + int NumThreads() const EIGEN_FINAL { return num_threads_; } + + int CurrentThreadId() const EIGEN_FINAL { + const PerThread* pt = const_cast(this)->GetPerThread(); + if (pt->pool == this) { + return pt->thread_id; + } else { + return -1; + } + } + + private: + // Create a single atomic that encodes start and limit information for + // each thread. + // We expect num_threads_ < 65536, so we can store them in a single + // std::atomic. + // Exposed publicly as static functions so that external callers can reuse + // this encode/decode logic for maintaining their own thread-safe copies of + // scheduling and steal domain(s). + static const int kMaxPartitionBits = 16; + static const int kMaxThreads = 1 << kMaxPartitionBits; + + inline unsigned EncodePartition(unsigned start, unsigned limit) { + return (start << kMaxPartitionBits) | limit; + } + + inline void DecodePartition(unsigned val, unsigned* start, unsigned* limit) { + *limit = val & (kMaxThreads - 1); + val >>= kMaxPartitionBits; + *start = val; + } + + void AssertBounds(int start, int end) { + eigen_plain_assert(start >= 0); + eigen_plain_assert(start < end); // non-zero sized partition + eigen_plain_assert(end <= num_threads_); + } + + inline void SetStealPartition(size_t i, unsigned val) { + thread_data_[i].steal_partition.store(val, std::memory_order_relaxed); + } + + inline unsigned GetStealPartition(int i) { + return thread_data_[i].steal_partition.load(std::memory_order_relaxed); + } + + void ComputeCoprimes(int N, MaxSizeVector* coprimes) { + for (int i = 1; i <= N; i++) { + unsigned a = i; + unsigned b = N; + // If GCD(a, b) == 1, then a and b are coprimes. + while (b != 0) { + unsigned tmp = a; + a = b; + b = tmp % b; + } + if (a == 1) { + coprimes->push_back(i); + } + } + } + + typedef typename Environment::EnvThread Thread; + + struct PerThread { + constexpr PerThread() : pool(NULL), rand(0), thread_id(-1) {} + ThreadPoolTempl* pool; // Parent pool, or null for normal threads. + uint64_t rand; // Random generator state. + int thread_id; // Worker thread index in pool. +#ifndef EIGEN_THREAD_LOCAL + // Prevent false sharing. + char pad_[128]; +#endif + }; + + struct ThreadData { + constexpr ThreadData() : thread(), steal_partition(0), queue() {} + std::unique_ptr thread; + std::atomic steal_partition; + Queue queue; + }; + + Environment env_; + const int num_threads_; + const bool allow_spinning_; + MaxSizeVector thread_data_; + MaxSizeVector> all_coprimes_; + MaxSizeVector waiters_; + unsigned global_steal_partition_; + std::atomic blocked_; + std::atomic spinning_; + std::atomic done_; + std::atomic cancelled_; + EventCount ec_; +#ifndef EIGEN_THREAD_LOCAL + std::unique_ptr init_barrier_; + std::mutex per_thread_map_mutex_; // Protects per_thread_map_. + std::unordered_map> per_thread_map_; +#endif + + // Main worker thread loop. + void WorkerLoop(int thread_id) { +#ifndef EIGEN_THREAD_LOCAL + std::unique_ptr new_pt(new PerThread()); + per_thread_map_mutex_.lock(); + bool insertOK = per_thread_map_.emplace(GlobalThreadIdHash(), std::move(new_pt)).second; + eigen_plain_assert(insertOK); + EIGEN_UNUSED_VARIABLE(insertOK); + per_thread_map_mutex_.unlock(); + init_barrier_->Notify(); + init_barrier_->Wait(); +#endif + PerThread* pt = GetPerThread(); + pt->pool = this; + pt->rand = GlobalThreadIdHash(); + pt->thread_id = thread_id; + Queue& q = thread_data_[thread_id].queue; + EventCount::Waiter* waiter = &waiters_[thread_id]; + // TODO(dvyukov,rmlarsen): The time spent in NonEmptyQueueIndex() is + // proportional to num_threads_ and we assume that new work is scheduled at + // a constant rate, so we set spin_count to 5000 / num_threads_. The + // constant was picked based on a fair dice roll, tune it. + const int spin_count = + allow_spinning_ && num_threads_ > 0 ? 5000 / num_threads_ : 0; + if (num_threads_ == 1) { + // For num_threads_ == 1 there is no point in going through the expensive + // steal loop. Moreover, since NonEmptyQueueIndex() calls PopBack() on the + // victim queues it might reverse the order in which ops are executed + // compared to the order in which they are scheduled, which tends to be + // counter-productive for the types of I/O workloads the single thread + // pools tend to be used for. + while (!cancelled_) { + Task t = q.PopFront(); + for (int i = 0; i < spin_count && !t.f; i++) { + if (!cancelled_.load(std::memory_order_relaxed)) { + t = q.PopFront(); + } + } + if (!t.f) { + if (!WaitForWork(waiter, &t)) { + return; + } + } + if (t.f) { + env_.ExecuteTask(t); + } + } + } else { + while (!cancelled_) { + Task t = q.PopFront(); + if (!t.f) { + t = LocalSteal(); + if (!t.f) { + t = GlobalSteal(); + if (!t.f) { + // Leave one thread spinning. This reduces latency. + if (allow_spinning_ && !spinning_ && !spinning_.exchange(true)) { + for (int i = 0; i < spin_count && !t.f; i++) { + if (!cancelled_.load(std::memory_order_relaxed)) { + t = GlobalSteal(); + } else { + return; + } + } + spinning_ = false; + } + if (!t.f) { + if (!WaitForWork(waiter, &t)) { + return; + } + } + } + } + } + if (t.f) { + env_.ExecuteTask(t); + } + } + } + } + + // Steal tries to steal work from other worker threads in the range [start, + // limit) in best-effort manner. + Task Steal(unsigned start, unsigned limit) { + PerThread* pt = GetPerThread(); + const size_t size = limit - start; + unsigned r = Rand(&pt->rand); + // Reduce r into [0, size) range, this utilizes trick from + // https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/ + eigen_plain_assert(all_coprimes_[size - 1].size() < (1<<30)); + unsigned victim = ((uint64_t)r * (uint64_t)size) >> 32; + unsigned index = ((uint64_t) all_coprimes_[size - 1].size() * (uint64_t)r) >> 32; + unsigned inc = all_coprimes_[size - 1][index]; + + for (unsigned i = 0; i < size; i++) { + eigen_plain_assert(start + victim < limit); + Task t = thread_data_[start + victim].queue.PopBack(); + if (t.f) { + return t; + } + victim += inc; + if (victim >= size) { + victim -= size; + } + } + return Task(); + } + + // Steals work within threads belonging to the partition. + Task LocalSteal() { + PerThread* pt = GetPerThread(); + unsigned partition = GetStealPartition(pt->thread_id); + // If thread steal partition is the same as global partition, there is no + // need to go through the steal loop twice. + if (global_steal_partition_ == partition) return Task(); + unsigned start, limit; + DecodePartition(partition, &start, &limit); + AssertBounds(start, limit); + + return Steal(start, limit); + } + + // Steals work from any other thread in the pool. + Task GlobalSteal() { + return Steal(0, num_threads_); + } + + + // WaitForWork blocks until new work is available (returns true), or if it is + // time to exit (returns false). Can optionally return a task to execute in t + // (in such case t.f != nullptr on return). + bool WaitForWork(EventCount::Waiter* waiter, Task* t) { + eigen_plain_assert(!t->f); + // We already did best-effort emptiness check in Steal, so prepare for + // blocking. + ec_.Prewait(); + // Now do a reliable emptiness check. + int victim = NonEmptyQueueIndex(); + if (victim != -1) { + ec_.CancelWait(); + if (cancelled_) { + return false; + } else { + *t = thread_data_[victim].queue.PopBack(); + return true; + } + } + // Number of blocked threads is used as termination condition. + // If we are shutting down and all worker threads blocked without work, + // that's we are done. + blocked_++; + // TODO is blocked_ required to be unsigned? + if (done_ && blocked_ == static_cast(num_threads_)) { + ec_.CancelWait(); + // Almost done, but need to re-check queues. + // Consider that all queues are empty and all worker threads are preempted + // right after incrementing blocked_ above. Now a free-standing thread + // submits work and calls destructor (which sets done_). If we don't + // re-check queues, we will exit leaving the work unexecuted. + if (NonEmptyQueueIndex() != -1) { + // Note: we must not pop from queues before we decrement blocked_, + // otherwise the following scenario is possible. Consider that instead + // of checking for emptiness we popped the only element from queues. + // Now other worker threads can start exiting, which is bad if the + // work item submits other work. So we just check emptiness here, + // which ensures that all worker threads exit at the same time. + blocked_--; + return true; + } + // Reached stable termination state. + ec_.Notify(true); + return false; + } + ec_.CommitWait(waiter); + blocked_--; + return true; + } + + int NonEmptyQueueIndex() { + PerThread* pt = GetPerThread(); + // We intentionally design NonEmptyQueueIndex to steal work from + // anywhere in the queue so threads don't block in WaitForWork() forever + // when all threads in their partition go to sleep. Steal is still local. + const size_t size = thread_data_.size(); + unsigned r = Rand(&pt->rand); + unsigned inc = all_coprimes_[size - 1][r % all_coprimes_[size - 1].size()]; + unsigned victim = r % size; + for (unsigned i = 0; i < size; i++) { + if (!thread_data_[victim].queue.Empty()) { + return victim; + } + victim += inc; + if (victim >= size) { + victim -= size; + } + } + return -1; + } + + static EIGEN_STRONG_INLINE uint64_t GlobalThreadIdHash() { + return std::hash()(std::this_thread::get_id()); + } + + EIGEN_STRONG_INLINE PerThread* GetPerThread() { +#ifndef EIGEN_THREAD_LOCAL + static PerThread dummy; + auto it = per_thread_map_.find(GlobalThreadIdHash()); + if (it == per_thread_map_.end()) { + return &dummy; + } else { + return it->second.get(); + } +#else + EIGEN_THREAD_LOCAL PerThread per_thread_; + PerThread* pt = &per_thread_; + return pt; +#endif + } + + static EIGEN_STRONG_INLINE unsigned Rand(uint64_t* state) { + uint64_t current = *state; + // Update the internal state + *state = current * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL; + // Generate the random output (using the PCG-XSH-RS scheme) + return static_cast((current ^ (current >> 22)) >> + (22 + (current >> 61))); + } +}; + +typedef ThreadPoolTempl ThreadPool; + +} // namespace Eigen + +#endif // EIGEN_CXX11_THREADPOOL_NONBLOCKING_THREAD_POOL_H diff --git a/external/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h b/external/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h new file mode 100644 index 0000000..b572ebc --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h @@ -0,0 +1,236 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Dmitry Vyukov +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_THREADPOOL_RUNQUEUE_H_ +#define EIGEN_CXX11_THREADPOOL_RUNQUEUE_H_ + +namespace Eigen { + +// RunQueue is a fixed-size, partially non-blocking deque or Work items. +// Operations on front of the queue must be done by a single thread (owner), +// operations on back of the queue can be done by multiple threads concurrently. +// +// Algorithm outline: +// All remote threads operating on the queue back are serialized by a mutex. +// This ensures that at most two threads access state: owner and one remote +// thread (Size aside). The algorithm ensures that the occupied region of the +// underlying array is logically continuous (can wraparound, but no stray +// occupied elements). Owner operates on one end of this region, remote thread +// operates on the other end. Synchronization between these threads +// (potential consumption of the last element and take up of the last empty +// element) happens by means of state variable in each element. States are: +// empty, busy (in process of insertion of removal) and ready. Threads claim +// elements (empty->busy and ready->busy transitions) by means of a CAS +// operation. The finishing transition (busy->empty and busy->ready) are done +// with plain store as the element is exclusively owned by the current thread. +// +// Note: we could permit only pointers as elements, then we would not need +// separate state variable as null/non-null pointer value would serve as state, +// but that would require malloc/free per operation for large, complex values +// (and this is designed to store std::function<()>). +template +class RunQueue { + public: + RunQueue() : front_(0), back_(0) { + // require power-of-two for fast masking + eigen_plain_assert((kSize & (kSize - 1)) == 0); + eigen_plain_assert(kSize > 2); // why would you do this? + eigen_plain_assert(kSize <= (64 << 10)); // leave enough space for counter + for (unsigned i = 0; i < kSize; i++) + array_[i].state.store(kEmpty, std::memory_order_relaxed); + } + + ~RunQueue() { eigen_plain_assert(Size() == 0); } + + // PushFront inserts w at the beginning of the queue. + // If queue is full returns w, otherwise returns default-constructed Work. + Work PushFront(Work w) { + unsigned front = front_.load(std::memory_order_relaxed); + Elem* e = &array_[front & kMask]; + uint8_t s = e->state.load(std::memory_order_relaxed); + if (s != kEmpty || + !e->state.compare_exchange_strong(s, kBusy, std::memory_order_acquire)) + return w; + front_.store(front + 1 + (kSize << 1), std::memory_order_relaxed); + e->w = std::move(w); + e->state.store(kReady, std::memory_order_release); + return Work(); + } + + // PopFront removes and returns the first element in the queue. + // If the queue was empty returns default-constructed Work. + Work PopFront() { + unsigned front = front_.load(std::memory_order_relaxed); + Elem* e = &array_[(front - 1) & kMask]; + uint8_t s = e->state.load(std::memory_order_relaxed); + if (s != kReady || + !e->state.compare_exchange_strong(s, kBusy, std::memory_order_acquire)) + return Work(); + Work w = std::move(e->w); + e->state.store(kEmpty, std::memory_order_release); + front = ((front - 1) & kMask2) | (front & ~kMask2); + front_.store(front, std::memory_order_relaxed); + return w; + } + + // PushBack adds w at the end of the queue. + // If queue is full returns w, otherwise returns default-constructed Work. + Work PushBack(Work w) { + std::unique_lock lock(mutex_); + unsigned back = back_.load(std::memory_order_relaxed); + Elem* e = &array_[(back - 1) & kMask]; + uint8_t s = e->state.load(std::memory_order_relaxed); + if (s != kEmpty || + !e->state.compare_exchange_strong(s, kBusy, std::memory_order_acquire)) + return w; + back = ((back - 1) & kMask2) | (back & ~kMask2); + back_.store(back, std::memory_order_relaxed); + e->w = std::move(w); + e->state.store(kReady, std::memory_order_release); + return Work(); + } + + // PopBack removes and returns the last elements in the queue. + Work PopBack() { + if (Empty()) return Work(); + std::unique_lock lock(mutex_); + unsigned back = back_.load(std::memory_order_relaxed); + Elem* e = &array_[back & kMask]; + uint8_t s = e->state.load(std::memory_order_relaxed); + if (s != kReady || + !e->state.compare_exchange_strong(s, kBusy, std::memory_order_acquire)) + return Work(); + Work w = std::move(e->w); + e->state.store(kEmpty, std::memory_order_release); + back_.store(back + 1 + (kSize << 1), std::memory_order_relaxed); + return w; + } + + // PopBackHalf removes and returns half last elements in the queue. + // Returns number of elements removed. + unsigned PopBackHalf(std::vector* result) { + if (Empty()) return 0; + std::unique_lock lock(mutex_); + unsigned back = back_.load(std::memory_order_relaxed); + unsigned size = Size(); + unsigned mid = back; + if (size > 1) mid = back + (size - 1) / 2; + unsigned n = 0; + unsigned start = 0; + for (; static_cast(mid - back) >= 0; mid--) { + Elem* e = &array_[mid & kMask]; + uint8_t s = e->state.load(std::memory_order_relaxed); + if (n == 0) { + if (s != kReady || !e->state.compare_exchange_strong( + s, kBusy, std::memory_order_acquire)) + continue; + start = mid; + } else { + // Note: no need to store temporal kBusy, we exclusively own these + // elements. + eigen_plain_assert(s == kReady); + } + result->push_back(std::move(e->w)); + e->state.store(kEmpty, std::memory_order_release); + n++; + } + if (n != 0) + back_.store(start + 1 + (kSize << 1), std::memory_order_relaxed); + return n; + } + + // Size returns current queue size. + // Can be called by any thread at any time. + unsigned Size() const { return SizeOrNotEmpty(); } + + // Empty tests whether container is empty. + // Can be called by any thread at any time. + bool Empty() const { return SizeOrNotEmpty() == 0; } + + // Delete all the elements from the queue. + void Flush() { + while (!Empty()) { + PopFront(); + } + } + + private: + static const unsigned kMask = kSize - 1; + static const unsigned kMask2 = (kSize << 1) - 1; + struct Elem { + std::atomic state; + Work w; + }; + enum { + kEmpty, + kBusy, + kReady, + }; + std::mutex mutex_; + // Low log(kSize) + 1 bits in front_ and back_ contain rolling index of + // front/back, respectively. The remaining bits contain modification counters + // that are incremented on Push operations. This allows us to (1) distinguish + // between empty and full conditions (if we would use log(kSize) bits for + // position, these conditions would be indistinguishable); (2) obtain + // consistent snapshot of front_/back_ for Size operation using the + // modification counters. + std::atomic front_; + std::atomic back_; + Elem array_[kSize]; + + // SizeOrNotEmpty returns current queue size; if NeedSizeEstimate is false, + // only whether the size is 0 is guaranteed to be correct. + // Can be called by any thread at any time. + template + unsigned SizeOrNotEmpty() const { + // Emptiness plays critical role in thread pool blocking. So we go to great + // effort to not produce false positives (claim non-empty queue as empty). + unsigned front = front_.load(std::memory_order_acquire); + for (;;) { + // Capture a consistent snapshot of front/tail. + unsigned back = back_.load(std::memory_order_acquire); + unsigned front1 = front_.load(std::memory_order_relaxed); + if (front != front1) { + front = front1; + std::atomic_thread_fence(std::memory_order_acquire); + continue; + } + if (NeedSizeEstimate) { + return CalculateSize(front, back); + } else { + // This value will be 0 if the queue is empty, and undefined otherwise. + unsigned maybe_zero = ((front ^ back) & kMask2); + // Queue size estimate must agree with maybe zero check on the queue + // empty/non-empty state. + eigen_assert((CalculateSize(front, back) == 0) == (maybe_zero == 0)); + return maybe_zero; + } + } + } + + EIGEN_ALWAYS_INLINE + unsigned CalculateSize(unsigned front, unsigned back) const { + int size = (front & kMask2) - (back & kMask2); + // Fix overflow. + if (size < 0) size += 2 * kSize; + // Order of modification in push/pop is crafted to make the queue look + // larger than it is during concurrent modifications. E.g. push can + // increment size before the corresponding pop has decremented it. + // So the computed size can be up to kSize + 1, fix it. + if (size > static_cast(kSize)) size = kSize; + return static_cast(size); + } + + RunQueue(const RunQueue&) = delete; + void operator=(const RunQueue&) = delete; +}; + +} // namespace Eigen + +#endif // EIGEN_CXX11_THREADPOOL_RUNQUEUE_H_ diff --git a/external/unsupported/Eigen/CXX11/src/ThreadPool/ThreadCancel.h b/external/unsupported/Eigen/CXX11/src/ThreadPool/ThreadCancel.h new file mode 100644 index 0000000..a05685f --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/ThreadPool/ThreadCancel.h @@ -0,0 +1,23 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_THREADPOOL_THREAD_CANCEL_H +#define EIGEN_CXX11_THREADPOOL_THREAD_CANCEL_H + +// Try to come up with a portable way to cancel a thread +#if EIGEN_OS_GNULINUX + #define EIGEN_THREAD_CANCEL(t) \ + pthread_cancel(t.native_handle()); + #define EIGEN_SUPPORTS_THREAD_CANCELLATION 1 +#else +#define EIGEN_THREAD_CANCEL(t) +#endif + + +#endif // EIGEN_CXX11_THREADPOOL_THREAD_CANCEL_H diff --git a/external/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h b/external/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h new file mode 100644 index 0000000..d94a064 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h @@ -0,0 +1,40 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_THREADPOOL_THREAD_ENVIRONMENT_H +#define EIGEN_CXX11_THREADPOOL_THREAD_ENVIRONMENT_H + +namespace Eigen { + +struct StlThreadEnvironment { + struct Task { + std::function f; + }; + + // EnvThread constructor must start the thread, + // destructor must join the thread. + class EnvThread { + public: + EnvThread(std::function f) : thr_(std::move(f)) {} + ~EnvThread() { thr_.join(); } + // This function is called when the threadpool is cancelled. + void OnCancel() { } + + private: + std::thread thr_; + }; + + EnvThread* CreateThread(std::function f) { return new EnvThread(std::move(f)); } + Task CreateTask(std::function f) { return Task{std::move(f)}; } + void ExecuteTask(const Task& t) { t.f(); } +}; + +} // namespace Eigen + +#endif // EIGEN_CXX11_THREADPOOL_THREAD_ENVIRONMENT_H diff --git a/external/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h b/external/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h new file mode 100644 index 0000000..4e68474 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h @@ -0,0 +1,301 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_THREADPOOL_THREAD_LOCAL_H +#define EIGEN_CXX11_THREADPOOL_THREAD_LOCAL_H + +#ifdef EIGEN_AVOID_THREAD_LOCAL + +#ifdef EIGEN_THREAD_LOCAL +#undef EIGEN_THREAD_LOCAL +#endif + +#else + +#if EIGEN_MAX_CPP_VER >= 11 && \ + ((EIGEN_COMP_GNUC && EIGEN_GNUC_AT_LEAST(4, 8)) || \ + __has_feature(cxx_thread_local) || \ + (EIGEN_COMP_MSVC >= 1900) ) +#define EIGEN_THREAD_LOCAL static thread_local +#endif + +// Disable TLS for Apple and Android builds with older toolchains. +#if defined(__APPLE__) +// Included for TARGET_OS_IPHONE, __IPHONE_OS_VERSION_MIN_REQUIRED, +// __IPHONE_8_0. +#include +#include +#endif +// Checks whether C++11's `thread_local` storage duration specifier is +// supported. +#if defined(__apple_build_version__) && \ + ((__apple_build_version__ < 8000042) || \ + (TARGET_OS_IPHONE && __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_9_0)) +// Notes: Xcode's clang did not support `thread_local` until version +// 8, and even then not for all iOS < 9.0. +#undef EIGEN_THREAD_LOCAL + +#elif defined(__ANDROID__) && EIGEN_COMP_CLANG +// There are platforms for which TLS should not be used even though the compiler +// makes it seem like it's supported (Android NDK < r12b for example). +// This is primarily because of linker problems and toolchain misconfiguration: +// TLS isn't supported until NDK r12b per +// https://developer.android.com/ndk/downloads/revision_history.html +// Since NDK r16, `__NDK_MAJOR__` and `__NDK_MINOR__` are defined in +// . For NDK < r16, users should define these macros, +// e.g. `-D__NDK_MAJOR__=11 -D__NKD_MINOR__=0` for NDK r11. +#if __has_include() +#include +#endif // __has_include() +#if defined(__ANDROID__) && defined(__clang__) && defined(__NDK_MAJOR__) && \ + defined(__NDK_MINOR__) && \ + ((__NDK_MAJOR__ < 12) || ((__NDK_MAJOR__ == 12) && (__NDK_MINOR__ < 1))) +#undef EIGEN_THREAD_LOCAL +#endif +#endif // defined(__ANDROID__) && defined(__clang__) + +#endif // EIGEN_AVOID_THREAD_LOCAL + +namespace Eigen { + +namespace internal { +template +struct ThreadLocalNoOpInitialize { + void operator()(T&) const {} +}; + +template +struct ThreadLocalNoOpRelease { + void operator()(T&) const {} +}; + +} // namespace internal + +// Thread local container for elements of type T, that does not use thread local +// storage. As long as the number of unique threads accessing this storage +// is smaller than `capacity_`, it is lock-free and wait-free. Otherwise it will +// use a mutex for synchronization. +// +// Type `T` has to be default constructible, and by default each thread will get +// a default constructed value. It is possible to specify custom `initialize` +// callable, that will be called lazily from each thread accessing this object, +// and will be passed a default initialized object of type `T`. Also it's +// possible to pass a custom `release` callable, that will be invoked before +// calling ~T(). +// +// Example: +// +// struct Counter { +// int value = 0; +// } +// +// Eigen::ThreadLocal counter(10); +// +// // Each thread will have access to it's own counter object. +// Counter& cnt = counter.local(); +// cnt++; +// +// WARNING: Eigen::ThreadLocal uses the OS-specific value returned by +// std::this_thread::get_id() to identify threads. This value is not guaranteed +// to be unique except for the life of the thread. A newly created thread may +// get an OS-specific ID equal to that of an already destroyed thread. +// +// Somewhat similar to TBB thread local storage, with similar restrictions: +// https://www.threadingbuildingblocks.org/docs/help/reference/thread_local_storage/enumerable_thread_specific_cls.html +// +template , + typename Release = internal::ThreadLocalNoOpRelease> +class ThreadLocal { + // We preallocate default constructed elements in MaxSizedVector. + static_assert(std::is_default_constructible::value, + "ThreadLocal data type must be default constructible"); + + public: + explicit ThreadLocal(int capacity) + : ThreadLocal(capacity, internal::ThreadLocalNoOpInitialize(), + internal::ThreadLocalNoOpRelease()) {} + + ThreadLocal(int capacity, Initialize initialize) + : ThreadLocal(capacity, std::move(initialize), + internal::ThreadLocalNoOpRelease()) {} + + ThreadLocal(int capacity, Initialize initialize, Release release) + : initialize_(std::move(initialize)), + release_(std::move(release)), + capacity_(capacity), + data_(capacity_), + ptr_(capacity_), + filled_records_(0) { + eigen_assert(capacity_ >= 0); + data_.resize(capacity_); + for (int i = 0; i < capacity_; ++i) { + ptr_.emplace_back(nullptr); + } + } + + T& local() { + std::thread::id this_thread = std::this_thread::get_id(); + if (capacity_ == 0) return SpilledLocal(this_thread); + + std::size_t h = std::hash()(this_thread); + const int start_idx = h % capacity_; + + // NOTE: From the definition of `std::this_thread::get_id()` it is + // guaranteed that we never can have concurrent insertions with the same key + // to our hash-map like data structure. If we didn't find an element during + // the initial traversal, it's guaranteed that no one else could have + // inserted it while we are in this function. This allows to massively + // simplify out lock-free insert-only hash map. + + // Check if we already have an element for `this_thread`. + int idx = start_idx; + while (ptr_[idx].load() != nullptr) { + ThreadIdAndValue& record = *(ptr_[idx].load()); + if (record.thread_id == this_thread) return record.value; + + idx += 1; + if (idx >= capacity_) idx -= capacity_; + if (idx == start_idx) break; + } + + // If we are here, it means that we found an insertion point in lookup + // table at `idx`, or we did a full traversal and table is full. + + // If lock-free storage is full, fallback on mutex. + if (filled_records_.load() >= capacity_) return SpilledLocal(this_thread); + + // We double check that we still have space to insert an element into a lock + // free storage. If old value in `filled_records_` is larger than the + // records capacity, it means that some other thread added an element while + // we were traversing lookup table. + int insertion_index = + filled_records_.fetch_add(1, std::memory_order_relaxed); + if (insertion_index >= capacity_) return SpilledLocal(this_thread); + + // At this point it's guaranteed that we can access to + // data_[insertion_index_] without a data race. + data_[insertion_index].thread_id = this_thread; + initialize_(data_[insertion_index].value); + + // That's the pointer we'll put into the lookup table. + ThreadIdAndValue* inserted = &data_[insertion_index]; + + // We'll use nullptr pointer to ThreadIdAndValue in a compare-and-swap loop. + ThreadIdAndValue* empty = nullptr; + + // Now we have to find an insertion point into the lookup table. We start + // from the `idx` that was identified as an insertion point above, it's + // guaranteed that we will have an empty record somewhere in a lookup table + // (because we created a record in the `data_`). + const int insertion_idx = idx; + + do { + // Always start search from the original insertion candidate. + idx = insertion_idx; + while (ptr_[idx].load() != nullptr) { + idx += 1; + if (idx >= capacity_) idx -= capacity_; + // If we did a full loop, it means that we don't have any free entries + // in the lookup table, and this means that something is terribly wrong. + eigen_assert(idx != insertion_idx); + } + // Atomic CAS of the pointer guarantees that any other thread, that will + // follow this pointer will see all the mutations in the `data_`. + } while (!ptr_[idx].compare_exchange_weak(empty, inserted)); + + return inserted->value; + } + + // WARN: It's not thread safe to call it concurrently with `local()`. + void ForEach(std::function f) { + // Reading directly from `data_` is unsafe, because only CAS to the + // record in `ptr_` makes all changes visible to other threads. + for (auto& ptr : ptr_) { + ThreadIdAndValue* record = ptr.load(); + if (record == nullptr) continue; + f(record->thread_id, record->value); + } + + // We did not spill into the map based storage. + if (filled_records_.load(std::memory_order_relaxed) < capacity_) return; + + // Adds a happens before edge from the last call to SpilledLocal(). + std::unique_lock lock(mu_); + for (auto& kv : per_thread_map_) { + f(kv.first, kv.second); + } + } + + // WARN: It's not thread safe to call it concurrently with `local()`. + ~ThreadLocal() { + // Reading directly from `data_` is unsafe, because only CAS to the record + // in `ptr_` makes all changes visible to other threads. + for (auto& ptr : ptr_) { + ThreadIdAndValue* record = ptr.load(); + if (record == nullptr) continue; + release_(record->value); + } + + // We did not spill into the map based storage. + if (filled_records_.load(std::memory_order_relaxed) < capacity_) return; + + // Adds a happens before edge from the last call to SpilledLocal(). + std::unique_lock lock(mu_); + for (auto& kv : per_thread_map_) { + release_(kv.second); + } + } + + private: + struct ThreadIdAndValue { + std::thread::id thread_id; + T value; + }; + + // Use unordered map guarded by a mutex when lock free storage is full. + T& SpilledLocal(std::thread::id this_thread) { + std::unique_lock lock(mu_); + + auto it = per_thread_map_.find(this_thread); + if (it == per_thread_map_.end()) { + auto result = per_thread_map_.emplace(this_thread, T()); + eigen_assert(result.second); + initialize_((*result.first).second); + return (*result.first).second; + } else { + return it->second; + } + } + + Initialize initialize_; + Release release_; + const int capacity_; + + // Storage that backs lock-free lookup table `ptr_`. Records stored in this + // storage contiguously starting from index 0. + MaxSizeVector data_; + + // Atomic pointers to the data stored in `data_`. Used as a lookup table for + // linear probing hash map (https://en.wikipedia.org/wiki/Linear_probing). + MaxSizeVector> ptr_; + + // Number of records stored in the `data_`. + std::atomic filled_records_; + + // We fallback on per thread map if lock-free storage is full. In practice + // this should never happen, if `capacity_` is a reasonable estimate of the + // number of threads running in a system. + std::mutex mu_; // Protects per_thread_map_. + std::unordered_map per_thread_map_; +}; + +} // namespace Eigen + +#endif // EIGEN_CXX11_THREADPOOL_THREAD_LOCAL_H diff --git a/external/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h b/external/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h new file mode 100644 index 0000000..25030dc --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h @@ -0,0 +1,48 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_THREADPOOL_THREAD_POOL_INTERFACE_H +#define EIGEN_CXX11_THREADPOOL_THREAD_POOL_INTERFACE_H + +namespace Eigen { + +// This defines an interface that ThreadPoolDevice can take to use +// custom thread pools underneath. +class ThreadPoolInterface { + public: + // Submits a closure to be run by a thread in the pool. + virtual void Schedule(std::function fn) = 0; + + // Submits a closure to be run by threads in the range [start, end) in the + // pool. + virtual void ScheduleWithHint(std::function fn, int /*start*/, + int /*end*/) { + // Just defer to Schedule in case sub-classes aren't interested in + // overriding this functionality. + Schedule(fn); + } + + // If implemented, stop processing the closures that have been enqueued. + // Currently running closures may still be processed. + // If not implemented, does nothing. + virtual void Cancel() {} + + // Returns the number of threads in the pool. + virtual int NumThreads() const = 0; + + // Returns a logical thread index between 0 and NumThreads() - 1 if called + // from one of the threads in the pool. Returns -1 otherwise. + virtual int CurrentThreadId() const = 0; + + virtual ~ThreadPoolInterface() {} +}; + +} // namespace Eigen + +#endif // EIGEN_CXX11_THREADPOOL_THREAD_POOL_INTERFACE_H diff --git a/external/unsupported/Eigen/CXX11/src/ThreadPool/ThreadYield.h b/external/unsupported/Eigen/CXX11/src/ThreadPool/ThreadYield.h new file mode 100644 index 0000000..a859c7b --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/ThreadPool/ThreadYield.h @@ -0,0 +1,20 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_THREADPOOL_THREAD_YIELD_H +#define EIGEN_CXX11_THREADPOOL_THREAD_YIELD_H + +// Try to come up with a portable way to yield +#if EIGEN_COMP_GNUC && EIGEN_GNUC_AT_MOST(4, 7) +#define EIGEN_THREAD_YIELD() sched_yield() +#else +#define EIGEN_THREAD_YIELD() std::this_thread::yield() +#endif + +#endif // EIGEN_CXX11_THREADPOOL_THREAD_YIELD_H diff --git a/external/unsupported/Eigen/CXX11/src/util/CXX11Meta.h b/external/unsupported/Eigen/CXX11/src/util/CXX11Meta.h new file mode 100644 index 0000000..149ceaf --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/util/CXX11Meta.h @@ -0,0 +1,537 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2013 Christian Seiler +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11META_H +#define EIGEN_CXX11META_H + +#include +#include "EmulateArray.h" + +#include "CXX11Workarounds.h" + +namespace Eigen { + +namespace internal { + +/** \internal + * \file CXX11/util/CXX11Meta.h + * This file contains generic metaprogramming classes which are not specifically related to Eigen. + * This file expands upon Core/util/Meta.h and adds support for C++11 specific features. + */ + +template +struct type_list { constexpr static int count = sizeof...(tt); }; + +template +struct type_list { constexpr static int count = sizeof...(tt) + 1; typedef t first_type; }; + +template +struct numeric_list { constexpr static std::size_t count = sizeof...(nn); }; + +template +struct numeric_list { static const std::size_t count = sizeof...(nn) + 1; const static T first_value = n; }; + +#ifndef EIGEN_PARSED_BY_DOXYGEN +/* numeric list constructors + * + * equivalencies: + * constructor result + * typename gen_numeric_list::type numeric_list + * typename gen_numeric_list_reversed::type numeric_list + * typename gen_numeric_list_swapped_pair::type numeric_list + * typename gen_numeric_list_repeated::type numeric_list + */ + +template struct gen_numeric_list : gen_numeric_list {}; +template struct gen_numeric_list { typedef numeric_list type; }; + +template struct gen_numeric_list_reversed : gen_numeric_list_reversed {}; +template struct gen_numeric_list_reversed { typedef numeric_list type; }; + +template struct gen_numeric_list_swapped_pair : gen_numeric_list_swapped_pair {}; +template struct gen_numeric_list_swapped_pair { typedef numeric_list type; }; + +template struct gen_numeric_list_repeated : gen_numeric_list_repeated {}; +template struct gen_numeric_list_repeated { typedef numeric_list type; }; + +/* list manipulation: concatenate */ + +template struct concat; + +template struct concat, type_list> { typedef type_list type; }; +template struct concat, numeric_list > { typedef numeric_list type; }; + +template struct mconcat; +template struct mconcat
{ typedef a type; }; +template struct mconcat : concat {}; +template struct mconcat : concat::type> {}; + +/* list manipulation: extract slices */ + +template struct take; +template struct take> : concat, typename take>::type> {}; +template struct take> { typedef type_list<> type; }; +template struct take<0, type_list> { typedef type_list<> type; }; +template<> struct take<0, type_list<>> { typedef type_list<> type; }; + +template struct take> : concat, typename take>::type> {}; +template struct take> { typedef numeric_list type; }; +template struct take<0, numeric_list> { typedef numeric_list type; }; +template struct take<0, numeric_list> { typedef numeric_list type; }; + +template struct h_skip_helper_numeric; +template struct h_skip_helper_numeric : h_skip_helper_numeric {}; +template struct h_skip_helper_numeric { typedef numeric_list type; }; +template struct h_skip_helper_numeric { typedef numeric_list type; }; +template struct h_skip_helper_numeric { typedef numeric_list type; }; + +template struct h_skip_helper_type; +template struct h_skip_helper_type : h_skip_helper_type {}; +template struct h_skip_helper_type<0, t, tt...> { typedef type_list type; }; +template struct h_skip_helper_type { typedef type_list<> type; }; +template<> struct h_skip_helper_type<0> { typedef type_list<> type; }; +#endif //not EIGEN_PARSED_BY_DOXYGEN + +template +struct h_skip { + template + constexpr static EIGEN_STRONG_INLINE typename h_skip_helper_numeric::type helper(numeric_list) { return typename h_skip_helper_numeric::type(); } + template + constexpr static EIGEN_STRONG_INLINE typename h_skip_helper_type::type helper(type_list) { return typename h_skip_helper_type::type(); } +}; + +template struct skip { typedef decltype(h_skip::helper(a())) type; }; + +template struct slice : take::type> {}; + +/* list manipulation: retrieve single element from list */ + +template struct get; + +template struct get> : get> {}; +template struct get<0, type_list> { typedef a type; }; + +template struct get> : get> {}; +template struct get<0, numeric_list> { constexpr static T value = a; }; + +template constexpr T array_get(const numeric_list&) { + return get<(int)n, numeric_list>::value; +} + +/* always get type, regardless of dummy; good for parameter pack expansion */ + +template struct id_numeric { typedef t type; }; +template struct id_type { typedef t type; }; + +/* equality checking, flagged version */ + +template struct is_same_gf : is_same { constexpr static int global_flags = 0; }; + +/* apply_op to list */ + +template< + bool from_left, // false + template class op, + typename additional_param, + typename... values +> +struct h_apply_op_helper { typedef type_list::type...> type; }; +template< + template class op, + typename additional_param, + typename... values +> +struct h_apply_op_helper { typedef type_list::type...> type; }; + +template< + bool from_left, + template class op, + typename additional_param +> +struct h_apply_op +{ + template + constexpr static typename h_apply_op_helper::type helper(type_list) + { return typename h_apply_op_helper::type(); } +}; + +template< + template class op, + typename additional_param, + typename a +> +struct apply_op_from_left { typedef decltype(h_apply_op::helper(a())) type; }; + +template< + template class op, + typename additional_param, + typename a +> +struct apply_op_from_right { typedef decltype(h_apply_op::helper(a())) type; }; + +/* see if an element is in a list */ + +template< + template class test, + typename check_against, + typename h_list, + bool last_check_positive = false +> +struct contained_in_list; + +template< + template class test, + typename check_against, + typename h_list +> +struct contained_in_list +{ + constexpr static bool value = true; +}; + +template< + template class test, + typename check_against, + typename a, + typename... as +> +struct contained_in_list, false> : contained_in_list, test::value> {}; + +template< + template class test, + typename check_against + EIGEN_TPL_PP_SPEC_HACK_DEFC(typename, empty) +> +struct contained_in_list, false> { constexpr static bool value = false; }; + +/* see if an element is in a list and check for global flags */ + +template< + template class test, + typename check_against, + typename h_list, + int default_flags = 0, + bool last_check_positive = false, + int last_check_flags = default_flags +> +struct contained_in_list_gf; + +template< + template class test, + typename check_against, + typename h_list, + int default_flags, + int last_check_flags +> +struct contained_in_list_gf +{ + constexpr static bool value = true; + constexpr static int global_flags = last_check_flags; +}; + +template< + template class test, + typename check_against, + typename a, + typename... as, + int default_flags, + int last_check_flags +> +struct contained_in_list_gf, default_flags, false, last_check_flags> : contained_in_list_gf, default_flags, test::value, test::global_flags> {}; + +template< + template class test, + typename check_against + EIGEN_TPL_PP_SPEC_HACK_DEFC(typename, empty), + int default_flags, + int last_check_flags +> +struct contained_in_list_gf, default_flags, false, last_check_flags> { constexpr static bool value = false; constexpr static int global_flags = default_flags; }; + +/* generic reductions */ + +template< + typename Reducer, + typename... Ts +> struct reduce; + +template< + typename Reducer +> struct reduce +{ + EIGEN_DEVICE_FUNC constexpr static EIGEN_STRONG_INLINE int run() { return Reducer::Identity; } +}; + +template< + typename Reducer, + typename A +> struct reduce +{ + EIGEN_DEVICE_FUNC constexpr static EIGEN_STRONG_INLINE A run(A a) { return a; } +}; + +template< + typename Reducer, + typename A, + typename... Ts +> struct reduce +{ + EIGEN_DEVICE_FUNC constexpr static EIGEN_STRONG_INLINE auto run(A a, Ts... ts) -> decltype(Reducer::run(a, reduce::run(ts...))) { + return Reducer::run(a, reduce::run(ts...)); + } +}; + +/* generic binary operations */ + +struct sum_op { + template EIGEN_DEVICE_FUNC constexpr static EIGEN_STRONG_INLINE auto run(A a, B b) -> decltype(a + b) { return a + b; } + static constexpr int Identity = 0; +}; +struct product_op { + template EIGEN_DEVICE_FUNC constexpr static EIGEN_STRONG_INLINE auto run(A a, B b) -> decltype(a * b) { return a * b; } + static constexpr int Identity = 1; +}; + +struct logical_and_op { template constexpr static EIGEN_STRONG_INLINE auto run(A a, B b) -> decltype(a && b) { return a && b; } }; +struct logical_or_op { template constexpr static EIGEN_STRONG_INLINE auto run(A a, B b) -> decltype(a || b) { return a || b; } }; + +struct equal_op { template constexpr static EIGEN_STRONG_INLINE auto run(A a, B b) -> decltype(a == b) { return a == b; } }; +struct not_equal_op { template constexpr static EIGEN_STRONG_INLINE auto run(A a, B b) -> decltype(a != b) { return a != b; } }; +struct lesser_op { template constexpr static EIGEN_STRONG_INLINE auto run(A a, B b) -> decltype(a < b) { return a < b; } }; +struct lesser_equal_op { template constexpr static EIGEN_STRONG_INLINE auto run(A a, B b) -> decltype(a <= b) { return a <= b; } }; +struct greater_op { template constexpr static EIGEN_STRONG_INLINE auto run(A a, B b) -> decltype(a > b) { return a > b; } }; +struct greater_equal_op { template constexpr static EIGEN_STRONG_INLINE auto run(A a, B b) -> decltype(a >= b) { return a >= b; } }; + +/* generic unary operations */ + +struct not_op { template constexpr static EIGEN_STRONG_INLINE auto run(A a) -> decltype(!a) { return !a; } }; +struct negation_op { template constexpr static EIGEN_STRONG_INLINE auto run(A a) -> decltype(-a) { return -a; } }; +struct greater_equal_zero_op { template constexpr static EIGEN_STRONG_INLINE auto run(A a) -> decltype(a >= 0) { return a >= 0; } }; + + +/* reductions for lists */ + +// using auto -> return value spec makes ICC 13.0 and 13.1 crash here, so we have to hack it +// together in front... (13.0 doesn't work with array_prod/array_reduce/... anyway, but 13.1 +// does... +template +EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE decltype(reduce::run((*((Ts*)0))...)) arg_prod(Ts... ts) +{ + return reduce::run(ts...); +} + +template +constexpr EIGEN_STRONG_INLINE decltype(reduce::run((*((Ts*)0))...)) arg_sum(Ts... ts) +{ + return reduce::run(ts...); +} + +/* reverse arrays */ + +template +constexpr EIGEN_STRONG_INLINE Array h_array_reverse(Array arr, numeric_list) +{ + return {{array_get(arr)...}}; +} + +template +constexpr EIGEN_STRONG_INLINE array array_reverse(array arr) +{ + return h_array_reverse(arr, typename gen_numeric_list::type()); +} + + +/* generic array reductions */ + +// can't reuse standard reduce() interface above because Intel's Compiler +// *really* doesn't like it, so we just reimplement the stuff +// (start from N - 1 and work down to 0 because specialization for +// n == N - 1 also doesn't work in Intel's compiler, so it goes into +// an infinite loop) +template +struct h_array_reduce { + EIGEN_DEVICE_FUNC constexpr static EIGEN_STRONG_INLINE auto run(array arr, T identity) -> decltype(Reducer::run(h_array_reduce::run(arr, identity), array_get(arr))) + { + return Reducer::run(h_array_reduce::run(arr, identity), array_get(arr)); + } +}; + +template +struct h_array_reduce +{ + EIGEN_DEVICE_FUNC constexpr static EIGEN_STRONG_INLINE T run(const array& arr, T) + { + return array_get<0>(arr); + } +}; + +template +struct h_array_reduce +{ + EIGEN_DEVICE_FUNC constexpr static EIGEN_STRONG_INLINE T run(const array&, T identity) + { + return identity; + } +}; + +template +EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE auto array_reduce(const array& arr, T identity) -> decltype(h_array_reduce::run(arr, identity)) +{ + return h_array_reduce::run(arr, identity); +} + +/* standard array reductions */ + +template +EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE auto array_sum(const array& arr) -> decltype(array_reduce(arr, static_cast(0))) +{ + return array_reduce(arr, static_cast(0)); +} + +template +EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE auto array_prod(const array& arr) -> decltype(array_reduce(arr, static_cast(1))) +{ + return array_reduce(arr, static_cast(1)); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE t array_prod(const std::vector& a) { + eigen_assert(a.size() > 0); + t prod = 1; + for (size_t i = 0; i < a.size(); ++i) { prod *= a[i]; } + return prod; +} + +/* zip an array */ + +template +constexpr EIGEN_STRONG_INLINE array h_array_zip(array a, array b, numeric_list) +{ + return array{{ Op::run(array_get(a), array_get(b))... }}; +} + +template +constexpr EIGEN_STRONG_INLINE array array_zip(array a, array b) +{ + return h_array_zip(a, b, typename gen_numeric_list::type()); +} + +/* zip an array and reduce the result */ + +template +constexpr EIGEN_STRONG_INLINE auto h_array_zip_and_reduce(array a, array b, numeric_list) -> decltype(reduce::type...>::run(Op::run(array_get(a), array_get(b))...)) +{ + return reduce::type...>::run(Op::run(array_get(a), array_get(b))...); +} + +template +constexpr EIGEN_STRONG_INLINE auto array_zip_and_reduce(array a, array b) -> decltype(h_array_zip_and_reduce(a, b, typename gen_numeric_list::type())) +{ + return h_array_zip_and_reduce(a, b, typename gen_numeric_list::type()); +} + +/* apply stuff to an array */ + +template +constexpr EIGEN_STRONG_INLINE array h_array_apply(array a, numeric_list) +{ + return array{{ Op::run(array_get(a))... }}; +} + +template +constexpr EIGEN_STRONG_INLINE array array_apply(array a) +{ + return h_array_apply(a, typename gen_numeric_list::type()); +} + +/* apply stuff to an array and reduce */ + +template +constexpr EIGEN_STRONG_INLINE auto h_array_apply_and_reduce(array arr, numeric_list) -> decltype(reduce::type...>::run(Op::run(array_get(arr))...)) +{ + return reduce::type...>::run(Op::run(array_get(arr))...); +} + +template +constexpr EIGEN_STRONG_INLINE auto array_apply_and_reduce(array a) -> decltype(h_array_apply_and_reduce(a, typename gen_numeric_list::type())) +{ + return h_array_apply_and_reduce(a, typename gen_numeric_list::type()); +} + +/* repeat a value n times (and make an array out of it + * usage: + * array = repeat<16>(42); + */ + +template +struct h_repeat +{ + template + constexpr static EIGEN_STRONG_INLINE array run(t v, numeric_list) + { + return {{ typename id_numeric::type(v)... }}; + } +}; + +template +constexpr array repeat(t v) { return h_repeat::run(v, typename gen_numeric_list::type()); } + +/* instantiate a class by a C-style array */ +template +struct h_instantiate_by_c_array; + +template +struct h_instantiate_by_c_array +{ + static InstType run(ArrType* arr, Ps... args) + { + return h_instantiate_by_c_array::run(arr + 1, args..., arr[0]); + } +}; + +template +struct h_instantiate_by_c_array +{ + static InstType run(ArrType* arr, Ps... args) + { + return h_instantiate_by_c_array::run(arr + 1, arr[0], args...); + } +}; + +template +struct h_instantiate_by_c_array +{ + static InstType run(ArrType* arr, Ps... args) + { + (void)arr; + return InstType(args...); + } +}; + +template +struct h_instantiate_by_c_array +{ + static InstType run(ArrType* arr, Ps... args) + { + (void)arr; + return InstType(args...); + } +}; + +template +InstType instantiate_by_c_array(ArrType* arr) +{ + return h_instantiate_by_c_array::run(arr); +} + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_CXX11META_H diff --git a/external/unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h b/external/unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h new file mode 100644 index 0000000..056736c --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h @@ -0,0 +1,88 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2013 Christian Seiler +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11WORKAROUNDS_H +#define EIGEN_CXX11WORKAROUNDS_H + +/* COMPATIBILITY CHECKS + * (so users of compilers that are too old get some realistic error messages) + */ +#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER < 1310) +#error Intel Compiler only supports required C++ features since version 13.1. +// note that most stuff in principle works with 13.0 but when combining +// some features, at some point 13.0 will just fail with an internal assertion +#elif defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 6)) +// G++ < 4.6 by default will continue processing the source files - even if we use #error to make +// it error out. For this reason, we use the pragma to make sure G++ aborts at the first error +// it sees. Unfortunately, that is still not our #error directive, but at least the output is +// short enough the user has a chance to see that the compiler version is not sufficient for +// the funky template mojo we use. +#pragma GCC diagnostic error "-Wfatal-errors" +#error GNU C++ Compiler (g++) only supports required C++ features since version 4.6. +#endif + +/* Check that the compiler at least claims to support C++11. It might not be sufficient + * because the compiler may not implement it correctly, but at least we'll know. + * On the other hand, visual studio still doesn't claim to support C++11 although it's + * compliant enugh for our purpose. + */ +#if (EIGEN_COMP_CXXVER < 11) +#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER) +#pragma GCC diagnostic error "-Wfatal-errors" +#endif +#error This library needs at least a C++11 compliant compiler. If you use g++/clang, please enable the -std=c++11 compiler flag. (-std=c++0x on older versions.) +#endif + +namespace Eigen { + +namespace internal { + +/* std::get is only constexpr in C++14, not yet in C++11 + */ + + +template constexpr inline T& array_get(std::vector& a) { return a[I_]; } +template constexpr inline T&& array_get(std::vector&& a) { return a[I_]; } +template constexpr inline T const& array_get(std::vector const& a) { return a[I_]; } + +/* Suppose you have a template of the form + * template struct X; + * And you want to specialize it in such a way: + * template struct X> { ::: }; + * template<> struct X> { ::: }; + * This will work in Intel's compiler 13.0, but only to some extent in g++ 4.6, since + * g++ can only match templates called with parameter packs if the number of template + * arguments is not a fixed size (so inside the first specialization, referencing + * X> will fail in g++). On the other hand, g++ will accept the following: + * template struct X> { ::: }: + * as an additional (!) specialization, which will then only match the empty case. + * But Intel's compiler 13.0 won't accept that, it will only accept the empty syntax, + * so we have to create a workaround for this. + */ +#if defined(__GNUC__) && !defined(__INTEL_COMPILER) +#define EIGEN_TPL_PP_SPEC_HACK_DEF(mt, n) mt... n +#define EIGEN_TPL_PP_SPEC_HACK_DEFC(mt, n) , EIGEN_TPL_PP_SPEC_HACK_DEF(mt, n) +#define EIGEN_TPL_PP_SPEC_HACK_USE(n) n... +#define EIGEN_TPL_PP_SPEC_HACK_USEC(n) , n... +#else +#define EIGEN_TPL_PP_SPEC_HACK_DEF(mt, n) +#define EIGEN_TPL_PP_SPEC_HACK_DEFC(mt, n) +#define EIGEN_TPL_PP_SPEC_HACK_USE(n) +#define EIGEN_TPL_PP_SPEC_HACK_USEC(n) +#endif + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_CXX11WORKAROUNDS_H + +/* + * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle; + */ diff --git a/external/unsupported/Eigen/CXX11/src/util/EmulateArray.h b/external/unsupported/Eigen/CXX11/src/util/EmulateArray.h new file mode 100644 index 0000000..834b20b --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/util/EmulateArray.h @@ -0,0 +1,261 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_EMULATE_ARRAY_H +#define EIGEN_EMULATE_ARRAY_H + + + +// The array class is only available starting with cxx11. Emulate our own here +// if needed. Beware, msvc still doesn't advertise itself as a c++11 compiler! +// Moreover, CUDA doesn't support the STL containers, so we use our own instead. +#if (__cplusplus <= 199711L && EIGEN_COMP_MSVC < 1900) || defined(EIGEN_GPUCC) || defined(EIGEN_AVOID_STL_ARRAY) + +namespace Eigen { +template class array { + public: + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE T& operator[] (size_t index) { eigen_internal_assert(index < size()); return values[index]; } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const T& operator[] (size_t index) const { eigen_internal_assert(index < size()); return values[index]; } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE T& at(size_t index) { eigen_assert(index < size()); return values[index]; } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const T& at(size_t index) const { eigen_assert(index < size()); return values[index]; } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE T& front() { return values[0]; } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const T& front() const { return values[0]; } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE T& back() { return values[n-1]; } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const T& back() const { return values[n-1]; } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + static std::size_t size() { return n; } + + T values[n]; + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE array() { } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE array(const T& v) { + EIGEN_STATIC_ASSERT(n==1, YOU_MADE_A_PROGRAMMING_MISTAKE) + values[0] = v; + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE array(const T& v1, const T& v2) { + EIGEN_STATIC_ASSERT(n==2, YOU_MADE_A_PROGRAMMING_MISTAKE) + values[0] = v1; + values[1] = v2; + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3) { + EIGEN_STATIC_ASSERT(n==3, YOU_MADE_A_PROGRAMMING_MISTAKE) + values[0] = v1; + values[1] = v2; + values[2] = v3; + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, + const T& v4) { + EIGEN_STATIC_ASSERT(n==4, YOU_MADE_A_PROGRAMMING_MISTAKE) + values[0] = v1; + values[1] = v2; + values[2] = v3; + values[3] = v4; + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, const T& v4, + const T& v5) { + EIGEN_STATIC_ASSERT(n==5, YOU_MADE_A_PROGRAMMING_MISTAKE) + values[0] = v1; + values[1] = v2; + values[2] = v3; + values[3] = v4; + values[4] = v5; + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, const T& v4, + const T& v5, const T& v6) { + EIGEN_STATIC_ASSERT(n==6, YOU_MADE_A_PROGRAMMING_MISTAKE) + values[0] = v1; + values[1] = v2; + values[2] = v3; + values[3] = v4; + values[4] = v5; + values[5] = v6; + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, const T& v4, + const T& v5, const T& v6, const T& v7) { + EIGEN_STATIC_ASSERT(n==7, YOU_MADE_A_PROGRAMMING_MISTAKE) + values[0] = v1; + values[1] = v2; + values[2] = v3; + values[3] = v4; + values[4] = v5; + values[5] = v6; + values[6] = v7; + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE array( + const T& v1, const T& v2, const T& v3, const T& v4, + const T& v5, const T& v6, const T& v7, const T& v8) { + EIGEN_STATIC_ASSERT(n==8, YOU_MADE_A_PROGRAMMING_MISTAKE) + values[0] = v1; + values[1] = v2; + values[2] = v3; + values[3] = v4; + values[4] = v5; + values[5] = v6; + values[6] = v7; + values[7] = v8; + } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE array(std::initializer_list l) { + eigen_assert(l.size() == n); + internal::smart_copy(l.begin(), l.end(), values); + } +#endif +}; + + +// Specialize array for zero size +template class array { + public: + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE T& operator[] (size_t) { + eigen_assert(false && "Can't index a zero size array"); + return dummy; + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const T& operator[] (size_t) const { + eigen_assert(false && "Can't index a zero size array"); + return dummy; + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE T& front() { + eigen_assert(false && "Can't index a zero size array"); + return dummy; + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const T& front() const { + eigen_assert(false && "Can't index a zero size array"); + return dummy; + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE T& back() { + eigen_assert(false && "Can't index a zero size array"); + return dummy; + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const T& back() const { + eigen_assert(false && "Can't index a zero size array"); + return dummy; + } + + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::size_t size() { return 0; } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE array() : dummy() { } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + EIGEN_DEVICE_FUNC array(std::initializer_list l) : dummy() { + EIGEN_UNUSED_VARIABLE(l); + eigen_assert(l.size() == 0); + } +#endif + + private: + T dummy; +}; + +// Comparison operator +// Todo: implement !=, <, <=, >, and >= +template +EIGEN_DEVICE_FUNC bool operator==(const array& lhs, const array& rhs) { + for (std::size_t i = 0; i < N; ++i) { + if (lhs[i] != rhs[i]) { + return false; + } + } + return true; +} + + +namespace internal { +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T& array_get(array& a) { + return a[I_]; +} +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T& array_get(const array& a) { + return a[I_]; +} + +template struct array_size > { + enum { value = N }; +}; +template struct array_size& > { + enum { value = N }; +}; +template struct array_size > { + enum { value = N }; +}; +template struct array_size& > { + enum { value = N }; +}; + +} // end namespace internal +} // end namespace Eigen + +#else + +// The compiler supports c++11, and we're not targeting cuda: use std::array as Eigen::array +#include +namespace Eigen { + +template using array = std::array; + +namespace internal { +/* std::get is only constexpr in C++14, not yet in C++11 + * - libstdc++ from version 4.7 onwards has it nevertheless, + * so use that + * - libstdc++ older versions: use _M_instance directly + * - libc++ all versions so far: use __elems_ directly + * - all other libs: use std::get to be portable, but + * this may not be constexpr + */ +#if defined(__GLIBCXX__) && __GLIBCXX__ < 20120322 +#define STD_GET_ARR_HACK a._M_instance[I_] +#elif defined(_LIBCPP_VERSION) +#define STD_GET_ARR_HACK a.__elems_[I_] +#else +#define STD_GET_ARR_HACK std::template get(a) +#endif + +template constexpr inline T& array_get(std::array& a) { return (T&) STD_GET_ARR_HACK; } +template constexpr inline T&& array_get(std::array&& a) { return (T&&) STD_GET_ARR_HACK; } +template constexpr inline T const& array_get(std::array const& a) { return (T const&) STD_GET_ARR_HACK; } + +#undef STD_GET_ARR_HACK + +} // end namespace internal +} // end namespace Eigen + +#endif + +#endif // EIGEN_EMULATE_ARRAY_H diff --git a/external/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h b/external/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h new file mode 100644 index 0000000..277ab14 --- /dev/null +++ b/external/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h @@ -0,0 +1,158 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_FIXEDSIZEVECTOR_H +#define EIGEN_FIXEDSIZEVECTOR_H + +namespace Eigen { + +/** \class MaxSizeVector + * \ingroup Core + * + * \brief The MaxSizeVector class. + * + * The %MaxSizeVector provides a subset of std::vector functionality. + * + * The goal is to provide basic std::vector operations when using + * std::vector is not an option (e.g. on GPU or when compiling using + * FMA/AVX, as this can cause either compilation failures or illegal + * instruction failures). + * + * Beware: The constructors are not API compatible with these of + * std::vector. + */ +template +class MaxSizeVector { + static const size_t alignment = EIGEN_PLAIN_ENUM_MAX(EIGEN_ALIGNOF(T), sizeof(void*)); + public: + // Construct a new MaxSizeVector, reserve n elements. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + explicit MaxSizeVector(size_t n) + : reserve_(n), size_(0), + data_(static_cast(internal::handmade_aligned_malloc(n * sizeof(T), alignment))) { + } + + // Construct a new MaxSizeVector, reserve and resize to n. + // Copy the init value to all elements. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + MaxSizeVector(size_t n, const T& init) + : reserve_(n), size_(n), + data_(static_cast(internal::handmade_aligned_malloc(n * sizeof(T), alignment))) { + size_t i = 0; + EIGEN_TRY + { + for(; i < size_; ++i) { new (&data_[i]) T(init); } + } + EIGEN_CATCH(...) + { + // Construction failed, destruct in reverse order: + for(; (i+1) > 0; --i) { data_[i-1].~T(); } + internal::handmade_aligned_free(data_); + EIGEN_THROW; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + ~MaxSizeVector() { + for (size_t i = size_; i > 0; --i) { + data_[i-1].~T(); + } + internal::handmade_aligned_free(data_); + } + + void resize(size_t n) { + eigen_assert(n <= reserve_); + for (; size_ < n; ++size_) { + new (&data_[size_]) T; + } + for (; size_ > n; --size_) { + data_[size_-1].~T(); + } + eigen_assert(size_ == n); + } + + // Append new elements (up to reserved size). + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + void push_back(const T& t) { + eigen_assert(size_ < reserve_); + new (&data_[size_++]) T(t); + } + + // For C++03 compatibility this only takes one argument + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + void emplace_back(const X& x) { + eigen_assert(size_ < reserve_); + new (&data_[size_++]) T(x); + } + + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const T& operator[] (size_t i) const { + eigen_assert(i < size_); + return data_[i]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + T& operator[] (size_t i) { + eigen_assert(i < size_); + return data_[i]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + T& back() { + eigen_assert(size_ > 0); + return data_[size_ - 1]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const T& back() const { + eigen_assert(size_ > 0); + return data_[size_ - 1]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + void pop_back() { + eigen_assert(size_ > 0); + data_[--size_].~T(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + size_t size() const { return size_; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + bool empty() const { return size_ == 0; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + T* data() { return data_; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const T* data() const { return data_; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + T* begin() { return data_; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + T* end() { return data_ + size_; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const T* begin() const { return data_; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const T* end() const { return data_ + size_; } + + private: + size_t reserve_; + size_t size_; + T* data_; +}; + +} // namespace Eigen + +#endif // EIGEN_FIXEDSIZEVECTOR_H diff --git a/external/unsupported/Eigen/EulerAngles b/external/unsupported/Eigen/EulerAngles new file mode 100644 index 0000000..f8f1c5d --- /dev/null +++ b/external/unsupported/Eigen/EulerAngles @@ -0,0 +1,43 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Tal Hadad +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_EULERANGLES_MODULE_H +#define EIGEN_EULERANGLES_MODULE_H + + +#include "../../Eigen/Core" +#include "../../Eigen/Geometry" + +#include "../../Eigen/src/Core/util/DisableStupidWarnings.h" + +namespace Eigen { + +/** + * \defgroup EulerAngles_Module EulerAngles module + * \brief This module provides generic euler angles rotation. + * + * Euler angles are a way to represent 3D rotation. + * + * In order to use this module in your code, include this header: + * \code + * #include + * \endcode + * + * See \ref EulerAngles for more information. + * + */ + +} + +#include "src/EulerAngles/EulerSystem.h" +#include "src/EulerAngles/EulerAngles.h" + +#include "../../Eigen/src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_EULERANGLES_MODULE_H diff --git a/external/unsupported/Eigen/FFT b/external/unsupported/Eigen/FFT new file mode 100644 index 0000000..c8c311a --- /dev/null +++ b/external/unsupported/Eigen/FFT @@ -0,0 +1,419 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Mark Borgerding mark a borgerding net +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_FFT_H +#define EIGEN_FFT_H + +#include +#include +#include +#include "../../Eigen/Core" + + +/** + * \defgroup FFT_Module Fast Fourier Transform module + * + * \code + * #include + * \endcode + * + * This module provides Fast Fourier transformation, with a configurable backend + * implementation. + * + * The default implementation is based on kissfft. It is a small, free, and + * reasonably efficient default. + * + * There are currently two implementation backend: + * + * - fftw (http://www.fftw.org) : faster, GPL -- incompatible with Eigen in LGPL form, bigger code size. + * - MKL (http://en.wikipedia.org/wiki/Math_Kernel_Library) : fastest, commercial -- may be incompatible with Eigen in GPL form. + * + * \section FFTDesign Design + * + * The following design decisions were made concerning scaling and + * half-spectrum for real FFT. + * + * The intent is to facilitate generic programming and ease migrating code + * from Matlab/octave. + * We think the default behavior of Eigen/FFT should favor correctness and + * generality over speed. Of course, the caller should be able to "opt-out" from this + * behavior and get the speed increase if they want it. + * + * 1) %Scaling: + * Other libraries (FFTW,IMKL,KISSFFT) do not perform scaling, so there + * is a constant gain incurred after the forward&inverse transforms , so + * IFFT(FFT(x)) = Kx; this is done to avoid a vector-by-value multiply. + * The downside is that algorithms that worked correctly in Matlab/octave + * don't behave the same way once implemented in C++. + * + * How Eigen/FFT differs: invertible scaling is performed so IFFT( FFT(x) ) = x. + * + * 2) Real FFT half-spectrum + * Other libraries use only half the frequency spectrum (plus one extra + * sample for the Nyquist bin) for a real FFT, the other half is the + * conjugate-symmetric of the first half. This saves them a copy and some + * memory. The downside is the caller needs to have special logic for the + * number of bins in complex vs real. + * + * How Eigen/FFT differs: The full spectrum is returned from the forward + * transform. This facilitates generic template programming by obviating + * separate specializations for real vs complex. On the inverse + * transform, only half the spectrum is actually used if the output type is real. + */ + + +#include "../../Eigen/src/Core/util/DisableStupidWarnings.h" + +#ifdef EIGEN_FFTW_DEFAULT +// FFTW: faster, GPL -- incompatible with Eigen in LGPL form, bigger code size +# include +# include "src/FFT/ei_fftw_impl.h" + namespace Eigen { + //template typedef struct internal::fftw_impl default_fft_impl; this does not work + template struct default_fft_impl : public internal::fftw_impl {}; + } +#elif defined EIGEN_MKL_DEFAULT +// TODO +// intel Math Kernel Library: fastest, commercial -- may be incompatible with Eigen in GPL form +# include "src/FFT/ei_imklfft_impl.h" + namespace Eigen { + template struct default_fft_impl : public internal::imklfft_impl {}; + } +#else +// internal::kissfft_impl: small, free, reasonably efficient default, derived from kissfft +// +# include "src/FFT/ei_kissfft_impl.h" + namespace Eigen { + template + struct default_fft_impl : public internal::kissfft_impl {}; + } +#endif + +namespace Eigen { + + +// +template struct fft_fwd_proxy; +template struct fft_inv_proxy; + +namespace internal { +template +struct traits< fft_fwd_proxy > +{ + typedef typename T_SrcMat::PlainObject ReturnType; +}; +template +struct traits< fft_inv_proxy > +{ + typedef typename T_SrcMat::PlainObject ReturnType; +}; +} + +template +struct fft_fwd_proxy + : public ReturnByValue > +{ + typedef DenseIndex Index; + + fft_fwd_proxy(const T_SrcMat& src,T_FftIfc & fft, Index nfft) : m_src(src),m_ifc(fft), m_nfft(nfft) {} + + template void evalTo(T_DestMat& dst) const; + + Index rows() const { return m_src.rows(); } + Index cols() const { return m_src.cols(); } +protected: + const T_SrcMat & m_src; + T_FftIfc & m_ifc; + Index m_nfft; +}; + +template +struct fft_inv_proxy + : public ReturnByValue > +{ + typedef DenseIndex Index; + + fft_inv_proxy(const T_SrcMat& src,T_FftIfc & fft, Index nfft) : m_src(src),m_ifc(fft), m_nfft(nfft) {} + + template void evalTo(T_DestMat& dst) const; + + Index rows() const { return m_src.rows(); } + Index cols() const { return m_src.cols(); } +protected: + const T_SrcMat & m_src; + T_FftIfc & m_ifc; + Index m_nfft; +}; + + +template > +class FFT +{ + public: + typedef T_Impl impl_type; + typedef DenseIndex Index; + typedef typename impl_type::Scalar Scalar; + typedef typename impl_type::Complex Complex; + + enum Flag { + Default=0, // goof proof + Unscaled=1, + HalfSpectrum=2, + // SomeOtherSpeedOptimization=4 + Speedy=32767 + }; + + FFT( const impl_type & impl=impl_type() , Flag flags=Default ) :m_impl(impl),m_flag(flags) { } + + inline + bool HasFlag(Flag f) const { return (m_flag & (int)f) == f;} + + inline + void SetFlag(Flag f) { m_flag |= (int)f;} + + inline + void ClearFlag(Flag f) { m_flag &= (~(int)f);} + + inline + void fwd( Complex * dst, const Scalar * src, Index nfft) + { + m_impl.fwd(dst,src,static_cast(nfft)); + if ( HasFlag(HalfSpectrum) == false) + ReflectSpectrum(dst,nfft); + } + + inline + void fwd( Complex * dst, const Complex * src, Index nfft) + { + m_impl.fwd(dst,src,static_cast(nfft)); + } + + /* + inline + void fwd2(Complex * dst, const Complex * src, int n0,int n1) + { + m_impl.fwd2(dst,src,n0,n1); + } + */ + + template + inline + void fwd( std::vector & dst, const std::vector<_Input> & src) + { + if ( NumTraits<_Input>::IsComplex == 0 && HasFlag(HalfSpectrum) ) + dst.resize( (src.size()>>1)+1); // half the bins + Nyquist bin + else + dst.resize(src.size()); + fwd(&dst[0],&src[0],src.size()); + } + + template + inline + void fwd( MatrixBase & dst, const MatrixBase & src, Index nfft=-1) + { + typedef typename ComplexDerived::Scalar dst_type; + typedef typename InputDerived::Scalar src_type; + EIGEN_STATIC_ASSERT_VECTOR_ONLY(InputDerived) + EIGEN_STATIC_ASSERT_VECTOR_ONLY(ComplexDerived) + EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(ComplexDerived,InputDerived) // size at compile-time + EIGEN_STATIC_ASSERT((internal::is_same::value), + YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) + EIGEN_STATIC_ASSERT(int(InputDerived::Flags)&int(ComplexDerived::Flags)&DirectAccessBit, + THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES) + + if (nfft<1) + nfft = src.size(); + + if ( NumTraits< src_type >::IsComplex == 0 && HasFlag(HalfSpectrum) ) + dst.derived().resize( (nfft>>1)+1); + else + dst.derived().resize(nfft); + + if ( src.innerStride() != 1 || src.size() < nfft ) { + Matrix tmp; + if (src.size() + inline + fft_fwd_proxy< MatrixBase, FFT > + fwd( const MatrixBase & src, Index nfft=-1) + { + return fft_fwd_proxy< MatrixBase ,FFT >( src, *this,nfft ); + } + + template + inline + fft_inv_proxy< MatrixBase, FFT > + inv( const MatrixBase & src, Index nfft=-1) + { + return fft_inv_proxy< MatrixBase ,FFT >( src, *this,nfft ); + } + + inline + void inv( Complex * dst, const Complex * src, Index nfft) + { + m_impl.inv( dst,src,static_cast(nfft) ); + if ( HasFlag( Unscaled ) == false) + scale(dst,Scalar(1./nfft),nfft); // scale the time series + } + + inline + void inv( Scalar * dst, const Complex * src, Index nfft) + { + m_impl.inv( dst,src,static_cast(nfft) ); + if ( HasFlag( Unscaled ) == false) + scale(dst,Scalar(1./nfft),nfft); // scale the time series + } + + template + inline + void inv( MatrixBase & dst, const MatrixBase & src, Index nfft=-1) + { + typedef typename ComplexDerived::Scalar src_type; + typedef typename ComplexDerived::RealScalar real_type; + typedef typename OutputDerived::Scalar dst_type; + const bool realfft= (NumTraits::IsComplex == 0); + EIGEN_STATIC_ASSERT_VECTOR_ONLY(OutputDerived) + EIGEN_STATIC_ASSERT_VECTOR_ONLY(ComplexDerived) + EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(ComplexDerived,OutputDerived) // size at compile-time + EIGEN_STATIC_ASSERT((internal::is_same::value), + YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) + EIGEN_STATIC_ASSERT(int(OutputDerived::Flags)&int(ComplexDerived::Flags)&DirectAccessBit, + THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES) + + if (nfft<1) { //automatic FFT size determination + if ( realfft && HasFlag(HalfSpectrum) ) + nfft = 2*(src.size()-1); //assume even fft size + else + nfft = src.size(); + } + dst.derived().resize( nfft ); + + // check for nfft that does not fit the input data size + Index resize_input= ( realfft && HasFlag(HalfSpectrum) ) + ? ( (nfft/2+1) - src.size() ) + : ( nfft - src.size() ); + + if ( src.innerStride() != 1 || resize_input ) { + // if the vector is strided, then we need to copy it to a packed temporary + Matrix tmp; + if ( resize_input ) { + size_t ncopy = (std::min)(src.size(),src.size() + resize_input); + tmp.setZero(src.size() + resize_input); + if ( realfft && HasFlag(HalfSpectrum) ) { + // pad at the Nyquist bin + tmp.head(ncopy) = src.head(ncopy); + tmp(ncopy-1) = real(tmp(ncopy-1)); // enforce real-only Nyquist bin + }else{ + size_t nhead,ntail; + nhead = 1+ncopy/2-1; // range [0:pi) + ntail = ncopy/2-1; // range (-pi:0) + tmp.head(nhead) = src.head(nhead); + tmp.tail(ntail) = src.tail(ntail); + if (resize_input<0) { //shrinking -- create the Nyquist bin as the average of the two bins that fold into it + tmp(nhead) = ( src(nfft/2) + src( src.size() - nfft/2 ) )*real_type(.5); + }else{ // expanding -- split the old Nyquist bin into two halves + tmp(nhead) = src(nhead) * real_type(.5); + tmp(tmp.size()-nhead) = tmp(nhead); + } + } + }else{ + tmp = src; + } + inv( &dst[0],&tmp[0], nfft); + }else{ + inv( &dst[0],&src[0], nfft); + } + } + + template + inline + void inv( std::vector<_Output> & dst, const std::vector & src,Index nfft=-1) + { + if (nfft<1) + nfft = ( NumTraits<_Output>::IsComplex == 0 && HasFlag(HalfSpectrum) ) ? 2*(src.size()-1) : src.size(); + dst.resize( nfft ); + inv( &dst[0],&src[0],nfft); + } + + + /* + // TODO: multi-dimensional FFTs + inline + void inv2(Complex * dst, const Complex * src, int n0,int n1) + { + m_impl.inv2(dst,src,n0,n1); + if ( HasFlag( Unscaled ) == false) + scale(dst,1./(n0*n1),n0*n1); + } + */ + + inline + impl_type & impl() {return m_impl;} + private: + + template + inline + void scale(T_Data * x,Scalar s,Index nx) + { +#if 1 + for (int k=0;k::Map(x,nx) *= s; + else + Matrix::MapAligned(x,nx) *= s; + //Matrix::Map(x,nx) * s; +#endif + } + + inline + void ReflectSpectrum(Complex * freq, Index nfft) + { + // create the implicit right-half spectrum (conjugate-mirror of the left-half) + Index nhbins=(nfft>>1)+1; + for (Index k=nhbins;k < nfft; ++k ) + freq[k] = conj(freq[nfft-k]); + } + + impl_type m_impl; + int m_flag; +}; + +template +template inline +void fft_fwd_proxy::evalTo(T_DestMat& dst) const +{ + m_ifc.fwd( dst, m_src, m_nfft); +} + +template +template inline +void fft_inv_proxy::evalTo(T_DestMat& dst) const +{ + m_ifc.inv( dst, m_src, m_nfft); +} + +} + +#include "../../Eigen/src/Core/util/ReenableStupidWarnings.h" + +#endif diff --git a/external/unsupported/Eigen/IterativeSolvers b/external/unsupported/Eigen/IterativeSolvers new file mode 100644 index 0000000..a3f58d6 --- /dev/null +++ b/external/unsupported/Eigen/IterativeSolvers @@ -0,0 +1,51 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ITERATIVE_SOLVERS_MODULE_H +#define EIGEN_ITERATIVE_SOLVERS_MODULE_H + +#include "../../Eigen/Sparse" +#include "../../Eigen/Jacobi" +#include "../../Eigen/Householder" + + +/** + * \defgroup IterativeLinearSolvers_Module Iterative solvers module + * This module aims to provide various iterative linear and non linear solver algorithms. + * It currently provides: + * - a constrained conjugate gradient + * - a Householder GMRES implementation + * - an IDR(s) implementation + * - a DGMRES implementation + * - a MINRES implementation + * + * \code + * #include + * \endcode + */ + + +#include "../../Eigen/src/Core/util/DisableStupidWarnings.h" + +#ifndef EIGEN_MPL2_ONLY +#include "src/IterativeSolvers/IterationController.h" +#include "src/IterativeSolvers/ConstrainedConjGrad.h" +#endif + +#include "src/IterativeSolvers/IncompleteLU.h" +#include "src/IterativeSolvers/GMRES.h" +#include "src/IterativeSolvers/DGMRES.h" +//#include "src/IterativeSolvers/SSORPreconditioner.h" +#include "src/IterativeSolvers/MINRES.h" +#include "src/IterativeSolvers/IDRS.h" + +#include "../../Eigen/src/Core/util/ReenableStupidWarnings.h" + + +#endif // EIGEN_ITERATIVE_SOLVERS_MODULE_H diff --git a/external/unsupported/Eigen/KroneckerProduct b/external/unsupported/Eigen/KroneckerProduct new file mode 100644 index 0000000..5f5afb8 --- /dev/null +++ b/external/unsupported/Eigen/KroneckerProduct @@ -0,0 +1,36 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_KRONECKER_PRODUCT_MODULE_H +#define EIGEN_KRONECKER_PRODUCT_MODULE_H + +#include "../../Eigen/Core" + +#include "../../Eigen/src/Core/util/DisableStupidWarnings.h" + +#include "../../Eigen/src/SparseCore/SparseUtil.h" + +namespace Eigen { + +/** + * \defgroup KroneckerProduct_Module KroneckerProduct module + * + * This module contains an experimental Kronecker product implementation. + * + * \code + * #include + * \endcode + */ + +} // namespace Eigen + +#include "src/KroneckerProduct/KroneckerTensorProduct.h" + +#include "../../Eigen/src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_KRONECKER_PRODUCT_MODULE_H diff --git a/external/unsupported/Eigen/LevenbergMarquardt b/external/unsupported/Eigen/LevenbergMarquardt new file mode 100644 index 0000000..1090505 --- /dev/null +++ b/external/unsupported/Eigen/LevenbergMarquardt @@ -0,0 +1,49 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Thomas Capricelli +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_LEVENBERGMARQUARDT_MODULE +#define EIGEN_LEVENBERGMARQUARDT_MODULE + +// #include + +#include "../../Eigen/Core" +#include "../../Eigen/Jacobi" +#include "../../Eigen/QR" +#include "NumericalDiff" + +#include "../../Eigen/SparseQR" + +/** + * \defgroup LevenbergMarquardt_Module Levenberg-Marquardt module + * + * \code + * #include + * \endcode + * + * + */ + +#include "../../Eigen/SparseCore" + +#include "../../Eigen/src/Core/util/DisableStupidWarnings.h" + +#ifndef EIGEN_PARSED_BY_DOXYGEN + +#include "src/LevenbergMarquardt/LMqrsolv.h" +#include "src/LevenbergMarquardt/LMcovar.h" +#include "src/LevenbergMarquardt/LMpar.h" + +#endif + +#include "src/LevenbergMarquardt/LevenbergMarquardt.h" +#include "src/LevenbergMarquardt/LMonestep.h" + +#include "../../Eigen/src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_LEVENBERGMARQUARDT_MODULE diff --git a/external/unsupported/Eigen/MPRealSupport b/external/unsupported/Eigen/MPRealSupport new file mode 100644 index 0000000..c4ea4ec --- /dev/null +++ b/external/unsupported/Eigen/MPRealSupport @@ -0,0 +1,213 @@ +// This file is part of a joint effort between Eigen, a lightweight C++ template library +// for linear algebra, and MPFR C++, a C++ interface to MPFR library (http://www.holoborodko.com/pavel/) +// +// Copyright (C) 2010-2012 Pavel Holoborodko +// Copyright (C) 2010 Konstantin Holoborodko +// Copyright (C) 2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_MPREALSUPPORT_MODULE_H +#define EIGEN_MPREALSUPPORT_MODULE_H + +#include "../../Eigen/Core" +#include + +namespace Eigen { + +/** + * \defgroup MPRealSupport_Module MPFRC++ Support module + * \code + * #include + * \endcode + * + * This module provides support for multi precision floating point numbers + * via the MPFR C++ + * library which itself is built upon MPFR/GMP. + * + * \warning MPFR C++ is licensed under the GPL. + * + * You can find a copy of MPFR C++ that is known to be compatible in the unsupported/test/mpreal folder. + * + * Here is an example: + * +\code +#include +#include +#include +using namespace mpfr; +using namespace Eigen; +int main() +{ + // set precision to 256 bits (double has only 53 bits) + mpreal::set_default_prec(256); + // Declare matrix and vector types with multi-precision scalar type + typedef Matrix MatrixXmp; + typedef Matrix VectorXmp; + + MatrixXmp A = MatrixXmp::Random(100,100); + VectorXmp b = VectorXmp::Random(100); + + // Solve Ax=b using LU + VectorXmp x = A.lu().solve(b); + std::cout << "relative error: " << (A*x - b).norm() / b.norm() << std::endl; + return 0; +} +\endcode + * + */ + + template<> struct NumTraits + : GenericNumTraits + { + enum { + IsInteger = 0, + IsSigned = 1, + IsComplex = 0, + RequireInitialization = 1, + ReadCost = HugeCost, + AddCost = HugeCost, + MulCost = HugeCost + }; + + typedef mpfr::mpreal Real; + typedef mpfr::mpreal NonInteger; + + static inline Real highest (long Precision = mpfr::mpreal::get_default_prec()) { return mpfr::maxval(Precision); } + static inline Real lowest (long Precision = mpfr::mpreal::get_default_prec()) { return -mpfr::maxval(Precision); } + + // Constants + static inline Real Pi (long Precision = mpfr::mpreal::get_default_prec()) { return mpfr::const_pi(Precision); } + static inline Real Euler (long Precision = mpfr::mpreal::get_default_prec()) { return mpfr::const_euler(Precision); } + static inline Real Log2 (long Precision = mpfr::mpreal::get_default_prec()) { return mpfr::const_log2(Precision); } + static inline Real Catalan (long Precision = mpfr::mpreal::get_default_prec()) { return mpfr::const_catalan(Precision); } + + static inline Real epsilon (long Precision = mpfr::mpreal::get_default_prec()) { return mpfr::machine_epsilon(Precision); } + static inline Real epsilon (const Real& x) { return mpfr::machine_epsilon(x); } + +#ifdef MPREAL_HAVE_DYNAMIC_STD_NUMERIC_LIMITS + static inline int digits10 (long Precision = mpfr::mpreal::get_default_prec()) { return std::numeric_limits::digits10(Precision); } + static inline int digits10 (const Real& x) { return std::numeric_limits::digits10(x); } + + static inline int digits () { return std::numeric_limits::digits(); } + static inline int digits (const Real& x) { return std::numeric_limits::digits(x); } +#endif + + static inline Real dummy_precision() + { + mpfr_prec_t weak_prec = ((mpfr::mpreal::get_default_prec()-1) * 90) / 100; + return mpfr::machine_epsilon(weak_prec); + } + }; + + namespace internal { + + template<> inline mpfr::mpreal random() + { + return mpfr::random(); + } + + template<> inline mpfr::mpreal random(const mpfr::mpreal& a, const mpfr::mpreal& b) + { + return a + (b-a) * random(); + } + + inline bool isMuchSmallerThan(const mpfr::mpreal& a, const mpfr::mpreal& b, const mpfr::mpreal& eps) + { + return mpfr::abs(a) <= mpfr::abs(b) * eps; + } + + inline bool isApprox(const mpfr::mpreal& a, const mpfr::mpreal& b, const mpfr::mpreal& eps) + { + return mpfr::isEqualFuzzy(a,b,eps); + } + + inline bool isApproxOrLessThan(const mpfr::mpreal& a, const mpfr::mpreal& b, const mpfr::mpreal& eps) + { + return a <= b || mpfr::isEqualFuzzy(a,b,eps); + } + + template<> inline long double cast(const mpfr::mpreal& x) + { return x.toLDouble(); } + + template<> inline double cast(const mpfr::mpreal& x) + { return x.toDouble(); } + + template<> inline long cast(const mpfr::mpreal& x) + { return x.toLong(); } + + template<> inline int cast(const mpfr::mpreal& x) + { return int(x.toLong()); } + + // Specialize GEBP kernel and traits for mpreal (no need for peeling, nor complicated stuff) + // This also permits to directly call mpfr's routines and avoid many temporaries produced by mpreal + template<> + class gebp_traits + { + public: + typedef mpfr::mpreal ResScalar; + enum { + Vectorizable = false, + LhsPacketSize = 1, + RhsPacketSize = 1, + ResPacketSize = 1, + NumberOfRegisters = 1, + nr = 1, + mr = 1, + LhsProgress = 1, + RhsProgress = 1 + }; + typedef ResScalar LhsPacket; + typedef ResScalar RhsPacket; + typedef ResScalar ResPacket; + typedef LhsPacket LhsPacket4Packing; + + }; + + + + template + struct gebp_kernel + { + typedef mpfr::mpreal mpreal; + + EIGEN_DONT_INLINE + void operator()(const DataMapper& res, const mpreal* blockA, const mpreal* blockB, + Index rows, Index depth, Index cols, const mpreal& alpha, + Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0) + { + if(rows==0 || cols==0 || depth==0) + return; + + mpreal acc1(0,mpfr_get_prec(blockA[0].mpfr_srcptr())), + tmp (0,mpfr_get_prec(blockA[0].mpfr_srcptr())); + + if(strideA==-1) strideA = depth; + if(strideB==-1) strideB = depth; + + for(Index i=0; i +// Copyright (C) 2012 Chen-Pang He +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_MATRIX_FUNCTIONS +#define EIGEN_MATRIX_FUNCTIONS + +#include +#include + +#include "../../Eigen/Core" +#include "../../Eigen/LU" +#include "../../Eigen/Eigenvalues" + +/** + * \defgroup MatrixFunctions_Module Matrix functions module + * \brief This module aims to provide various methods for the computation of + * matrix functions. + * + * To use this module, add + * \code + * #include + * \endcode + * at the start of your source file. + * + * This module defines the following MatrixBase methods. + * - \ref matrixbase_cos "MatrixBase::cos()", for computing the matrix cosine + * - \ref matrixbase_cosh "MatrixBase::cosh()", for computing the matrix hyperbolic cosine + * - \ref matrixbase_exp "MatrixBase::exp()", for computing the matrix exponential + * - \ref matrixbase_log "MatrixBase::log()", for computing the matrix logarithm + * - \ref matrixbase_pow "MatrixBase::pow()", for computing the matrix power + * - \ref matrixbase_matrixfunction "MatrixBase::matrixFunction()", for computing general matrix functions + * - \ref matrixbase_sin "MatrixBase::sin()", for computing the matrix sine + * - \ref matrixbase_sinh "MatrixBase::sinh()", for computing the matrix hyperbolic sine + * - \ref matrixbase_sqrt "MatrixBase::sqrt()", for computing the matrix square root + * + * These methods are the main entry points to this module. + * + * %Matrix functions are defined as follows. Suppose that \f$ f \f$ + * is an entire function (that is, a function on the complex plane + * that is everywhere complex differentiable). Then its Taylor + * series + * \f[ f(0) + f'(0) x + \frac{f''(0)}{2} x^2 + \frac{f'''(0)}{3!} x^3 + \cdots \f] + * converges to \f$ f(x) \f$. In this case, we can define the matrix + * function by the same series: + * \f[ f(M) = f(0) + f'(0) M + \frac{f''(0)}{2} M^2 + \frac{f'''(0)}{3!} M^3 + \cdots \f] + * + */ + +#include "../../Eigen/src/Core/util/DisableStupidWarnings.h" + +#include "src/MatrixFunctions/MatrixExponential.h" +#include "src/MatrixFunctions/MatrixFunction.h" +#include "src/MatrixFunctions/MatrixSquareRoot.h" +#include "src/MatrixFunctions/MatrixLogarithm.h" +#include "src/MatrixFunctions/MatrixPower.h" + +#include "../../Eigen/src/Core/util/ReenableStupidWarnings.h" + + +/** +\page matrixbaseextra_page +\ingroup MatrixFunctions_Module + +\section matrixbaseextra MatrixBase methods defined in the MatrixFunctions module + +The remainder of the page documents the following MatrixBase methods +which are defined in the MatrixFunctions module. + + + +\subsection matrixbase_cos MatrixBase::cos() + +Compute the matrix cosine. + +\code +const MatrixFunctionReturnValue MatrixBase::cos() const +\endcode + +\param[in] M a square matrix. +\returns expression representing \f$ \cos(M) \f$. + +This function computes the matrix cosine. Use ArrayBase::cos() for computing the entry-wise cosine. + +The implementation calls \ref matrixbase_matrixfunction "matrixFunction()" with StdStemFunctions::cos(). + +\sa \ref matrixbase_sin "sin()" for an example. + + + +\subsection matrixbase_cosh MatrixBase::cosh() + +Compute the matrix hyberbolic cosine. + +\code +const MatrixFunctionReturnValue MatrixBase::cosh() const +\endcode + +\param[in] M a square matrix. +\returns expression representing \f$ \cosh(M) \f$ + +This function calls \ref matrixbase_matrixfunction "matrixFunction()" with StdStemFunctions::cosh(). + +\sa \ref matrixbase_sinh "sinh()" for an example. + + + +\subsection matrixbase_exp MatrixBase::exp() + +Compute the matrix exponential. + +\code +const MatrixExponentialReturnValue MatrixBase::exp() const +\endcode + +\param[in] M matrix whose exponential is to be computed. +\returns expression representing the matrix exponential of \p M. + +The matrix exponential of \f$ M \f$ is defined by +\f[ \exp(M) = \sum_{k=0}^\infty \frac{M^k}{k!}. \f] +The matrix exponential can be used to solve linear ordinary +differential equations: the solution of \f$ y' = My \f$ with the +initial condition \f$ y(0) = y_0 \f$ is given by +\f$ y(t) = \exp(M) y_0 \f$. + +The matrix exponential is different from applying the exp function to all the entries in the matrix. +Use ArrayBase::exp() if you want to do the latter. + +The cost of the computation is approximately \f$ 20 n^3 \f$ for +matrices of size \f$ n \f$. The number 20 depends weakly on the +norm of the matrix. + +The matrix exponential is computed using the scaling-and-squaring +method combined with Padé approximation. The matrix is first +rescaled, then the exponential of the reduced matrix is computed +approximant, and then the rescaling is undone by repeated +squaring. The degree of the Padé approximant is chosen such +that the approximation error is less than the round-off +error. However, errors may accumulate during the squaring phase. + +Details of the algorithm can be found in: Nicholas J. Higham, "The +scaling and squaring method for the matrix exponential revisited," +SIAM J. %Matrix Anal. Applic., 26:1179–1193, +2005. + +Example: The following program checks that +\f[ \exp \left[ \begin{array}{ccc} + 0 & \frac14\pi & 0 \\ + -\frac14\pi & 0 & 0 \\ + 0 & 0 & 0 + \end{array} \right] = \left[ \begin{array}{ccc} + \frac12\sqrt2 & -\frac12\sqrt2 & 0 \\ + \frac12\sqrt2 & \frac12\sqrt2 & 0 \\ + 0 & 0 & 1 + \end{array} \right]. \f] +This corresponds to a rotation of \f$ \frac14\pi \f$ radians around +the z-axis. + +\include MatrixExponential.cpp +Output: \verbinclude MatrixExponential.out + +\note \p M has to be a matrix of \c float, \c double, `long double` +\c complex, \c complex, or `complex` . + + +\subsection matrixbase_log MatrixBase::log() + +Compute the matrix logarithm. + +\code +const MatrixLogarithmReturnValue MatrixBase::log() const +\endcode + +\param[in] M invertible matrix whose logarithm is to be computed. +\returns expression representing the matrix logarithm root of \p M. + +The matrix logarithm of \f$ M \f$ is a matrix \f$ X \f$ such that +\f$ \exp(X) = M \f$ where exp denotes the matrix exponential. As for +the scalar logarithm, the equation \f$ \exp(X) = M \f$ may have +multiple solutions; this function returns a matrix whose eigenvalues +have imaginary part in the interval \f$ (-\pi,\pi] \f$. + +The matrix logarithm is different from applying the log function to all the entries in the matrix. +Use ArrayBase::log() if you want to do the latter. + +In the real case, the matrix \f$ M \f$ should be invertible and +it should have no eigenvalues which are real and negative (pairs of +complex conjugate eigenvalues are allowed). In the complex case, it +only needs to be invertible. + +This function computes the matrix logarithm using the Schur-Parlett +algorithm as implemented by MatrixBase::matrixFunction(). The +logarithm of an atomic block is computed by MatrixLogarithmAtomic, +which uses direct computation for 1-by-1 and 2-by-2 blocks and an +inverse scaling-and-squaring algorithm for bigger blocks, with the +square roots computed by MatrixBase::sqrt(). + +Details of the algorithm can be found in Section 11.6.2 of: +Nicholas J. Higham, +Functions of Matrices: Theory and Computation, +SIAM 2008. ISBN 978-0-898716-46-7. + +Example: The following program checks that +\f[ \log \left[ \begin{array}{ccc} + \frac12\sqrt2 & -\frac12\sqrt2 & 0 \\ + \frac12\sqrt2 & \frac12\sqrt2 & 0 \\ + 0 & 0 & 1 + \end{array} \right] = \left[ \begin{array}{ccc} + 0 & \frac14\pi & 0 \\ + -\frac14\pi & 0 & 0 \\ + 0 & 0 & 0 + \end{array} \right]. \f] +This corresponds to a rotation of \f$ \frac14\pi \f$ radians around +the z-axis. This is the inverse of the example used in the +documentation of \ref matrixbase_exp "exp()". + +\include MatrixLogarithm.cpp +Output: \verbinclude MatrixLogarithm.out + +\note \p M has to be a matrix of \c float, \c double, `long +double`, \c complex, \c complex, or `complex`. + +\sa MatrixBase::exp(), MatrixBase::matrixFunction(), + class MatrixLogarithmAtomic, MatrixBase::sqrt(). + + +\subsection matrixbase_pow MatrixBase::pow() + +Compute the matrix raised to arbitrary real power. + +\code +const MatrixPowerReturnValue MatrixBase::pow(RealScalar p) const +\endcode + +\param[in] M base of the matrix power, should be a square matrix. +\param[in] p exponent of the matrix power. + +The matrix power \f$ M^p \f$ is defined as \f$ \exp(p \log(M)) \f$, +where exp denotes the matrix exponential, and log denotes the matrix +logarithm. This is different from raising all the entries in the matrix +to the p-th power. Use ArrayBase::pow() if you want to do the latter. + +If \p p is complex, the scalar type of \p M should be the type of \p +p . \f$ M^p \f$ simply evaluates into \f$ \exp(p \log(M)) \f$. +Therefore, the matrix \f$ M \f$ should meet the conditions to be an +argument of matrix logarithm. + +If \p p is real, it is casted into the real scalar type of \p M. Then +this function computes the matrix power using the Schur-Padé +algorithm as implemented by class MatrixPower. The exponent is split +into integral part and fractional part, where the fractional part is +in the interval \f$ (-1, 1) \f$. The main diagonal and the first +super-diagonal is directly computed. + +If \p M is singular with a semisimple zero eigenvalue and \p p is +positive, the Schur factor \f$ T \f$ is reordered with Givens +rotations, i.e. + +\f[ T = \left[ \begin{array}{cc} + T_1 & T_2 \\ + 0 & 0 + \end{array} \right] \f] + +where \f$ T_1 \f$ is invertible. Then \f$ T^p \f$ is given by + +\f[ T^p = \left[ \begin{array}{cc} + T_1^p & T_1^{-1} T_1^p T_2 \\ + 0 & 0 + \end{array}. \right] \f] + +\warning Fractional power of a matrix with a non-semisimple zero +eigenvalue is not well-defined. We introduce an assertion failure +against inaccurate result, e.g. \code +#include +#include + +int main() +{ + Eigen::Matrix4d A; + A << 0, 0, 2, 3, + 0, 0, 4, 5, + 0, 0, 6, 7, + 0, 0, 8, 9; + std::cout << A.pow(0.37) << std::endl; + + // The 1 makes eigenvalue 0 non-semisimple. + A.coeffRef(0, 1) = 1; + + // This fails if EIGEN_NO_DEBUG is undefined. + std::cout << A.pow(0.37) << std::endl; + + return 0; +} +\endcode + +Details of the algorithm can be found in: Nicholas J. Higham and +Lijing Lin, "A Schur-Padé algorithm for fractional powers of a +matrix," SIAM J. %Matrix Anal. Applic., +32(3):1056–1078, 2011. + +Example: The following program checks that +\f[ \left[ \begin{array}{ccc} + \cos1 & -\sin1 & 0 \\ + \sin1 & \cos1 & 0 \\ + 0 & 0 & 1 + \end{array} \right]^{\frac14\pi} = \left[ \begin{array}{ccc} + \frac12\sqrt2 & -\frac12\sqrt2 & 0 \\ + \frac12\sqrt2 & \frac12\sqrt2 & 0 \\ + 0 & 0 & 1 + \end{array} \right]. \f] +This corresponds to \f$ \frac14\pi \f$ rotations of 1 radian around +the z-axis. + +\include MatrixPower.cpp +Output: \verbinclude MatrixPower.out + +MatrixBase::pow() is user-friendly. However, there are some +circumstances under which you should use class MatrixPower directly. +MatrixPower can save the result of Schur decomposition, so it's +better for computing various powers for the same matrix. + +Example: +\include MatrixPower_optimal.cpp +Output: \verbinclude MatrixPower_optimal.out + +\note \p M has to be a matrix of \c float, \c double, `long +double`, \c complex, \c complex, or +\c complex . + +\sa MatrixBase::exp(), MatrixBase::log(), class MatrixPower. + + +\subsection matrixbase_matrixfunction MatrixBase::matrixFunction() + +Compute a matrix function. + +\code +const MatrixFunctionReturnValue MatrixBase::matrixFunction(typename internal::stem_function::Scalar>::type f) const +\endcode + +\param[in] M argument of matrix function, should be a square matrix. +\param[in] f an entire function; \c f(x,n) should compute the n-th +derivative of f at x. +\returns expression representing \p f applied to \p M. + +Suppose that \p M is a matrix whose entries have type \c Scalar. +Then, the second argument, \p f, should be a function with prototype +\code +ComplexScalar f(ComplexScalar, int) +\endcode +where \c ComplexScalar = \c std::complex if \c Scalar is +real (e.g., \c float or \c double) and \c ComplexScalar = +\c Scalar if \c Scalar is complex. The return value of \c f(x,n) +should be \f$ f^{(n)}(x) \f$, the n-th derivative of f at x. + +This routine uses the algorithm described in: +Philip Davies and Nicholas J. Higham, +"A Schur-Parlett algorithm for computing matrix functions", +SIAM J. %Matrix Anal. Applic., 25:464–485, 2003. + +The actual work is done by the MatrixFunction class. + +Example: The following program checks that +\f[ \exp \left[ \begin{array}{ccc} + 0 & \frac14\pi & 0 \\ + -\frac14\pi & 0 & 0 \\ + 0 & 0 & 0 + \end{array} \right] = \left[ \begin{array}{ccc} + \frac12\sqrt2 & -\frac12\sqrt2 & 0 \\ + \frac12\sqrt2 & \frac12\sqrt2 & 0 \\ + 0 & 0 & 1 + \end{array} \right]. \f] +This corresponds to a rotation of \f$ \frac14\pi \f$ radians around +the z-axis. This is the same example as used in the documentation +of \ref matrixbase_exp "exp()". + +\include MatrixFunction.cpp +Output: \verbinclude MatrixFunction.out + +Note that the function \c expfn is defined for complex numbers +\c x, even though the matrix \c A is over the reals. Instead of +\c expfn, we could also have used StdStemFunctions::exp: +\code +A.matrixFunction(StdStemFunctions >::exp, &B); +\endcode + + + +\subsection matrixbase_sin MatrixBase::sin() + +Compute the matrix sine. + +\code +const MatrixFunctionReturnValue MatrixBase::sin() const +\endcode + +\param[in] M a square matrix. +\returns expression representing \f$ \sin(M) \f$. + +This function computes the matrix sine. Use ArrayBase::sin() for computing the entry-wise sine. + +The implementation calls \ref matrixbase_matrixfunction "matrixFunction()" with StdStemFunctions::sin(). + +Example: \include MatrixSine.cpp +Output: \verbinclude MatrixSine.out + + + +\subsection matrixbase_sinh MatrixBase::sinh() + +Compute the matrix hyperbolic sine. + +\code +MatrixFunctionReturnValue MatrixBase::sinh() const +\endcode + +\param[in] M a square matrix. +\returns expression representing \f$ \sinh(M) \f$ + +This function calls \ref matrixbase_matrixfunction "matrixFunction()" with StdStemFunctions::sinh(). + +Example: \include MatrixSinh.cpp +Output: \verbinclude MatrixSinh.out + + +\subsection matrixbase_sqrt MatrixBase::sqrt() + +Compute the matrix square root. + +\code +const MatrixSquareRootReturnValue MatrixBase::sqrt() const +\endcode + +\param[in] M invertible matrix whose square root is to be computed. +\returns expression representing the matrix square root of \p M. + +The matrix square root of \f$ M \f$ is the matrix \f$ M^{1/2} \f$ +whose square is the original matrix; so if \f$ S = M^{1/2} \f$ then +\f$ S^2 = M \f$. This is different from taking the square root of all +the entries in the matrix; use ArrayBase::sqrt() if you want to do the +latter. + +In the real case, the matrix \f$ M \f$ should be invertible and +it should have no eigenvalues which are real and negative (pairs of +complex conjugate eigenvalues are allowed). In that case, the matrix +has a square root which is also real, and this is the square root +computed by this function. + +The matrix square root is computed by first reducing the matrix to +quasi-triangular form with the real Schur decomposition. The square +root of the quasi-triangular matrix can then be computed directly. The +cost is approximately \f$ 25 n^3 \f$ real flops for the real Schur +decomposition and \f$ 3\frac13 n^3 \f$ real flops for the remainder +(though the computation time in practice is likely more than this +indicates). + +Details of the algorithm can be found in: Nicholas J. Highan, +"Computing real square roots of a real matrix", Linear Algebra +Appl., 88/89:405–430, 1987. + +If the matrix is positive-definite symmetric, then the square +root is also positive-definite symmetric. In this case, it is best to +use SelfAdjointEigenSolver::operatorSqrt() to compute it. + +In the complex case, the matrix \f$ M \f$ should be invertible; +this is a restriction of the algorithm. The square root computed by +this algorithm is the one whose eigenvalues have an argument in the +interval \f$ (-\frac12\pi, \frac12\pi] \f$. This is the usual branch +cut. + +The computation is the same as in the real case, except that the +complex Schur decomposition is used to reduce the matrix to a +triangular matrix. The theoretical cost is the same. Details are in: +Åke Björck and Sven Hammarling, "A Schur method for the +square root of a matrix", Linear Algebra Appl., +52/53:127–140, 1983. + +Example: The following program checks that the square root of +\f[ \left[ \begin{array}{cc} + \cos(\frac13\pi) & -\sin(\frac13\pi) \\ + \sin(\frac13\pi) & \cos(\frac13\pi) + \end{array} \right], \f] +corresponding to a rotation over 60 degrees, is a rotation over 30 degrees: +\f[ \left[ \begin{array}{cc} + \cos(\frac16\pi) & -\sin(\frac16\pi) \\ + \sin(\frac16\pi) & \cos(\frac16\pi) + \end{array} \right]. \f] + +\include MatrixSquareRoot.cpp +Output: \verbinclude MatrixSquareRoot.out + +\sa class RealSchur, class ComplexSchur, class MatrixSquareRoot, + SelfAdjointEigenSolver::operatorSqrt(). + +*/ + +#endif // EIGEN_MATRIX_FUNCTIONS + diff --git a/external/unsupported/Eigen/MoreVectorization b/external/unsupported/Eigen/MoreVectorization new file mode 100644 index 0000000..7662b47 --- /dev/null +++ b/external/unsupported/Eigen/MoreVectorization @@ -0,0 +1,24 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_MOREVECTORIZATION_MODULE_H +#define EIGEN_MOREVECTORIZATION_MODULE_H + +#include "../../Eigen/Core" + +namespace Eigen { + +/** + * \defgroup MoreVectorization More vectorization module + */ + +} + +#include "src/MoreVectorization/MathFunctions.h" + +#endif // EIGEN_MOREVECTORIZATION_MODULE_H diff --git a/external/unsupported/Eigen/NonLinearOptimization b/external/unsupported/Eigen/NonLinearOptimization new file mode 100644 index 0000000..961f192 --- /dev/null +++ b/external/unsupported/Eigen/NonLinearOptimization @@ -0,0 +1,140 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Thomas Capricelli +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_NONLINEAROPTIMIZATION_MODULE +#define EIGEN_NONLINEAROPTIMIZATION_MODULE + +#include + +#include "../../Eigen/Core" +#include "../../Eigen/Jacobi" +#include "../../Eigen/QR" +#include "NumericalDiff" + +/** + * \defgroup NonLinearOptimization_Module Non linear optimization module + * + * \code + * #include + * \endcode + * + * This module provides implementation of two important algorithms in non linear + * optimization. In both cases, we consider a system of non linear functions. Of + * course, this should work, and even work very well if those functions are + * actually linear. But if this is so, you should probably better use other + * methods more fitted to this special case. + * + * One algorithm allows to find a least-squares solution of such a system + * (Levenberg-Marquardt algorithm) and the second one is used to find + * a zero for the system (Powell hybrid "dogleg" method). + * + * This code is a port of minpack (http://en.wikipedia.org/wiki/MINPACK). + * Minpack is a very famous, old, robust and well renowned package, written in + * fortran. Those implementations have been carefully tuned, tested, and used + * for several decades. + * + * The original fortran code was automatically translated using f2c (http://en.wikipedia.org/wiki/F2c) in C, + * then c++, and then cleaned by several different authors. + * The last one of those cleanings being our starting point : + * http://devernay.free.fr/hacks/cminpack.html + * + * Finally, we ported this code to Eigen, creating classes and API + * coherent with Eigen. When possible, we switched to Eigen + * implementation, such as most linear algebra (vectors, matrices, stable norms). + * + * Doing so, we were very careful to check the tests we setup at the very + * beginning, which ensure that the same results are found. + * + * \section Tests Tests + * + * The tests are placed in the file unsupported/test/NonLinear.cpp. + * + * There are two kinds of tests : those that come from examples bundled with cminpack. + * They guaranty we get the same results as the original algorithms (value for 'x', + * for the number of evaluations of the function, and for the number of evaluations + * of the Jacobian if ever). + * + * Other tests were added by myself at the very beginning of the + * process and check the results for Levenberg-Marquardt using the reference data + * on http://www.itl.nist.gov/div898/strd/nls/nls_main.shtml. Since then i've + * carefully checked that the same results were obtained when modifying the + * code. Please note that we do not always get the exact same decimals as they do, + * but this is ok : they use 128bits float, and we do the tests using the C type 'double', + * which is 64 bits on most platforms (x86 and amd64, at least). + * I've performed those tests on several other implementations of Levenberg-Marquardt, and + * (c)minpack performs VERY well compared to those, both in accuracy and speed. + * + * The documentation for running the tests is on the wiki + * http://eigen.tuxfamily.org/index.php?title=Tests + * + * \section API API: overview of methods + * + * Both algorithms needs a functor computing the Jacobian. It can be computed by + * hand, using auto-differentiation (see \ref AutoDiff_Module), or using numerical + * differences (see \ref NumericalDiff_Module). For instance: + *\code + * MyFunc func; + * NumericalDiff func_with_num_diff(func); + * LevenbergMarquardt > lm(func_with_num_diff); + * \endcode + * For HybridNonLinearSolver, the method solveNumericalDiff() does the above wrapping for + * you. + * + * The methods LevenbergMarquardt.lmder1()/lmdif1()/lmstr1() and + * HybridNonLinearSolver.hybrj1()/hybrd1() are specific methods from the original + * minpack package that you probably should NOT use until you are porting a code that + * was previously using minpack. They just define a 'simple' API with default values + * for some parameters. + * + * All algorithms are provided using two APIs : + * - one where the user inits the algorithm, and uses '*OneStep()' as much as he wants : + * this way the caller have control over the steps + * - one where the user just calls a method (optimize() or solve()) which will + * handle the loop: init + loop until a stop condition is met. Those are provided for + * convenience. + * + * As an example, the method LevenbergMarquardt::minimize() is + * implemented as follow: + * \code + * Status LevenbergMarquardt::minimize(FVectorType &x, const int mode) + * { + * Status status = minimizeInit(x, mode); + * do { + * status = minimizeOneStep(x, mode); + * } while (status==Running); + * return status; + * } + * \endcode + * + * \section examples Examples + * + * The easiest way to understand how to use this module is by looking at the many examples in the file + * unsupported/test/NonLinearOptimization.cpp. + */ + +#ifndef EIGEN_PARSED_BY_DOXYGEN + +#include "src/NonLinearOptimization/qrsolv.h" +#include "src/NonLinearOptimization/r1updt.h" +#include "src/NonLinearOptimization/r1mpyq.h" +#include "src/NonLinearOptimization/rwupdt.h" +#include "src/NonLinearOptimization/fdjac1.h" +#include "src/NonLinearOptimization/lmpar.h" +#include "src/NonLinearOptimization/dogleg.h" +#include "src/NonLinearOptimization/covar.h" + +#include "src/NonLinearOptimization/chkder.h" + +#endif + +#include "src/NonLinearOptimization/HybridNonLinearSolver.h" +#include "src/NonLinearOptimization/LevenbergMarquardt.h" + + +#endif // EIGEN_NONLINEAROPTIMIZATION_MODULE diff --git a/external/unsupported/Eigen/NumericalDiff b/external/unsupported/Eigen/NumericalDiff new file mode 100644 index 0000000..0668f96 --- /dev/null +++ b/external/unsupported/Eigen/NumericalDiff @@ -0,0 +1,56 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Thomas Capricelli +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_NUMERICALDIFF_MODULE +#define EIGEN_NUMERICALDIFF_MODULE + +#include "../../Eigen/Core" + +namespace Eigen { + +/** + * \defgroup NumericalDiff_Module Numerical differentiation module + * + * \code + * #include + * \endcode + * + * See http://en.wikipedia.org/wiki/Numerical_differentiation + * + * Warning : this should NOT be confused with automatic differentiation, which + * is a different method and has its own module in Eigen : \ref + * AutoDiff_Module. + * + * Currently only "Forward" and "Central" schemes are implemented. Those + * are basic methods, and there exist some more elaborated way of + * computing such approximates. They are implemented using both + * proprietary and free software, and usually requires linking to an + * external library. It is very easy for you to write a functor + * using such software, and the purpose is quite orthogonal to what we + * want to achieve with Eigen. + * + * This is why we will not provide wrappers for every great numerical + * differentiation software that exist, but should rather stick with those + * basic ones, that still are useful for testing. + * + * Also, the \ref NonLinearOptimization_Module needs this in order to + * provide full features compatibility with the original (c)minpack + * package. + * + */ +} + +//@{ + +#include "src/NumericalDiff/NumericalDiff.h" + +//@} + + +#endif // EIGEN_NUMERICALDIFF_MODULE diff --git a/external/unsupported/Eigen/OpenGLSupport b/external/unsupported/Eigen/OpenGLSupport new file mode 100644 index 0000000..f8c2130 --- /dev/null +++ b/external/unsupported/Eigen/OpenGLSupport @@ -0,0 +1,322 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_OPENGL_MODULE +#define EIGEN_OPENGL_MODULE + +#include "../../Eigen/Geometry" + +#if defined(__APPLE_CC__) + #include +#else + #include +#endif + +namespace Eigen { + +/** + * \defgroup OpenGLSUpport_Module OpenGL Support module + * + * This module provides wrapper functions for a couple of OpenGL functions + * which simplify the way to pass Eigen's object to openGL. + * Here is an example: + * + * \code + * // You need to add path_to_eigen/unsupported to your include path. + * #include + * // ... + * Vector3f x, y; + * Matrix3f rot; + * + * glVertex(y + x * rot); + * + * Quaternion q; + * glRotate(q); + * + * // ... + * \endcode + * + */ +//@{ + +#define EIGEN_GL_FUNC_DECLARATION(FUNC) \ +namespace internal { \ + template< typename XprType, \ + typename Scalar = typename XprType::Scalar, \ + int Rows = XprType::RowsAtCompileTime, \ + int Cols = XprType::ColsAtCompileTime, \ + bool IsGLCompatible = bool(internal::evaluator::Flags&LinearAccessBit) \ + && bool(XprType::Flags&DirectAccessBit) \ + && (XprType::IsVectorAtCompileTime || (XprType::Flags&RowMajorBit)==0)> \ + struct EIGEN_CAT(EIGEN_CAT(gl_,FUNC),_impl); \ + \ + template \ + struct EIGEN_CAT(EIGEN_CAT(gl_,FUNC),_impl) { \ + inline static void run(const XprType& p) { \ + EIGEN_CAT(EIGEN_CAT(gl_,FUNC),_impl)::type>::run(p); } \ + }; \ +} \ + \ +template inline void FUNC(const Eigen::DenseBase& p) { \ + EIGEN_CAT(EIGEN_CAT(internal::gl_,FUNC),_impl)::run(p.derived()); \ +} + + +#define EIGEN_GL_FUNC_SPECIALIZATION_MAT(FUNC,SCALAR,ROWS,COLS,SUFFIX) \ +namespace internal { \ + template< typename XprType> struct EIGEN_CAT(EIGEN_CAT(gl_,FUNC),_impl) { \ + inline static void run(const XprType& p) { FUNC##SUFFIX(p.data()); } \ + }; \ +} + + +#define EIGEN_GL_FUNC_SPECIALIZATION_VEC(FUNC,SCALAR,SIZE,SUFFIX) \ +namespace internal { \ + template< typename XprType> struct EIGEN_CAT(EIGEN_CAT(gl_,FUNC),_impl) { \ + inline static void run(const XprType& p) { FUNC##SUFFIX(p.data()); } \ + }; \ + template< typename XprType> struct EIGEN_CAT(EIGEN_CAT(gl_,FUNC),_impl) { \ + inline static void run(const XprType& p) { FUNC##SUFFIX(p.data()); } \ + }; \ +} + + +EIGEN_GL_FUNC_DECLARATION (glVertex) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glVertex,int, 2,2iv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glVertex,short, 2,2sv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glVertex,float, 2,2fv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glVertex,double, 2,2dv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glVertex,int, 3,3iv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glVertex,short, 3,3sv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glVertex,float, 3,3fv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glVertex,double, 3,3dv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glVertex,int, 4,4iv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glVertex,short, 4,4sv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glVertex,float, 4,4fv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glVertex,double, 4,4dv) + +EIGEN_GL_FUNC_DECLARATION (glTexCoord) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glTexCoord,int, 2,2iv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glTexCoord,short, 2,2sv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glTexCoord,float, 2,2fv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glTexCoord,double, 2,2dv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glTexCoord,int, 3,3iv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glTexCoord,short, 3,3sv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glTexCoord,float, 3,3fv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glTexCoord,double, 3,3dv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glTexCoord,int, 4,4iv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glTexCoord,short, 4,4sv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glTexCoord,float, 4,4fv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glTexCoord,double, 4,4dv) + +EIGEN_GL_FUNC_DECLARATION (glColor) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glColor,int, 2,2iv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glColor,short, 2,2sv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glColor,float, 2,2fv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glColor,double, 2,2dv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glColor,int, 3,3iv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glColor,short, 3,3sv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glColor,float, 3,3fv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glColor,double, 3,3dv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glColor,int, 4,4iv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glColor,short, 4,4sv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glColor,float, 4,4fv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glColor,double, 4,4dv) + +EIGEN_GL_FUNC_DECLARATION (glNormal) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glNormal,int, 3,3iv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glNormal,short, 3,3sv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glNormal,float, 3,3fv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glNormal,double, 3,3dv) + +inline void glScale2fv(const float* v) { glScalef(v[0], v[1], 1.f); } +inline void glScale2dv(const double* v) { glScaled(v[0], v[1], 1.0); } +inline void glScale3fv(const float* v) { glScalef(v[0], v[1], v[2]); } +inline void glScale3dv(const double* v) { glScaled(v[0], v[1], v[2]); } + +EIGEN_GL_FUNC_DECLARATION (glScale) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glScale,float, 2,2fv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glScale,double, 2,2dv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glScale,float, 3,3fv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glScale,double, 3,3dv) + +template void glScale(const UniformScaling& s) { glScale(Matrix::Constant(s.factor())); } + +inline void glTranslate2fv(const float* v) { glTranslatef(v[0], v[1], 0.f); } +inline void glTranslate2dv(const double* v) { glTranslated(v[0], v[1], 0.0); } +inline void glTranslate3fv(const float* v) { glTranslatef(v[0], v[1], v[2]); } +inline void glTranslate3dv(const double* v) { glTranslated(v[0], v[1], v[2]); } + +EIGEN_GL_FUNC_DECLARATION (glTranslate) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glTranslate,float, 2,2fv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glTranslate,double, 2,2dv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glTranslate,float, 3,3fv) +EIGEN_GL_FUNC_SPECIALIZATION_VEC(glTranslate,double, 3,3dv) + +template void glTranslate(const Translation& t) { glTranslate(t.vector()); } +template void glTranslate(const Translation& t) { glTranslate(t.vector()); } + +EIGEN_GL_FUNC_DECLARATION (glMultMatrix) +EIGEN_GL_FUNC_SPECIALIZATION_MAT(glMultMatrix,float, 4,4,f) +EIGEN_GL_FUNC_SPECIALIZATION_MAT(glMultMatrix,double, 4,4,d) + +template void glMultMatrix(const Transform& t) { glMultMatrix(t.matrix()); } +template void glMultMatrix(const Transform& t) { glMultMatrix(t.matrix()); } +template void glMultMatrix(const Transform& t) { glMultMatrix(Transform(t).matrix()); } + +EIGEN_GL_FUNC_DECLARATION (glLoadMatrix) +EIGEN_GL_FUNC_SPECIALIZATION_MAT(glLoadMatrix,float, 4,4,f) +EIGEN_GL_FUNC_SPECIALIZATION_MAT(glLoadMatrix,double, 4,4,d) + +template void glLoadMatrix(const Transform& t) { glLoadMatrix(t.matrix()); } +template void glLoadMatrix(const Transform& t) { glLoadMatrix(t.matrix()); } +template void glLoadMatrix(const Transform& t) { glLoadMatrix(Transform(t).matrix()); } + +inline void glRotate(const Rotation2D& rot) +{ + glRotatef(rot.angle()*180.f/float(EIGEN_PI), 0.f, 0.f, 1.f); +} +inline void glRotate(const Rotation2D& rot) +{ + glRotated(rot.angle()*180.0/double(EIGEN_PI), 0.0, 0.0, 1.0); +} + +template void glRotate(const RotationBase& rot) +{ + Transform tr(rot); + glMultMatrix(tr.matrix()); +} + +#define EIGEN_GL_MAKE_CONST_const const +#define EIGEN_GL_MAKE_CONST__ +#define EIGEN_GL_EVAL(X) X + +#define EIGEN_GL_FUNC1_DECLARATION(FUNC,ARG1,CONST) \ +namespace internal { \ + template< typename XprType, \ + typename Scalar = typename XprType::Scalar, \ + int Rows = XprType::RowsAtCompileTime, \ + int Cols = XprType::ColsAtCompileTime, \ + bool IsGLCompatible = bool(internal::evaluator::Flags&LinearAccessBit) \ + && bool(XprType::Flags&DirectAccessBit) \ + && (XprType::IsVectorAtCompileTime || (XprType::Flags&RowMajorBit)==0)> \ + struct EIGEN_CAT(EIGEN_CAT(gl_,FUNC),_impl); \ + \ + template \ + struct EIGEN_CAT(EIGEN_CAT(gl_,FUNC),_impl) { \ + inline static void run(ARG1 a,EIGEN_GL_EVAL(EIGEN_GL_MAKE_CONST_##CONST) XprType& p) { \ + EIGEN_CAT(EIGEN_CAT(gl_,FUNC),_impl)::type>::run(a,p); } \ + }; \ +} \ + \ +template inline void FUNC(ARG1 a,EIGEN_GL_EVAL(EIGEN_GL_MAKE_CONST_##CONST) Eigen::DenseBase& p) { \ + EIGEN_CAT(EIGEN_CAT(internal::gl_,FUNC),_impl)::run(a,p.derived()); \ +} + + +#define EIGEN_GL_FUNC1_SPECIALIZATION_MAT(FUNC,ARG1,CONST,SCALAR,ROWS,COLS,SUFFIX) \ +namespace internal { \ + template< typename XprType> struct EIGEN_CAT(EIGEN_CAT(gl_,FUNC),_impl) { \ + inline static void run(ARG1 a, EIGEN_GL_EVAL(EIGEN_GL_MAKE_CONST_##CONST) XprType& p) { FUNC##SUFFIX(a,p.data()); } \ + }; \ +} + + +#define EIGEN_GL_FUNC1_SPECIALIZATION_VEC(FUNC,ARG1,CONST,SCALAR,SIZE,SUFFIX) \ +namespace internal { \ + template< typename XprType> struct EIGEN_CAT(EIGEN_CAT(gl_,FUNC),_impl) { \ + inline static void run(ARG1 a, EIGEN_GL_EVAL(EIGEN_GL_MAKE_CONST_##CONST) XprType& p) { FUNC##SUFFIX(a,p.data()); } \ + }; \ + template< typename XprType> struct EIGEN_CAT(EIGEN_CAT(gl_,FUNC),_impl) { \ + inline static void run(ARG1 a, EIGEN_GL_EVAL(EIGEN_GL_MAKE_CONST_##CONST) XprType& p) { FUNC##SUFFIX(a,p.data()); } \ + }; \ +} + +EIGEN_GL_FUNC1_DECLARATION (glGet,GLenum,_) +EIGEN_GL_FUNC1_SPECIALIZATION_MAT(glGet,GLenum,_,float, 4,4,Floatv) +EIGEN_GL_FUNC1_SPECIALIZATION_MAT(glGet,GLenum,_,double, 4,4,Doublev) + +// glUniform API + +#ifdef GL_VERSION_2_0 + +inline void glUniform2fv_ei (GLint loc, const float* v) { glUniform2fv(loc,1,v); } +inline void glUniform2iv_ei (GLint loc, const int* v) { glUniform2iv(loc,1,v); } + +inline void glUniform3fv_ei (GLint loc, const float* v) { glUniform3fv(loc,1,v); } +inline void glUniform3iv_ei (GLint loc, const int* v) { glUniform3iv(loc,1,v); } + +inline void glUniform4fv_ei (GLint loc, const float* v) { glUniform4fv(loc,1,v); } +inline void glUniform4iv_ei (GLint loc, const int* v) { glUniform4iv(loc,1,v); } + +inline void glUniformMatrix2fv_ei (GLint loc, const float* v) { glUniformMatrix2fv(loc,1,false,v); } +inline void glUniformMatrix3fv_ei (GLint loc, const float* v) { glUniformMatrix3fv(loc,1,false,v); } +inline void glUniformMatrix4fv_ei (GLint loc, const float* v) { glUniformMatrix4fv(loc,1,false,v); } + + +EIGEN_GL_FUNC1_DECLARATION (glUniform,GLint,const) +EIGEN_GL_FUNC1_SPECIALIZATION_VEC(glUniform,GLint,const,float, 2,2fv_ei) +EIGEN_GL_FUNC1_SPECIALIZATION_VEC(glUniform,GLint,const,int, 2,2iv_ei) +EIGEN_GL_FUNC1_SPECIALIZATION_VEC(glUniform,GLint,const,float, 3,3fv_ei) +EIGEN_GL_FUNC1_SPECIALIZATION_VEC(glUniform,GLint,const,int, 3,3iv_ei) +EIGEN_GL_FUNC1_SPECIALIZATION_VEC(glUniform,GLint,const,float, 4,4fv_ei) +EIGEN_GL_FUNC1_SPECIALIZATION_VEC(glUniform,GLint,const,int, 4,4iv_ei) + +EIGEN_GL_FUNC1_SPECIALIZATION_MAT(glUniform,GLint,const,float, 2,2,Matrix2fv_ei) +EIGEN_GL_FUNC1_SPECIALIZATION_MAT(glUniform,GLint,const,float, 3,3,Matrix3fv_ei) +EIGEN_GL_FUNC1_SPECIALIZATION_MAT(glUniform,GLint,const,float, 4,4,Matrix4fv_ei) + +#endif + +#ifdef GL_VERSION_2_1 + +inline void glUniformMatrix2x3fv_ei(GLint loc, const float* v) { glUniformMatrix2x3fv(loc,1,false,v); } +inline void glUniformMatrix3x2fv_ei(GLint loc, const float* v) { glUniformMatrix3x2fv(loc,1,false,v); } +inline void glUniformMatrix2x4fv_ei(GLint loc, const float* v) { glUniformMatrix2x4fv(loc,1,false,v); } +inline void glUniformMatrix4x2fv_ei(GLint loc, const float* v) { glUniformMatrix4x2fv(loc,1,false,v); } +inline void glUniformMatrix3x4fv_ei(GLint loc, const float* v) { glUniformMatrix3x4fv(loc,1,false,v); } +inline void glUniformMatrix4x3fv_ei(GLint loc, const float* v) { glUniformMatrix4x3fv(loc,1,false,v); } + +EIGEN_GL_FUNC1_SPECIALIZATION_MAT(glUniform,GLint,const,float, 2,3,Matrix2x3fv_ei) +EIGEN_GL_FUNC1_SPECIALIZATION_MAT(glUniform,GLint,const,float, 3,2,Matrix3x2fv_ei) +EIGEN_GL_FUNC1_SPECIALIZATION_MAT(glUniform,GLint,const,float, 2,4,Matrix2x4fv_ei) +EIGEN_GL_FUNC1_SPECIALIZATION_MAT(glUniform,GLint,const,float, 4,2,Matrix4x2fv_ei) +EIGEN_GL_FUNC1_SPECIALIZATION_MAT(glUniform,GLint,const,float, 3,4,Matrix3x4fv_ei) +EIGEN_GL_FUNC1_SPECIALIZATION_MAT(glUniform,GLint,const,float, 4,3,Matrix4x3fv_ei) + +#endif + +#ifdef GL_VERSION_3_0 + +inline void glUniform2uiv_ei (GLint loc, const unsigned int* v) { glUniform2uiv(loc,1,v); } +inline void glUniform3uiv_ei (GLint loc, const unsigned int* v) { glUniform3uiv(loc,1,v); } +inline void glUniform4uiv_ei (GLint loc, const unsigned int* v) { glUniform4uiv(loc,1,v); } + +EIGEN_GL_FUNC1_SPECIALIZATION_VEC(glUniform,GLint,const,unsigned int, 2,2uiv_ei) +EIGEN_GL_FUNC1_SPECIALIZATION_VEC(glUniform,GLint,const,unsigned int, 3,3uiv_ei) +EIGEN_GL_FUNC1_SPECIALIZATION_VEC(glUniform,GLint,const,unsigned int, 4,4uiv_ei) + +#endif + +#ifdef GL_ARB_gpu_shader_fp64 +inline void glUniform2dv_ei (GLint loc, const double* v) { glUniform2dv(loc,1,v); } +inline void glUniform3dv_ei (GLint loc, const double* v) { glUniform3dv(loc,1,v); } +inline void glUniform4dv_ei (GLint loc, const double* v) { glUniform4dv(loc,1,v); } + +EIGEN_GL_FUNC1_SPECIALIZATION_VEC(glUniform,GLint,const,double, 2,2dv_ei) +EIGEN_GL_FUNC1_SPECIALIZATION_VEC(glUniform,GLint,const,double, 3,3dv_ei) +EIGEN_GL_FUNC1_SPECIALIZATION_VEC(glUniform,GLint,const,double, 4,4dv_ei) +#endif + + +//@} + +} + +#endif // EIGEN_OPENGL_MODULE diff --git a/external/unsupported/Eigen/Polynomials b/external/unsupported/Eigen/Polynomials new file mode 100644 index 0000000..32ce2a2 --- /dev/null +++ b/external/unsupported/Eigen/Polynomials @@ -0,0 +1,137 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_POLYNOMIALS_MODULE_H +#define EIGEN_POLYNOMIALS_MODULE_H + +#include "../../Eigen/Core" + +#include "../../Eigen/Eigenvalues" + +#include "../../Eigen/src/Core/util/DisableStupidWarnings.h" + +// Note that EIGEN_HIDE_HEAVY_CODE has to be defined per module +#if (defined EIGEN_EXTERN_INSTANTIATIONS) && (EIGEN_EXTERN_INSTANTIATIONS>=2) + #ifndef EIGEN_HIDE_HEAVY_CODE + #define EIGEN_HIDE_HEAVY_CODE + #endif +#elif defined EIGEN_HIDE_HEAVY_CODE + #undef EIGEN_HIDE_HEAVY_CODE +#endif + +/** + * \defgroup Polynomials_Module Polynomials module + * \brief This module provides a QR based polynomial solver. + * + * To use this module, add + * \code + * #include + * \endcode + * at the start of your source file. + */ + +#include "src/Polynomials/PolynomialUtils.h" +#include "src/Polynomials/Companion.h" +#include "src/Polynomials/PolynomialSolver.h" + +/** + \page polynomials Polynomials defines functions for dealing with polynomials + and a QR based polynomial solver. + \ingroup Polynomials_Module + + The remainder of the page documents first the functions for evaluating, computing + polynomials, computing estimates about polynomials and next the QR based polynomial + solver. + + \section polynomialUtils convenient functions to deal with polynomials + \subsection roots_to_monicPolynomial + The function + \code + void roots_to_monicPolynomial( const RootVector& rv, Polynomial& poly ) + \endcode + computes the coefficients \f$ a_i \f$ of + + \f$ p(x) = a_0 + a_{1}x + ... + a_{n-1}x^{n-1} + x^n \f$ + + where \f$ p \f$ is known through its roots i.e. \f$ p(x) = (x-r_1)(x-r_2)...(x-r_n) \f$. + + \subsection poly_eval + The function + \code + T poly_eval( const Polynomials& poly, const T& x ) + \endcode + evaluates a polynomial at a given point using stabilized Hörner method. + + The following code: first computes the coefficients in the monomial basis of the monic polynomial that has the provided roots; + then, it evaluates the computed polynomial, using a stabilized Hörner method. + + \include PolynomialUtils1.cpp + Output: \verbinclude PolynomialUtils1.out + + \subsection Cauchy bounds + The function + \code + Real cauchy_max_bound( const Polynomial& poly ) + \endcode + provides a maximum bound (the Cauchy one: \f$C(p)\f$) for the absolute value of a root of the given polynomial i.e. + \f$ \forall r_i \f$ root of \f$ p(x) = \sum_{k=0}^d a_k x^k \f$, + \f$ |r_i| \le C(p) = \sum_{k=0}^{d} \left | \frac{a_k}{a_d} \right | \f$ + The leading coefficient \f$ p \f$: should be non zero \f$a_d \neq 0\f$. + + + The function + \code + Real cauchy_min_bound( const Polynomial& poly ) + \endcode + provides a minimum bound (the Cauchy one: \f$c(p)\f$) for the absolute value of a non zero root of the given polynomial i.e. + \f$ \forall r_i \neq 0 \f$ root of \f$ p(x) = \sum_{k=0}^d a_k x^k \f$, + \f$ |r_i| \ge c(p) = \left( \sum_{k=0}^{d} \left | \frac{a_k}{a_0} \right | \right)^{-1} \f$ + + + + + \section QR polynomial solver class + Computes the complex roots of a polynomial by computing the eigenvalues of the associated companion matrix with the QR algorithm. + + The roots of \f$ p(x) = a_0 + a_1 x + a_2 x^2 + a_{3} x^3 + x^4 \f$ are the eigenvalues of + \f$ + \left [ + \begin{array}{cccc} + 0 & 0 & 0 & a_0 \\ + 1 & 0 & 0 & a_1 \\ + 0 & 1 & 0 & a_2 \\ + 0 & 0 & 1 & a_3 + \end{array} \right ] + \f$ + + However, the QR algorithm is not guaranteed to converge when there are several eigenvalues with same modulus. + + Therefore the current polynomial solver is guaranteed to provide a correct result only when the complex roots \f$r_1,r_2,...,r_d\f$ have distinct moduli i.e. + + \f$ \forall i,j \in [1;d],~ \| r_i \| \neq \| r_j \| \f$. + + With 32bit (float) floating types this problem shows up frequently. + However, almost always, correct accuracy is reached even in these cases for 64bit + (double) floating types and small polynomial degree (<20). + + \include PolynomialSolver1.cpp + + In the above example: + + -# a simple use of the polynomial solver is shown; + -# the accuracy problem with the QR algorithm is presented: a polynomial with almost conjugate roots is provided to the solver. + Those roots have almost same module therefore the QR algorithm failed to converge: the accuracy + of the last root is bad; + -# a simple way to circumvent the problem is shown: use doubles instead of floats. + + Output: \verbinclude PolynomialSolver1.out +*/ + +#include "../../Eigen/src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_POLYNOMIALS_MODULE_H diff --git a/external/unsupported/Eigen/Skyline b/external/unsupported/Eigen/Skyline new file mode 100644 index 0000000..ebdf143 --- /dev/null +++ b/external/unsupported/Eigen/Skyline @@ -0,0 +1,39 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SKYLINE_MODULE_H +#define EIGEN_SKYLINE_MODULE_H + + +#include "../../Eigen/Core" + +#include "../../Eigen/src/Core/util/DisableStupidWarnings.h" + +#include +#include +#include +#include + +/** + * \defgroup Skyline_Module Skyline module + * + * + * + * + */ + +#include "src/Skyline/SkylineUtil.h" +#include "src/Skyline/SkylineMatrixBase.h" +#include "src/Skyline/SkylineStorage.h" +#include "src/Skyline/SkylineMatrix.h" +#include "src/Skyline/SkylineInplaceLU.h" +#include "src/Skyline/SkylineProduct.h" + +#include "../../Eigen/src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_SKYLINE_MODULE_H diff --git a/external/unsupported/Eigen/SparseExtra b/external/unsupported/Eigen/SparseExtra new file mode 100644 index 0000000..ba5cbd6 --- /dev/null +++ b/external/unsupported/Eigen/SparseExtra @@ -0,0 +1,54 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPARSE_EXTRA_MODULE_H +#define EIGEN_SPARSE_EXTRA_MODULE_H + +#include "../../Eigen/Sparse" + +#include "../../Eigen/src/Core/util/DisableStupidWarnings.h" + +#include +#include +#include +#include +#include +#include +#include + +#ifdef EIGEN_GOOGLEHASH_SUPPORT + #include + #include +#endif + +/** + * \defgroup SparseExtra_Module SparseExtra module + * + * This module contains some experimental features extending the sparse module. + * + * \code + * #include + * \endcode + */ + + +#include "src/SparseExtra/DynamicSparseMatrix.h" +#include "src/SparseExtra/BlockOfDynamicSparseMatrix.h" +#include "src/SparseExtra/RandomSetter.h" + +#include "src/SparseExtra/MarketIO.h" + +#if !defined(_WIN32) +#include +#include "src/SparseExtra/MatrixMarketIterator.h" +#endif + +#include "../../Eigen/src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_SPARSE_EXTRA_MODULE_H diff --git a/external/unsupported/Eigen/SpecialFunctions b/external/unsupported/Eigen/SpecialFunctions new file mode 100644 index 0000000..f6a2460 --- /dev/null +++ b/external/unsupported/Eigen/SpecialFunctions @@ -0,0 +1,103 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPECIALFUNCTIONS_MODULE +#define EIGEN_SPECIALFUNCTIONS_MODULE + +#include + +#include "../../Eigen/Core" + +#include "../../Eigen/src/Core/util/DisableStupidWarnings.h" + +namespace Eigen { + +/** + * \defgroup SpecialFunctions_Module Special math functions module + * + * This module features additional coefficient-wise math functions available + * within the numext:: namespace for the scalar version, and as method and/or free + * functions of Array. Those include: + * + * - erf + * - erfc + * - lgamma + * - igamma + * - igamma_der_a + * - gamma_sample_der_alpha + * - igammac + * - digamma + * - ndtri + * - polygamma + * - zeta + * - betainc + * + * Bessel Functions + * - bessel_i0 + * - bessel_i0e + * - bessel_i1 + * - bessel_i1e + * - bessel_j0 + * - bessel_j1 + * - bessel_k0 + * - bessel_k0e + * - bessel_k1 + * - bessel_k1e + * - bessel_y0 + * - bessel_y1 + * + * \code + * #include + * \endcode + */ +//@{ + +} + +#include "src/SpecialFunctions/BesselFunctionsImpl.h" +#include "src/SpecialFunctions/BesselFunctionsBFloat16.h" +#include "src/SpecialFunctions/BesselFunctionsHalf.h" +#include "src/SpecialFunctions/BesselFunctionsPacketMath.h" +#include "src/SpecialFunctions/BesselFunctionsFunctors.h" +#include "src/SpecialFunctions/BesselFunctionsArrayAPI.h" +#include "src/SpecialFunctions/SpecialFunctionsImpl.h" +#if defined(EIGEN_HIPCC) +#include "src/SpecialFunctions/HipVectorCompatibility.h" +#endif +#include "src/SpecialFunctions/SpecialFunctionsBFloat16.h" +#include "src/SpecialFunctions/SpecialFunctionsHalf.h" +#include "src/SpecialFunctions/SpecialFunctionsPacketMath.h" +#include "src/SpecialFunctions/SpecialFunctionsFunctors.h" +#include "src/SpecialFunctions/SpecialFunctionsArrayAPI.h" + +#if defined EIGEN_VECTORIZE_AVX512 + #include "src/SpecialFunctions/arch/AVX/BesselFunctions.h" + #include "src/SpecialFunctions/arch/AVX/SpecialFunctions.h" + #include "src/SpecialFunctions/arch/AVX512/BesselFunctions.h" + #include "src/SpecialFunctions/arch/AVX512/SpecialFunctions.h" +#elif defined EIGEN_VECTORIZE_AVX + #include "src/SpecialFunctions/arch/AVX/BesselFunctions.h" + #include "src/SpecialFunctions/arch/AVX/SpecialFunctions.h" +#elif defined EIGEN_VECTORIZE_NEON + #include "src/SpecialFunctions/arch/NEON/BesselFunctions.h" + #include "src/SpecialFunctions/arch/NEON/SpecialFunctions.h" +#endif + +#if defined EIGEN_VECTORIZE_GPU + #include "src/SpecialFunctions/arch/GPU/SpecialFunctions.h" +#endif + +namespace Eigen { +//@} +} + + +#include "../../Eigen/src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_SPECIALFUNCTIONS_MODULE diff --git a/external/unsupported/Eigen/Splines b/external/unsupported/Eigen/Splines new file mode 100644 index 0000000..2ca5813 --- /dev/null +++ b/external/unsupported/Eigen/Splines @@ -0,0 +1,35 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 20010-2011 Hauke Heibel +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPLINES_MODULE_H +#define EIGEN_SPLINES_MODULE_H + +namespace Eigen +{ +/** + * \defgroup Splines_Module Spline and spline fitting module + * + * This module provides a simple multi-dimensional spline class while + * offering most basic functionality to fit a spline to point sets. + * + * \code + * #include + * \endcode + */ +} + +#include "../../Eigen/src/Core/util/DisableStupidWarnings.h" + +#include "src/Splines/SplineFwd.h" +#include "src/Splines/Spline.h" +#include "src/Splines/SplineFitting.h" + +#include "../../Eigen/src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_SPLINES_MODULE_H diff --git a/external/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h b/external/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h new file mode 100644 index 0000000..33b6c39 --- /dev/null +++ b/external/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h @@ -0,0 +1,108 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_AUTODIFF_JACOBIAN_H +#define EIGEN_AUTODIFF_JACOBIAN_H + +namespace Eigen +{ + +template class AutoDiffJacobian : public Functor +{ +public: + AutoDiffJacobian() : Functor() {} + AutoDiffJacobian(const Functor& f) : Functor(f) {} + + // forward constructors +#if EIGEN_HAS_VARIADIC_TEMPLATES + template + AutoDiffJacobian(const T& ...Values) : Functor(Values...) {} +#else + template + AutoDiffJacobian(const T0& a0) : Functor(a0) {} + template + AutoDiffJacobian(const T0& a0, const T1& a1) : Functor(a0, a1) {} + template + AutoDiffJacobian(const T0& a0, const T1& a1, const T2& a2) : Functor(a0, a1, a2) {} +#endif + + typedef typename Functor::InputType InputType; + typedef typename Functor::ValueType ValueType; + typedef typename ValueType::Scalar Scalar; + + enum { + InputsAtCompileTime = InputType::RowsAtCompileTime, + ValuesAtCompileTime = ValueType::RowsAtCompileTime + }; + + typedef Matrix JacobianType; + typedef typename JacobianType::Index Index; + + typedef Matrix DerivativeType; + typedef AutoDiffScalar ActiveScalar; + + typedef Matrix ActiveInput; + typedef Matrix ActiveValue; + +#if EIGEN_HAS_VARIADIC_TEMPLATES + // Some compilers don't accept variadic parameters after a default parameter, + // i.e., we can't just write _jac=0 but we need to overload operator(): + EIGEN_STRONG_INLINE + void operator() (const InputType& x, ValueType* v) const + { + this->operator()(x, v, 0); + } + template + void operator() (const InputType& x, ValueType* v, JacobianType* _jac, + const ParamsType&... Params) const +#else + void operator() (const InputType& x, ValueType* v, JacobianType* _jac=0) const +#endif + { + eigen_assert(v!=0); + + if (!_jac) + { +#if EIGEN_HAS_VARIADIC_TEMPLATES + Functor::operator()(x, v, Params...); +#else + Functor::operator()(x, v); +#endif + return; + } + + JacobianType& jac = *_jac; + + ActiveInput ax = x.template cast(); + ActiveValue av(jac.rows()); + + if(InputsAtCompileTime==Dynamic) + for (Index j=0; j +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_AUTODIFF_SCALAR_H +#define EIGEN_AUTODIFF_SCALAR_H + +namespace Eigen { + +namespace internal { + +template +struct make_coherent_impl { + static void run(A&, B&) {} +}; + +// resize a to match b is a.size()==0, and conversely. +template +void make_coherent(const A& a, const B&b) +{ + make_coherent_impl::run(a.const_cast_derived(), b.const_cast_derived()); +} + +template struct auto_diff_special_op; + +} // end namespace internal + +template class AutoDiffScalar; + +template +inline AutoDiffScalar MakeAutoDiffScalar(const typename NewDerType::Scalar& value, const NewDerType &der) { + return AutoDiffScalar(value,der); +} + +/** \class AutoDiffScalar + * \brief A scalar type replacement with automatic differentiation capability + * + * \param DerivativeType the vector type used to store/represent the derivatives. The base scalar type + * as well as the number of derivatives to compute are determined from this type. + * Typical choices include, e.g., \c Vector4f for 4 derivatives, or \c VectorXf + * if the number of derivatives is not known at compile time, and/or, the number + * of derivatives is large. + * Note that DerivativeType can also be a reference (e.g., \c VectorXf&) to wrap a + * existing vector into an AutoDiffScalar. + * Finally, DerivativeType can also be any Eigen compatible expression. + * + * This class represents a scalar value while tracking its respective derivatives using Eigen's expression + * template mechanism. + * + * It supports the following list of global math function: + * - std::abs, std::sqrt, std::pow, std::exp, std::log, std::sin, std::cos, + * - internal::abs, internal::sqrt, numext::pow, internal::exp, internal::log, internal::sin, internal::cos, + * - internal::conj, internal::real, internal::imag, numext::abs2. + * + * AutoDiffScalar can be used as the scalar type of an Eigen::Matrix object. However, + * in that case, the expression template mechanism only occurs at the top Matrix level, + * while derivatives are computed right away. + * + */ + +template +class AutoDiffScalar + : public internal::auto_diff_special_op + ::type>::Scalar, + typename NumTraits::type>::Scalar>::Real>::value> +{ + public: + typedef internal::auto_diff_special_op + ::type>::Scalar, + typename NumTraits::type>::Scalar>::Real>::value> Base; + typedef typename internal::remove_all::type DerType; + typedef typename internal::traits::Scalar Scalar; + typedef typename NumTraits::Real Real; + + using Base::operator+; + using Base::operator*; + + /** Default constructor without any initialization. */ + AutoDiffScalar() {} + + /** Constructs an active scalar from its \a value, + and initializes the \a nbDer derivatives such that it corresponds to the \a derNumber -th variable */ + AutoDiffScalar(const Scalar& value, int nbDer, int derNumber) + : m_value(value), m_derivatives(DerType::Zero(nbDer)) + { + m_derivatives.coeffRef(derNumber) = Scalar(1); + } + + /** Conversion from a scalar constant to an active scalar. + * The derivatives are set to zero. */ + /*explicit*/ AutoDiffScalar(const Real& value) + : m_value(value) + { + if(m_derivatives.size()>0) + m_derivatives.setZero(); + } + + /** Constructs an active scalar from its \a value and derivatives \a der */ + AutoDiffScalar(const Scalar& value, const DerType& der) + : m_value(value), m_derivatives(der) + {} + + template + AutoDiffScalar(const AutoDiffScalar& other +#ifndef EIGEN_PARSED_BY_DOXYGEN + , typename internal::enable_if< + internal::is_same::type>::Scalar>::value + && internal::is_convertible::value , void*>::type = 0 +#endif + ) + : m_value(other.value()), m_derivatives(other.derivatives()) + {} + + friend std::ostream & operator << (std::ostream & s, const AutoDiffScalar& a) + { + return s << a.value(); + } + + AutoDiffScalar(const AutoDiffScalar& other) + : m_value(other.value()), m_derivatives(other.derivatives()) + {} + + template + inline AutoDiffScalar& operator=(const AutoDiffScalar& other) + { + m_value = other.value(); + m_derivatives = other.derivatives(); + return *this; + } + + inline AutoDiffScalar& operator=(const AutoDiffScalar& other) + { + m_value = other.value(); + m_derivatives = other.derivatives(); + return *this; + } + + inline AutoDiffScalar& operator=(const Scalar& other) + { + m_value = other; + if(m_derivatives.size()>0) + m_derivatives.setZero(); + return *this; + } + +// inline operator const Scalar& () const { return m_value; } +// inline operator Scalar& () { return m_value; } + + inline const Scalar& value() const { return m_value; } + inline Scalar& value() { return m_value; } + + inline const DerType& derivatives() const { return m_derivatives; } + inline DerType& derivatives() { return m_derivatives; } + + inline bool operator< (const Scalar& other) const { return m_value < other; } + inline bool operator<=(const Scalar& other) const { return m_value <= other; } + inline bool operator> (const Scalar& other) const { return m_value > other; } + inline bool operator>=(const Scalar& other) const { return m_value >= other; } + inline bool operator==(const Scalar& other) const { return m_value == other; } + inline bool operator!=(const Scalar& other) const { return m_value != other; } + + friend inline bool operator< (const Scalar& a, const AutoDiffScalar& b) { return a < b.value(); } + friend inline bool operator<=(const Scalar& a, const AutoDiffScalar& b) { return a <= b.value(); } + friend inline bool operator> (const Scalar& a, const AutoDiffScalar& b) { return a > b.value(); } + friend inline bool operator>=(const Scalar& a, const AutoDiffScalar& b) { return a >= b.value(); } + friend inline bool operator==(const Scalar& a, const AutoDiffScalar& b) { return a == b.value(); } + friend inline bool operator!=(const Scalar& a, const AutoDiffScalar& b) { return a != b.value(); } + + template inline bool operator< (const AutoDiffScalar& b) const { return m_value < b.value(); } + template inline bool operator<=(const AutoDiffScalar& b) const { return m_value <= b.value(); } + template inline bool operator> (const AutoDiffScalar& b) const { return m_value > b.value(); } + template inline bool operator>=(const AutoDiffScalar& b) const { return m_value >= b.value(); } + template inline bool operator==(const AutoDiffScalar& b) const { return m_value == b.value(); } + template inline bool operator!=(const AutoDiffScalar& b) const { return m_value != b.value(); } + + inline const AutoDiffScalar operator+(const Scalar& other) const + { + return AutoDiffScalar(m_value + other, m_derivatives); + } + + friend inline const AutoDiffScalar operator+(const Scalar& a, const AutoDiffScalar& b) + { + return AutoDiffScalar(a + b.value(), b.derivatives()); + } + +// inline const AutoDiffScalar operator+(const Real& other) const +// { +// return AutoDiffScalar(m_value + other, m_derivatives); +// } + +// friend inline const AutoDiffScalar operator+(const Real& a, const AutoDiffScalar& b) +// { +// return AutoDiffScalar(a + b.value(), b.derivatives()); +// } + + inline AutoDiffScalar& operator+=(const Scalar& other) + { + value() += other; + return *this; + } + + template + inline const AutoDiffScalar,const DerType,const typename internal::remove_all::type> > + operator+(const AutoDiffScalar& other) const + { + internal::make_coherent(m_derivatives, other.derivatives()); + return AutoDiffScalar,const DerType,const typename internal::remove_all::type> >( + m_value + other.value(), + m_derivatives + other.derivatives()); + } + + template + inline AutoDiffScalar& + operator+=(const AutoDiffScalar& other) + { + (*this) = (*this) + other; + return *this; + } + + inline const AutoDiffScalar operator-(const Scalar& b) const + { + return AutoDiffScalar(m_value - b, m_derivatives); + } + + friend inline const AutoDiffScalar, const DerType> > + operator-(const Scalar& a, const AutoDiffScalar& b) + { + return AutoDiffScalar, const DerType> > + (a - b.value(), -b.derivatives()); + } + + inline AutoDiffScalar& operator-=(const Scalar& other) + { + value() -= other; + return *this; + } + + template + inline const AutoDiffScalar, const DerType,const typename internal::remove_all::type> > + operator-(const AutoDiffScalar& other) const + { + internal::make_coherent(m_derivatives, other.derivatives()); + return AutoDiffScalar, const DerType,const typename internal::remove_all::type> >( + m_value - other.value(), + m_derivatives - other.derivatives()); + } + + template + inline AutoDiffScalar& + operator-=(const AutoDiffScalar& other) + { + *this = *this - other; + return *this; + } + + inline const AutoDiffScalar, const DerType> > + operator-() const + { + return AutoDiffScalar, const DerType> >( + -m_value, + -m_derivatives); + } + + inline const AutoDiffScalar + operator*(const Scalar& other) const + { + return MakeAutoDiffScalar(m_value * other, m_derivatives * other); + } + + friend inline const AutoDiffScalar + operator*(const Scalar& other, const AutoDiffScalar& a) + { + return MakeAutoDiffScalar(a.value() * other, a.derivatives() * other); + } + +// inline const AutoDiffScalar, DerType>::Type > +// operator*(const Real& other) const +// { +// return AutoDiffScalar, DerType>::Type >( +// m_value * other, +// (m_derivatives * other)); +// } +// +// friend inline const AutoDiffScalar, DerType>::Type > +// operator*(const Real& other, const AutoDiffScalar& a) +// { +// return AutoDiffScalar, DerType>::Type >( +// a.value() * other, +// a.derivatives() * other); +// } + + inline const AutoDiffScalar + operator/(const Scalar& other) const + { + return MakeAutoDiffScalar(m_value / other, (m_derivatives * (Scalar(1)/other))); + } + + friend inline const AutoDiffScalar + operator/(const Scalar& other, const AutoDiffScalar& a) + { + return MakeAutoDiffScalar(other / a.value(), a.derivatives() * (Scalar(-other) / (a.value()*a.value()))); + } + +// inline const AutoDiffScalar, DerType>::Type > +// operator/(const Real& other) const +// { +// return AutoDiffScalar, DerType>::Type >( +// m_value / other, +// (m_derivatives * (Real(1)/other))); +// } +// +// friend inline const AutoDiffScalar, DerType>::Type > +// operator/(const Real& other, const AutoDiffScalar& a) +// { +// return AutoDiffScalar, DerType>::Type >( +// other / a.value(), +// a.derivatives() * (-Real(1)/other)); +// } + + template + inline const AutoDiffScalar EIGEN_COMMA + const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DerType,Scalar,product) EIGEN_COMMA + const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(typename internal::remove_all::type,Scalar,product) >,Scalar,product) > + operator/(const AutoDiffScalar& other) const + { + internal::make_coherent(m_derivatives, other.derivatives()); + return MakeAutoDiffScalar( + m_value / other.value(), + ((m_derivatives * other.value()) - (other.derivatives() * m_value)) + * (Scalar(1)/(other.value()*other.value()))); + } + + template + inline const AutoDiffScalar, + const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DerType,Scalar,product), + const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(typename internal::remove_all::type,Scalar,product) > > + operator*(const AutoDiffScalar& other) const + { + internal::make_coherent(m_derivatives, other.derivatives()); + return MakeAutoDiffScalar( + m_value * other.value(), + (m_derivatives * other.value()) + (other.derivatives() * m_value)); + } + + inline AutoDiffScalar& operator*=(const Scalar& other) + { + *this = *this * other; + return *this; + } + + template + inline AutoDiffScalar& operator*=(const AutoDiffScalar& other) + { + *this = *this * other; + return *this; + } + + inline AutoDiffScalar& operator/=(const Scalar& other) + { + *this = *this / other; + return *this; + } + + template + inline AutoDiffScalar& operator/=(const AutoDiffScalar& other) + { + *this = *this / other; + return *this; + } + + protected: + Scalar m_value; + DerType m_derivatives; + +}; + +namespace internal { + +template +struct auto_diff_special_op +// : auto_diff_scalar_op::Real, +// is_same::Real>::value> +{ + typedef typename remove_all::type DerType; + typedef typename traits::Scalar Scalar; + typedef typename NumTraits::Real Real; + +// typedef auto_diff_scalar_op::Real, +// is_same::Real>::value> Base; + +// using Base::operator+; +// using Base::operator+=; +// using Base::operator-; +// using Base::operator-=; +// using Base::operator*; +// using Base::operator*=; + + const AutoDiffScalar& derived() const { return *static_cast*>(this); } + AutoDiffScalar& derived() { return *static_cast*>(this); } + + + inline const AutoDiffScalar operator+(const Real& other) const + { + return AutoDiffScalar(derived().value() + other, derived().derivatives()); + } + + friend inline const AutoDiffScalar operator+(const Real& a, const AutoDiffScalar& b) + { + return AutoDiffScalar(a + b.value(), b.derivatives()); + } + + inline AutoDiffScalar& operator+=(const Real& other) + { + derived().value() += other; + return derived(); + } + + + inline const AutoDiffScalar >, DerType>::Type > + operator*(const Real& other) const + { + return AutoDiffScalar >, DerType>::Type >( + derived().value() * other, + derived().derivatives() * other); + } + + friend inline const AutoDiffScalar >, DerType>::Type > + operator*(const Real& other, const AutoDiffScalar& a) + { + return AutoDiffScalar >, DerType>::Type >( + a.value() * other, + a.derivatives() * other); + } + + inline AutoDiffScalar& operator*=(const Scalar& other) + { + *this = *this * other; + return derived(); + } +}; + +template +struct auto_diff_special_op +{ + void operator*() const; + void operator-() const; + void operator+() const; +}; + +template +void make_coherent_expression(CwiseBinaryOp xpr, const RefType &ref) +{ + make_coherent(xpr.const_cast_derived().lhs(), ref); + make_coherent(xpr.const_cast_derived().rhs(), ref); +} + +template +void make_coherent_expression(const CwiseUnaryOp &xpr, const RefType &ref) +{ + make_coherent(xpr.nestedExpression().const_cast_derived(), ref); +} + +// needed for compilation only +template +void make_coherent_expression(const CwiseNullaryOp &, const RefType &) +{} + +template +struct make_coherent_impl, B> { + typedef Matrix A; + static void run(A& a, B& b) { + if((A_Rows==Dynamic || A_Cols==Dynamic) && (a.size()==0)) + { + a.resize(b.size()); + a.setZero(); + } + else if (B::SizeAtCompileTime==Dynamic && a.size()!=0 && b.size()==0) + { + make_coherent_expression(b,a); + } + } +}; + +template +struct make_coherent_impl > { + typedef Matrix B; + static void run(A& a, B& b) { + if((B_Rows==Dynamic || B_Cols==Dynamic) && (b.size()==0)) + { + b.resize(a.size()); + b.setZero(); + } + else if (A::SizeAtCompileTime==Dynamic && b.size()!=0 && a.size()==0) + { + make_coherent_expression(a,b); + } + } +}; + +template +struct make_coherent_impl, + Matrix > { + typedef Matrix A; + typedef Matrix B; + static void run(A& a, B& b) { + if((A_Rows==Dynamic || A_Cols==Dynamic) && (a.size()==0)) + { + a.resize(b.size()); + a.setZero(); + } + else if((B_Rows==Dynamic || B_Cols==Dynamic) && (b.size()==0)) + { + b.resize(a.size()); + b.setZero(); + } + } +}; + +} // end namespace internal + +template +struct ScalarBinaryOpTraits,typename DerType::Scalar,BinOp> +{ + typedef AutoDiffScalar ReturnType; +}; + +template +struct ScalarBinaryOpTraits, BinOp> +{ + typedef AutoDiffScalar ReturnType; +}; + + +// The following is an attempt to let Eigen's known about expression template, but that's more tricky! + +// template +// struct ScalarBinaryOpTraits,AutoDiffScalar, BinOp> +// { +// enum { Defined = 1 }; +// typedef AutoDiffScalar ReturnType; +// }; +// +// template +// struct ScalarBinaryOpTraits,AutoDiffScalar, BinOp> +// { +// enum { Defined = 1 };//internal::is_same::value }; +// typedef AutoDiffScalar ReturnType; +// }; + +#define EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(FUNC,CODE) \ + template \ + inline const Eigen::AutoDiffScalar< \ + EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(typename Eigen::internal::remove_all::type, typename Eigen::internal::traits::type>::Scalar, product) > \ + FUNC(const Eigen::AutoDiffScalar& x) { \ + using namespace Eigen; \ + typedef typename Eigen::internal::traits::type>::Scalar Scalar; \ + EIGEN_UNUSED_VARIABLE(sizeof(Scalar)); \ + CODE; \ + } + +template +struct CleanedUpDerType { + typedef AutoDiffScalar::type::PlainObject> type; +}; + +template +inline const AutoDiffScalar& conj(const AutoDiffScalar& x) { return x; } +template +inline const AutoDiffScalar& real(const AutoDiffScalar& x) { return x; } +template +inline typename DerType::Scalar imag(const AutoDiffScalar&) { return 0.; } +template +inline typename CleanedUpDerType::type (min)(const AutoDiffScalar& x, const T& y) { + typedef typename CleanedUpDerType::type ADS; + return (x <= y ? ADS(x) : ADS(y)); +} +template +inline typename CleanedUpDerType::type (max)(const AutoDiffScalar& x, const T& y) { + typedef typename CleanedUpDerType::type ADS; + return (x >= y ? ADS(x) : ADS(y)); +} +template +inline typename CleanedUpDerType::type (min)(const T& x, const AutoDiffScalar& y) { + typedef typename CleanedUpDerType::type ADS; + return (x < y ? ADS(x) : ADS(y)); +} +template +inline typename CleanedUpDerType::type (max)(const T& x, const AutoDiffScalar& y) { + typedef typename CleanedUpDerType::type ADS; + return (x > y ? ADS(x) : ADS(y)); +} +template +inline typename CleanedUpDerType::type (min)(const AutoDiffScalar& x, const AutoDiffScalar& y) { + return (x.value() < y.value() ? x : y); +} +template +inline typename CleanedUpDerType::type (max)(const AutoDiffScalar& x, const AutoDiffScalar& y) { + return (x.value() >= y.value() ? x : y); +} + + +EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(abs, + using std::abs; + return Eigen::MakeAutoDiffScalar(abs(x.value()), x.derivatives() * (x.value()<0 ? -1 : 1) );) + +EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(abs2, + using numext::abs2; + return Eigen::MakeAutoDiffScalar(abs2(x.value()), x.derivatives() * (Scalar(2)*x.value()));) + +EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(sqrt, + using std::sqrt; + Scalar sqrtx = sqrt(x.value()); + return Eigen::MakeAutoDiffScalar(sqrtx,x.derivatives() * (Scalar(0.5) / sqrtx));) + +EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(cos, + using std::cos; + using std::sin; + return Eigen::MakeAutoDiffScalar(cos(x.value()), x.derivatives() * (-sin(x.value())));) + +EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(sin, + using std::sin; + using std::cos; + return Eigen::MakeAutoDiffScalar(sin(x.value()),x.derivatives() * cos(x.value()));) + +EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(exp, + using std::exp; + Scalar expx = exp(x.value()); + return Eigen::MakeAutoDiffScalar(expx,x.derivatives() * expx);) + +EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(log, + using std::log; + return Eigen::MakeAutoDiffScalar(log(x.value()),x.derivatives() * (Scalar(1)/x.value()));) + +template +inline const Eigen::AutoDiffScalar< +EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(typename internal::remove_all::type,typename internal::traits::type>::Scalar,product) > +pow(const Eigen::AutoDiffScalar &x, const typename internal::traits::type>::Scalar &y) +{ + using namespace Eigen; + using std::pow; + return Eigen::MakeAutoDiffScalar(pow(x.value(),y), x.derivatives() * (y * pow(x.value(),y-1))); +} + + +template +inline const AutoDiffScalar::type>::Scalar,Dynamic,1> > +atan2(const AutoDiffScalar& a, const AutoDiffScalar& b) +{ + using std::atan2; + typedef typename internal::traits::type>::Scalar Scalar; + typedef AutoDiffScalar > PlainADS; + PlainADS ret; + ret.value() = atan2(a.value(), b.value()); + + Scalar squared_hypot = a.value() * a.value() + b.value() * b.value(); + + // if (squared_hypot==0) the derivation is undefined and the following results in a NaN: + ret.derivatives() = (a.derivatives() * b.value() - a.value() * b.derivatives()) / squared_hypot; + + return ret; +} + +EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(tan, + using std::tan; + using std::cos; + return Eigen::MakeAutoDiffScalar(tan(x.value()),x.derivatives() * (Scalar(1)/numext::abs2(cos(x.value()))));) + +EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(asin, + using std::sqrt; + using std::asin; + return Eigen::MakeAutoDiffScalar(asin(x.value()),x.derivatives() * (Scalar(1)/sqrt(1-numext::abs2(x.value()))));) + +EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(acos, + using std::sqrt; + using std::acos; + return Eigen::MakeAutoDiffScalar(acos(x.value()),x.derivatives() * (Scalar(-1)/sqrt(1-numext::abs2(x.value()))));) + +EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(tanh, + using std::cosh; + using std::tanh; + return Eigen::MakeAutoDiffScalar(tanh(x.value()),x.derivatives() * (Scalar(1)/numext::abs2(cosh(x.value()))));) + +EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(sinh, + using std::sinh; + using std::cosh; + return Eigen::MakeAutoDiffScalar(sinh(x.value()),x.derivatives() * cosh(x.value()));) + +EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(cosh, + using std::sinh; + using std::cosh; + return Eigen::MakeAutoDiffScalar(cosh(x.value()),x.derivatives() * sinh(x.value()));) + +#undef EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY + +template struct NumTraits > + : NumTraits< typename NumTraits::type::Scalar>::Real > +{ + typedef typename internal::remove_all::type DerTypeCleaned; + typedef AutoDiffScalar::Real,DerTypeCleaned::RowsAtCompileTime,DerTypeCleaned::ColsAtCompileTime, + 0, DerTypeCleaned::MaxRowsAtCompileTime, DerTypeCleaned::MaxColsAtCompileTime> > Real; + typedef AutoDiffScalar NonInteger; + typedef AutoDiffScalar Nested; + typedef typename NumTraits::Literal Literal; + enum{ + RequireInitialization = 1 + }; +}; + +} + +namespace std { + +template +class numeric_limits > + : public numeric_limits {}; + +template +class numeric_limits > + : public numeric_limits {}; + +} // namespace std + +#endif // EIGEN_AUTODIFF_SCALAR_H diff --git a/external/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h b/external/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h new file mode 100644 index 0000000..8c2d048 --- /dev/null +++ b/external/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h @@ -0,0 +1,220 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_AUTODIFF_VECTOR_H +#define EIGEN_AUTODIFF_VECTOR_H + +namespace Eigen { + +/* \class AutoDiffScalar + * \brief A scalar type replacement with automatic differentation capability + * + * \param DerType the vector type used to store/represent the derivatives (e.g. Vector3f) + * + * This class represents a scalar value while tracking its respective derivatives. + * + * It supports the following list of global math function: + * - std::abs, std::sqrt, std::pow, std::exp, std::log, std::sin, std::cos, + * - internal::abs, internal::sqrt, numext::pow, internal::exp, internal::log, internal::sin, internal::cos, + * - internal::conj, internal::real, internal::imag, numext::abs2. + * + * AutoDiffScalar can be used as the scalar type of an Eigen::Matrix object. However, + * in that case, the expression template mechanism only occurs at the top Matrix level, + * while derivatives are computed right away. + * + */ +template +class AutoDiffVector +{ + public: + //typedef typename internal::traits::Scalar Scalar; + typedef typename internal::traits::Scalar BaseScalar; + typedef AutoDiffScalar > ActiveScalar; + typedef ActiveScalar Scalar; + typedef AutoDiffScalar CoeffType; + typedef typename JacobianType::Index Index; + + inline AutoDiffVector() {} + + inline AutoDiffVector(const ValueType& values) + : m_values(values) + { + m_jacobian.setZero(); + } + + + CoeffType operator[] (Index i) { return CoeffType(m_values[i], m_jacobian.col(i)); } + const CoeffType operator[] (Index i) const { return CoeffType(m_values[i], m_jacobian.col(i)); } + + CoeffType operator() (Index i) { return CoeffType(m_values[i], m_jacobian.col(i)); } + const CoeffType operator() (Index i) const { return CoeffType(m_values[i], m_jacobian.col(i)); } + + CoeffType coeffRef(Index i) { return CoeffType(m_values[i], m_jacobian.col(i)); } + const CoeffType coeffRef(Index i) const { return CoeffType(m_values[i], m_jacobian.col(i)); } + + Index size() const { return m_values.size(); } + + // FIXME here we could return an expression of the sum + Scalar sum() const { /*std::cerr << "sum \n\n";*/ /*std::cerr << m_jacobian.rowwise().sum() << "\n\n";*/ return Scalar(m_values.sum(), m_jacobian.rowwise().sum()); } + + + inline AutoDiffVector(const ValueType& values, const JacobianType& jac) + : m_values(values), m_jacobian(jac) + {} + + template + inline AutoDiffVector(const AutoDiffVector& other) + : m_values(other.values()), m_jacobian(other.jacobian()) + {} + + inline AutoDiffVector(const AutoDiffVector& other) + : m_values(other.values()), m_jacobian(other.jacobian()) + {} + + template + inline AutoDiffVector& operator=(const AutoDiffVector& other) + { + m_values = other.values(); + m_jacobian = other.jacobian(); + return *this; + } + + inline AutoDiffVector& operator=(const AutoDiffVector& other) + { + m_values = other.values(); + m_jacobian = other.jacobian(); + return *this; + } + + inline const ValueType& values() const { return m_values; } + inline ValueType& values() { return m_values; } + + inline const JacobianType& jacobian() const { return m_jacobian; } + inline JacobianType& jacobian() { return m_jacobian; } + + template + inline const AutoDiffVector< + typename MakeCwiseBinaryOp,ValueType,OtherValueType>::Type, + typename MakeCwiseBinaryOp,JacobianType,OtherJacobianType>::Type > + operator+(const AutoDiffVector& other) const + { + return AutoDiffVector< + typename MakeCwiseBinaryOp,ValueType,OtherValueType>::Type, + typename MakeCwiseBinaryOp,JacobianType,OtherJacobianType>::Type >( + m_values + other.values(), + m_jacobian + other.jacobian()); + } + + template + inline AutoDiffVector& + operator+=(const AutoDiffVector& other) + { + m_values += other.values(); + m_jacobian += other.jacobian(); + return *this; + } + + template + inline const AutoDiffVector< + typename MakeCwiseBinaryOp,ValueType,OtherValueType>::Type, + typename MakeCwiseBinaryOp,JacobianType,OtherJacobianType>::Type > + operator-(const AutoDiffVector& other) const + { + return AutoDiffVector< + typename MakeCwiseBinaryOp,ValueType,OtherValueType>::Type, + typename MakeCwiseBinaryOp,JacobianType,OtherJacobianType>::Type >( + m_values - other.values(), + m_jacobian - other.jacobian()); + } + + template + inline AutoDiffVector& + operator-=(const AutoDiffVector& other) + { + m_values -= other.values(); + m_jacobian -= other.jacobian(); + return *this; + } + + inline const AutoDiffVector< + typename MakeCwiseUnaryOp, ValueType>::Type, + typename MakeCwiseUnaryOp, JacobianType>::Type > + operator-() const + { + return AutoDiffVector< + typename MakeCwiseUnaryOp, ValueType>::Type, + typename MakeCwiseUnaryOp, JacobianType>::Type >( + -m_values, + -m_jacobian); + } + + inline const AutoDiffVector< + typename MakeCwiseUnaryOp, ValueType>::Type, + typename MakeCwiseUnaryOp, JacobianType>::Type> + operator*(const BaseScalar& other) const + { + return AutoDiffVector< + typename MakeCwiseUnaryOp, ValueType>::Type, + typename MakeCwiseUnaryOp, JacobianType>::Type >( + m_values * other, + m_jacobian * other); + } + + friend inline const AutoDiffVector< + typename MakeCwiseUnaryOp, ValueType>::Type, + typename MakeCwiseUnaryOp, JacobianType>::Type > + operator*(const Scalar& other, const AutoDiffVector& v) + { + return AutoDiffVector< + typename MakeCwiseUnaryOp, ValueType>::Type, + typename MakeCwiseUnaryOp, JacobianType>::Type >( + v.values() * other, + v.jacobian() * other); + } + +// template +// inline const AutoDiffVector< +// CwiseBinaryOp, ValueType, OtherValueType> +// CwiseBinaryOp, +// CwiseUnaryOp, JacobianType>, +// CwiseUnaryOp, OtherJacobianType> > > +// operator*(const AutoDiffVector& other) const +// { +// return AutoDiffVector< +// CwiseBinaryOp, ValueType, OtherValueType> +// CwiseBinaryOp, +// CwiseUnaryOp, JacobianType>, +// CwiseUnaryOp, OtherJacobianType> > >( +// m_values.cwise() * other.values(), +// (m_jacobian * other.values()) + (m_values * other.jacobian())); +// } + + inline AutoDiffVector& operator*=(const Scalar& other) + { + m_values *= other; + m_jacobian *= other; + return *this; + } + + template + inline AutoDiffVector& operator*=(const AutoDiffVector& other) + { + *this = *this * other; + return *this; + } + + protected: + ValueType m_values; + JacobianType m_jacobian; + +}; + +} + +#endif // EIGEN_AUTODIFF_VECTOR_H diff --git a/external/unsupported/Eigen/src/BVH/BVAlgorithms.h b/external/unsupported/Eigen/src/BVH/BVAlgorithms.h new file mode 100644 index 0000000..994c8af --- /dev/null +++ b/external/unsupported/Eigen/src/BVH/BVAlgorithms.h @@ -0,0 +1,293 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Ilya Baran +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_BVALGORITHMS_H +#define EIGEN_BVALGORITHMS_H + +namespace Eigen { + +namespace internal { + +#ifndef EIGEN_PARSED_BY_DOXYGEN +template +bool intersect_helper(const BVH &tree, Intersector &intersector, typename BVH::Index root) +{ + typedef typename BVH::Index Index; + typedef typename BVH::VolumeIterator VolIter; + typedef typename BVH::ObjectIterator ObjIter; + + VolIter vBegin = VolIter(), vEnd = VolIter(); + ObjIter oBegin = ObjIter(), oEnd = ObjIter(); + + std::vector todo(1, root); + + while(!todo.empty()) { + tree.getChildren(todo.back(), vBegin, vEnd, oBegin, oEnd); + todo.pop_back(); + + for(; vBegin != vEnd; ++vBegin) //go through child volumes + if(intersector.intersectVolume(tree.getVolume(*vBegin))) + todo.push_back(*vBegin); + + for(; oBegin != oEnd; ++oBegin) //go through child objects + if(intersector.intersectObject(*oBegin)) + return true; //intersector said to stop query + } + return false; +} +#endif //not EIGEN_PARSED_BY_DOXYGEN + +template +struct intersector_helper1 +{ + intersector_helper1(const Object2 &inStored, Intersector &in) : stored(inStored), intersector(in) {} + bool intersectVolume(const Volume1 &vol) { return intersector.intersectVolumeObject(vol, stored); } + bool intersectObject(const Object1 &obj) { return intersector.intersectObjectObject(obj, stored); } + Object2 stored; + Intersector &intersector; +private: + intersector_helper1& operator=(const intersector_helper1&); +}; + +template +struct intersector_helper2 +{ + intersector_helper2(const Object1 &inStored, Intersector &in) : stored(inStored), intersector(in) {} + bool intersectVolume(const Volume2 &vol) { return intersector.intersectObjectVolume(stored, vol); } + bool intersectObject(const Object2 &obj) { return intersector.intersectObjectObject(stored, obj); } + Object1 stored; + Intersector &intersector; +private: + intersector_helper2& operator=(const intersector_helper2&); +}; + +} // end namespace internal + +/** Given a BVH, runs the query encapsulated by \a intersector. + * The Intersector type must provide the following members: \code + bool intersectVolume(const BVH::Volume &volume) //returns true if volume intersects the query + bool intersectObject(const BVH::Object &object) //returns true if the search should terminate immediately + \endcode + */ +template +void BVIntersect(const BVH &tree, Intersector &intersector) +{ + internal::intersect_helper(tree, intersector, tree.getRootIndex()); +} + +/** Given two BVH's, runs the query on their Cartesian product encapsulated by \a intersector. + * The Intersector type must provide the following members: \code + bool intersectVolumeVolume(const BVH1::Volume &v1, const BVH2::Volume &v2) //returns true if product of volumes intersects the query + bool intersectVolumeObject(const BVH1::Volume &v1, const BVH2::Object &o2) //returns true if the volume-object product intersects the query + bool intersectObjectVolume(const BVH1::Object &o1, const BVH2::Volume &v2) //returns true if the volume-object product intersects the query + bool intersectObjectObject(const BVH1::Object &o1, const BVH2::Object &o2) //returns true if the search should terminate immediately + \endcode + */ +template +void BVIntersect(const BVH1 &tree1, const BVH2 &tree2, Intersector &intersector) //TODO: tandem descent when it makes sense +{ + typedef typename BVH1::Index Index1; + typedef typename BVH2::Index Index2; + typedef internal::intersector_helper1 Helper1; + typedef internal::intersector_helper2 Helper2; + typedef typename BVH1::VolumeIterator VolIter1; + typedef typename BVH1::ObjectIterator ObjIter1; + typedef typename BVH2::VolumeIterator VolIter2; + typedef typename BVH2::ObjectIterator ObjIter2; + + VolIter1 vBegin1 = VolIter1(), vEnd1 = VolIter1(); + ObjIter1 oBegin1 = ObjIter1(), oEnd1 = ObjIter1(); + VolIter2 vBegin2 = VolIter2(), vEnd2 = VolIter2(), vCur2 = VolIter2(); + ObjIter2 oBegin2 = ObjIter2(), oEnd2 = ObjIter2(), oCur2 = ObjIter2(); + + std::vector > todo(1, std::make_pair(tree1.getRootIndex(), tree2.getRootIndex())); + + while(!todo.empty()) { + tree1.getChildren(todo.back().first, vBegin1, vEnd1, oBegin1, oEnd1); + tree2.getChildren(todo.back().second, vBegin2, vEnd2, oBegin2, oEnd2); + todo.pop_back(); + + for(; vBegin1 != vEnd1; ++vBegin1) { //go through child volumes of first tree + const typename BVH1::Volume &vol1 = tree1.getVolume(*vBegin1); + for(vCur2 = vBegin2; vCur2 != vEnd2; ++vCur2) { //go through child volumes of second tree + if(intersector.intersectVolumeVolume(vol1, tree2.getVolume(*vCur2))) + todo.push_back(std::make_pair(*vBegin1, *vCur2)); + } + + for(oCur2 = oBegin2; oCur2 != oEnd2; ++oCur2) {//go through child objects of second tree + Helper1 helper(*oCur2, intersector); + if(internal::intersect_helper(tree1, helper, *vBegin1)) + return; //intersector said to stop query + } + } + + for(; oBegin1 != oEnd1; ++oBegin1) { //go through child objects of first tree + for(vCur2 = vBegin2; vCur2 != vEnd2; ++vCur2) { //go through child volumes of second tree + Helper2 helper(*oBegin1, intersector); + if(internal::intersect_helper(tree2, helper, *vCur2)) + return; //intersector said to stop query + } + + for(oCur2 = oBegin2; oCur2 != oEnd2; ++oCur2) {//go through child objects of second tree + if(intersector.intersectObjectObject(*oBegin1, *oCur2)) + return; //intersector said to stop query + } + } + } +} + +namespace internal { + +#ifndef EIGEN_PARSED_BY_DOXYGEN +template +typename Minimizer::Scalar minimize_helper(const BVH &tree, Minimizer &minimizer, typename BVH::Index root, typename Minimizer::Scalar minimum) +{ + typedef typename Minimizer::Scalar Scalar; + typedef typename BVH::Index Index; + typedef std::pair QueueElement; //first element is priority + typedef typename BVH::VolumeIterator VolIter; + typedef typename BVH::ObjectIterator ObjIter; + + VolIter vBegin = VolIter(), vEnd = VolIter(); + ObjIter oBegin = ObjIter(), oEnd = ObjIter(); + std::priority_queue, std::greater > todo; //smallest is at the top + + todo.push(std::make_pair(Scalar(), root)); + + while(!todo.empty()) { + tree.getChildren(todo.top().second, vBegin, vEnd, oBegin, oEnd); + todo.pop(); + + for(; oBegin != oEnd; ++oBegin) //go through child objects + minimum = (std::min)(minimum, minimizer.minimumOnObject(*oBegin)); + + for(; vBegin != vEnd; ++vBegin) { //go through child volumes + Scalar val = minimizer.minimumOnVolume(tree.getVolume(*vBegin)); + if(val < minimum) + todo.push(std::make_pair(val, *vBegin)); + } + } + + return minimum; +} +#endif //not EIGEN_PARSED_BY_DOXYGEN + + +template +struct minimizer_helper1 +{ + typedef typename Minimizer::Scalar Scalar; + minimizer_helper1(const Object2 &inStored, Minimizer &m) : stored(inStored), minimizer(m) {} + Scalar minimumOnVolume(const Volume1 &vol) { return minimizer.minimumOnVolumeObject(vol, stored); } + Scalar minimumOnObject(const Object1 &obj) { return minimizer.minimumOnObjectObject(obj, stored); } + Object2 stored; + Minimizer &minimizer; +private: + minimizer_helper1& operator=(const minimizer_helper1&); +}; + +template +struct minimizer_helper2 +{ + typedef typename Minimizer::Scalar Scalar; + minimizer_helper2(const Object1 &inStored, Minimizer &m) : stored(inStored), minimizer(m) {} + Scalar minimumOnVolume(const Volume2 &vol) { return minimizer.minimumOnObjectVolume(stored, vol); } + Scalar minimumOnObject(const Object2 &obj) { return minimizer.minimumOnObjectObject(stored, obj); } + Object1 stored; + Minimizer &minimizer; +private: + minimizer_helper2& operator=(const minimizer_helper2&); +}; + +} // end namespace internal + +/** Given a BVH, runs the query encapsulated by \a minimizer. + * \returns the minimum value. + * The Minimizer type must provide the following members: \code + typedef Scalar //the numeric type of what is being minimized--not necessarily the Scalar type of the BVH (if it has one) + Scalar minimumOnVolume(const BVH::Volume &volume) + Scalar minimumOnObject(const BVH::Object &object) + \endcode + */ +template +typename Minimizer::Scalar BVMinimize(const BVH &tree, Minimizer &minimizer) +{ + return internal::minimize_helper(tree, minimizer, tree.getRootIndex(), (std::numeric_limits::max)()); +} + +/** Given two BVH's, runs the query on their cartesian product encapsulated by \a minimizer. + * \returns the minimum value. + * The Minimizer type must provide the following members: \code + typedef Scalar //the numeric type of what is being minimized--not necessarily the Scalar type of the BVH (if it has one) + Scalar minimumOnVolumeVolume(const BVH1::Volume &v1, const BVH2::Volume &v2) + Scalar minimumOnVolumeObject(const BVH1::Volume &v1, const BVH2::Object &o2) + Scalar minimumOnObjectVolume(const BVH1::Object &o1, const BVH2::Volume &v2) + Scalar minimumOnObjectObject(const BVH1::Object &o1, const BVH2::Object &o2) + \endcode + */ +template +typename Minimizer::Scalar BVMinimize(const BVH1 &tree1, const BVH2 &tree2, Minimizer &minimizer) +{ + typedef typename Minimizer::Scalar Scalar; + typedef typename BVH1::Index Index1; + typedef typename BVH2::Index Index2; + typedef internal::minimizer_helper1 Helper1; + typedef internal::minimizer_helper2 Helper2; + typedef std::pair > QueueElement; //first element is priority + typedef typename BVH1::VolumeIterator VolIter1; + typedef typename BVH1::ObjectIterator ObjIter1; + typedef typename BVH2::VolumeIterator VolIter2; + typedef typename BVH2::ObjectIterator ObjIter2; + + VolIter1 vBegin1 = VolIter1(), vEnd1 = VolIter1(); + ObjIter1 oBegin1 = ObjIter1(), oEnd1 = ObjIter1(); + VolIter2 vBegin2 = VolIter2(), vEnd2 = VolIter2(), vCur2 = VolIter2(); + ObjIter2 oBegin2 = ObjIter2(), oEnd2 = ObjIter2(), oCur2 = ObjIter2(); + std::priority_queue, std::greater > todo; //smallest is at the top + + Scalar minimum = (std::numeric_limits::max)(); + todo.push(std::make_pair(Scalar(), std::make_pair(tree1.getRootIndex(), tree2.getRootIndex()))); + + while(!todo.empty()) { + tree1.getChildren(todo.top().second.first, vBegin1, vEnd1, oBegin1, oEnd1); + tree2.getChildren(todo.top().second.second, vBegin2, vEnd2, oBegin2, oEnd2); + todo.pop(); + + for(; oBegin1 != oEnd1; ++oBegin1) { //go through child objects of first tree + for(oCur2 = oBegin2; oCur2 != oEnd2; ++oCur2) {//go through child objects of second tree + minimum = (std::min)(minimum, minimizer.minimumOnObjectObject(*oBegin1, *oCur2)); + } + + for(vCur2 = vBegin2; vCur2 != vEnd2; ++vCur2) { //go through child volumes of second tree + Helper2 helper(*oBegin1, minimizer); + minimum = (std::min)(minimum, internal::minimize_helper(tree2, helper, *vCur2, minimum)); + } + } + + for(; vBegin1 != vEnd1; ++vBegin1) { //go through child volumes of first tree + const typename BVH1::Volume &vol1 = tree1.getVolume(*vBegin1); + + for(oCur2 = oBegin2; oCur2 != oEnd2; ++oCur2) {//go through child objects of second tree + Helper1 helper(*oCur2, minimizer); + minimum = (std::min)(minimum, internal::minimize_helper(tree1, helper, *vBegin1, minimum)); + } + + for(vCur2 = vBegin2; vCur2 != vEnd2; ++vCur2) { //go through child volumes of second tree + Scalar val = minimizer.minimumOnVolumeVolume(vol1, tree2.getVolume(*vCur2)); + if(val < minimum) + todo.push(std::make_pair(val, std::make_pair(*vBegin1, *vCur2))); + } + } + } + return minimum; +} + +} // end namespace Eigen + +#endif // EIGEN_BVALGORITHMS_H diff --git a/external/unsupported/Eigen/src/BVH/KdBVH.h b/external/unsupported/Eigen/src/BVH/KdBVH.h new file mode 100644 index 0000000..2d5b76a --- /dev/null +++ b/external/unsupported/Eigen/src/BVH/KdBVH.h @@ -0,0 +1,223 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Ilya Baran +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef KDBVH_H_INCLUDED +#define KDBVH_H_INCLUDED + +namespace Eigen { + +namespace internal { + +//internal pair class for the BVH--used instead of std::pair because of alignment +template +struct vector_int_pair +{ +EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar, Dim) + typedef Matrix VectorType; + + vector_int_pair(const VectorType &v, int i) : first(v), second(i) {} + + VectorType first; + int second; +}; + +//these templates help the tree initializer get the bounding boxes either from a provided +//iterator range or using bounding_box in a unified way +template +struct get_boxes_helper { + void operator()(const ObjectList &objects, BoxIter boxBegin, BoxIter boxEnd, VolumeList &outBoxes) + { + outBoxes.insert(outBoxes.end(), boxBegin, boxEnd); + eigen_assert(outBoxes.size() == objects.size()); + EIGEN_ONLY_USED_FOR_DEBUG(objects); + } +}; + +template +struct get_boxes_helper { + void operator()(const ObjectList &objects, int, int, VolumeList &outBoxes) + { + outBoxes.reserve(objects.size()); + for(int i = 0; i < (int)objects.size(); ++i) + outBoxes.push_back(bounding_box(objects[i])); + } +}; + +} // end namespace internal + + +/** \class KdBVH + * \brief A simple bounding volume hierarchy based on AlignedBox + * + * \param _Scalar The underlying scalar type of the bounding boxes + * \param _Dim The dimension of the space in which the hierarchy lives + * \param _Object The object type that lives in the hierarchy. It must have value semantics. Either bounding_box(_Object) must + * be defined and return an AlignedBox<_Scalar, _Dim> or bounding boxes must be provided to the tree initializer. + * + * This class provides a simple (as opposed to optimized) implementation of a bounding volume hierarchy analogous to a Kd-tree. + * Given a sequence of objects, it computes their bounding boxes, constructs a Kd-tree of their centers + * and builds a BVH with the structure of that Kd-tree. When the elements of the tree are too expensive to be copied around, + * it is useful for _Object to be a pointer. + */ +template class KdBVH +{ +public: + enum { Dim = _Dim }; + typedef _Object Object; + typedef std::vector > ObjectList; + typedef _Scalar Scalar; + typedef AlignedBox Volume; + typedef std::vector > VolumeList; + typedef int Index; + typedef const int *VolumeIterator; //the iterators are just pointers into the tree's vectors + typedef const Object *ObjectIterator; + + KdBVH() {} + + /** Given an iterator range over \a Object references, constructs the BVH. Requires that bounding_box(Object) return a Volume. */ + template KdBVH(Iter begin, Iter end) { init(begin, end, 0, 0); } //int is recognized by init as not being an iterator type + + /** Given an iterator range over \a Object references and an iterator range over their bounding boxes, constructs the BVH */ + template KdBVH(OIter begin, OIter end, BIter boxBegin, BIter boxEnd) { init(begin, end, boxBegin, boxEnd); } + + /** Given an iterator range over \a Object references, constructs the BVH, overwriting whatever is in there currently. + * Requires that bounding_box(Object) return a Volume. */ + template void init(Iter begin, Iter end) { init(begin, end, 0, 0); } + + /** Given an iterator range over \a Object references and an iterator range over their bounding boxes, + * constructs the BVH, overwriting whatever is in there currently. */ + template void init(OIter begin, OIter end, BIter boxBegin, BIter boxEnd) + { + objects.clear(); + boxes.clear(); + children.clear(); + + objects.insert(objects.end(), begin, end); + int n = static_cast(objects.size()); + + if(n < 2) + return; //if we have at most one object, we don't need any internal nodes + + VolumeList objBoxes; + VIPairList objCenters; + + //compute the bounding boxes depending on BIter type + internal::get_boxes_helper()(objects, boxBegin, boxEnd, objBoxes); + + objCenters.reserve(n); + boxes.reserve(n - 1); + children.reserve(2 * n - 2); + + for(int i = 0; i < n; ++i) + objCenters.push_back(VIPair(objBoxes[i].center(), i)); + + build(objCenters, 0, n, objBoxes, 0); //the recursive part of the algorithm + + ObjectList tmp(n); + tmp.swap(objects); + for(int i = 0; i < n; ++i) + objects[i] = tmp[objCenters[i].second]; + } + + /** \returns the index of the root of the hierarchy */ + inline Index getRootIndex() const { return (int)boxes.size() - 1; } + + /** Given an \a index of a node, on exit, \a outVBegin and \a outVEnd range over the indices of the volume children of the node + * and \a outOBegin and \a outOEnd range over the object children of the node */ + EIGEN_STRONG_INLINE void getChildren(Index index, VolumeIterator &outVBegin, VolumeIterator &outVEnd, + ObjectIterator &outOBegin, ObjectIterator &outOEnd) const + { //inlining this function should open lots of optimization opportunities to the compiler + if(index < 0) { + outVBegin = outVEnd; + if(!objects.empty()) + outOBegin = &(objects[0]); + outOEnd = outOBegin + objects.size(); //output all objects--necessary when the tree has only one object + return; + } + + int numBoxes = static_cast(boxes.size()); + + int idx = index * 2; + if(children[idx + 1] < numBoxes) { //second index is always bigger + outVBegin = &(children[idx]); + outVEnd = outVBegin + 2; + outOBegin = outOEnd; + } + else if(children[idx] >= numBoxes) { //if both children are objects + outVBegin = outVEnd; + outOBegin = &(objects[children[idx] - numBoxes]); + outOEnd = outOBegin + 2; + } else { //if the first child is a volume and the second is an object + outVBegin = &(children[idx]); + outVEnd = outVBegin + 1; + outOBegin = &(objects[children[idx + 1] - numBoxes]); + outOEnd = outOBegin + 1; + } + } + + /** \returns the bounding box of the node at \a index */ + inline const Volume &getVolume(Index index) const + { + return boxes[index]; + } + +private: + typedef internal::vector_int_pair VIPair; + typedef std::vector > VIPairList; + typedef Matrix VectorType; + struct VectorComparator //compares vectors, or more specifically, VIPairs along a particular dimension + { + VectorComparator(int inDim) : dim(inDim) {} + inline bool operator()(const VIPair &v1, const VIPair &v2) const { return v1.first[dim] < v2.first[dim]; } + int dim; + }; + + //Build the part of the tree between objects[from] and objects[to] (not including objects[to]). + //This routine partitions the objCenters in [from, to) along the dimension dim, recursively constructs + //the two halves, and adds their parent node. TODO: a cache-friendlier layout + void build(VIPairList &objCenters, int from, int to, const VolumeList &objBoxes, int dim) + { + eigen_assert(to - from > 1); + if(to - from == 2) { + boxes.push_back(objBoxes[objCenters[from].second].merged(objBoxes[objCenters[from + 1].second])); + children.push_back(from + (int)objects.size() - 1); //there are objects.size() - 1 tree nodes + children.push_back(from + (int)objects.size()); + } + else if(to - from == 3) { + int mid = from + 2; + std::nth_element(objCenters.begin() + from, objCenters.begin() + mid, + objCenters.begin() + to, VectorComparator(dim)); //partition + build(objCenters, from, mid, objBoxes, (dim + 1) % Dim); + int idx1 = (int)boxes.size() - 1; + boxes.push_back(boxes[idx1].merged(objBoxes[objCenters[mid].second])); + children.push_back(idx1); + children.push_back(mid + (int)objects.size() - 1); + } + else { + int mid = from + (to - from) / 2; + nth_element(objCenters.begin() + from, objCenters.begin() + mid, + objCenters.begin() + to, VectorComparator(dim)); //partition + build(objCenters, from, mid, objBoxes, (dim + 1) % Dim); + int idx1 = (int)boxes.size() - 1; + build(objCenters, mid, to, objBoxes, (dim + 1) % Dim); + int idx2 = (int)boxes.size() - 1; + boxes.push_back(boxes[idx1].merged(boxes[idx2])); + children.push_back(idx1); + children.push_back(idx2); + } + } + + std::vector children; //children of x are children[2x] and children[2x+1], indices bigger than boxes.size() index into objects. + VolumeList boxes; + ObjectList objects; +}; + +} // end namespace Eigen + +#endif //KDBVH_H_INCLUDED diff --git a/external/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h b/external/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h new file mode 100644 index 0000000..0fbd847 --- /dev/null +++ b/external/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h @@ -0,0 +1,790 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 David Harmon +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ARPACKGENERALIZEDSELFADJOINTEIGENSOLVER_H +#define EIGEN_ARPACKGENERALIZEDSELFADJOINTEIGENSOLVER_H + +#include "../../../../Eigen/Dense" + +namespace Eigen { + +namespace internal { + template struct arpack_wrapper; + template struct OP; +} + + + +template, bool BisSPD=false> +class ArpackGeneralizedSelfAdjointEigenSolver +{ +public: + //typedef typename MatrixSolver::MatrixType MatrixType; + + /** \brief Scalar type for matrices of type \p MatrixType. */ + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::Index Index; + + /** \brief Real scalar type for \p MatrixType. + * + * This is just \c Scalar if #Scalar is real (e.g., \c float or + * \c Scalar), and the type of the real part of \c Scalar if #Scalar is + * complex. + */ + typedef typename NumTraits::Real RealScalar; + + /** \brief Type for vector of eigenvalues as returned by eigenvalues(). + * + * This is a column vector with entries of type #RealScalar. + * The length of the vector is the size of \p nbrEigenvalues. + */ + typedef typename internal::plain_col_type::type RealVectorType; + + /** \brief Default constructor. + * + * The default constructor is for cases in which the user intends to + * perform decompositions via compute(). + * + */ + ArpackGeneralizedSelfAdjointEigenSolver() + : m_eivec(), + m_eivalues(), + m_isInitialized(false), + m_eigenvectorsOk(false), + m_nbrConverged(0), + m_nbrIterations(0) + { } + + /** \brief Constructor; computes generalized eigenvalues of given matrix with respect to another matrix. + * + * \param[in] A Self-adjoint matrix whose eigenvalues / eigenvectors will + * computed. By default, the upper triangular part is used, but can be changed + * through the template parameter. + * \param[in] B Self-adjoint matrix for the generalized eigenvalue problem. + * \param[in] nbrEigenvalues The number of eigenvalues / eigenvectors to compute. + * Must be less than the size of the input matrix, or an error is returned. + * \param[in] eigs_sigma String containing either "LM", "SM", "LA", or "SA", with + * respective meanings to find the largest magnitude , smallest magnitude, + * largest algebraic, or smallest algebraic eigenvalues. Alternatively, this + * value can contain floating point value in string form, in which case the + * eigenvalues closest to this value will be found. + * \param[in] options Can be #ComputeEigenvectors (default) or #EigenvaluesOnly. + * \param[in] tol What tolerance to find the eigenvalues to. Default is 0, which + * means machine precision. + * + * This constructor calls compute(const MatrixType&, const MatrixType&, Index, string, int, RealScalar) + * to compute the eigenvalues of the matrix \p A with respect to \p B. The eigenvectors are computed if + * \p options equals #ComputeEigenvectors. + * + */ + ArpackGeneralizedSelfAdjointEigenSolver(const MatrixType& A, const MatrixType& B, + Index nbrEigenvalues, std::string eigs_sigma="LM", + int options=ComputeEigenvectors, RealScalar tol=0.0) + : m_eivec(), + m_eivalues(), + m_isInitialized(false), + m_eigenvectorsOk(false), + m_nbrConverged(0), + m_nbrIterations(0) + { + compute(A, B, nbrEigenvalues, eigs_sigma, options, tol); + } + + /** \brief Constructor; computes eigenvalues of given matrix. + * + * \param[in] A Self-adjoint matrix whose eigenvalues / eigenvectors will + * computed. By default, the upper triangular part is used, but can be changed + * through the template parameter. + * \param[in] nbrEigenvalues The number of eigenvalues / eigenvectors to compute. + * Must be less than the size of the input matrix, or an error is returned. + * \param[in] eigs_sigma String containing either "LM", "SM", "LA", or "SA", with + * respective meanings to find the largest magnitude , smallest magnitude, + * largest algebraic, or smallest algebraic eigenvalues. Alternatively, this + * value can contain floating point value in string form, in which case the + * eigenvalues closest to this value will be found. + * \param[in] options Can be #ComputeEigenvectors (default) or #EigenvaluesOnly. + * \param[in] tol What tolerance to find the eigenvalues to. Default is 0, which + * means machine precision. + * + * This constructor calls compute(const MatrixType&, Index, string, int, RealScalar) + * to compute the eigenvalues of the matrix \p A. The eigenvectors are computed if + * \p options equals #ComputeEigenvectors. + * + */ + + ArpackGeneralizedSelfAdjointEigenSolver(const MatrixType& A, + Index nbrEigenvalues, std::string eigs_sigma="LM", + int options=ComputeEigenvectors, RealScalar tol=0.0) + : m_eivec(), + m_eivalues(), + m_isInitialized(false), + m_eigenvectorsOk(false), + m_nbrConverged(0), + m_nbrIterations(0) + { + compute(A, nbrEigenvalues, eigs_sigma, options, tol); + } + + + /** \brief Computes generalized eigenvalues / eigenvectors of given matrix using the external ARPACK library. + * + * \param[in] A Selfadjoint matrix whose eigendecomposition is to be computed. + * \param[in] B Selfadjoint matrix for generalized eigenvalues. + * \param[in] nbrEigenvalues The number of eigenvalues / eigenvectors to compute. + * Must be less than the size of the input matrix, or an error is returned. + * \param[in] eigs_sigma String containing either "LM", "SM", "LA", or "SA", with + * respective meanings to find the largest magnitude , smallest magnitude, + * largest algebraic, or smallest algebraic eigenvalues. Alternatively, this + * value can contain floating point value in string form, in which case the + * eigenvalues closest to this value will be found. + * \param[in] options Can be #ComputeEigenvectors (default) or #EigenvaluesOnly. + * \param[in] tol What tolerance to find the eigenvalues to. Default is 0, which + * means machine precision. + * + * \returns Reference to \c *this + * + * This function computes the generalized eigenvalues of \p A with respect to \p B using ARPACK. The eigenvalues() + * function can be used to retrieve them. If \p options equals #ComputeEigenvectors, + * then the eigenvectors are also computed and can be retrieved by + * calling eigenvectors(). + * + */ + ArpackGeneralizedSelfAdjointEigenSolver& compute(const MatrixType& A, const MatrixType& B, + Index nbrEigenvalues, std::string eigs_sigma="LM", + int options=ComputeEigenvectors, RealScalar tol=0.0); + + /** \brief Computes eigenvalues / eigenvectors of given matrix using the external ARPACK library. + * + * \param[in] A Selfadjoint matrix whose eigendecomposition is to be computed. + * \param[in] nbrEigenvalues The number of eigenvalues / eigenvectors to compute. + * Must be less than the size of the input matrix, or an error is returned. + * \param[in] eigs_sigma String containing either "LM", "SM", "LA", or "SA", with + * respective meanings to find the largest magnitude , smallest magnitude, + * largest algebraic, or smallest algebraic eigenvalues. Alternatively, this + * value can contain floating point value in string form, in which case the + * eigenvalues closest to this value will be found. + * \param[in] options Can be #ComputeEigenvectors (default) or #EigenvaluesOnly. + * \param[in] tol What tolerance to find the eigenvalues to. Default is 0, which + * means machine precision. + * + * \returns Reference to \c *this + * + * This function computes the eigenvalues of \p A using ARPACK. The eigenvalues() + * function can be used to retrieve them. If \p options equals #ComputeEigenvectors, + * then the eigenvectors are also computed and can be retrieved by + * calling eigenvectors(). + * + */ + ArpackGeneralizedSelfAdjointEigenSolver& compute(const MatrixType& A, + Index nbrEigenvalues, std::string eigs_sigma="LM", + int options=ComputeEigenvectors, RealScalar tol=0.0); + + + /** \brief Returns the eigenvectors of given matrix. + * + * \returns A const reference to the matrix whose columns are the eigenvectors. + * + * \pre The eigenvectors have been computed before. + * + * Column \f$ k \f$ of the returned matrix is an eigenvector corresponding + * to eigenvalue number \f$ k \f$ as returned by eigenvalues(). The + * eigenvectors are normalized to have (Euclidean) norm equal to one. If + * this object was used to solve the eigenproblem for the selfadjoint + * matrix \f$ A \f$, then the matrix returned by this function is the + * matrix \f$ V \f$ in the eigendecomposition \f$ A V = D V \f$. + * For the generalized eigenproblem, the matrix returned is the solution \f$ A V = D B V \f$ + * + * Example: \include SelfAdjointEigenSolver_eigenvectors.cpp + * Output: \verbinclude SelfAdjointEigenSolver_eigenvectors.out + * + * \sa eigenvalues() + */ + const Matrix& eigenvectors() const + { + eigen_assert(m_isInitialized && "ArpackGeneralizedSelfAdjointEigenSolver is not initialized."); + eigen_assert(m_eigenvectorsOk && "The eigenvectors have not been computed together with the eigenvalues."); + return m_eivec; + } + + /** \brief Returns the eigenvalues of given matrix. + * + * \returns A const reference to the column vector containing the eigenvalues. + * + * \pre The eigenvalues have been computed before. + * + * The eigenvalues are repeated according to their algebraic multiplicity, + * so there are as many eigenvalues as rows in the matrix. The eigenvalues + * are sorted in increasing order. + * + * Example: \include SelfAdjointEigenSolver_eigenvalues.cpp + * Output: \verbinclude SelfAdjointEigenSolver_eigenvalues.out + * + * \sa eigenvectors(), MatrixBase::eigenvalues() + */ + const Matrix& eigenvalues() const + { + eigen_assert(m_isInitialized && "ArpackGeneralizedSelfAdjointEigenSolver is not initialized."); + return m_eivalues; + } + + /** \brief Computes the positive-definite square root of the matrix. + * + * \returns the positive-definite square root of the matrix + * + * \pre The eigenvalues and eigenvectors of a positive-definite matrix + * have been computed before. + * + * The square root of a positive-definite matrix \f$ A \f$ is the + * positive-definite matrix whose square equals \f$ A \f$. This function + * uses the eigendecomposition \f$ A = V D V^{-1} \f$ to compute the + * square root as \f$ A^{1/2} = V D^{1/2} V^{-1} \f$. + * + * Example: \include SelfAdjointEigenSolver_operatorSqrt.cpp + * Output: \verbinclude SelfAdjointEigenSolver_operatorSqrt.out + * + * \sa operatorInverseSqrt(), + * \ref MatrixFunctions_Module "MatrixFunctions Module" + */ + Matrix operatorSqrt() const + { + eigen_assert(m_isInitialized && "SelfAdjointEigenSolver is not initialized."); + eigen_assert(m_eigenvectorsOk && "The eigenvectors have not been computed together with the eigenvalues."); + return m_eivec * m_eivalues.cwiseSqrt().asDiagonal() * m_eivec.adjoint(); + } + + /** \brief Computes the inverse square root of the matrix. + * + * \returns the inverse positive-definite square root of the matrix + * + * \pre The eigenvalues and eigenvectors of a positive-definite matrix + * have been computed before. + * + * This function uses the eigendecomposition \f$ A = V D V^{-1} \f$ to + * compute the inverse square root as \f$ V D^{-1/2} V^{-1} \f$. This is + * cheaper than first computing the square root with operatorSqrt() and + * then its inverse with MatrixBase::inverse(). + * + * Example: \include SelfAdjointEigenSolver_operatorInverseSqrt.cpp + * Output: \verbinclude SelfAdjointEigenSolver_operatorInverseSqrt.out + * + * \sa operatorSqrt(), MatrixBase::inverse(), + * \ref MatrixFunctions_Module "MatrixFunctions Module" + */ + Matrix operatorInverseSqrt() const + { + eigen_assert(m_isInitialized && "SelfAdjointEigenSolver is not initialized."); + eigen_assert(m_eigenvectorsOk && "The eigenvectors have not been computed together with the eigenvalues."); + return m_eivec * m_eivalues.cwiseInverse().cwiseSqrt().asDiagonal() * m_eivec.adjoint(); + } + + /** \brief Reports whether previous computation was successful. + * + * \returns \c Success if computation was successful, \c NoConvergence otherwise. + */ + ComputationInfo info() const + { + eigen_assert(m_isInitialized && "ArpackGeneralizedSelfAdjointEigenSolver is not initialized."); + return m_info; + } + + size_t getNbrConvergedEigenValues() const + { return m_nbrConverged; } + + size_t getNbrIterations() const + { return m_nbrIterations; } + +protected: + Matrix m_eivec; + Matrix m_eivalues; + ComputationInfo m_info; + bool m_isInitialized; + bool m_eigenvectorsOk; + + size_t m_nbrConverged; + size_t m_nbrIterations; +}; + + + + + +template +ArpackGeneralizedSelfAdjointEigenSolver& + ArpackGeneralizedSelfAdjointEigenSolver +::compute(const MatrixType& A, Index nbrEigenvalues, + std::string eigs_sigma, int options, RealScalar tol) +{ + MatrixType B(0,0); + compute(A, B, nbrEigenvalues, eigs_sigma, options, tol); + + return *this; +} + + +template +ArpackGeneralizedSelfAdjointEigenSolver& + ArpackGeneralizedSelfAdjointEigenSolver +::compute(const MatrixType& A, const MatrixType& B, Index nbrEigenvalues, + std::string eigs_sigma, int options, RealScalar tol) +{ + eigen_assert(A.cols() == A.rows()); + eigen_assert(B.cols() == B.rows()); + eigen_assert(B.rows() == 0 || A.cols() == B.rows()); + eigen_assert((options &~ (EigVecMask | GenEigMask)) == 0 + && (options & EigVecMask) != EigVecMask + && "invalid option parameter"); + + bool isBempty = (B.rows() == 0) || (B.cols() == 0); + + // For clarity, all parameters match their ARPACK name + // + // Always 0 on the first call + // + int ido = 0; + + int n = (int)A.cols(); + + // User options: "LA", "SA", "SM", "LM", "BE" + // + char whch[3] = "LM"; + + // Specifies the shift if iparam[6] = { 3, 4, 5 }, not used if iparam[6] = { 1, 2 } + // + RealScalar sigma = 0.0; + + if (eigs_sigma.length() >= 2 && isalpha(eigs_sigma[0]) && isalpha(eigs_sigma[1])) + { + eigs_sigma[0] = toupper(eigs_sigma[0]); + eigs_sigma[1] = toupper(eigs_sigma[1]); + + // In the following special case we're going to invert the problem, since solving + // for larger magnitude is much much faster + // i.e., if 'SM' is specified, we're going to really use 'LM', the default + // + if (eigs_sigma.substr(0,2) != "SM") + { + whch[0] = eigs_sigma[0]; + whch[1] = eigs_sigma[1]; + } + } + else + { + eigen_assert(false && "Specifying clustered eigenvalues is not yet supported!"); + + // If it's not scalar values, then the user may be explicitly + // specifying the sigma value to cluster the evs around + // + sigma = atof(eigs_sigma.c_str()); + + // If atof fails, it returns 0.0, which is a fine default + // + } + + // "I" means normal eigenvalue problem, "G" means generalized + // + char bmat[2] = "I"; + if (eigs_sigma.substr(0,2) == "SM" || !(isalpha(eigs_sigma[0]) && isalpha(eigs_sigma[1])) || (!isBempty && !BisSPD)) + bmat[0] = 'G'; + + // Now we determine the mode to use + // + int mode = (bmat[0] == 'G') + 1; + if (eigs_sigma.substr(0,2) == "SM" || !(isalpha(eigs_sigma[0]) && isalpha(eigs_sigma[1]))) + { + // We're going to use shift-and-invert mode, and basically find + // the largest eigenvalues of the inverse operator + // + mode = 3; + } + + // The user-specified number of eigenvalues/vectors to compute + // + int nev = (int)nbrEigenvalues; + + // Allocate space for ARPACK to store the residual + // + Scalar *resid = new Scalar[n]; + + // Number of Lanczos vectors, must satisfy nev < ncv <= n + // Note that this indicates that nev != n, and we cannot compute + // all eigenvalues of a mtrix + // + int ncv = std::min(std::max(2*nev, 20), n); + + // The working n x ncv matrix, also store the final eigenvectors (if computed) + // + Scalar *v = new Scalar[n*ncv]; + int ldv = n; + + // Working space + // + Scalar *workd = new Scalar[3*n]; + int lworkl = ncv*ncv+8*ncv; // Must be at least this length + Scalar *workl = new Scalar[lworkl]; + + int *iparam= new int[11]; + iparam[0] = 1; // 1 means we let ARPACK perform the shifts, 0 means we'd have to do it + iparam[2] = std::max(300, (int)std::ceil(2*n/std::max(ncv,1))); + iparam[6] = mode; // The mode, 1 is standard ev problem, 2 for generalized ev, 3 for shift-and-invert + + // Used during reverse communicate to notify where arrays start + // + int *ipntr = new int[11]; + + // Error codes are returned in here, initial value of 0 indicates a random initial + // residual vector is used, any other values means resid contains the initial residual + // vector, possibly from a previous run + // + int info = 0; + + Scalar scale = 1.0; + //if (!isBempty) + //{ + //Scalar scale = B.norm() / std::sqrt(n); + //scale = std::pow(2, std::floor(std::log(scale+1))); + ////M /= scale; + //for (size_t i=0; i<(size_t)B.outerSize(); i++) + // for (typename MatrixType::InnerIterator it(B, i); it; ++it) + // it.valueRef() /= scale; + //} + + MatrixSolver OP; + if (mode == 1 || mode == 2) + { + if (!isBempty) + OP.compute(B); + } + else if (mode == 3) + { + if (sigma == 0.0) + { + OP.compute(A); + } + else + { + // Note: We will never enter here because sigma must be 0.0 + // + if (isBempty) + { + MatrixType AminusSigmaB(A); + for (Index i=0; i::saupd(&ido, bmat, &n, whch, &nev, &tol, resid, + &ncv, v, &ldv, iparam, ipntr, workd, workl, + &lworkl, &info); + + if (ido == -1 || ido == 1) + { + Scalar *in = workd + ipntr[0] - 1; + Scalar *out = workd + ipntr[1] - 1; + + if (ido == 1 && mode != 2) + { + Scalar *out2 = workd + ipntr[2] - 1; + if (isBempty || mode == 1) + Matrix::Map(out2, n) = Matrix::Map(in, n); + else + Matrix::Map(out2, n) = B * Matrix::Map(in, n); + + in = workd + ipntr[2] - 1; + } + + if (mode == 1) + { + if (isBempty) + { + // OP = A + // + Matrix::Map(out, n) = A * Matrix::Map(in, n); + } + else + { + // OP = L^{-1}AL^{-T} + // + internal::OP::applyOP(OP, A, n, in, out); + } + } + else if (mode == 2) + { + if (ido == 1) + Matrix::Map(in, n) = A * Matrix::Map(in, n); + + // OP = B^{-1} A + // + Matrix::Map(out, n) = OP.solve(Matrix::Map(in, n)); + } + else if (mode == 3) + { + // OP = (A-\sigmaB)B (\sigma could be 0, and B could be I) + // The B * in is already computed and stored at in if ido == 1 + // + if (ido == 1 || isBempty) + Matrix::Map(out, n) = OP.solve(Matrix::Map(in, n)); + else + Matrix::Map(out, n) = OP.solve(B * Matrix::Map(in, n)); + } + } + else if (ido == 2) + { + Scalar *in = workd + ipntr[0] - 1; + Scalar *out = workd + ipntr[1] - 1; + + if (isBempty || mode == 1) + Matrix::Map(out, n) = Matrix::Map(in, n); + else + Matrix::Map(out, n) = B * Matrix::Map(in, n); + } + } while (ido != 99); + + if (info == 1) + m_info = NoConvergence; + else if (info == 3) + m_info = NumericalIssue; + else if (info < 0) + m_info = InvalidInput; + else if (info != 0) + eigen_assert(false && "Unknown ARPACK return value!"); + else + { + // Do we compute eigenvectors or not? + // + int rvec = (options & ComputeEigenvectors) == ComputeEigenvectors; + + // "A" means "All", use "S" to choose specific eigenvalues (not yet supported in ARPACK)) + // + char howmny[2] = "A"; + + // if howmny == "S", specifies the eigenvalues to compute (not implemented in ARPACK) + // + int *select = new int[ncv]; + + // Final eigenvalues + // + m_eivalues.resize(nev, 1); + + internal::arpack_wrapper::seupd(&rvec, howmny, select, m_eivalues.data(), v, &ldv, + &sigma, bmat, &n, whch, &nev, &tol, resid, &ncv, + v, &ldv, iparam, ipntr, workd, workl, &lworkl, &info); + + if (info == -14) + m_info = NoConvergence; + else if (info != 0) + m_info = InvalidInput; + else + { + if (rvec) + { + m_eivec.resize(A.rows(), nev); + for (int i=0; i::project(OP, n, nev, m_eivec.data()); + + m_eigenvectorsOk = true; + } + + m_nbrIterations = iparam[2]; + m_nbrConverged = iparam[4]; + + m_info = Success; + } + + delete[] select; + } + + delete[] v; + delete[] iparam; + delete[] ipntr; + delete[] workd; + delete[] workl; + delete[] resid; + + m_isInitialized = true; + + return *this; +} + + +// Single precision +// +extern "C" void ssaupd_(int *ido, char *bmat, int *n, char *which, + int *nev, float *tol, float *resid, int *ncv, + float *v, int *ldv, int *iparam, int *ipntr, + float *workd, float *workl, int *lworkl, + int *info); + +extern "C" void sseupd_(int *rvec, char *All, int *select, float *d, + float *z, int *ldz, float *sigma, + char *bmat, int *n, char *which, int *nev, + float *tol, float *resid, int *ncv, float *v, + int *ldv, int *iparam, int *ipntr, float *workd, + float *workl, int *lworkl, int *ierr); + +// Double precision +// +extern "C" void dsaupd_(int *ido, char *bmat, int *n, char *which, + int *nev, double *tol, double *resid, int *ncv, + double *v, int *ldv, int *iparam, int *ipntr, + double *workd, double *workl, int *lworkl, + int *info); + +extern "C" void dseupd_(int *rvec, char *All, int *select, double *d, + double *z, int *ldz, double *sigma, + char *bmat, int *n, char *which, int *nev, + double *tol, double *resid, int *ncv, double *v, + int *ldv, int *iparam, int *ipntr, double *workd, + double *workl, int *lworkl, int *ierr); + + +namespace internal { + +template struct arpack_wrapper +{ + static inline void saupd(int *ido, char *bmat, int *n, char *which, + int *nev, RealScalar *tol, Scalar *resid, int *ncv, + Scalar *v, int *ldv, int *iparam, int *ipntr, + Scalar *workd, Scalar *workl, int *lworkl, int *info) + { + EIGEN_STATIC_ASSERT(!NumTraits::IsComplex, NUMERIC_TYPE_MUST_BE_REAL) + } + + static inline void seupd(int *rvec, char *All, int *select, Scalar *d, + Scalar *z, int *ldz, RealScalar *sigma, + char *bmat, int *n, char *which, int *nev, + RealScalar *tol, Scalar *resid, int *ncv, Scalar *v, + int *ldv, int *iparam, int *ipntr, Scalar *workd, + Scalar *workl, int *lworkl, int *ierr) + { + EIGEN_STATIC_ASSERT(!NumTraits::IsComplex, NUMERIC_TYPE_MUST_BE_REAL) + } +}; + +template <> struct arpack_wrapper +{ + static inline void saupd(int *ido, char *bmat, int *n, char *which, + int *nev, float *tol, float *resid, int *ncv, + float *v, int *ldv, int *iparam, int *ipntr, + float *workd, float *workl, int *lworkl, int *info) + { + ssaupd_(ido, bmat, n, which, nev, tol, resid, ncv, v, ldv, iparam, ipntr, workd, workl, lworkl, info); + } + + static inline void seupd(int *rvec, char *All, int *select, float *d, + float *z, int *ldz, float *sigma, + char *bmat, int *n, char *which, int *nev, + float *tol, float *resid, int *ncv, float *v, + int *ldv, int *iparam, int *ipntr, float *workd, + float *workl, int *lworkl, int *ierr) + { + sseupd_(rvec, All, select, d, z, ldz, sigma, bmat, n, which, nev, tol, resid, ncv, v, ldv, iparam, ipntr, + workd, workl, lworkl, ierr); + } +}; + +template <> struct arpack_wrapper +{ + static inline void saupd(int *ido, char *bmat, int *n, char *which, + int *nev, double *tol, double *resid, int *ncv, + double *v, int *ldv, int *iparam, int *ipntr, + double *workd, double *workl, int *lworkl, int *info) + { + dsaupd_(ido, bmat, n, which, nev, tol, resid, ncv, v, ldv, iparam, ipntr, workd, workl, lworkl, info); + } + + static inline void seupd(int *rvec, char *All, int *select, double *d, + double *z, int *ldz, double *sigma, + char *bmat, int *n, char *which, int *nev, + double *tol, double *resid, int *ncv, double *v, + int *ldv, int *iparam, int *ipntr, double *workd, + double *workl, int *lworkl, int *ierr) + { + dseupd_(rvec, All, select, d, v, ldv, sigma, bmat, n, which, nev, tol, resid, ncv, v, ldv, iparam, ipntr, + workd, workl, lworkl, ierr); + } +}; + + +template +struct OP +{ + static inline void applyOP(MatrixSolver &OP, const MatrixType &A, int n, Scalar *in, Scalar *out); + static inline void project(MatrixSolver &OP, int n, int k, Scalar *vecs); +}; + +template +struct OP +{ + static inline void applyOP(MatrixSolver &OP, const MatrixType &A, int n, Scalar *in, Scalar *out) +{ + // OP = L^{-1} A L^{-T} (B = LL^T) + // + // First solve L^T out = in + // + Matrix::Map(out, n) = OP.matrixU().solve(Matrix::Map(in, n)); + Matrix::Map(out, n) = OP.permutationPinv() * Matrix::Map(out, n); + + // Then compute out = A out + // + Matrix::Map(out, n) = A * Matrix::Map(out, n); + + // Then solve L out = out + // + Matrix::Map(out, n) = OP.permutationP() * Matrix::Map(out, n); + Matrix::Map(out, n) = OP.matrixL().solve(Matrix::Map(out, n)); +} + + static inline void project(MatrixSolver &OP, int n, int k, Scalar *vecs) +{ + // Solve L^T out = in + // + Matrix::Map(vecs, n, k) = OP.matrixU().solve(Matrix::Map(vecs, n, k)); + Matrix::Map(vecs, n, k) = OP.permutationPinv() * Matrix::Map(vecs, n, k); +} + +}; + +template +struct OP +{ + static inline void applyOP(MatrixSolver &OP, const MatrixType &A, int n, Scalar *in, Scalar *out) +{ + eigen_assert(false && "Should never be in here..."); +} + + static inline void project(MatrixSolver &OP, int n, int k, Scalar *vecs) +{ + eigen_assert(false && "Should never be in here..."); +} + +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_ARPACKSELFADJOINTEIGENSOLVER_H + diff --git a/external/unsupported/Eigen/src/EulerAngles/CMakeLists.txt b/external/unsupported/Eigen/src/EulerAngles/CMakeLists.txt new file mode 100644 index 0000000..22088eb --- /dev/null +++ b/external/unsupported/Eigen/src/EulerAngles/CMakeLists.txt @@ -0,0 +1,6 @@ +file(GLOB Eigen_EulerAngles_SRCS "*.h") + +install(FILES + ${Eigen_EulerAngles_SRCS} + DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/EulerAngles COMPONENT Devel + ) diff --git a/external/unsupported/Eigen/src/EulerAngles/EulerAngles.h b/external/unsupported/Eigen/src/EulerAngles/EulerAngles.h new file mode 100644 index 0000000..e43cdb7 --- /dev/null +++ b/external/unsupported/Eigen/src/EulerAngles/EulerAngles.h @@ -0,0 +1,355 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Tal Hadad +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_EULERANGLESCLASS_H// TODO: Fix previous "EIGEN_EULERANGLES_H" definition? +#define EIGEN_EULERANGLESCLASS_H + +namespace Eigen +{ + /** \class EulerAngles + * + * \ingroup EulerAngles_Module + * + * \brief Represents a rotation in a 3 dimensional space as three Euler angles. + * + * Euler rotation is a set of three rotation of three angles over three fixed axes, defined by the EulerSystem given as a template parameter. + * + * Here is how intrinsic Euler angles works: + * - first, rotate the axes system over the alpha axis in angle alpha + * - then, rotate the axes system over the beta axis(which was rotated in the first stage) in angle beta + * - then, rotate the axes system over the gamma axis(which was rotated in the two stages above) in angle gamma + * + * \note This class support only intrinsic Euler angles for simplicity, + * see EulerSystem how to easily overcome this for extrinsic systems. + * + * ### Rotation representation and conversions ### + * + * It has been proved(see Wikipedia link below) that every rotation can be represented + * by Euler angles, but there is no single representation (e.g. unlike rotation matrices). + * Therefore, you can convert from Eigen rotation and to them + * (including rotation matrices, which is not called "rotations" by Eigen design). + * + * Euler angles usually used for: + * - convenient human representation of rotation, especially in interactive GUI. + * - gimbal systems and robotics + * - efficient encoding(i.e. 3 floats only) of rotation for network protocols. + * + * However, Euler angles are slow comparing to quaternion or matrices, + * because their unnatural math definition, although it's simple for human. + * To overcome this, this class provide easy movement from the math friendly representation + * to the human friendly representation, and vise-versa. + * + * All the user need to do is a safe simple C++ type conversion, + * and this class take care for the math. + * Additionally, some axes related computation is done in compile time. + * + * #### Euler angles ranges in conversions #### + * Rotations representation as EulerAngles are not single (unlike matrices), + * and even have infinite EulerAngles representations.
+ * For example, add or subtract 2*PI from either angle of EulerAngles + * and you'll get the same rotation. + * This is the general reason for infinite representation, + * but it's not the only general reason for not having a single representation. + * + * When converting rotation to EulerAngles, this class convert it to specific ranges + * When converting some rotation to EulerAngles, the rules for ranges are as follow: + * - If the rotation we converting from is an EulerAngles + * (even when it represented as RotationBase explicitly), angles ranges are __undefined__. + * - otherwise, alpha and gamma angles will be in the range [-PI, PI].
+ * As for Beta angle: + * - If the system is Tait-Bryan, the beta angle will be in the range [-PI/2, PI/2]. + * - otherwise: + * - If the beta axis is positive, the beta angle will be in the range [0, PI] + * - If the beta axis is negative, the beta angle will be in the range [-PI, 0] + * + * \sa EulerAngles(const MatrixBase&) + * \sa EulerAngles(const RotationBase&) + * + * ### Convenient user typedefs ### + * + * Convenient typedefs for EulerAngles exist for float and double scalar, + * in a form of EulerAngles{A}{B}{C}{scalar}, + * e.g. \ref EulerAnglesXYZd, \ref EulerAnglesZYZf. + * + * Only for positive axes{+x,+y,+z} Euler systems are have convenient typedef. + * If you need negative axes{-x,-y,-z}, it is recommended to create you own typedef with + * a word that represent what you need. + * + * ### Example ### + * + * \include EulerAngles.cpp + * Output: \verbinclude EulerAngles.out + * + * ### Additional reading ### + * + * If you're want to get more idea about how Euler system work in Eigen see EulerSystem. + * + * More information about Euler angles: https://en.wikipedia.org/wiki/Euler_angles + * + * \tparam _Scalar the scalar type, i.e. the type of the angles. + * + * \tparam _System the EulerSystem to use, which represents the axes of rotation. + */ + template + class EulerAngles : public RotationBase, 3> + { + public: + typedef RotationBase, 3> Base; + + /** the scalar type of the angles */ + typedef _Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + + /** the EulerSystem to use, which represents the axes of rotation. */ + typedef _System System; + + typedef Matrix Matrix3; /*!< the equivalent rotation matrix type */ + typedef Matrix Vector3; /*!< the equivalent 3 dimension vector type */ + typedef Quaternion QuaternionType; /*!< the equivalent quaternion type */ + typedef AngleAxis AngleAxisType; /*!< the equivalent angle-axis type */ + + /** \returns the axis vector of the first (alpha) rotation */ + static Vector3 AlphaAxisVector() { + const Vector3& u = Vector3::Unit(System::AlphaAxisAbs - 1); + return System::IsAlphaOpposite ? -u : u; + } + + /** \returns the axis vector of the second (beta) rotation */ + static Vector3 BetaAxisVector() { + const Vector3& u = Vector3::Unit(System::BetaAxisAbs - 1); + return System::IsBetaOpposite ? -u : u; + } + + /** \returns the axis vector of the third (gamma) rotation */ + static Vector3 GammaAxisVector() { + const Vector3& u = Vector3::Unit(System::GammaAxisAbs - 1); + return System::IsGammaOpposite ? -u : u; + } + + private: + Vector3 m_angles; + + public: + /** Default constructor without initialization. */ + EulerAngles() {} + /** Constructs and initialize an EulerAngles (\p alpha, \p beta, \p gamma). */ + EulerAngles(const Scalar& alpha, const Scalar& beta, const Scalar& gamma) : + m_angles(alpha, beta, gamma) {} + + // TODO: Test this constructor + /** Constructs and initialize an EulerAngles from the array data {alpha, beta, gamma} */ + explicit EulerAngles(const Scalar* data) : m_angles(data) {} + + /** Constructs and initializes an EulerAngles from either: + * - a 3x3 rotation matrix expression(i.e. pure orthogonal matrix with determinant of +1), + * - a 3D vector expression representing Euler angles. + * + * \note If \p other is a 3x3 rotation matrix, the angles range rules will be as follow:
+ * Alpha and gamma angles will be in the range [-PI, PI].
+ * As for Beta angle: + * - If the system is Tait-Bryan, the beta angle will be in the range [-PI/2, PI/2]. + * - otherwise: + * - If the beta axis is positive, the beta angle will be in the range [0, PI] + * - If the beta axis is negative, the beta angle will be in the range [-PI, 0] + */ + template + explicit EulerAngles(const MatrixBase& other) { *this = other; } + + /** Constructs and initialize Euler angles from a rotation \p rot. + * + * \note If \p rot is an EulerAngles (even when it represented as RotationBase explicitly), + * angles ranges are __undefined__. + * Otherwise, alpha and gamma angles will be in the range [-PI, PI].
+ * As for Beta angle: + * - If the system is Tait-Bryan, the beta angle will be in the range [-PI/2, PI/2]. + * - otherwise: + * - If the beta axis is positive, the beta angle will be in the range [0, PI] + * - If the beta axis is negative, the beta angle will be in the range [-PI, 0] + */ + template + EulerAngles(const RotationBase& rot) { System::CalcEulerAngles(*this, rot.toRotationMatrix()); } + + /*EulerAngles(const QuaternionType& q) + { + // TODO: Implement it in a faster way for quaternions + // According to http://www.euclideanspace.com/maths/geometry/rotations/conversions/quaternionToEuler/ + // we can compute only the needed matrix cells and then convert to euler angles. (see ZYX example below) + // Currently we compute all matrix cells from quaternion. + + // Special case only for ZYX + //Scalar y2 = q.y() * q.y(); + //m_angles[0] = std::atan2(2*(q.w()*q.z() + q.x()*q.y()), (1 - 2*(y2 + q.z()*q.z()))); + //m_angles[1] = std::asin( 2*(q.w()*q.y() - q.z()*q.x())); + //m_angles[2] = std::atan2(2*(q.w()*q.x() + q.y()*q.z()), (1 - 2*(q.x()*q.x() + y2))); + }*/ + + /** \returns The angle values stored in a vector (alpha, beta, gamma). */ + const Vector3& angles() const { return m_angles; } + /** \returns A read-write reference to the angle values stored in a vector (alpha, beta, gamma). */ + Vector3& angles() { return m_angles; } + + /** \returns The value of the first angle. */ + Scalar alpha() const { return m_angles[0]; } + /** \returns A read-write reference to the angle of the first angle. */ + Scalar& alpha() { return m_angles[0]; } + + /** \returns The value of the second angle. */ + Scalar beta() const { return m_angles[1]; } + /** \returns A read-write reference to the angle of the second angle. */ + Scalar& beta() { return m_angles[1]; } + + /** \returns The value of the third angle. */ + Scalar gamma() const { return m_angles[2]; } + /** \returns A read-write reference to the angle of the third angle. */ + Scalar& gamma() { return m_angles[2]; } + + /** \returns The Euler angles rotation inverse (which is as same as the negative), + * (-alpha, -beta, -gamma). + */ + EulerAngles inverse() const + { + EulerAngles res; + res.m_angles = -m_angles; + return res; + } + + /** \returns The Euler angles rotation negative (which is as same as the inverse), + * (-alpha, -beta, -gamma). + */ + EulerAngles operator -() const + { + return inverse(); + } + + /** Set \c *this from either: + * - a 3x3 rotation matrix expression(i.e. pure orthogonal matrix with determinant of +1), + * - a 3D vector expression representing Euler angles. + * + * See EulerAngles(const MatrixBase&) for more information about + * angles ranges output. + */ + template + EulerAngles& operator=(const MatrixBase& other) + { + EIGEN_STATIC_ASSERT((internal::is_same::value), + YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) + + internal::eulerangles_assign_impl::run(*this, other.derived()); + return *this; + } + + // TODO: Assign and construct from another EulerAngles (with different system) + + /** Set \c *this from a rotation. + * + * See EulerAngles(const RotationBase&) for more information about + * angles ranges output. + */ + template + EulerAngles& operator=(const RotationBase& rot) { + System::CalcEulerAngles(*this, rot.toRotationMatrix()); + return *this; + } + + /** \returns \c true if \c *this is approximately equal to \a other, within the precision + * determined by \a prec. + * + * \sa MatrixBase::isApprox() */ + bool isApprox(const EulerAngles& other, + const RealScalar& prec = NumTraits::dummy_precision()) const + { return angles().isApprox(other.angles(), prec); } + + /** \returns an equivalent 3x3 rotation matrix. */ + Matrix3 toRotationMatrix() const + { + // TODO: Calc it faster + return static_cast(*this).toRotationMatrix(); + } + + /** Convert the Euler angles to quaternion. */ + operator QuaternionType() const + { + return + AngleAxisType(alpha(), AlphaAxisVector()) * + AngleAxisType(beta(), BetaAxisVector()) * + AngleAxisType(gamma(), GammaAxisVector()); + } + + friend std::ostream& operator<<(std::ostream& s, const EulerAngles& eulerAngles) + { + s << eulerAngles.angles().transpose(); + return s; + } + + /** \returns \c *this with scalar type casted to \a NewScalarType */ + template + EulerAngles cast() const + { + EulerAngles e; + e.angles() = angles().template cast(); + return e; + } + }; + +#define EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(AXES, SCALAR_TYPE, SCALAR_POSTFIX) \ + /** \ingroup EulerAngles_Module */ \ + typedef EulerAngles EulerAngles##AXES##SCALAR_POSTFIX; + +#define EIGEN_EULER_ANGLES_TYPEDEFS(SCALAR_TYPE, SCALAR_POSTFIX) \ + EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(XYZ, SCALAR_TYPE, SCALAR_POSTFIX) \ + EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(XYX, SCALAR_TYPE, SCALAR_POSTFIX) \ + EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(XZY, SCALAR_TYPE, SCALAR_POSTFIX) \ + EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(XZX, SCALAR_TYPE, SCALAR_POSTFIX) \ + \ + EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(YZX, SCALAR_TYPE, SCALAR_POSTFIX) \ + EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(YZY, SCALAR_TYPE, SCALAR_POSTFIX) \ + EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(YXZ, SCALAR_TYPE, SCALAR_POSTFIX) \ + EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(YXY, SCALAR_TYPE, SCALAR_POSTFIX) \ + \ + EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(ZXY, SCALAR_TYPE, SCALAR_POSTFIX) \ + EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(ZXZ, SCALAR_TYPE, SCALAR_POSTFIX) \ + EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(ZYX, SCALAR_TYPE, SCALAR_POSTFIX) \ + EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(ZYZ, SCALAR_TYPE, SCALAR_POSTFIX) + +EIGEN_EULER_ANGLES_TYPEDEFS(float, f) +EIGEN_EULER_ANGLES_TYPEDEFS(double, d) + + namespace internal + { + template + struct traits > + { + typedef _Scalar Scalar; + }; + + // set from a rotation matrix + template + struct eulerangles_assign_impl + { + typedef typename Other::Scalar Scalar; + static void run(EulerAngles& e, const Other& m) + { + System::CalcEulerAngles(e, m); + } + }; + + // set from a vector of Euler angles + template + struct eulerangles_assign_impl + { + typedef typename Other::Scalar Scalar; + static void run(EulerAngles& e, const Other& vec) + { + e.angles() = vec; + } + }; + } +} + +#endif // EIGEN_EULERANGLESCLASS_H diff --git a/external/unsupported/Eigen/src/EulerAngles/EulerSystem.h b/external/unsupported/Eigen/src/EulerAngles/EulerSystem.h new file mode 100644 index 0000000..2a833b0 --- /dev/null +++ b/external/unsupported/Eigen/src/EulerAngles/EulerSystem.h @@ -0,0 +1,305 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Tal Hadad +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_EULERSYSTEM_H +#define EIGEN_EULERSYSTEM_H + +namespace Eigen +{ + // Forward declarations + template + class EulerAngles; + + namespace internal + { + // TODO: Add this trait to the Eigen internal API? + template 0)> + struct Abs + { + enum { value = Num }; + }; + + template + struct Abs + { + enum { value = -Num }; + }; + + template + struct IsValidAxis + { + enum { value = Axis != 0 && Abs::value <= 3 }; + }; + + template + struct eulerangles_assign_impl; + } + + #define EIGEN_EULER_ANGLES_CLASS_STATIC_ASSERT(COND,MSG) typedef char static_assertion_##MSG[(COND)?1:-1] + + /** \brief Representation of a fixed signed rotation axis for EulerSystem. + * + * \ingroup EulerAngles_Module + * + * Values here represent: + * - The axis of the rotation: X, Y or Z. + * - The sign (i.e. direction of the rotation along the axis): positive(+) or negative(-) + * + * Therefore, this could express all the axes {+X,+Y,+Z,-X,-Y,-Z} + * + * For positive axis, use +EULER_{axis}, and for negative axis use -EULER_{axis}. + */ + enum EulerAxis + { + EULER_X = 1, /*!< the X axis */ + EULER_Y = 2, /*!< the Y axis */ + EULER_Z = 3 /*!< the Z axis */ + }; + + /** \class EulerSystem + * + * \ingroup EulerAngles_Module + * + * \brief Represents a fixed Euler rotation system. + * + * This meta-class goal is to represent the Euler system in compilation time, for EulerAngles. + * + * You can use this class to get two things: + * - Build an Euler system, and then pass it as a template parameter to EulerAngles. + * - Query some compile time data about an Euler system. (e.g. Whether it's Tait-Bryan) + * + * Euler rotation is a set of three rotation on fixed axes. (see \ref EulerAngles) + * This meta-class store constantly those signed axes. (see \ref EulerAxis) + * + * ### Types of Euler systems ### + * + * All and only valid 3 dimension Euler rotation over standard + * signed axes{+X,+Y,+Z,-X,-Y,-Z} are supported: + * - all axes X, Y, Z in each valid order (see below what order is valid) + * - rotation over the axis is supported both over the positive and negative directions. + * - both Tait-Bryan and proper/classic Euler angles (i.e. the opposite). + * + * Since EulerSystem support both positive and negative directions, + * you may call this rotation distinction in other names: + * - _right handed_ or _left handed_ + * - _counterclockwise_ or _clockwise_ + * + * Notice all axed combination are valid, and would trigger a static assertion. + * Same unsigned axes can't be neighbors, e.g. {X,X,Y} is invalid. + * This yield two and only two classes: + * - _Tait-Bryan_ - all unsigned axes are distinct, e.g. {X,Y,Z} + * - _proper/classic Euler angles_ - The first and the third unsigned axes is equal, + * and the second is different, e.g. {X,Y,X} + * + * ### Intrinsic vs extrinsic Euler systems ### + * + * Only intrinsic Euler systems are supported for simplicity. + * If you want to use extrinsic Euler systems, + * just use the equal intrinsic opposite order for axes and angles. + * I.e axes (A,B,C) becomes (C,B,A), and angles (a,b,c) becomes (c,b,a). + * + * ### Convenient user typedefs ### + * + * Convenient typedefs for EulerSystem exist (only for positive axes Euler systems), + * in a form of EulerSystem{A}{B}{C}, e.g. \ref EulerSystemXYZ. + * + * ### Additional reading ### + * + * More information about Euler angles: https://en.wikipedia.org/wiki/Euler_angles + * + * \tparam _AlphaAxis the first fixed EulerAxis + * + * \tparam _BetaAxis the second fixed EulerAxis + * + * \tparam _GammaAxis the third fixed EulerAxis + */ + template + class EulerSystem + { + public: + // It's defined this way and not as enum, because I think + // that enum is not guerantee to support negative numbers + + /** The first rotation axis */ + static const int AlphaAxis = _AlphaAxis; + + /** The second rotation axis */ + static const int BetaAxis = _BetaAxis; + + /** The third rotation axis */ + static const int GammaAxis = _GammaAxis; + + enum + { + AlphaAxisAbs = internal::Abs::value, /*!< the first rotation axis unsigned */ + BetaAxisAbs = internal::Abs::value, /*!< the second rotation axis unsigned */ + GammaAxisAbs = internal::Abs::value, /*!< the third rotation axis unsigned */ + + IsAlphaOpposite = (AlphaAxis < 0) ? 1 : 0, /*!< whether alpha axis is negative */ + IsBetaOpposite = (BetaAxis < 0) ? 1 : 0, /*!< whether beta axis is negative */ + IsGammaOpposite = (GammaAxis < 0) ? 1 : 0, /*!< whether gamma axis is negative */ + + // Parity is even if alpha axis X is followed by beta axis Y, or Y is followed + // by Z, or Z is followed by X; otherwise it is odd. + IsOdd = ((AlphaAxisAbs)%3 == (BetaAxisAbs - 1)%3) ? 0 : 1, /*!< whether the Euler system is odd */ + IsEven = IsOdd ? 0 : 1, /*!< whether the Euler system is even */ + + IsTaitBryan = ((unsigned)AlphaAxisAbs != (unsigned)GammaAxisAbs) ? 1 : 0 /*!< whether the Euler system is Tait-Bryan */ + }; + + private: + + EIGEN_EULER_ANGLES_CLASS_STATIC_ASSERT(internal::IsValidAxis::value, + ALPHA_AXIS_IS_INVALID); + + EIGEN_EULER_ANGLES_CLASS_STATIC_ASSERT(internal::IsValidAxis::value, + BETA_AXIS_IS_INVALID); + + EIGEN_EULER_ANGLES_CLASS_STATIC_ASSERT(internal::IsValidAxis::value, + GAMMA_AXIS_IS_INVALID); + + EIGEN_EULER_ANGLES_CLASS_STATIC_ASSERT((unsigned)AlphaAxisAbs != (unsigned)BetaAxisAbs, + ALPHA_AXIS_CANT_BE_EQUAL_TO_BETA_AXIS); + + EIGEN_EULER_ANGLES_CLASS_STATIC_ASSERT((unsigned)BetaAxisAbs != (unsigned)GammaAxisAbs, + BETA_AXIS_CANT_BE_EQUAL_TO_GAMMA_AXIS); + + static const int + // I, J, K are the pivot indexes permutation for the rotation matrix, that match this Euler system. + // They are used in this class converters. + // They are always different from each other, and their possible values are: 0, 1, or 2. + I_ = AlphaAxisAbs - 1, + J_ = (AlphaAxisAbs - 1 + 1 + IsOdd)%3, + K_ = (AlphaAxisAbs - 1 + 2 - IsOdd)%3 + ; + + // TODO: Get @mat parameter in form that avoids double evaluation. + template + static void CalcEulerAngles_imp(Matrix::Scalar, 3, 1>& res, const MatrixBase& mat, internal::true_type /*isTaitBryan*/) + { + using std::atan2; + using std::sqrt; + + typedef typename Derived::Scalar Scalar; + + const Scalar plusMinus = IsEven? 1 : -1; + const Scalar minusPlus = IsOdd? 1 : -1; + + const Scalar Rsum = sqrt((mat(I_,I_) * mat(I_,I_) + mat(I_,J_) * mat(I_,J_) + mat(J_,K_) * mat(J_,K_) + mat(K_,K_) * mat(K_,K_))/2); + res[1] = atan2(plusMinus * mat(I_,K_), Rsum); + + // There is a singularity when cos(beta) == 0 + if(Rsum > 4 * NumTraits::epsilon()) {// cos(beta) != 0 + res[0] = atan2(minusPlus * mat(J_, K_), mat(K_, K_)); + res[2] = atan2(minusPlus * mat(I_, J_), mat(I_, I_)); + } + else if(plusMinus * mat(I_, K_) > 0) {// cos(beta) == 0 and sin(beta) == 1 + Scalar spos = mat(J_, I_) + plusMinus * mat(K_, J_); // 2*sin(alpha + plusMinus * gamma + Scalar cpos = mat(J_, J_) + minusPlus * mat(K_, I_); // 2*cos(alpha + plusMinus * gamma) + Scalar alphaPlusMinusGamma = atan2(spos, cpos); + res[0] = alphaPlusMinusGamma; + res[2] = 0; + } + else {// cos(beta) == 0 and sin(beta) == -1 + Scalar sneg = plusMinus * (mat(K_, J_) + minusPlus * mat(J_, I_)); // 2*sin(alpha + minusPlus*gamma) + Scalar cneg = mat(J_, J_) + plusMinus * mat(K_, I_); // 2*cos(alpha + minusPlus*gamma) + Scalar alphaMinusPlusBeta = atan2(sneg, cneg); + res[0] = alphaMinusPlusBeta; + res[2] = 0; + } + } + + template + static void CalcEulerAngles_imp(Matrix::Scalar,3,1>& res, + const MatrixBase& mat, internal::false_type /*isTaitBryan*/) + { + using std::atan2; + using std::sqrt; + + typedef typename Derived::Scalar Scalar; + + const Scalar plusMinus = IsEven? 1 : -1; + const Scalar minusPlus = IsOdd? 1 : -1; + + const Scalar Rsum = sqrt((mat(I_, J_) * mat(I_, J_) + mat(I_, K_) * mat(I_, K_) + mat(J_, I_) * mat(J_, I_) + mat(K_, I_) * mat(K_, I_)) / 2); + + res[1] = atan2(Rsum, mat(I_, I_)); + + // There is a singularity when sin(beta) == 0 + if(Rsum > 4 * NumTraits::epsilon()) {// sin(beta) != 0 + res[0] = atan2(mat(J_, I_), minusPlus * mat(K_, I_)); + res[2] = atan2(mat(I_, J_), plusMinus * mat(I_, K_)); + } + else if(mat(I_, I_) > 0) {// sin(beta) == 0 and cos(beta) == 1 + Scalar spos = plusMinus * mat(K_, J_) + minusPlus * mat(J_, K_); // 2*sin(alpha + gamma) + Scalar cpos = mat(J_, J_) + mat(K_, K_); // 2*cos(alpha + gamma) + res[0] = atan2(spos, cpos); + res[2] = 0; + } + else {// sin(beta) == 0 and cos(beta) == -1 + Scalar sneg = plusMinus * mat(K_, J_) + plusMinus * mat(J_, K_); // 2*sin(alpha - gamma) + Scalar cneg = mat(J_, J_) - mat(K_, K_); // 2*cos(alpha - gamma) + res[0] = atan2(sneg, cneg); + res[2] = 0; + } + } + + template + static void CalcEulerAngles( + EulerAngles& res, + const typename EulerAngles::Matrix3& mat) + { + CalcEulerAngles_imp( + res.angles(), mat, + typename internal::conditional::type()); + + if (IsAlphaOpposite) + res.alpha() = -res.alpha(); + + if (IsBetaOpposite) + res.beta() = -res.beta(); + + if (IsGammaOpposite) + res.gamma() = -res.gamma(); + } + + template + friend class Eigen::EulerAngles; + + template + friend struct internal::eulerangles_assign_impl; + }; + +#define EIGEN_EULER_SYSTEM_TYPEDEF(A, B, C) \ + /** \ingroup EulerAngles_Module */ \ + typedef EulerSystem EulerSystem##A##B##C; + + EIGEN_EULER_SYSTEM_TYPEDEF(X,Y,Z) + EIGEN_EULER_SYSTEM_TYPEDEF(X,Y,X) + EIGEN_EULER_SYSTEM_TYPEDEF(X,Z,Y) + EIGEN_EULER_SYSTEM_TYPEDEF(X,Z,X) + + EIGEN_EULER_SYSTEM_TYPEDEF(Y,Z,X) + EIGEN_EULER_SYSTEM_TYPEDEF(Y,Z,Y) + EIGEN_EULER_SYSTEM_TYPEDEF(Y,X,Z) + EIGEN_EULER_SYSTEM_TYPEDEF(Y,X,Y) + + EIGEN_EULER_SYSTEM_TYPEDEF(Z,X,Y) + EIGEN_EULER_SYSTEM_TYPEDEF(Z,X,Z) + EIGEN_EULER_SYSTEM_TYPEDEF(Z,Y,X) + EIGEN_EULER_SYSTEM_TYPEDEF(Z,Y,Z) +} + +#endif // EIGEN_EULERSYSTEM_H diff --git a/external/unsupported/Eigen/src/FFT/ei_fftw_impl.h b/external/unsupported/Eigen/src/FFT/ei_fftw_impl.h new file mode 100644 index 0000000..1c2cd24 --- /dev/null +++ b/external/unsupported/Eigen/src/FFT/ei_fftw_impl.h @@ -0,0 +1,261 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Mark Borgerding mark a borgerding net +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +namespace Eigen { + +namespace internal { + + // FFTW uses non-const arguments + // so we must use ugly const_cast calls for all the args it uses + // + // This should be safe as long as + // 1. we use FFTW_ESTIMATE for all our planning + // see the FFTW docs section 4.3.2 "Planner Flags" + // 2. fftw_complex is compatible with std::complex + // This assumes std::complex layout is array of size 2 with real,imag + template + inline + T * fftw_cast(const T* p) + { + return const_cast( p); + } + + inline + fftw_complex * fftw_cast( const std::complex * p) + { + return const_cast( reinterpret_cast(p) ); + } + + inline + fftwf_complex * fftw_cast( const std::complex * p) + { + return const_cast( reinterpret_cast(p) ); + } + + inline + fftwl_complex * fftw_cast( const std::complex * p) + { + return const_cast( reinterpret_cast(p) ); + } + + template + struct fftw_plan {}; + + template <> + struct fftw_plan + { + typedef float scalar_type; + typedef fftwf_complex complex_type; + fftwf_plan m_plan; + fftw_plan() :m_plan(NULL) {} + ~fftw_plan() {if (m_plan) fftwf_destroy_plan(m_plan);} + + inline + void fwd(complex_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftwf_plan_dft_1d(nfft,src,dst, FFTW_FORWARD, FFTW_ESTIMATE|FFTW_PRESERVE_INPUT); + fftwf_execute_dft( m_plan, src,dst); + } + inline + void inv(complex_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftwf_plan_dft_1d(nfft,src,dst, FFTW_BACKWARD , FFTW_ESTIMATE|FFTW_PRESERVE_INPUT); + fftwf_execute_dft( m_plan, src,dst); + } + inline + void fwd(complex_type * dst,scalar_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftwf_plan_dft_r2c_1d(nfft,src,dst,FFTW_ESTIMATE|FFTW_PRESERVE_INPUT); + fftwf_execute_dft_r2c( m_plan,src,dst); + } + inline + void inv(scalar_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) + m_plan = fftwf_plan_dft_c2r_1d(nfft,src,dst,FFTW_ESTIMATE|FFTW_PRESERVE_INPUT); + fftwf_execute_dft_c2r( m_plan, src,dst); + } + + inline + void fwd2( complex_type * dst,complex_type * src,int n0,int n1) { + if (m_plan==NULL) m_plan = fftwf_plan_dft_2d(n0,n1,src,dst,FFTW_FORWARD,FFTW_ESTIMATE|FFTW_PRESERVE_INPUT); + fftwf_execute_dft( m_plan, src,dst); + } + inline + void inv2( complex_type * dst,complex_type * src,int n0,int n1) { + if (m_plan==NULL) m_plan = fftwf_plan_dft_2d(n0,n1,src,dst,FFTW_BACKWARD,FFTW_ESTIMATE|FFTW_PRESERVE_INPUT); + fftwf_execute_dft( m_plan, src,dst); + } + + }; + template <> + struct fftw_plan + { + typedef double scalar_type; + typedef fftw_complex complex_type; + ::fftw_plan m_plan; + fftw_plan() :m_plan(NULL) {} + ~fftw_plan() {if (m_plan) fftw_destroy_plan(m_plan);} + + inline + void fwd(complex_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftw_plan_dft_1d(nfft,src,dst, FFTW_FORWARD, FFTW_ESTIMATE|FFTW_PRESERVE_INPUT); + fftw_execute_dft( m_plan, src,dst); + } + inline + void inv(complex_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftw_plan_dft_1d(nfft,src,dst, FFTW_BACKWARD , FFTW_ESTIMATE|FFTW_PRESERVE_INPUT); + fftw_execute_dft( m_plan, src,dst); + } + inline + void fwd(complex_type * dst,scalar_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftw_plan_dft_r2c_1d(nfft,src,dst,FFTW_ESTIMATE|FFTW_PRESERVE_INPUT); + fftw_execute_dft_r2c( m_plan,src,dst); + } + inline + void inv(scalar_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) + m_plan = fftw_plan_dft_c2r_1d(nfft,src,dst,FFTW_ESTIMATE|FFTW_PRESERVE_INPUT); + fftw_execute_dft_c2r( m_plan, src,dst); + } + inline + void fwd2( complex_type * dst,complex_type * src,int n0,int n1) { + if (m_plan==NULL) m_plan = fftw_plan_dft_2d(n0,n1,src,dst,FFTW_FORWARD,FFTW_ESTIMATE|FFTW_PRESERVE_INPUT); + fftw_execute_dft( m_plan, src,dst); + } + inline + void inv2( complex_type * dst,complex_type * src,int n0,int n1) { + if (m_plan==NULL) m_plan = fftw_plan_dft_2d(n0,n1,src,dst,FFTW_BACKWARD,FFTW_ESTIMATE|FFTW_PRESERVE_INPUT); + fftw_execute_dft( m_plan, src,dst); + } + }; + template <> + struct fftw_plan + { + typedef long double scalar_type; + typedef fftwl_complex complex_type; + fftwl_plan m_plan; + fftw_plan() :m_plan(NULL) {} + ~fftw_plan() {if (m_plan) fftwl_destroy_plan(m_plan);} + + inline + void fwd(complex_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftwl_plan_dft_1d(nfft,src,dst, FFTW_FORWARD, FFTW_ESTIMATE|FFTW_PRESERVE_INPUT); + fftwl_execute_dft( m_plan, src,dst); + } + inline + void inv(complex_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftwl_plan_dft_1d(nfft,src,dst, FFTW_BACKWARD , FFTW_ESTIMATE|FFTW_PRESERVE_INPUT); + fftwl_execute_dft( m_plan, src,dst); + } + inline + void fwd(complex_type * dst,scalar_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftwl_plan_dft_r2c_1d(nfft,src,dst,FFTW_ESTIMATE|FFTW_PRESERVE_INPUT); + fftwl_execute_dft_r2c( m_plan,src,dst); + } + inline + void inv(scalar_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) + m_plan = fftwl_plan_dft_c2r_1d(nfft,src,dst,FFTW_ESTIMATE|FFTW_PRESERVE_INPUT); + fftwl_execute_dft_c2r( m_plan, src,dst); + } + inline + void fwd2( complex_type * dst,complex_type * src,int n0,int n1) { + if (m_plan==NULL) m_plan = fftwl_plan_dft_2d(n0,n1,src,dst,FFTW_FORWARD,FFTW_ESTIMATE|FFTW_PRESERVE_INPUT); + fftwl_execute_dft( m_plan, src,dst); + } + inline + void inv2( complex_type * dst,complex_type * src,int n0,int n1) { + if (m_plan==NULL) m_plan = fftwl_plan_dft_2d(n0,n1,src,dst,FFTW_BACKWARD,FFTW_ESTIMATE|FFTW_PRESERVE_INPUT); + fftwl_execute_dft( m_plan, src,dst); + } + }; + + template + struct fftw_impl + { + typedef _Scalar Scalar; + typedef std::complex Complex; + + inline + void clear() + { + m_plans.clear(); + } + + // complex-to-complex forward FFT + inline + void fwd( Complex * dst,const Complex *src,int nfft) + { + get_plan(nfft,false,dst,src).fwd(fftw_cast(dst), fftw_cast(src),nfft ); + } + + // real-to-complex forward FFT + inline + void fwd( Complex * dst,const Scalar * src,int nfft) + { + get_plan(nfft,false,dst,src).fwd(fftw_cast(dst), fftw_cast(src) ,nfft); + } + + // 2-d complex-to-complex + inline + void fwd2(Complex * dst, const Complex * src, int n0,int n1) + { + get_plan(n0,n1,false,dst,src).fwd2(fftw_cast(dst), fftw_cast(src) ,n0,n1); + } + + // inverse complex-to-complex + inline + void inv(Complex * dst,const Complex *src,int nfft) + { + get_plan(nfft,true,dst,src).inv(fftw_cast(dst), fftw_cast(src),nfft ); + } + + // half-complex to scalar + inline + void inv( Scalar * dst,const Complex * src,int nfft) + { + get_plan(nfft,true,dst,src).inv(fftw_cast(dst), fftw_cast(src),nfft ); + } + + // 2-d complex-to-complex + inline + void inv2(Complex * dst, const Complex * src, int n0,int n1) + { + get_plan(n0,n1,true,dst,src).inv2(fftw_cast(dst), fftw_cast(src) ,n0,n1); + } + + + protected: + typedef fftw_plan PlanData; + + typedef Eigen::numext::int64_t int64_t; + + typedef std::map PlanMap; + + PlanMap m_plans; + + inline + PlanData & get_plan(int nfft,bool inverse,void * dst,const void * src) + { + bool inplace = (dst==src); + bool aligned = ( (reinterpret_cast(src)&15) | (reinterpret_cast(dst)&15) ) == 0; + int64_t key = ( (nfft<<3 ) | (inverse<<2) | (inplace<<1) | aligned ) << 1; + return m_plans[key]; + } + + inline + PlanData & get_plan(int n0,int n1,bool inverse,void * dst,const void * src) + { + bool inplace = (dst==src); + bool aligned = ( (reinterpret_cast(src)&15) | (reinterpret_cast(dst)&15) ) == 0; + int64_t key = ( ( (((int64_t)n0) << 30)|(n1<<3 ) | (inverse<<2) | (inplace<<1) | aligned ) << 1 ) + 1; + return m_plans[key]; + } + }; + +} // end namespace internal + +} // end namespace Eigen diff --git a/external/unsupported/Eigen/src/FFT/ei_kissfft_impl.h b/external/unsupported/Eigen/src/FFT/ei_kissfft_impl.h new file mode 100644 index 0000000..430953a --- /dev/null +++ b/external/unsupported/Eigen/src/FFT/ei_kissfft_impl.h @@ -0,0 +1,449 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Mark Borgerding mark a borgerding net +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +namespace Eigen { + +namespace internal { + + // This FFT implementation was derived from kissfft http:sourceforge.net/projects/kissfft + // Copyright 2003-2009 Mark Borgerding + +template +struct kiss_cpx_fft +{ + typedef _Scalar Scalar; + typedef std::complex Complex; + std::vector m_twiddles; + std::vector m_stageRadix; + std::vector m_stageRemainder; + std::vector m_scratchBuf; + bool m_inverse; + + inline void make_twiddles(int nfft, bool inverse) + { + using numext::sin; + using numext::cos; + m_inverse = inverse; + m_twiddles.resize(nfft); + double phinc = 0.25 * double(EIGEN_PI) / nfft; + Scalar flip = inverse ? Scalar(1) : Scalar(-1); + m_twiddles[0] = Complex(Scalar(1), Scalar(0)); + if ((nfft&1)==0) + m_twiddles[nfft/2] = Complex(Scalar(-1), Scalar(0)); + int i=1; + for (;i*8n) + p=n;// impossible to have a factor > sqrt(n) + } + n /= p; + m_stageRadix.push_back(p); + m_stageRemainder.push_back(n); + if ( p > 5 ) + m_scratchBuf.resize(p); // scratchbuf will be needed in bfly_generic + }while(n>1); + } + + template + inline + void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride) + { + int p = m_stageRadix[stage]; + int m = m_stageRemainder[stage]; + Complex * Fout_beg = xout; + Complex * Fout_end = xout + p*m; + + if (m>1) { + do{ + // recursive call: + // DFT of size m*p performed by doing + // p instances of smaller DFTs of size m, + // each one takes a decimated version of the input + work(stage+1, xout , xin, fstride*p,in_stride); + xin += fstride*in_stride; + }while( (xout += m) != Fout_end ); + }else{ + do{ + *xout = *xin; + xin += fstride*in_stride; + }while(++xout != Fout_end ); + } + xout=Fout_beg; + + // recombine the p smaller DFTs + switch (p) { + case 2: bfly2(xout,fstride,m); break; + case 3: bfly3(xout,fstride,m); break; + case 4: bfly4(xout,fstride,m); break; + case 5: bfly5(xout,fstride,m); break; + default: bfly_generic(xout,fstride,m,p); break; + } + } + + inline + void bfly2( Complex * Fout, const size_t fstride, int m) + { + for (int k=0;kreal() - Scalar(.5)*scratch[3].real() , Fout->imag() - Scalar(.5)*scratch[3].imag() ); + scratch[0] *= epi3.imag(); + *Fout += scratch[3]; + Fout[m2] = Complex( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); + Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() ); + ++Fout; + }while(--k); + } + + inline + void bfly5( Complex * Fout, const size_t fstride, const size_t m) + { + Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; + size_t u; + Complex scratch[13]; + Complex * twiddles = &m_twiddles[0]; + Complex *tw; + Complex ya,yb; + ya = twiddles[fstride*m]; + yb = twiddles[fstride*2*m]; + + Fout0=Fout; + Fout1=Fout0+m; + Fout2=Fout0+2*m; + Fout3=Fout0+3*m; + Fout4=Fout0+4*m; + + tw=twiddles; + for ( u=0; u(m_twiddles.size()); + Complex * scratchbuf = &m_scratchBuf[0]; + + for ( u=0; u(fstride) * k; + if (twidx>=Norig) twidx-=Norig; + t=scratchbuf[q] * twiddles[twidx]; + Fout[ k ] += t; + } + k += m; + } + } + } +}; + +template +struct kissfft_impl +{ + typedef _Scalar Scalar; + typedef std::complex Complex; + + void clear() + { + m_plans.clear(); + m_realTwiddles.clear(); + } + + inline + void fwd( Complex * dst,const Complex *src,int nfft) + { + get_plan(nfft,false).work(0, dst, src, 1,1); + } + + inline + void fwd2( Complex * dst,const Complex *src,int n0,int n1) + { + EIGEN_UNUSED_VARIABLE(dst); + EIGEN_UNUSED_VARIABLE(src); + EIGEN_UNUSED_VARIABLE(n0); + EIGEN_UNUSED_VARIABLE(n1); + } + + inline + void inv2( Complex * dst,const Complex *src,int n0,int n1) + { + EIGEN_UNUSED_VARIABLE(dst); + EIGEN_UNUSED_VARIABLE(src); + EIGEN_UNUSED_VARIABLE(n0); + EIGEN_UNUSED_VARIABLE(n1); + } + + // real-to-complex forward FFT + // perform two FFTs of src even and src odd + // then twiddle to recombine them into the half-spectrum format + // then fill in the conjugate symmetric half + inline + void fwd( Complex * dst,const Scalar * src,int nfft) + { + if ( nfft&3 ) { + // use generic mode for odd + m_tmpBuf1.resize(nfft); + get_plan(nfft,false).work(0, &m_tmpBuf1[0], src, 1,1); + std::copy(m_tmpBuf1.begin(),m_tmpBuf1.begin()+(nfft>>1)+1,dst ); + }else{ + int ncfft = nfft>>1; + int ncfft2 = nfft>>2; + Complex * rtw = real_twiddles(ncfft2); + + // use optimized mode for even real + fwd( dst, reinterpret_cast (src), ncfft); + Complex dc(dst[0].real() + dst[0].imag()); + Complex nyquist(dst[0].real() - dst[0].imag()); + int k; + for ( k=1;k <= ncfft2 ; ++k ) { + Complex fpk = dst[k]; + Complex fpnk = conj(dst[ncfft-k]); + Complex f1k = fpk + fpnk; + Complex f2k = fpk - fpnk; + Complex tw= f2k * rtw[k-1]; + dst[k] = (f1k + tw) * Scalar(.5); + dst[ncfft-k] = conj(f1k -tw)*Scalar(.5); + } + dst[0] = dc; + dst[ncfft] = nyquist; + } + } + + // inverse complex-to-complex + inline + void inv(Complex * dst,const Complex *src,int nfft) + { + get_plan(nfft,true).work(0, dst, src, 1,1); + } + + // half-complex to scalar + inline + void inv( Scalar * dst,const Complex * src,int nfft) + { + if (nfft&3) { + m_tmpBuf1.resize(nfft); + m_tmpBuf2.resize(nfft); + std::copy(src,src+(nfft>>1)+1,m_tmpBuf1.begin() ); + for (int k=1;k<(nfft>>1)+1;++k) + m_tmpBuf1[nfft-k] = conj(m_tmpBuf1[k]); + inv(&m_tmpBuf2[0],&m_tmpBuf1[0],nfft); + for (int k=0;k>1; + int ncfft2 = nfft>>2; + Complex * rtw = real_twiddles(ncfft2); + m_tmpBuf1.resize(ncfft); + m_tmpBuf1[0] = Complex( src[0].real() + src[ncfft].real(), src[0].real() - src[ncfft].real() ); + for (int k = 1; k <= ncfft / 2; ++k) { + Complex fk = src[k]; + Complex fnkc = conj(src[ncfft-k]); + Complex fek = fk + fnkc; + Complex tmp = fk - fnkc; + Complex fok = tmp * conj(rtw[k-1]); + m_tmpBuf1[k] = fek + fok; + m_tmpBuf1[ncfft-k] = conj(fek - fok); + } + get_plan(ncfft,true).work(0, reinterpret_cast(dst), &m_tmpBuf1[0], 1,1); + } + } + + protected: + typedef kiss_cpx_fft PlanData; + typedef std::map PlanMap; + + PlanMap m_plans; + std::map > m_realTwiddles; + std::vector m_tmpBuf1; + std::vector m_tmpBuf2; + + inline + int PlanKey(int nfft, bool isinverse) const { return (nfft<<1) | int(isinverse); } + + inline + PlanData & get_plan(int nfft, bool inverse) + { + // TODO look for PlanKey(nfft, ! inverse) and conjugate the twiddles + PlanData & pd = m_plans[ PlanKey(nfft,inverse) ]; + if ( pd.m_twiddles.size() == 0 ) { + pd.make_twiddles(nfft,inverse); + pd.factorize(nfft); + } + return pd; + } + + inline + Complex * real_twiddles(int ncfft2) + { + using std::acos; + std::vector & twidref = m_realTwiddles[ncfft2];// creates new if not there + if ( (int)twidref.size() != ncfft2 ) { + twidref.resize(ncfft2); + int ncfft= ncfft2<<1; + Scalar pi = acos( Scalar(-1) ); + for (int k=1;k<=ncfft2;++k) + twidref[k-1] = exp( Complex(0,-pi * (Scalar(k) / ncfft + Scalar(.5)) ) ); + } + return &twidref[0]; + } +}; + +} // end namespace internal + +} // end namespace Eigen diff --git a/external/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h b/external/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h new file mode 100644 index 0000000..e7d70f3 --- /dev/null +++ b/external/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h @@ -0,0 +1,187 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud + +/* NOTE The functions of this file have been adapted from the GMM++ library */ + +//======================================================================== +// +// Copyright (C) 2002-2007 Yves Renard +// +// This file is a part of GETFEM++ +// +// Getfem++ is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; version 2.1 of the License. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// You should have received a copy of the GNU Lesser General Public +// License along with this program; if not, write to the Free Software +// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, +// USA. +// +//======================================================================== + +#include "../../../../Eigen/src/Core/util/NonMPL2.h" + +#ifndef EIGEN_CONSTRAINEDCG_H +#define EIGEN_CONSTRAINEDCG_H + +#include "../../../../Eigen/Core" + +namespace Eigen { + +namespace internal { + +/** \ingroup IterativeLinearSolvers_Module + * Compute the pseudo inverse of the non-square matrix C such that + * \f$ CINV = (C * C^T)^{-1} * C \f$ based on a conjugate gradient method. + * + * This function is internally used by constrained_cg. + */ +template +void pseudo_inverse(const CMatrix &C, CINVMatrix &CINV) +{ + // optimisable : copie de la ligne, precalcul de C * trans(C). + typedef typename CMatrix::Scalar Scalar; + typedef typename CMatrix::Index Index; + // FIXME use sparse vectors ? + typedef Matrix TmpVec; + + Index rows = C.rows(), cols = C.cols(); + + TmpVec d(rows), e(rows), l(cols), p(rows), q(rows), r(rows); + Scalar rho, rho_1, alpha; + d.setZero(); + + typedef Triplet T; + std::vector tripletList; + + for (Index i = 0; i < rows; ++i) + { + d[i] = 1.0; + rho = 1.0; + e.setZero(); + r = d; + p = d; + + while (rho >= 1e-38) + { /* conjugate gradient to compute e */ + /* which is the i-th row of inv(C * trans(C)) */ + l = C.transpose() * p; + q = C * l; + alpha = rho / p.dot(q); + e += alpha * p; + r += -alpha * q; + rho_1 = rho; + rho = r.dot(r); + p = (rho/rho_1) * p + r; + } + + l = C.transpose() * e; // l is the i-th row of CINV + // FIXME add a generic "prune/filter" expression for both dense and sparse object to sparse + for (Index j=0; j +void constrained_cg(const TMatrix& A, const CMatrix& C, VectorX& x, + const VectorB& b, const VectorF& f, IterationController &iter) +{ + using std::sqrt; + typedef typename TMatrix::Scalar Scalar; + typedef typename TMatrix::Index Index; + typedef Matrix TmpVec; + + Scalar rho = 1.0, rho_1, lambda, gamma; + Index xSize = x.size(); + TmpVec p(xSize), q(xSize), q2(xSize), + r(xSize), old_z(xSize), z(xSize), + memox(xSize); + std::vector satured(C.rows()); + p.setZero(); + iter.setRhsNorm(sqrt(b.dot(b))); // gael vect_sp(PS, b, b) + if (iter.rhsNorm() == 0.0) iter.setRhsNorm(1.0); + + SparseMatrix CINV(C.rows(), C.cols()); + pseudo_inverse(C, CINV); + + while(true) + { + // computation of residual + old_z = z; + memox = x; + r = b; + r += A * -x; + z = r; + bool transition = false; + for (Index i = 0; i < C.rows(); ++i) + { + Scalar al = C.row(i).dot(x) - f.coeff(i); + if (al >= -1.0E-15) + { + if (!satured[i]) + { + satured[i] = true; + transition = true; + } + Scalar bb = CINV.row(i).dot(z); + if (bb > 0.0) + // FIXME: we should allow that: z += -bb * C.row(i); + for (typename CMatrix::InnerIterator it(C,i); it; ++it) + z.coeffRef(it.index()) -= bb*it.value(); + } + else + satured[i] = false; + } + + // descent direction + rho_1 = rho; + rho = r.dot(z); + + if (iter.finished(rho)) break; + if (transition || iter.first()) gamma = 0.0; + else gamma = (std::max)(0.0, (rho - old_z.dot(z)) / rho_1); + p = z + gamma*p; + + ++iter; + // one dimensionnal optimization + q = A * p; + lambda = rho / q.dot(p); + for (Index i = 0; i < C.rows(); ++i) + { + if (!satured[i]) + { + Scalar bb = C.row(i).dot(p) - f[i]; + if (bb > 0.0) + lambda = (std::min)(lambda, (f.coeff(i)-C.row(i).dot(x)) / bb); + } + } + x += lambda * p; + memox -= x; + } +} + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_CONSTRAINEDCG_H diff --git a/external/unsupported/Eigen/src/IterativeSolvers/DGMRES.h b/external/unsupported/Eigen/src/IterativeSolvers/DGMRES.h new file mode 100644 index 0000000..5ae011b --- /dev/null +++ b/external/unsupported/Eigen/src/IterativeSolvers/DGMRES.h @@ -0,0 +1,511 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_DGMRES_H +#define EIGEN_DGMRES_H + +#include "../../../../Eigen/Eigenvalues" + +namespace Eigen { + +template< typename _MatrixType, + typename _Preconditioner = DiagonalPreconditioner > +class DGMRES; + +namespace internal { + +template< typename _MatrixType, typename _Preconditioner> +struct traits > +{ + typedef _MatrixType MatrixType; + typedef _Preconditioner Preconditioner; +}; + +/** \brief Computes a permutation vector to have a sorted sequence + * \param vec The vector to reorder. + * \param perm gives the sorted sequence on output. Must be initialized with 0..n-1 + * \param ncut Put the ncut smallest elements at the end of the vector + * WARNING This is an expensive sort, so should be used only + * for small size vectors + * TODO Use modified QuickSplit or std::nth_element to get the smallest values + */ +template +void sortWithPermutation (VectorType& vec, IndexType& perm, typename IndexType::Scalar& ncut) +{ + eigen_assert(vec.size() == perm.size()); + bool flag; + for (Index k = 0; k < ncut; k++) + { + flag = false; + for (Index j = 0; j < vec.size()-1; j++) + { + if ( vec(perm(j)) < vec(perm(j+1)) ) + { + std::swap(perm(j),perm(j+1)); + flag = true; + } + if (!flag) break; // The vector is in sorted order + } + } +} + +} +/** + * \ingroup IterativeLinearSolvers_Module + * \brief A Restarted GMRES with deflation. + * This class implements a modification of the GMRES solver for + * sparse linear systems. The basis is built with modified + * Gram-Schmidt. At each restart, a few approximated eigenvectors + * corresponding to the smallest eigenvalues are used to build a + * preconditioner for the next cycle. This preconditioner + * for deflation can be combined with any other preconditioner, + * the IncompleteLUT for instance. The preconditioner is applied + * at right of the matrix and the combination is multiplicative. + * + * \tparam _MatrixType the type of the sparse matrix A, can be a dense or a sparse matrix. + * \tparam _Preconditioner the type of the preconditioner. Default is DiagonalPreconditioner + * Typical usage : + * \code + * SparseMatrix A; + * VectorXd x, b; + * //Fill A and b ... + * DGMRES > solver; + * solver.set_restart(30); // Set restarting value + * solver.setEigenv(1); // Set the number of eigenvalues to deflate + * solver.compute(A); + * x = solver.solve(b); + * \endcode + * + * DGMRES can also be used in a matrix-free context, see the following \link MatrixfreeSolverExample example \endlink. + * + * References : + * [1] D. NUENTSA WAKAM and F. PACULL, Memory Efficient Hybrid + * Algebraic Solvers for Linear Systems Arising from Compressible + * Flows, Computers and Fluids, In Press, + * https://doi.org/10.1016/j.compfluid.2012.03.023 + * [2] K. Burrage and J. Erhel, On the performance of various + * adaptive preconditioned GMRES strategies, 5(1998), 101-121. + * [3] J. Erhel, K. Burrage and B. Pohl, Restarted GMRES + * preconditioned by deflation,J. Computational and Applied + * Mathematics, 69(1996), 303-318. + + * + */ +template< typename _MatrixType, typename _Preconditioner> +class DGMRES : public IterativeSolverBase > +{ + typedef IterativeSolverBase Base; + using Base::matrix; + using Base::m_error; + using Base::m_iterations; + using Base::m_info; + using Base::m_isInitialized; + using Base::m_tolerance; + public: + using Base::_solve_impl; + using Base::_solve_with_guess_impl; + typedef _MatrixType MatrixType; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::StorageIndex StorageIndex; + typedef typename MatrixType::RealScalar RealScalar; + typedef _Preconditioner Preconditioner; + typedef Matrix DenseMatrix; + typedef Matrix DenseRealMatrix; + typedef Matrix DenseVector; + typedef Matrix DenseRealVector; + typedef Matrix, Dynamic, 1> ComplexVector; + + + /** Default constructor. */ + DGMRES() : Base(),m_restart(30),m_neig(0),m_r(0),m_maxNeig(5),m_isDeflAllocated(false),m_isDeflInitialized(false) {} + + /** Initialize the solver with matrix \a A for further \c Ax=b solving. + * + * This constructor is a shortcut for the default constructor followed + * by a call to compute(). + * + * \warning this class stores a reference to the matrix A as well as some + * precomputed values that depend on it. Therefore, if \a A is changed + * this class becomes invalid. Call compute() to update it with the new + * matrix A, or modify a copy of A. + */ + template + explicit DGMRES(const EigenBase& A) : Base(A.derived()), m_restart(30),m_neig(0),m_r(0),m_maxNeig(5),m_isDeflAllocated(false),m_isDeflInitialized(false) {} + + ~DGMRES() {} + + /** \internal */ + template + void _solve_vector_with_guess_impl(const Rhs& b, Dest& x) const + { + EIGEN_STATIC_ASSERT(Rhs::ColsAtCompileTime==1 || Dest::ColsAtCompileTime==1, YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX); + + m_iterations = Base::maxIterations(); + m_error = Base::m_tolerance; + + dgmres(matrix(), b, x, Base::m_preconditioner); + } + + /** + * Get the restart value + */ + Index restart() { return m_restart; } + + /** + * Set the restart value (default is 30) + */ + void set_restart(const Index restart) { m_restart=restart; } + + /** + * Set the number of eigenvalues to deflate at each restart + */ + void setEigenv(const Index neig) + { + m_neig = neig; + if (neig+1 > m_maxNeig) m_maxNeig = neig+1; // To allow for complex conjugates + } + + /** + * Get the size of the deflation subspace size + */ + Index deflSize() {return m_r; } + + /** + * Set the maximum size of the deflation subspace + */ + void setMaxEigenv(const Index maxNeig) { m_maxNeig = maxNeig; } + + protected: + // DGMRES algorithm + template + void dgmres(const MatrixType& mat,const Rhs& rhs, Dest& x, const Preconditioner& precond) const; + // Perform one cycle of GMRES + template + Index dgmresCycle(const MatrixType& mat, const Preconditioner& precond, Dest& x, DenseVector& r0, RealScalar& beta, const RealScalar& normRhs, Index& nbIts) const; + // Compute data to use for deflation + Index dgmresComputeDeflationData(const MatrixType& mat, const Preconditioner& precond, const Index& it, StorageIndex& neig) const; + // Apply deflation to a vector + template + Index dgmresApplyDeflation(const RhsType& In, DestType& Out) const; + ComplexVector schurValues(const ComplexSchur& schurofH) const; + ComplexVector schurValues(const RealSchur& schurofH) const; + // Init data for deflation + void dgmresInitDeflation(Index& rows) const; + mutable DenseMatrix m_V; // Krylov basis vectors + mutable DenseMatrix m_H; // Hessenberg matrix + mutable DenseMatrix m_Hes; // Initial hessenberg matrix without Givens rotations applied + mutable Index m_restart; // Maximum size of the Krylov subspace + mutable DenseMatrix m_U; // Vectors that form the basis of the invariant subspace + mutable DenseMatrix m_MU; // matrix operator applied to m_U (for next cycles) + mutable DenseMatrix m_T; /* T=U^T*M^{-1}*A*U */ + mutable PartialPivLU m_luT; // LU factorization of m_T + mutable StorageIndex m_neig; //Number of eigenvalues to extract at each restart + mutable Index m_r; // Current number of deflated eigenvalues, size of m_U + mutable Index m_maxNeig; // Maximum number of eigenvalues to deflate + mutable RealScalar m_lambdaN; //Modulus of the largest eigenvalue of A + mutable bool m_isDeflAllocated; + mutable bool m_isDeflInitialized; + + //Adaptive strategy + mutable RealScalar m_smv; // Smaller multiple of the remaining number of steps allowed + mutable bool m_force; // Force the use of deflation at each restart + +}; +/** + * \brief Perform several cycles of restarted GMRES with modified Gram Schmidt, + * + * A right preconditioner is used combined with deflation. + * + */ +template< typename _MatrixType, typename _Preconditioner> +template +void DGMRES<_MatrixType, _Preconditioner>::dgmres(const MatrixType& mat,const Rhs& rhs, Dest& x, + const Preconditioner& precond) const +{ + const RealScalar considerAsZero = (std::numeric_limits::min)(); + + RealScalar normRhs = rhs.norm(); + if(normRhs <= considerAsZero) + { + x.setZero(); + m_error = 0; + return; + } + + //Initialization + m_isDeflInitialized = false; + Index n = mat.rows(); + DenseVector r0(n); + Index nbIts = 0; + m_H.resize(m_restart+1, m_restart); + m_Hes.resize(m_restart, m_restart); + m_V.resize(n,m_restart+1); + //Initial residual vector and initial norm + if(x.squaredNorm()==0) + x = precond.solve(rhs); + r0 = rhs - mat * x; + RealScalar beta = r0.norm(); + + m_error = beta/normRhs; + if(m_error < m_tolerance) + m_info = Success; + else + m_info = NoConvergence; + + // Iterative process + while (nbIts < m_iterations && m_info == NoConvergence) + { + dgmresCycle(mat, precond, x, r0, beta, normRhs, nbIts); + + // Compute the new residual vector for the restart + if (nbIts < m_iterations && m_info == NoConvergence) { + r0 = rhs - mat * x; + beta = r0.norm(); + } + } +} + +/** + * \brief Perform one restart cycle of DGMRES + * \param mat The coefficient matrix + * \param precond The preconditioner + * \param x the new approximated solution + * \param r0 The initial residual vector + * \param beta The norm of the residual computed so far + * \param normRhs The norm of the right hand side vector + * \param nbIts The number of iterations + */ +template< typename _MatrixType, typename _Preconditioner> +template +Index DGMRES<_MatrixType, _Preconditioner>::dgmresCycle(const MatrixType& mat, const Preconditioner& precond, Dest& x, DenseVector& r0, RealScalar& beta, const RealScalar& normRhs, Index& nbIts) const +{ + //Initialization + DenseVector g(m_restart+1); // Right hand side of the least square problem + g.setZero(); + g(0) = Scalar(beta); + m_V.col(0) = r0/beta; + m_info = NoConvergence; + std::vector >gr(m_restart); // Givens rotations + Index it = 0; // Number of inner iterations + Index n = mat.rows(); + DenseVector tv1(n), tv2(n); //Temporary vectors + while (m_info == NoConvergence && it < m_restart && nbIts < m_iterations) + { + // Apply preconditioner(s) at right + if (m_isDeflInitialized ) + { + dgmresApplyDeflation(m_V.col(it), tv1); // Deflation + tv2 = precond.solve(tv1); + } + else + { + tv2 = precond.solve(m_V.col(it)); // User's selected preconditioner + } + tv1 = mat * tv2; + + // Orthogonalize it with the previous basis in the basis using modified Gram-Schmidt + Scalar coef; + for (Index i = 0; i <= it; ++i) + { + coef = tv1.dot(m_V.col(i)); + tv1 = tv1 - coef * m_V.col(i); + m_H(i,it) = coef; + m_Hes(i,it) = coef; + } + // Normalize the vector + coef = tv1.norm(); + m_V.col(it+1) = tv1/coef; + m_H(it+1, it) = coef; +// m_Hes(it+1,it) = coef; + + // FIXME Check for happy breakdown + + // Update Hessenberg matrix with Givens rotations + for (Index i = 1; i <= it; ++i) + { + m_H.col(it).applyOnTheLeft(i-1,i,gr[i-1].adjoint()); + } + // Compute the new plane rotation + gr[it].makeGivens(m_H(it, it), m_H(it+1,it)); + // Apply the new rotation + m_H.col(it).applyOnTheLeft(it,it+1,gr[it].adjoint()); + g.applyOnTheLeft(it,it+1, gr[it].adjoint()); + + beta = std::abs(g(it+1)); + m_error = beta/normRhs; + // std::cerr << nbIts << " Relative Residual Norm " << m_error << std::endl; + it++; nbIts++; + + if (m_error < m_tolerance) + { + // The method has converged + m_info = Success; + break; + } + } + + // Compute the new coefficients by solving the least square problem +// it++; + //FIXME Check first if the matrix is singular ... zero diagonal + DenseVector nrs(m_restart); + nrs = m_H.topLeftCorner(it,it).template triangularView().solve(g.head(it)); + + // Form the new solution + if (m_isDeflInitialized) + { + tv1 = m_V.leftCols(it) * nrs; + dgmresApplyDeflation(tv1, tv2); + x = x + precond.solve(tv2); + } + else + x = x + precond.solve(m_V.leftCols(it) * nrs); + + // Go for a new cycle and compute data for deflation + if(nbIts < m_iterations && m_info == NoConvergence && m_neig > 0 && (m_r+m_neig) < m_maxNeig) + dgmresComputeDeflationData(mat, precond, it, m_neig); + return 0; + +} + + +template< typename _MatrixType, typename _Preconditioner> +void DGMRES<_MatrixType, _Preconditioner>::dgmresInitDeflation(Index& rows) const +{ + m_U.resize(rows, m_maxNeig); + m_MU.resize(rows, m_maxNeig); + m_T.resize(m_maxNeig, m_maxNeig); + m_lambdaN = 0.0; + m_isDeflAllocated = true; +} + +template< typename _MatrixType, typename _Preconditioner> +inline typename DGMRES<_MatrixType, _Preconditioner>::ComplexVector DGMRES<_MatrixType, _Preconditioner>::schurValues(const ComplexSchur& schurofH) const +{ + return schurofH.matrixT().diagonal(); +} + +template< typename _MatrixType, typename _Preconditioner> +inline typename DGMRES<_MatrixType, _Preconditioner>::ComplexVector DGMRES<_MatrixType, _Preconditioner>::schurValues(const RealSchur& schurofH) const +{ + const DenseMatrix& T = schurofH.matrixT(); + Index it = T.rows(); + ComplexVector eig(it); + Index j = 0; + while (j < it-1) + { + if (T(j+1,j) ==Scalar(0)) + { + eig(j) = std::complex(T(j,j),RealScalar(0)); + j++; + } + else + { + eig(j) = std::complex(T(j,j),T(j+1,j)); + eig(j+1) = std::complex(T(j,j+1),T(j+1,j+1)); + j++; + } + } + if (j < it-1) eig(j) = std::complex(T(j,j),RealScalar(0)); + return eig; +} + +template< typename _MatrixType, typename _Preconditioner> +Index DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const MatrixType& mat, const Preconditioner& precond, const Index& it, StorageIndex& neig) const +{ + // First, find the Schur form of the Hessenberg matrix H + typename internal::conditional::IsComplex, ComplexSchur, RealSchur >::type schurofH; + bool computeU = true; + DenseMatrix matrixQ(it,it); + matrixQ.setIdentity(); + schurofH.computeFromHessenberg(m_Hes.topLeftCorner(it,it), matrixQ, computeU); + + ComplexVector eig(it); + Matrixperm(it); + eig = this->schurValues(schurofH); + + // Reorder the absolute values of Schur values + DenseRealVector modulEig(it); + for (Index j=0; j(it-1)); + internal::sortWithPermutation(modulEig, perm, neig); + + if (!m_lambdaN) + { + m_lambdaN = (std::max)(modulEig.maxCoeff(), m_lambdaN); + } + //Count the real number of extracted eigenvalues (with complex conjugates) + Index nbrEig = 0; + while (nbrEig < neig) + { + if(eig(perm(it-nbrEig-1)).imag() == RealScalar(0)) nbrEig++; + else nbrEig += 2; + } + // Extract the Schur vectors corresponding to the smallest Ritz values + DenseMatrix Sr(it, nbrEig); + Sr.setZero(); + for (Index j = 0; j < nbrEig; j++) + { + Sr.col(j) = schurofH.matrixU().col(perm(it-j-1)); + } + + // Form the Schur vectors of the initial matrix using the Krylov basis + DenseMatrix X; + X = m_V.leftCols(it) * Sr; + if (m_r) + { + // Orthogonalize X against m_U using modified Gram-Schmidt + for (Index j = 0; j < nbrEig; j++) + for (Index k =0; k < m_r; k++) + X.col(j) = X.col(j) - (m_U.col(k).dot(X.col(j)))*m_U.col(k); + } + + // Compute m_MX = A * M^-1 * X + Index m = m_V.rows(); + if (!m_isDeflAllocated) + dgmresInitDeflation(m); + DenseMatrix MX(m, nbrEig); + DenseVector tv1(m); + for (Index j = 0; j < nbrEig; j++) + { + tv1 = mat * X.col(j); + MX.col(j) = precond.solve(tv1); + } + + //Update m_T = [U'MU U'MX; X'MU X'MX] + m_T.block(m_r, m_r, nbrEig, nbrEig) = X.transpose() * MX; + if(m_r) + { + m_T.block(0, m_r, m_r, nbrEig) = m_U.leftCols(m_r).transpose() * MX; + m_T.block(m_r, 0, nbrEig, m_r) = X.transpose() * m_MU.leftCols(m_r); + } + + // Save X into m_U and m_MX in m_MU + for (Index j = 0; j < nbrEig; j++) m_U.col(m_r+j) = X.col(j); + for (Index j = 0; j < nbrEig; j++) m_MU.col(m_r+j) = MX.col(j); + // Increase the size of the invariant subspace + m_r += nbrEig; + + // Factorize m_T into m_luT + m_luT.compute(m_T.topLeftCorner(m_r, m_r)); + + //FIXME CHeck if the factorization was correctly done (nonsingular matrix) + m_isDeflInitialized = true; + return 0; +} +template +template +Index DGMRES<_MatrixType, _Preconditioner>::dgmresApplyDeflation(const RhsType &x, DestType &y) const +{ + DenseVector x1 = m_U.leftCols(m_r).transpose() * x; + y = x + m_U.leftCols(m_r) * ( m_lambdaN * m_luT.solve(x1) - x1); + return 0; +} + +} // end namespace Eigen +#endif diff --git a/external/unsupported/Eigen/src/IterativeSolvers/GMRES.h b/external/unsupported/Eigen/src/IterativeSolvers/GMRES.h new file mode 100644 index 0000000..ff91209 --- /dev/null +++ b/external/unsupported/Eigen/src/IterativeSolvers/GMRES.h @@ -0,0 +1,335 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011 Gael Guennebaud +// Copyright (C) 2012, 2014 Kolja Brix +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_GMRES_H +#define EIGEN_GMRES_H + +namespace Eigen { + +namespace internal { + +/** +* Generalized Minimal Residual Algorithm based on the +* Arnoldi algorithm implemented with Householder reflections. +* +* Parameters: +* \param mat matrix of linear system of equations +* \param rhs right hand side vector of linear system of equations +* \param x on input: initial guess, on output: solution +* \param precond preconditioner used +* \param iters on input: maximum number of iterations to perform +* on output: number of iterations performed +* \param restart number of iterations for a restart +* \param tol_error on input: relative residual tolerance +* on output: residuum achieved +* +* \sa IterativeMethods::bicgstab() +* +* +* For references, please see: +* +* Saad, Y. and Schultz, M. H. +* GMRES: A Generalized Minimal Residual Algorithm for Solving Nonsymmetric Linear Systems. +* SIAM J.Sci.Stat.Comp. 7, 1986, pp. 856 - 869. +* +* Saad, Y. +* Iterative Methods for Sparse Linear Systems. +* Society for Industrial and Applied Mathematics, Philadelphia, 2003. +* +* Walker, H. F. +* Implementations of the GMRES method. +* Comput.Phys.Comm. 53, 1989, pp. 311 - 320. +* +* Walker, H. F. +* Implementation of the GMRES Method using Householder Transformations. +* SIAM J.Sci.Stat.Comp. 9, 1988, pp. 152 - 163. +* +*/ +template +bool gmres(const MatrixType & mat, const Rhs & rhs, Dest & x, const Preconditioner & precond, + Index &iters, const Index &restart, typename Dest::RealScalar & tol_error) { + + using std::sqrt; + using std::abs; + + typedef typename Dest::RealScalar RealScalar; + typedef typename Dest::Scalar Scalar; + typedef Matrix < Scalar, Dynamic, 1 > VectorType; + typedef Matrix < Scalar, Dynamic, Dynamic, ColMajor> FMatrixType; + + const RealScalar considerAsZero = (std::numeric_limits::min)(); + + if(rhs.norm() <= considerAsZero) + { + x.setZero(); + tol_error = 0; + return true; + } + + RealScalar tol = tol_error; + const Index maxIters = iters; + iters = 0; + + const Index m = mat.rows(); + + // residual and preconditioned residual + VectorType p0 = rhs - mat*x; + VectorType r0 = precond.solve(p0); + + const RealScalar r0Norm = r0.norm(); + + // is initial guess already good enough? + if(r0Norm == 0) + { + tol_error = 0; + return true; + } + + // storage for Hessenberg matrix and Householder data + FMatrixType H = FMatrixType::Zero(m, restart + 1); + VectorType w = VectorType::Zero(restart + 1); + VectorType tau = VectorType::Zero(restart + 1); + + // storage for Jacobi rotations + std::vector < JacobiRotation < Scalar > > G(restart); + + // storage for temporaries + VectorType t(m), v(m), workspace(m), x_new(m); + + // generate first Householder vector + Ref H0_tail = H.col(0).tail(m - 1); + RealScalar beta; + r0.makeHouseholder(H0_tail, tau.coeffRef(0), beta); + w(0) = Scalar(beta); + + for (Index k = 1; k <= restart; ++k) + { + ++iters; + + v = VectorType::Unit(m, k - 1); + + // apply Householder reflections H_{1} ... H_{k-1} to v + // TODO: use a HouseholderSequence + for (Index i = k - 1; i >= 0; --i) { + v.tail(m - i).applyHouseholderOnTheLeft(H.col(i).tail(m - i - 1), tau.coeffRef(i), workspace.data()); + } + + // apply matrix M to v: v = mat * v; + t.noalias() = mat * v; + v = precond.solve(t); + + // apply Householder reflections H_{k-1} ... H_{1} to v + // TODO: use a HouseholderSequence + for (Index i = 0; i < k; ++i) { + v.tail(m - i).applyHouseholderOnTheLeft(H.col(i).tail(m - i - 1), tau.coeffRef(i), workspace.data()); + } + + if (v.tail(m - k).norm() != 0.0) + { + if (k <= restart) + { + // generate new Householder vector + Ref Hk_tail = H.col(k).tail(m - k - 1); + v.tail(m - k).makeHouseholder(Hk_tail, tau.coeffRef(k), beta); + + // apply Householder reflection H_{k} to v + v.tail(m - k).applyHouseholderOnTheLeft(Hk_tail, tau.coeffRef(k), workspace.data()); + } + } + + if (k > 1) + { + for (Index i = 0; i < k - 1; ++i) + { + // apply old Givens rotations to v + v.applyOnTheLeft(i, i + 1, G[i].adjoint()); + } + } + + if (k y = w.head(k); + H.topLeftCorner(k, k).template triangularView ().solveInPlace(y); + + // use Horner-like scheme to calculate solution vector + x_new.setZero(); + for (Index i = k - 1; i >= 0; --i) + { + x_new(i) += y(i); + // apply Householder reflection H_{i} to x_new + x_new.tail(m - i).applyHouseholderOnTheLeft(H.col(i).tail(m - i - 1), tau.coeffRef(i), workspace.data()); + } + + x += x_new; + + if(stop) + { + return true; + } + else + { + k=0; + + // reset data for restart + p0.noalias() = rhs - mat*x; + r0 = precond.solve(p0); + + // clear Hessenberg matrix and Householder data + H.setZero(); + w.setZero(); + tau.setZero(); + + // generate first Householder vector + r0.makeHouseholder(H0_tail, tau.coeffRef(0), beta); + w(0) = Scalar(beta); + } + } + } + + return false; + +} + +} + +template< typename _MatrixType, + typename _Preconditioner = DiagonalPreconditioner > +class GMRES; + +namespace internal { + +template< typename _MatrixType, typename _Preconditioner> +struct traits > +{ + typedef _MatrixType MatrixType; + typedef _Preconditioner Preconditioner; +}; + +} + +/** \ingroup IterativeLinearSolvers_Module + * \brief A GMRES solver for sparse square problems + * + * This class allows to solve for A.x = b sparse linear problems using a generalized minimal + * residual method. The vectors x and b can be either dense or sparse. + * + * \tparam _MatrixType the type of the sparse matrix A, can be a dense or a sparse matrix. + * \tparam _Preconditioner the type of the preconditioner. Default is DiagonalPreconditioner + * + * The maximal number of iterations and tolerance value can be controlled via the setMaxIterations() + * and setTolerance() methods. The defaults are the size of the problem for the maximal number of iterations + * and NumTraits::epsilon() for the tolerance. + * + * This class can be used as the direct solver classes. Here is a typical usage example: + * \code + * int n = 10000; + * VectorXd x(n), b(n); + * SparseMatrix A(n,n); + * // fill A and b + * GMRES > solver(A); + * x = solver.solve(b); + * std::cout << "#iterations: " << solver.iterations() << std::endl; + * std::cout << "estimated error: " << solver.error() << std::endl; + * // update b, and solve again + * x = solver.solve(b); + * \endcode + * + * By default the iterations start with x=0 as an initial guess of the solution. + * One can control the start using the solveWithGuess() method. + * + * GMRES can also be used in a matrix-free context, see the following \link MatrixfreeSolverExample example \endlink. + * + * \sa class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner + */ +template< typename _MatrixType, typename _Preconditioner> +class GMRES : public IterativeSolverBase > +{ + typedef IterativeSolverBase Base; + using Base::matrix; + using Base::m_error; + using Base::m_iterations; + using Base::m_info; + using Base::m_isInitialized; + +private: + Index m_restart; + +public: + using Base::_solve_impl; + typedef _MatrixType MatrixType; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef _Preconditioner Preconditioner; + +public: + + /** Default constructor. */ + GMRES() : Base(), m_restart(30) {} + + /** Initialize the solver with matrix \a A for further \c Ax=b solving. + * + * This constructor is a shortcut for the default constructor followed + * by a call to compute(). + * + * \warning this class stores a reference to the matrix A as well as some + * precomputed values that depend on it. Therefore, if \a A is changed + * this class becomes invalid. Call compute() to update it with the new + * matrix A, or modify a copy of A. + */ + template + explicit GMRES(const EigenBase& A) : Base(A.derived()), m_restart(30) {} + + ~GMRES() {} + + /** Get the number of iterations after that a restart is performed. + */ + Index get_restart() { return m_restart; } + + /** Set the number of iterations after that a restart is performed. + * \param restart number of iterations for a restarti, default is 30. + */ + void set_restart(const Index restart) { m_restart=restart; } + + /** \internal */ + template + void _solve_vector_with_guess_impl(const Rhs& b, Dest& x) const + { + m_iterations = Base::maxIterations(); + m_error = Base::m_tolerance; + bool ret = internal::gmres(matrix(), b, x, Base::m_preconditioner, m_iterations, m_restart, m_error); + m_info = (!ret) ? NumericalIssue + : m_error <= Base::m_tolerance ? Success + : NoConvergence; + } + +protected: + +}; + +} // end namespace Eigen + +#endif // EIGEN_GMRES_H diff --git a/external/unsupported/Eigen/src/IterativeSolvers/IDRS.h b/external/unsupported/Eigen/src/IterativeSolvers/IDRS.h new file mode 100644 index 0000000..90d20fa --- /dev/null +++ b/external/unsupported/Eigen/src/IterativeSolvers/IDRS.h @@ -0,0 +1,436 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2020 Chris Schoutrop +// Copyright (C) 2020 Jens Wehner +// Copyright (C) 2020 Jan van Dijk +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#ifndef EIGEN_IDRS_H +#define EIGEN_IDRS_H + +namespace Eigen +{ + + namespace internal + { + /** \internal Low-level Induced Dimension Reduction algoritm + \param A The matrix A + \param b The right hand side vector b + \param x On input and initial solution, on output the computed solution. + \param precond A preconditioner being able to efficiently solve for an + approximation of Ax=b (regardless of b) + \param iter On input the max number of iteration, on output the number of performed iterations. + \param relres On input the tolerance error, on output an estimation of the relative error. + \param S On input Number of the dimension of the shadow space. + \param smoothing switches residual smoothing on. + \param angle small omega lead to faster convergence at the expense of numerical stability + \param replacement switches on a residual replacement strategy to increase accuracy of residual at the expense of more Mat*vec products + \return false in the case of numerical issue, for example a break down of IDRS. + */ + template + typename Vector::Scalar omega(const Vector& t, const Vector& s, RealScalar angle) + { + using numext::abs; + typedef typename Vector::Scalar Scalar; + const RealScalar ns = s.norm(); + const RealScalar nt = t.norm(); + const Scalar ts = t.dot(s); + const RealScalar rho = abs(ts / (nt * ns)); + + if (rho < angle) { + if (ts == Scalar(0)) { + return Scalar(0); + } + // Original relation for om is given by + // om = om * angle / rho; + // To alleviate potential (near) division by zero this can be rewritten as + // om = angle * (ns / nt) * (ts / abs(ts)) = angle * (ns / nt) * sgn(ts) + return angle * (ns / nt) * (ts / abs(ts)); + } + return ts / (nt * nt); + } + + template + bool idrs(const MatrixType& A, const Rhs& b, Dest& x, const Preconditioner& precond, + Index& iter, + typename Dest::RealScalar& relres, Index S, bool smoothing, typename Dest::RealScalar angle, bool replacement) + { + typedef typename Dest::RealScalar RealScalar; + typedef typename Dest::Scalar Scalar; + typedef Matrix VectorType; + typedef Matrix DenseMatrixType; + const Index N = b.size(); + S = S < x.rows() ? S : x.rows(); + const RealScalar tol = relres; + const Index maxit = iter; + + Index replacements = 0; + bool trueres = false; + + FullPivLU lu_solver; + + DenseMatrixType P; + { + HouseholderQR qr(DenseMatrixType::Random(N, S)); + P = (qr.householderQ() * DenseMatrixType::Identity(N, S)); + } + + const RealScalar normb = b.norm(); + + if (internal::isApprox(normb, RealScalar(0))) + { + //Solution is the zero vector + x.setZero(); + iter = 0; + relres = 0; + return true; + } + // from http://homepage.tudelft.nl/1w5b5/IDRS/manual.pdf + // A peak in the residual is considered dangerously high if‖ri‖/‖b‖> C(tol/epsilon). + // With epsilon the + // relative machine precision. The factor tol/epsilon corresponds to the size of a + // finite precision number that is so large that the absolute round-off error in + // this number, when propagated through the process, makes it impossible to + // achieve the required accuracy.The factor C accounts for the accumulation of + // round-off errors. This parameter has beenset to 10−3. + // mp is epsilon/C + // 10^3 * eps is very conservative, so normally no residual replacements will take place. + // It only happens if things go very wrong. Too many restarts may ruin the convergence. + const RealScalar mp = RealScalar(1e3) * NumTraits::epsilon(); + + + + //Compute initial residual + const RealScalar tolb = tol * normb; //Relative tolerance + VectorType r = b - A * x; + + VectorType x_s, r_s; + + if (smoothing) + { + x_s = x; + r_s = r; + } + + RealScalar normr = r.norm(); + + if (normr <= tolb) + { + //Initial guess is a good enough solution + iter = 0; + relres = normr / normb; + return true; + } + + DenseMatrixType G = DenseMatrixType::Zero(N, S); + DenseMatrixType U = DenseMatrixType::Zero(N, S); + DenseMatrixType M = DenseMatrixType::Identity(S, S); + VectorType t(N), v(N); + Scalar om = 1.; + + //Main iteration loop, guild G-spaces: + iter = 0; + + while (normr > tolb && iter < maxit) + { + //New right hand size for small system: + VectorType f = (r.adjoint() * P).adjoint(); + + for (Index k = 0; k < S; ++k) + { + //Solve small system and make v orthogonal to P: + //c = M(k:s,k:s)\f(k:s); + lu_solver.compute(M.block(k , k , S -k, S - k )); + VectorType c = lu_solver.solve(f.segment(k , S - k )); + //v = r - G(:,k:s)*c; + v = r - G.rightCols(S - k ) * c; + //Preconditioning + v = precond.solve(v); + + //Compute new U(:,k) and G(:,k), G(:,k) is in space G_j + U.col(k) = U.rightCols(S - k ) * c + om * v; + G.col(k) = A * U.col(k ); + + //Bi-Orthogonalise the new basis vectors: + for (Index i = 0; i < k-1 ; ++i) + { + //alpha = ( P(:,i)'*G(:,k) )/M(i,i); + Scalar alpha = P.col(i ).dot(G.col(k )) / M(i, i ); + G.col(k ) = G.col(k ) - alpha * G.col(i ); + U.col(k ) = U.col(k ) - alpha * U.col(i ); + } + + //New column of M = P'*G (first k-1 entries are zero) + //M(k:s,k) = (G(:,k)'*P(:,k:s))'; + M.block(k , k , S - k , 1) = (G.col(k ).adjoint() * P.rightCols(S - k )).adjoint(); + + if (internal::isApprox(M(k,k), Scalar(0))) + { + return false; + } + + //Make r orthogonal to q_i, i = 0..k-1 + Scalar beta = f(k ) / M(k , k ); + r = r - beta * G.col(k ); + x = x + beta * U.col(k ); + normr = r.norm(); + + if (replacement && normr > tolb / mp) + { + trueres = true; + } + + //Smoothing: + if (smoothing) + { + t = r_s - r; + //gamma is a Scalar, but the conversion is not allowed + Scalar gamma = t.dot(r_s) / t.norm(); + r_s = r_s - gamma * t; + x_s = x_s - gamma * (x_s - x); + normr = r_s.norm(); + } + + if (normr < tolb || iter == maxit) + { + break; + } + + //New f = P'*r (first k components are zero) + if (k < S-1) + { + f.segment(k + 1, S - (k + 1) ) = f.segment(k + 1 , S - (k + 1)) - beta * M.block(k + 1 , k , S - (k + 1), 1); + } + }//end for + + if (normr < tolb || iter == maxit) + { + break; + } + + //Now we have sufficient vectors in G_j to compute residual in G_j+1 + //Note: r is already perpendicular to P so v = r + //Preconditioning + v = r; + v = precond.solve(v); + + //Matrix-vector multiplication: + t = A * v; + + //Computation of a new omega + om = internal::omega(t, r, angle); + + if (om == RealScalar(0.0)) + { + return false; + } + + r = r - om * t; + x = x + om * v; + normr = r.norm(); + + if (replacement && normr > tolb / mp) + { + trueres = true; + } + + //Residual replacement? + if (trueres && normr < normb) + { + r = b - A * x; + trueres = false; + replacements++; + } + + //Smoothing: + if (smoothing) + { + t = r_s - r; + Scalar gamma = t.dot(r_s) /t.norm(); + r_s = r_s - gamma * t; + x_s = x_s - gamma * (x_s - x); + normr = r_s.norm(); + } + + iter++; + + }//end while + + if (smoothing) + { + x = x_s; + } + relres=normr/normb; + return true; + } + + } // namespace internal + + template > + class IDRS; + + namespace internal + { + + template + struct traits > + { + typedef _MatrixType MatrixType; + typedef _Preconditioner Preconditioner; + }; + + } // namespace internal + + +/** \ingroup IterativeLinearSolvers_Module + * \brief The Induced Dimension Reduction method (IDR(s)) is a short-recurrences Krylov method for sparse square problems. + * + * This class allows to solve for A.x = b sparse linear problems. The vectors x and b can be either dense or sparse. + * he Induced Dimension Reduction method, IDR(), is a robust and efficient short-recurrence Krylov subspace method for + * solving large nonsymmetric systems of linear equations. + * + * For indefinite systems IDR(S) outperforms both BiCGStab and BiCGStab(L). Additionally, IDR(S) can handle matrices + * with complex eigenvalues more efficiently than BiCGStab. + * + * Many problems that do not converge for BiCGSTAB converge for IDR(s) (for larger values of s). And if both methods + * converge the convergence for IDR(s) is typically much faster for difficult systems (for example indefinite problems). + * + * IDR(s) is a limited memory finite termination method. In exact arithmetic it converges in at most N+N/s iterations, + * with N the system size. It uses a fixed number of 4+3s vector. In comparison, BiCGSTAB terminates in 2N iterations + * and uses 7 vectors. GMRES terminates in at most N iterations, and uses I+3 vectors, with I the number of iterations. + * Restarting GMRES limits the memory consumption, but destroys the finite termination property. + * + * \tparam _MatrixType the type of the sparse matrix A, can be a dense or a sparse matrix. + * \tparam _Preconditioner the type of the preconditioner. Default is DiagonalPreconditioner + * + * \implsparsesolverconcept + * + * The maximal number of iterations and tolerance value can be controlled via the setMaxIterations() + * and setTolerance() methods. The defaults are the size of the problem for the maximal number of iterations + * and NumTraits::epsilon() for the tolerance. + * + * The tolerance corresponds to the relative residual error: |Ax-b|/|b| + * + * \b Performance: when using sparse matrices, best performance is achied for a row-major sparse matrix format. + * Moreover, in this case multi-threading can be exploited if the user code is compiled with OpenMP enabled. + * See \ref TopicMultiThreading for details. + * + * By default the iterations start with x=0 as an initial guess of the solution. + * One can control the start using the solveWithGuess() method. + * + * IDR(s) can also be used in a matrix-free context, see the following \link MatrixfreeSolverExample example \endlink. + * + * \sa class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner + */ + template + class IDRS : public IterativeSolverBase > + { + + public: + typedef _MatrixType MatrixType; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef _Preconditioner Preconditioner; + + private: + typedef IterativeSolverBase Base; + using Base::m_error; + using Base::m_info; + using Base::m_isInitialized; + using Base::m_iterations; + using Base::matrix; + Index m_S; + bool m_smoothing; + RealScalar m_angle; + bool m_residual; + + public: + /** Default constructor. */ + IDRS(): m_S(4), m_smoothing(false), m_angle(RealScalar(0.7)), m_residual(false) {} + + /** Initialize the solver with matrix \a A for further \c Ax=b solving. + + This constructor is a shortcut for the default constructor followed + by a call to compute(). + + \warning this class stores a reference to the matrix A as well as some + precomputed values that depend on it. Therefore, if \a A is changed + this class becomes invalid. Call compute() to update it with the new + matrix A, or modify a copy of A. + */ + template + explicit IDRS(const EigenBase& A) : Base(A.derived()), m_S(4), m_smoothing(false), + m_angle(RealScalar(0.7)), m_residual(false) {} + + + /** \internal */ + /** Loops over the number of columns of b and does the following: + 1. sets the tolerence and maxIterations + 2. Calls the function that has the core solver routine + */ + template + void _solve_vector_with_guess_impl(const Rhs& b, Dest& x) const + { + m_iterations = Base::maxIterations(); + m_error = Base::m_tolerance; + + bool ret = internal::idrs(matrix(), b, x, Base::m_preconditioner, m_iterations, m_error, m_S,m_smoothing,m_angle,m_residual); + + m_info = (!ret) ? NumericalIssue : m_error <= Base::m_tolerance ? Success : NoConvergence; + } + + /** Sets the parameter S, indicating the dimension of the shadow space. Default is 4*/ + void setS(Index S) + { + if (S < 1) + { + S = 4; + } + + m_S = S; + } + + /** Switches off and on smoothing. + Residual smoothing results in monotonically decreasing residual norms at + the expense of two extra vectors of storage and a few extra vector + operations. Although monotonic decrease of the residual norms is a + desirable property, the rate of convergence of the unsmoothed process and + the smoothed process is basically the same. Default is off */ + void setSmoothing(bool smoothing) + { + m_smoothing=smoothing; + } + + /** The angle must be a real scalar. In IDR(s), a value for the + iteration parameter omega must be chosen in every s+1th step. The most + natural choice is to select a value to minimize the norm of the next residual. + This corresponds to the parameter omega = 0. In practice, this may lead to + values of omega that are so small that the other iteration parameters + cannot be computed with sufficient accuracy. In such cases it is better to + increase the value of omega sufficiently such that a compromise is reached + between accurate computations and reduction of the residual norm. The + parameter angle =0.7 (”maintaining the convergence strategy”) + results in such a compromise. */ + void setAngle(RealScalar angle) + { + m_angle=angle; + } + + /** The parameter replace is a logical that determines whether a + residual replacement strategy is employed to increase the accuracy of the + solution. */ + void setResidualUpdate(bool update) + { + m_residual=update; + } + + }; + +} // namespace Eigen + +#endif /* EIGEN_IDRS_H */ diff --git a/external/unsupported/Eigen/src/IterativeSolvers/IncompleteLU.h b/external/unsupported/Eigen/src/IterativeSolvers/IncompleteLU.h new file mode 100644 index 0000000..7d08c35 --- /dev/null +++ b/external/unsupported/Eigen/src/IterativeSolvers/IncompleteLU.h @@ -0,0 +1,90 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_INCOMPLETE_LU_H +#define EIGEN_INCOMPLETE_LU_H + +namespace Eigen { + +template +class IncompleteLU : public SparseSolverBase > +{ + protected: + typedef SparseSolverBase > Base; + using Base::m_isInitialized; + + typedef _Scalar Scalar; + typedef Matrix Vector; + typedef typename Vector::Index Index; + typedef SparseMatrix FactorType; + + public: + typedef Matrix MatrixType; + + IncompleteLU() {} + + template + IncompleteLU(const MatrixType& mat) + { + compute(mat); + } + + Index rows() const { return m_lu.rows(); } + Index cols() const { return m_lu.cols(); } + + template + IncompleteLU& compute(const MatrixType& mat) + { + m_lu = mat; + int size = mat.cols(); + Vector diag(size); + for(int i=0; i + void _solve_impl(const Rhs& b, Dest& x) const + { + x = m_lu.template triangularView().solve(b); + x = m_lu.template triangularView().solve(x); + } + + protected: + FactorType m_lu; +}; + +} // end namespace Eigen + +#endif // EIGEN_INCOMPLETE_LU_H diff --git a/external/unsupported/Eigen/src/IterativeSolvers/IterationController.h b/external/unsupported/Eigen/src/IterativeSolvers/IterationController.h new file mode 100644 index 0000000..a116e09 --- /dev/null +++ b/external/unsupported/Eigen/src/IterativeSolvers/IterationController.h @@ -0,0 +1,154 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Gael Guennebaud + +/* NOTE The class IterationController has been adapted from the iteration + * class of the GMM++ and ITL libraries. + */ + +//======================================================================= +// Copyright (C) 1997-2001 +// Authors: Andrew Lumsdaine +// Lie-Quan Lee +// +// This file is part of the Iterative Template Library +// +// You should have received a copy of the License Agreement for the +// Iterative Template Library along with the software; see the +// file LICENSE. +// +// Permission to modify the code and to distribute modified code is +// granted, provided the text of this NOTICE is retained, a notice that +// the code was modified is included with the above COPYRIGHT NOTICE and +// with the COPYRIGHT NOTICE in the LICENSE file, and that the LICENSE +// file is distributed with the modified code. +// +// LICENSOR MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. +// By way of example, but not limitation, Licensor MAKES NO +// REPRESENTATIONS OR WARRANTIES OF MERCHANTABILITY OR FITNESS FOR ANY +// PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE COMPONENTS +// OR DOCUMENTATION WILL NOT INFRINGE ANY PATENTS, COPYRIGHTS, TRADEMARKS +// OR OTHER RIGHTS. +//======================================================================= + +//======================================================================== +// +// Copyright (C) 2002-2007 Yves Renard +// +// This file is a part of GETFEM++ +// +// Getfem++ is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; version 2.1 of the License. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// You should have received a copy of the GNU Lesser General Public +// License along with this program; if not, write to the Free Software +// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, +// USA. +// +//======================================================================== + +#include "../../../../Eigen/src/Core/util/NonMPL2.h" + +#ifndef EIGEN_ITERATION_CONTROLLER_H +#define EIGEN_ITERATION_CONTROLLER_H + +namespace Eigen { + +/** \ingroup IterativeLinearSolvers_Module + * \class IterationController + * + * \brief Controls the iterations of the iterative solvers + * + * This class has been adapted from the iteration class of GMM++ and ITL libraries. + * + */ +class IterationController +{ + protected : + double m_rhsn; ///< Right hand side norm + size_t m_maxiter; ///< Max. number of iterations + int m_noise; ///< if noise > 0 iterations are printed + double m_resmax; ///< maximum residual + double m_resminreach, m_resadd; + size_t m_nit; ///< iteration number + double m_res; ///< last computed residual + bool m_written; + void (*m_callback)(const IterationController&); + public : + + void init() + { + m_nit = 0; m_res = 0.0; m_written = false; + m_resminreach = 1E50; m_resadd = 0.0; + m_callback = 0; + } + + IterationController(double r = 1.0E-8, int noi = 0, size_t mit = size_t(-1)) + : m_rhsn(1.0), m_maxiter(mit), m_noise(noi), m_resmax(r) { init(); } + + void operator ++(int) { m_nit++; m_written = false; m_resadd += m_res; } + void operator ++() { (*this)++; } + + bool first() { return m_nit == 0; } + + /* get/set the "noisyness" (verbosity) of the solvers */ + int noiseLevel() const { return m_noise; } + void setNoiseLevel(int n) { m_noise = n; } + void reduceNoiseLevel() { if (m_noise > 0) m_noise--; } + + double maxResidual() const { return m_resmax; } + void setMaxResidual(double r) { m_resmax = r; } + + double residual() const { return m_res; } + + /* change the user-definable callback, called after each iteration */ + void setCallback(void (*t)(const IterationController&)) + { + m_callback = t; + } + + size_t iteration() const { return m_nit; } + void setIteration(size_t i) { m_nit = i; } + + size_t maxIterarions() const { return m_maxiter; } + void setMaxIterations(size_t i) { m_maxiter = i; } + + double rhsNorm() const { return m_rhsn; } + void setRhsNorm(double r) { m_rhsn = r; } + + bool converged() const { return m_res <= m_rhsn * m_resmax; } + bool converged(double nr) + { + using std::abs; + m_res = abs(nr); + m_resminreach = (std::min)(m_resminreach, m_res); + return converged(); + } + template bool converged(const VectorType &v) + { return converged(v.squaredNorm()); } + + bool finished(double nr) + { + if (m_callback) m_callback(*this); + if (m_noise > 0 && !m_written) + { + converged(nr); + m_written = true; + } + return (m_nit >= m_maxiter || converged(nr)); + } + template + bool finished(const MatrixBase &v) + { return finished(double(v.squaredNorm())); } + +}; + +} // end namespace Eigen + +#endif // EIGEN_ITERATION_CONTROLLER_H diff --git a/external/unsupported/Eigen/src/IterativeSolvers/MINRES.h b/external/unsupported/Eigen/src/IterativeSolvers/MINRES.h new file mode 100644 index 0000000..5db454d --- /dev/null +++ b/external/unsupported/Eigen/src/IterativeSolvers/MINRES.h @@ -0,0 +1,267 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Giacomo Po +// Copyright (C) 2011-2014 Gael Guennebaud +// Copyright (C) 2018 David Hyde +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#ifndef EIGEN_MINRES_H_ +#define EIGEN_MINRES_H_ + + +namespace Eigen { + + namespace internal { + + /** \internal Low-level MINRES algorithm + * \param mat The matrix A + * \param rhs The right hand side vector b + * \param x On input and initial solution, on output the computed solution. + * \param precond A right preconditioner being able to efficiently solve for an + * approximation of Ax=b (regardless of b) + * \param iters On input the max number of iteration, on output the number of performed iterations. + * \param tol_error On input the tolerance error, on output an estimation of the relative error. + */ + template + EIGEN_DONT_INLINE + void minres(const MatrixType& mat, const Rhs& rhs, Dest& x, + const Preconditioner& precond, Index& iters, + typename Dest::RealScalar& tol_error) + { + using std::sqrt; + typedef typename Dest::RealScalar RealScalar; + typedef typename Dest::Scalar Scalar; + typedef Matrix VectorType; + + // Check for zero rhs + const RealScalar rhsNorm2(rhs.squaredNorm()); + if(rhsNorm2 == 0) + { + x.setZero(); + iters = 0; + tol_error = 0; + return; + } + + // initialize + const Index maxIters(iters); // initialize maxIters to iters + const Index N(mat.cols()); // the size of the matrix + const RealScalar threshold2(tol_error*tol_error*rhsNorm2); // convergence threshold (compared to residualNorm2) + + // Initialize preconditioned Lanczos + VectorType v_old(N); // will be initialized inside loop + VectorType v( VectorType::Zero(N) ); //initialize v + VectorType v_new(rhs-mat*x); //initialize v_new + RealScalar residualNorm2(v_new.squaredNorm()); + VectorType w(N); // will be initialized inside loop + VectorType w_new(precond.solve(v_new)); // initialize w_new +// RealScalar beta; // will be initialized inside loop + RealScalar beta_new2(v_new.dot(w_new)); + eigen_assert(beta_new2 >= 0.0 && "PRECONDITIONER IS NOT POSITIVE DEFINITE"); + RealScalar beta_new(sqrt(beta_new2)); + const RealScalar beta_one(beta_new); + // Initialize other variables + RealScalar c(1.0); // the cosine of the Givens rotation + RealScalar c_old(1.0); + RealScalar s(0.0); // the sine of the Givens rotation + RealScalar s_old(0.0); // the sine of the Givens rotation + VectorType p_oold(N); // will be initialized in loop + VectorType p_old(VectorType::Zero(N)); // initialize p_old=0 + VectorType p(p_old); // initialize p=0 + RealScalar eta(1.0); + + iters = 0; // reset iters + while ( iters < maxIters ) + { + // Preconditioned Lanczos + /* Note that there are 4 variants on the Lanczos algorithm. These are + * described in Paige, C. C. (1972). Computational variants of + * the Lanczos method for the eigenproblem. IMA Journal of Applied + * Mathematics, 10(3), 373-381. The current implementation corresponds + * to the case A(2,7) in the paper. It also corresponds to + * algorithm 6.14 in Y. Saad, Iterative Methods for Sparse Linear + * Systems, 2003 p.173. For the preconditioned version see + * A. Greenbaum, Iterative Methods for Solving Linear Systems, SIAM (1987). + */ + const RealScalar beta(beta_new); + v_old = v; // update: at first time step, this makes v_old = 0 so value of beta doesn't matter + v_new /= beta_new; // overwrite v_new for next iteration + w_new /= beta_new; // overwrite w_new for next iteration + v = v_new; // update + w = w_new; // update + v_new.noalias() = mat*w - beta*v_old; // compute v_new + const RealScalar alpha = v_new.dot(w); + v_new -= alpha*v; // overwrite v_new + w_new = precond.solve(v_new); // overwrite w_new + beta_new2 = v_new.dot(w_new); // compute beta_new + eigen_assert(beta_new2 >= 0.0 && "PRECONDITIONER IS NOT POSITIVE DEFINITE"); + beta_new = sqrt(beta_new2); // compute beta_new + + // Givens rotation + const RealScalar r2 =s*alpha+c*c_old*beta; // s, s_old, c and c_old are still from previous iteration + const RealScalar r3 =s_old*beta; // s, s_old, c and c_old are still from previous iteration + const RealScalar r1_hat=c*alpha-c_old*s*beta; + const RealScalar r1 =sqrt( std::pow(r1_hat,2) + std::pow(beta_new,2) ); + c_old = c; // store for next iteration + s_old = s; // store for next iteration + c=r1_hat/r1; // new cosine + s=beta_new/r1; // new sine + + // Update solution + p_oold = p_old; + p_old = p; + p.noalias()=(w-r2*p_old-r3*p_oold) /r1; // IS NOALIAS REQUIRED? + x += beta_one*c*eta*p; + + /* Update the squared residual. Note that this is the estimated residual. + The real residual |Ax-b|^2 may be slightly larger */ + residualNorm2 *= s*s; + + if ( residualNorm2 < threshold2) + { + break; + } + + eta=-s*eta; // update eta + iters++; // increment iteration number (for output purposes) + } + + /* Compute error. Note that this is the estimated error. The real + error |Ax-b|/|b| may be slightly larger */ + tol_error = std::sqrt(residualNorm2 / rhsNorm2); + } + + } + + template< typename _MatrixType, int _UpLo=Lower, + typename _Preconditioner = IdentityPreconditioner> + class MINRES; + + namespace internal { + + template< typename _MatrixType, int _UpLo, typename _Preconditioner> + struct traits > + { + typedef _MatrixType MatrixType; + typedef _Preconditioner Preconditioner; + }; + + } + + /** \ingroup IterativeLinearSolvers_Module + * \brief A minimal residual solver for sparse symmetric problems + * + * This class allows to solve for A.x = b sparse linear problems using the MINRES algorithm + * of Paige and Saunders (1975). The sparse matrix A must be symmetric (possibly indefinite). + * The vectors x and b can be either dense or sparse. + * + * \tparam _MatrixType the type of the sparse matrix A, can be a dense or a sparse matrix. + * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower, + * Upper, or Lower|Upper in which the full matrix entries will be considered. Default is Lower. + * \tparam _Preconditioner the type of the preconditioner. Default is DiagonalPreconditioner + * + * The maximal number of iterations and tolerance value can be controlled via the setMaxIterations() + * and setTolerance() methods. The defaults are the size of the problem for the maximal number of iterations + * and NumTraits::epsilon() for the tolerance. + * + * This class can be used as the direct solver classes. Here is a typical usage example: + * \code + * int n = 10000; + * VectorXd x(n), b(n); + * SparseMatrix A(n,n); + * // fill A and b + * MINRES > mr; + * mr.compute(A); + * x = mr.solve(b); + * std::cout << "#iterations: " << mr.iterations() << std::endl; + * std::cout << "estimated error: " << mr.error() << std::endl; + * // update b, and solve again + * x = mr.solve(b); + * \endcode + * + * By default the iterations start with x=0 as an initial guess of the solution. + * One can control the start using the solveWithGuess() method. + * + * MINRES can also be used in a matrix-free context, see the following \link MatrixfreeSolverExample example \endlink. + * + * \sa class ConjugateGradient, BiCGSTAB, SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner + */ + template< typename _MatrixType, int _UpLo, typename _Preconditioner> + class MINRES : public IterativeSolverBase > + { + + typedef IterativeSolverBase Base; + using Base::matrix; + using Base::m_error; + using Base::m_iterations; + using Base::m_info; + using Base::m_isInitialized; + public: + using Base::_solve_impl; + typedef _MatrixType MatrixType; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef _Preconditioner Preconditioner; + + enum {UpLo = _UpLo}; + + public: + + /** Default constructor. */ + MINRES() : Base() {} + + /** Initialize the solver with matrix \a A for further \c Ax=b solving. + * + * This constructor is a shortcut for the default constructor followed + * by a call to compute(). + * + * \warning this class stores a reference to the matrix A as well as some + * precomputed values that depend on it. Therefore, if \a A is changed + * this class becomes invalid. Call compute() to update it with the new + * matrix A, or modify a copy of A. + */ + template + explicit MINRES(const EigenBase& A) : Base(A.derived()) {} + + /** Destructor. */ + ~MINRES(){} + + /** \internal */ + template + void _solve_vector_with_guess_impl(const Rhs& b, Dest& x) const + { + typedef typename Base::MatrixWrapper MatrixWrapper; + typedef typename Base::ActualMatrixType ActualMatrixType; + enum { + TransposeInput = (!MatrixWrapper::MatrixFree) + && (UpLo==(Lower|Upper)) + && (!MatrixType::IsRowMajor) + && (!NumTraits::IsComplex) + }; + typedef typename internal::conditional, ActualMatrixType const&>::type RowMajorWrapper; + EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(MatrixWrapper::MatrixFree,UpLo==(Lower|Upper)),MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY); + typedef typename internal::conditional::Type + >::type SelfAdjointWrapper; + + m_iterations = Base::maxIterations(); + m_error = Base::m_tolerance; + RowMajorWrapper row_mat(matrix()); + internal::minres(SelfAdjointWrapper(row_mat), b, x, + Base::m_preconditioner, m_iterations, m_error); + m_info = m_error <= Base::m_tolerance ? Success : NoConvergence; + } + + protected: + + }; + +} // end namespace Eigen + +#endif // EIGEN_MINRES_H diff --git a/external/unsupported/Eigen/src/IterativeSolvers/Scaling.h b/external/unsupported/Eigen/src/IterativeSolvers/Scaling.h new file mode 100644 index 0000000..9b3eb53 --- /dev/null +++ b/external/unsupported/Eigen/src/IterativeSolvers/Scaling.h @@ -0,0 +1,193 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Desire NUENTSA WAKAM A; + * // fill A and b; + * IterScaling > scal; + * // Compute the left and right scaling vectors. The matrix is equilibrated at output + * scal.computeRef(A); + * // Scale the right hand side + * b = scal.LeftScaling().cwiseProduct(b); + * // Now, solve the equilibrated linear system with any available solver + * + * // Scale back the computed solution + * x = scal.RightScaling().cwiseProduct(x); + * \endcode + * + * \tparam _MatrixType the type of the matrix. It should be a real square sparsematrix + * + * References : D. Ruiz and B. Ucar, A Symmetry Preserving Algorithm for Matrix Scaling, INRIA Research report RR-7552 + * + * \sa \ref IncompleteLUT + */ +template +class IterScaling +{ + public: + typedef _MatrixType MatrixType; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::Index Index; + + public: + IterScaling() { init(); } + + IterScaling(const MatrixType& matrix) + { + init(); + compute(matrix); + } + + ~IterScaling() { } + + /** + * Compute the left and right diagonal matrices to scale the input matrix @p mat + * + * FIXME This algorithm will be modified such that the diagonal elements are permuted on the diagonal. + * + * \sa LeftScaling() RightScaling() + */ + void compute (const MatrixType& mat) + { + using std::abs; + int m = mat.rows(); + int n = mat.cols(); + eigen_assert((m>0 && m == n) && "Please give a non - empty matrix"); + m_left.resize(m); + m_right.resize(n); + m_left.setOnes(); + m_right.setOnes(); + m_matrix = mat; + VectorXd Dr, Dc, DrRes, DcRes; // Temporary Left and right scaling vectors + Dr.resize(m); Dc.resize(n); + DrRes.resize(m); DcRes.resize(n); + double EpsRow = 1.0, EpsCol = 1.0; + int its = 0; + do + { // Iterate until the infinite norm of each row and column is approximately 1 + // Get the maximum value in each row and column + Dr.setZero(); Dc.setZero(); + for (int k=0; km_tol || EpsCol > m_tol) && (its < m_maxits) ); + m_isInitialized = true; + } + /** Compute the left and right vectors to scale the vectors + * the input matrix is scaled with the computed vectors at output + * + * \sa compute() + */ + void computeRef (MatrixType& mat) + { + compute (mat); + mat = m_matrix; + } + /** Get the vector to scale the rows of the matrix + */ + VectorXd& LeftScaling() + { + return m_left; + } + + /** Get the vector to scale the columns of the matrix + */ + VectorXd& RightScaling() + { + return m_right; + } + + /** Set the tolerance for the convergence of the iterative scaling algorithm + */ + void setTolerance(double tol) + { + m_tol = tol; + } + + protected: + + void init() + { + m_tol = 1e-10; + m_maxits = 5; + m_isInitialized = false; + } + + MatrixType m_matrix; + mutable ComputationInfo m_info; + bool m_isInitialized; + VectorXd m_left; // Left scaling vector + VectorXd m_right; // m_right scaling vector + double m_tol; + int m_maxits; // Maximum number of iterations allowed +}; +} +#endif diff --git a/external/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h b/external/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h new file mode 100644 index 0000000..6a9b0be --- /dev/null +++ b/external/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h @@ -0,0 +1,305 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011 Kolja Brix +// Copyright (C) 2011 Andreas Platen +// Copyright (C) 2012 Chen-Pang He +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef KRONECKER_TENSOR_PRODUCT_H +#define KRONECKER_TENSOR_PRODUCT_H + +namespace Eigen { + +/*! + * \ingroup KroneckerProduct_Module + * + * \brief The base class of dense and sparse Kronecker product. + * + * \tparam Derived is the derived type. + */ +template +class KroneckerProductBase : public ReturnByValue +{ + private: + typedef typename internal::traits Traits; + typedef typename Traits::Scalar Scalar; + + protected: + typedef typename Traits::Lhs Lhs; + typedef typename Traits::Rhs Rhs; + + public: + /*! \brief Constructor. */ + KroneckerProductBase(const Lhs& A, const Rhs& B) + : m_A(A), m_B(B) + {} + + inline Index rows() const { return m_A.rows() * m_B.rows(); } + inline Index cols() const { return m_A.cols() * m_B.cols(); } + + /*! + * This overrides ReturnByValue::coeff because this function is + * efficient enough. + */ + Scalar coeff(Index row, Index col) const + { + return m_A.coeff(row / m_B.rows(), col / m_B.cols()) * + m_B.coeff(row % m_B.rows(), col % m_B.cols()); + } + + /*! + * This overrides ReturnByValue::coeff because this function is + * efficient enough. + */ + Scalar coeff(Index i) const + { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived); + return m_A.coeff(i / m_A.size()) * m_B.coeff(i % m_A.size()); + } + + protected: + typename Lhs::Nested m_A; + typename Rhs::Nested m_B; +}; + +/*! + * \ingroup KroneckerProduct_Module + * + * \brief Kronecker tensor product helper class for dense matrices + * + * This class is the return value of kroneckerProduct(MatrixBase, + * MatrixBase). Use the function rather than construct this class + * directly to avoid specifying template prarameters. + * + * \tparam Lhs Type of the left-hand side, a matrix expression. + * \tparam Rhs Type of the rignt-hand side, a matrix expression. + */ +template +class KroneckerProduct : public KroneckerProductBase > +{ + private: + typedef KroneckerProductBase Base; + using Base::m_A; + using Base::m_B; + + public: + /*! \brief Constructor. */ + KroneckerProduct(const Lhs& A, const Rhs& B) + : Base(A, B) + {} + + /*! \brief Evaluate the Kronecker tensor product. */ + template void evalTo(Dest& dst) const; +}; + +/*! + * \ingroup KroneckerProduct_Module + * + * \brief Kronecker tensor product helper class for sparse matrices + * + * If at least one of the operands is a sparse matrix expression, + * then this class is returned and evaluates into a sparse matrix. + * + * This class is the return value of kroneckerProduct(EigenBase, + * EigenBase). Use the function rather than construct this class + * directly to avoid specifying template prarameters. + * + * \tparam Lhs Type of the left-hand side, a matrix expression. + * \tparam Rhs Type of the rignt-hand side, a matrix expression. + */ +template +class KroneckerProductSparse : public KroneckerProductBase > +{ + private: + typedef KroneckerProductBase Base; + using Base::m_A; + using Base::m_B; + + public: + /*! \brief Constructor. */ + KroneckerProductSparse(const Lhs& A, const Rhs& B) + : Base(A, B) + {} + + /*! \brief Evaluate the Kronecker tensor product. */ + template void evalTo(Dest& dst) const; +}; + +template +template +void KroneckerProduct::evalTo(Dest& dst) const +{ + const int BlockRows = Rhs::RowsAtCompileTime, + BlockCols = Rhs::ColsAtCompileTime; + const Index Br = m_B.rows(), + Bc = m_B.cols(); + for (Index i=0; i < m_A.rows(); ++i) + for (Index j=0; j < m_A.cols(); ++j) + Block(dst,i*Br,j*Bc,Br,Bc) = m_A.coeff(i,j) * m_B; +} + +template +template +void KroneckerProductSparse::evalTo(Dest& dst) const +{ + Index Br = m_B.rows(), Bc = m_B.cols(); + dst.resize(this->rows(), this->cols()); + dst.resizeNonZeros(0); + + // 1 - evaluate the operands if needed: + typedef typename internal::nested_eval::type Lhs1; + typedef typename internal::remove_all::type Lhs1Cleaned; + const Lhs1 lhs1(m_A); + typedef typename internal::nested_eval::type Rhs1; + typedef typename internal::remove_all::type Rhs1Cleaned; + const Rhs1 rhs1(m_B); + + // 2 - construct respective iterators + typedef Eigen::InnerIterator LhsInnerIterator; + typedef Eigen::InnerIterator RhsInnerIterator; + + // compute number of non-zeros per innervectors of dst + { + // TODO VectorXi is not necessarily big enough! + VectorXi nnzA = VectorXi::Zero(Dest::IsRowMajor ? m_A.rows() : m_A.cols()); + for (Index kA=0; kA < m_A.outerSize(); ++kA) + for (LhsInnerIterator itA(lhs1,kA); itA; ++itA) + nnzA(Dest::IsRowMajor ? itA.row() : itA.col())++; + + VectorXi nnzB = VectorXi::Zero(Dest::IsRowMajor ? m_B.rows() : m_B.cols()); + for (Index kB=0; kB < m_B.outerSize(); ++kB) + for (RhsInnerIterator itB(rhs1,kB); itB; ++itB) + nnzB(Dest::IsRowMajor ? itB.row() : itB.col())++; + + Matrix nnzAB = nnzB * nnzA.transpose(); + dst.reserve(VectorXi::Map(nnzAB.data(), nnzAB.size())); + } + + for (Index kA=0; kA < m_A.outerSize(); ++kA) + { + for (Index kB=0; kB < m_B.outerSize(); ++kB) + { + for (LhsInnerIterator itA(lhs1,kA); itA; ++itA) + { + for (RhsInnerIterator itB(rhs1,kB); itB; ++itB) + { + Index i = itA.row() * Br + itB.row(), + j = itA.col() * Bc + itB.col(); + dst.insert(i,j) = itA.value() * itB.value(); + } + } + } + } +} + +namespace internal { + +template +struct traits > +{ + typedef typename remove_all<_Lhs>::type Lhs; + typedef typename remove_all<_Rhs>::type Rhs; + typedef typename ScalarBinaryOpTraits::ReturnType Scalar; + typedef typename promote_index_type::type StorageIndex; + + enum { + Rows = size_at_compile_time::RowsAtCompileTime, traits::RowsAtCompileTime>::ret, + Cols = size_at_compile_time::ColsAtCompileTime, traits::ColsAtCompileTime>::ret, + MaxRows = size_at_compile_time::MaxRowsAtCompileTime, traits::MaxRowsAtCompileTime>::ret, + MaxCols = size_at_compile_time::MaxColsAtCompileTime, traits::MaxColsAtCompileTime>::ret + }; + + typedef Matrix ReturnType; +}; + +template +struct traits > +{ + typedef MatrixXpr XprKind; + typedef typename remove_all<_Lhs>::type Lhs; + typedef typename remove_all<_Rhs>::type Rhs; + typedef typename ScalarBinaryOpTraits::ReturnType Scalar; + typedef typename cwise_promote_storage_type::StorageKind, typename traits::StorageKind, scalar_product_op >::ret StorageKind; + typedef typename promote_index_type::type StorageIndex; + + enum { + LhsFlags = Lhs::Flags, + RhsFlags = Rhs::Flags, + + RowsAtCompileTime = size_at_compile_time::RowsAtCompileTime, traits::RowsAtCompileTime>::ret, + ColsAtCompileTime = size_at_compile_time::ColsAtCompileTime, traits::ColsAtCompileTime>::ret, + MaxRowsAtCompileTime = size_at_compile_time::MaxRowsAtCompileTime, traits::MaxRowsAtCompileTime>::ret, + MaxColsAtCompileTime = size_at_compile_time::MaxColsAtCompileTime, traits::MaxColsAtCompileTime>::ret, + + EvalToRowMajor = (int(LhsFlags) & int(RhsFlags) & RowMajorBit), + RemovedBits = ~(EvalToRowMajor ? 0 : RowMajorBit), + + Flags = ((int(LhsFlags) | int(RhsFlags)) & HereditaryBits & RemovedBits) + | EvalBeforeNestingBit, + CoeffReadCost = HugeCost + }; + + typedef SparseMatrix ReturnType; +}; + +} // end namespace internal + +/*! + * \ingroup KroneckerProduct_Module + * + * Computes Kronecker tensor product of two dense matrices + * + * \warning If you want to replace a matrix by its Kronecker product + * with some matrix, do \b NOT do this: + * \code + * A = kroneckerProduct(A,B); // bug!!! caused by aliasing effect + * \endcode + * instead, use eval() to work around this: + * \code + * A = kroneckerProduct(A,B).eval(); + * \endcode + * + * \param a Dense matrix a + * \param b Dense matrix b + * \return Kronecker tensor product of a and b + */ +template +KroneckerProduct kroneckerProduct(const MatrixBase& a, const MatrixBase& b) +{ + return KroneckerProduct(a.derived(), b.derived()); +} + +/*! + * \ingroup KroneckerProduct_Module + * + * Computes Kronecker tensor product of two matrices, at least one of + * which is sparse + * + * \warning If you want to replace a matrix by its Kronecker product + * with some matrix, do \b NOT do this: + * \code + * A = kroneckerProduct(A,B); // bug!!! caused by aliasing effect + * \endcode + * instead, use eval() to work around this: + * \code + * A = kroneckerProduct(A,B).eval(); + * \endcode + * + * \param a Dense/sparse matrix a + * \param b Dense/sparse matrix b + * \return Kronecker tensor product of a and b, stored in a sparse + * matrix + */ +template +KroneckerProductSparse kroneckerProduct(const EigenBase& a, const EigenBase& b) +{ + return KroneckerProductSparse(a.derived(), b.derived()); +} + +} // end namespace Eigen + +#endif // KRONECKER_TENSOR_PRODUCT_H diff --git a/external/unsupported/Eigen/src/LevenbergMarquardt/CopyrightMINPACK.txt b/external/unsupported/Eigen/src/LevenbergMarquardt/CopyrightMINPACK.txt new file mode 100644 index 0000000..ae7984d --- /dev/null +++ b/external/unsupported/Eigen/src/LevenbergMarquardt/CopyrightMINPACK.txt @@ -0,0 +1,52 @@ +Minpack Copyright Notice (1999) University of Chicago. All rights reserved + +Redistribution and use in source and binary forms, with or +without modification, are permitted provided that the +following conditions are met: + +1. Redistributions of source code must retain the above +copyright notice, this list of conditions and the following +disclaimer. + +2. Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following +disclaimer in the documentation and/or other materials +provided with the distribution. + +3. The end-user documentation included with the +redistribution, if any, must include the following +acknowledgment: + + "This product includes software developed by the + University of Chicago, as Operator of Argonne National + Laboratory. + +Alternately, this acknowledgment may appear in the software +itself, if and wherever such third-party acknowledgments +normally appear. + +4. WARRANTY DISCLAIMER. THE SOFTWARE IS SUPPLIED "AS IS" +WITHOUT WARRANTY OF ANY KIND. THE COPYRIGHT HOLDER, THE +UNITED STATES, THE UNITED STATES DEPARTMENT OF ENERGY, AND +THEIR EMPLOYEES: (1) DISCLAIM ANY WARRANTIES, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO ANY IMPLIED WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE +OR NON-INFRINGEMENT, (2) DO NOT ASSUME ANY LEGAL LIABILITY +OR RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR +USEFULNESS OF THE SOFTWARE, (3) DO NOT REPRESENT THAT USE OF +THE SOFTWARE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS, (4) +DO NOT WARRANT THAT THE SOFTWARE WILL FUNCTION +UNINTERRUPTED, THAT IT IS ERROR-FREE OR THAT ANY ERRORS WILL +BE CORRECTED. + +5. LIMITATION OF LIABILITY. IN NO EVENT WILL THE COPYRIGHT +HOLDER, THE UNITED STATES, THE UNITED STATES DEPARTMENT OF +ENERGY, OR THEIR EMPLOYEES: BE LIABLE FOR ANY INDIRECT, +INCIDENTAL, CONSEQUENTIAL, SPECIAL OR PUNITIVE DAMAGES OF +ANY KIND OR NATURE, INCLUDING BUT NOT LIMITED TO LOSS OF +PROFITS OR LOSS OF DATA, FOR ANY REASON WHATSOEVER, WHETHER +SUCH LIABILITY IS ASSERTED ON THE BASIS OF CONTRACT, TORT +(INCLUDING NEGLIGENCE OR STRICT LIABILITY), OR OTHERWISE, +EVEN IF ANY OF SAID PARTIES HAS BEEN WARNED OF THE +POSSIBILITY OF SUCH LOSS OR DAMAGES. + diff --git a/external/unsupported/Eigen/src/LevenbergMarquardt/LMcovar.h b/external/unsupported/Eigen/src/LevenbergMarquardt/LMcovar.h new file mode 100644 index 0000000..b75bea2 --- /dev/null +++ b/external/unsupported/Eigen/src/LevenbergMarquardt/LMcovar.h @@ -0,0 +1,84 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This code initially comes from MINPACK whose original authors are: +// Copyright Jorge More - Argonne National Laboratory +// Copyright Burt Garbow - Argonne National Laboratory +// Copyright Ken Hillstrom - Argonne National Laboratory +// +// This Source Code Form is subject to the terms of the Minpack license +// (a BSD-like license) described in the campaigned CopyrightMINPACK.txt file. + +#ifndef EIGEN_LMCOVAR_H +#define EIGEN_LMCOVAR_H + +namespace Eigen { + +namespace internal { + +template +void covar( + Matrix< Scalar, Dynamic, Dynamic > &r, + const VectorXi& ipvt, + Scalar tol = std::sqrt(NumTraits::epsilon()) ) +{ + using std::abs; + /* Local variables */ + Index i, j, k, l, ii, jj; + bool sing; + Scalar temp; + + /* Function Body */ + const Index n = r.cols(); + const Scalar tolr = tol * abs(r(0,0)); + Matrix< Scalar, Dynamic, 1 > wa(n); + eigen_assert(ipvt.size()==n); + + /* form the inverse of r in the full upper triangle of r. */ + l = -1; + for (k = 0; k < n; ++k) + if (abs(r(k,k)) > tolr) { + r(k,k) = 1. / r(k,k); + for (j = 0; j <= k-1; ++j) { + temp = r(k,k) * r(j,k); + r(j,k) = 0.; + r.col(k).head(j+1) -= r.col(j).head(j+1) * temp; + } + l = k; + } + + /* form the full upper triangle of the inverse of (r transpose)*r */ + /* in the full upper triangle of r. */ + for (k = 0; k <= l; ++k) { + for (j = 0; j <= k-1; ++j) + r.col(j).head(j+1) += r.col(k).head(j+1) * r(j,k); + r.col(k).head(k+1) *= r(k,k); + } + + /* form the full lower triangle of the covariance matrix */ + /* in the strict lower triangle of r and in wa. */ + for (j = 0; j < n; ++j) { + jj = ipvt[j]; + sing = j > l; + for (i = 0; i <= j; ++i) { + if (sing) + r(i,j) = 0.; + ii = ipvt[i]; + if (ii > jj) + r(ii,jj) = r(i,j); + if (ii < jj) + r(jj,ii) = r(i,j); + } + wa[jj] = r(j,j); + } + + /* symmetrize the covariance matrix in r. */ + r.topLeftCorner(n,n).template triangularView() = r.topLeftCorner(n,n).transpose(); + r.diagonal() = wa; +} + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_LMCOVAR_H diff --git a/external/unsupported/Eigen/src/LevenbergMarquardt/LMonestep.h b/external/unsupported/Eigen/src/LevenbergMarquardt/LMonestep.h new file mode 100644 index 0000000..25b32ec --- /dev/null +++ b/external/unsupported/Eigen/src/LevenbergMarquardt/LMonestep.h @@ -0,0 +1,202 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Thomas Capricelli +// +// This code initially comes from MINPACK whose original authors are: +// Copyright Jorge More - Argonne National Laboratory +// Copyright Burt Garbow - Argonne National Laboratory +// Copyright Ken Hillstrom - Argonne National Laboratory +// +// This Source Code Form is subject to the terms of the Minpack license +// (a BSD-like license) described in the campaigned CopyrightMINPACK.txt file. + +#ifndef EIGEN_LMONESTEP_H +#define EIGEN_LMONESTEP_H + +namespace Eigen { + +template +LevenbergMarquardtSpace::Status +LevenbergMarquardt::minimizeOneStep(FVectorType &x) +{ + using std::abs; + using std::sqrt; + RealScalar temp, temp1,temp2; + RealScalar ratio; + RealScalar pnorm, xnorm, fnorm1, actred, dirder, prered; + eigen_assert(x.size()==n); // check the caller is not cheating us + + temp = 0.0; xnorm = 0.0; + /* calculate the jacobian matrix. */ + Index df_ret = m_functor.df(x, m_fjac); + if (df_ret<0) + return LevenbergMarquardtSpace::UserAsked; + if (df_ret>0) + // numerical diff, we evaluated the function df_ret times + m_nfev += df_ret; + else m_njev++; + + /* compute the qr factorization of the jacobian. */ + for (int j = 0; j < x.size(); ++j) + m_wa2(j) = m_fjac.col(j).blueNorm(); + QRSolver qrfac(m_fjac); + if(qrfac.info() != Success) { + m_info = NumericalIssue; + return LevenbergMarquardtSpace::ImproperInputParameters; + } + // Make a copy of the first factor with the associated permutation + m_rfactor = qrfac.matrixR(); + m_permutation = (qrfac.colsPermutation()); + + /* on the first iteration and if external scaling is not used, scale according */ + /* to the norms of the columns of the initial jacobian. */ + if (m_iter == 1) { + if (!m_useExternalScaling) + for (Index j = 0; j < n; ++j) + m_diag[j] = (m_wa2[j]==0.)? 1. : m_wa2[j]; + + /* on the first iteration, calculate the norm of the scaled x */ + /* and initialize the step bound m_delta. */ + xnorm = m_diag.cwiseProduct(x).stableNorm(); + m_delta = m_factor * xnorm; + if (m_delta == 0.) + m_delta = m_factor; + } + + /* form (q transpose)*m_fvec and store the first n components in */ + /* m_qtf. */ + m_wa4 = m_fvec; + m_wa4 = qrfac.matrixQ().adjoint() * m_fvec; + m_qtf = m_wa4.head(n); + + /* compute the norm of the scaled gradient. */ + m_gnorm = 0.; + if (m_fnorm != 0.) + for (Index j = 0; j < n; ++j) + if (m_wa2[m_permutation.indices()[j]] != 0.) + m_gnorm = (std::max)(m_gnorm, abs( m_rfactor.col(j).head(j+1).dot(m_qtf.head(j+1)/m_fnorm) / m_wa2[m_permutation.indices()[j]])); + + /* test for convergence of the gradient norm. */ + if (m_gnorm <= m_gtol) { + m_info = Success; + return LevenbergMarquardtSpace::CosinusTooSmall; + } + + /* rescale if necessary. */ + if (!m_useExternalScaling) + m_diag = m_diag.cwiseMax(m_wa2); + + do { + /* determine the levenberg-marquardt parameter. */ + internal::lmpar2(qrfac, m_diag, m_qtf, m_delta, m_par, m_wa1); + + /* store the direction p and x + p. calculate the norm of p. */ + m_wa1 = -m_wa1; + m_wa2 = x + m_wa1; + pnorm = m_diag.cwiseProduct(m_wa1).stableNorm(); + + /* on the first iteration, adjust the initial step bound. */ + if (m_iter == 1) + m_delta = (std::min)(m_delta,pnorm); + + /* evaluate the function at x + p and calculate its norm. */ + if ( m_functor(m_wa2, m_wa4) < 0) + return LevenbergMarquardtSpace::UserAsked; + ++m_nfev; + fnorm1 = m_wa4.stableNorm(); + + /* compute the scaled actual reduction. */ + actred = -1.; + if (Scalar(.1) * fnorm1 < m_fnorm) + actred = 1. - numext::abs2(fnorm1 / m_fnorm); + + /* compute the scaled predicted reduction and */ + /* the scaled directional derivative. */ + m_wa3 = m_rfactor.template triangularView() * (m_permutation.inverse() *m_wa1); + temp1 = numext::abs2(m_wa3.stableNorm() / m_fnorm); + temp2 = numext::abs2(sqrt(m_par) * pnorm / m_fnorm); + prered = temp1 + temp2 / Scalar(.5); + dirder = -(temp1 + temp2); + + /* compute the ratio of the actual to the predicted */ + /* reduction. */ + ratio = 0.; + if (prered != 0.) + ratio = actred / prered; + + /* update the step bound. */ + if (ratio <= Scalar(.25)) { + if (actred >= 0.) + temp = RealScalar(.5); + if (actred < 0.) + temp = RealScalar(.5) * dirder / (dirder + RealScalar(.5) * actred); + if (RealScalar(.1) * fnorm1 >= m_fnorm || temp < RealScalar(.1)) + temp = Scalar(.1); + /* Computing MIN */ + m_delta = temp * (std::min)(m_delta, pnorm / RealScalar(.1)); + m_par /= temp; + } else if (!(m_par != 0. && ratio < RealScalar(.75))) { + m_delta = pnorm / RealScalar(.5); + m_par = RealScalar(.5) * m_par; + } + + /* test for successful iteration. */ + if (ratio >= RealScalar(1e-4)) { + /* successful iteration. update x, m_fvec, and their norms. */ + x = m_wa2; + m_wa2 = m_diag.cwiseProduct(x); + m_fvec = m_wa4; + xnorm = m_wa2.stableNorm(); + m_fnorm = fnorm1; + ++m_iter; + } + + /* tests for convergence. */ + if (abs(actred) <= m_ftol && prered <= m_ftol && Scalar(.5) * ratio <= 1. && m_delta <= m_xtol * xnorm) + { + m_info = Success; + return LevenbergMarquardtSpace::RelativeErrorAndReductionTooSmall; + } + if (abs(actred) <= m_ftol && prered <= m_ftol && Scalar(.5) * ratio <= 1.) + { + m_info = Success; + return LevenbergMarquardtSpace::RelativeReductionTooSmall; + } + if (m_delta <= m_xtol * xnorm) + { + m_info = Success; + return LevenbergMarquardtSpace::RelativeErrorTooSmall; + } + + /* tests for termination and stringent tolerances. */ + if (m_nfev >= m_maxfev) + { + m_info = NoConvergence; + return LevenbergMarquardtSpace::TooManyFunctionEvaluation; + } + if (abs(actred) <= NumTraits::epsilon() && prered <= NumTraits::epsilon() && Scalar(.5) * ratio <= 1.) + { + m_info = Success; + return LevenbergMarquardtSpace::FtolTooSmall; + } + if (m_delta <= NumTraits::epsilon() * xnorm) + { + m_info = Success; + return LevenbergMarquardtSpace::XtolTooSmall; + } + if (m_gnorm <= NumTraits::epsilon()) + { + m_info = Success; + return LevenbergMarquardtSpace::GtolTooSmall; + } + + } while (ratio < Scalar(1e-4)); + + return LevenbergMarquardtSpace::Running; +} + + +} // end namespace Eigen + +#endif // EIGEN_LMONESTEP_H diff --git a/external/unsupported/Eigen/src/LevenbergMarquardt/LMpar.h b/external/unsupported/Eigen/src/LevenbergMarquardt/LMpar.h new file mode 100644 index 0000000..9a48365 --- /dev/null +++ b/external/unsupported/Eigen/src/LevenbergMarquardt/LMpar.h @@ -0,0 +1,160 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This code initially comes from MINPACK whose original authors are: +// Copyright Jorge More - Argonne National Laboratory +// Copyright Burt Garbow - Argonne National Laboratory +// Copyright Ken Hillstrom - Argonne National Laboratory +// +// This Source Code Form is subject to the terms of the Minpack license +// (a BSD-like license) described in the campaigned CopyrightMINPACK.txt file. + +#ifndef EIGEN_LMPAR_H +#define EIGEN_LMPAR_H + +namespace Eigen { + +namespace internal { + + template + void lmpar2( + const QRSolver &qr, + const VectorType &diag, + const VectorType &qtb, + typename VectorType::Scalar m_delta, + typename VectorType::Scalar &par, + VectorType &x) + + { + using std::sqrt; + using std::abs; + typedef typename QRSolver::MatrixType MatrixType; + typedef typename QRSolver::Scalar Scalar; +// typedef typename QRSolver::StorageIndex StorageIndex; + + /* Local variables */ + Index j; + Scalar fp; + Scalar parc, parl; + Index iter; + Scalar temp, paru; + Scalar gnorm; + Scalar dxnorm; + + // Make a copy of the triangular factor. + // This copy is modified during call the qrsolv + MatrixType s; + s = qr.matrixR(); + + /* Function Body */ + const Scalar dwarf = (std::numeric_limits::min)(); + const Index n = qr.matrixR().cols(); + eigen_assert(n==diag.size()); + eigen_assert(n==qtb.size()); + + VectorType wa1, wa2; + + /* compute and store in x the gauss-newton direction. if the */ + /* jacobian is rank-deficient, obtain a least squares solution. */ + + // const Index rank = qr.nonzeroPivots(); // exactly double(0.) + const Index rank = qr.rank(); // use a threshold + wa1 = qtb; + wa1.tail(n-rank).setZero(); + //FIXME There is no solve in place for sparse triangularView + wa1.head(rank) = s.topLeftCorner(rank,rank).template triangularView().solve(qtb.head(rank)); + + x = qr.colsPermutation()*wa1; + + /* initialize the iteration counter. */ + /* evaluate the function at the origin, and test */ + /* for acceptance of the gauss-newton direction. */ + iter = 0; + wa2 = diag.cwiseProduct(x); + dxnorm = wa2.blueNorm(); + fp = dxnorm - m_delta; + if (fp <= Scalar(0.1) * m_delta) { + par = 0; + return; + } + + /* if the jacobian is not rank deficient, the newton */ + /* step provides a lower bound, parl, for the zero of */ + /* the function. otherwise set this bound to zero. */ + parl = 0.; + if (rank==n) { + wa1 = qr.colsPermutation().inverse() * diag.cwiseProduct(wa2)/dxnorm; + s.topLeftCorner(n,n).transpose().template triangularView().solveInPlace(wa1); + temp = wa1.blueNorm(); + parl = fp / m_delta / temp / temp; + } + + /* calculate an upper bound, paru, for the zero of the function. */ + for (j = 0; j < n; ++j) + wa1[j] = s.col(j).head(j+1).dot(qtb.head(j+1)) / diag[qr.colsPermutation().indices()(j)]; + + gnorm = wa1.stableNorm(); + paru = gnorm / m_delta; + if (paru == 0.) + paru = dwarf / (std::min)(m_delta,Scalar(0.1)); + + /* if the input par lies outside of the interval (parl,paru), */ + /* set par to the closer endpoint. */ + par = (std::max)(par,parl); + par = (std::min)(par,paru); + if (par == 0.) + par = gnorm / dxnorm; + + /* beginning of an iteration. */ + while (true) { + ++iter; + + /* evaluate the function at the current value of par. */ + if (par == 0.) + par = (std::max)(dwarf,Scalar(.001) * paru); /* Computing MAX */ + wa1 = sqrt(par)* diag; + + VectorType sdiag(n); + lmqrsolv(s, qr.colsPermutation(), wa1, qtb, x, sdiag); + + wa2 = diag.cwiseProduct(x); + dxnorm = wa2.blueNorm(); + temp = fp; + fp = dxnorm - m_delta; + + /* if the function is small enough, accept the current value */ + /* of par. also test for the exceptional cases where parl */ + /* is zero or the number of iterations has reached 10. */ + if (abs(fp) <= Scalar(0.1) * m_delta || (parl == 0. && fp <= temp && temp < 0.) || iter == 10) + break; + + /* compute the newton correction. */ + wa1 = qr.colsPermutation().inverse() * diag.cwiseProduct(wa2/dxnorm); + // we could almost use this here, but the diagonal is outside qr, in sdiag[] + for (j = 0; j < n; ++j) { + wa1[j] /= sdiag[j]; + temp = wa1[j]; + for (Index i = j+1; i < n; ++i) + wa1[i] -= s.coeff(i,j) * temp; + } + temp = wa1.blueNorm(); + parc = fp / m_delta / temp / temp; + + /* depending on the sign of the function, update parl or paru. */ + if (fp > 0.) + parl = (std::max)(parl,par); + if (fp < 0.) + paru = (std::min)(paru,par); + + /* compute an improved estimate for par. */ + par = (std::max)(parl,par+parc); + } + if (iter == 0) + par = 0.; + return; + } +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_LMPAR_H diff --git a/external/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h b/external/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h new file mode 100644 index 0000000..1234858 --- /dev/null +++ b/external/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h @@ -0,0 +1,188 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Thomas Capricelli +// Copyright (C) 2012 Desire Nuentsa +// +// This code initially comes from MINPACK whose original authors are: +// Copyright Jorge More - Argonne National Laboratory +// Copyright Burt Garbow - Argonne National Laboratory +// Copyright Ken Hillstrom - Argonne National Laboratory +// +// This Source Code Form is subject to the terms of the Minpack license +// (a BSD-like license) described in the campaigned CopyrightMINPACK.txt file. + +#ifndef EIGEN_LMQRSOLV_H +#define EIGEN_LMQRSOLV_H + +namespace Eigen { + +namespace internal { + +template +void lmqrsolv( + Matrix &s, + const PermutationMatrix &iPerm, + const Matrix &diag, + const Matrix &qtb, + Matrix &x, + Matrix &sdiag) +{ + /* Local variables */ + Index i, j, k; + Scalar temp; + Index n = s.cols(); + Matrix wa(n); + JacobiRotation givens; + + /* Function Body */ + // the following will only change the lower triangular part of s, including + // the diagonal, though the diagonal is restored afterward + + /* copy r and (q transpose)*b to preserve input and initialize s. */ + /* in particular, save the diagonal elements of r in x. */ + x = s.diagonal(); + wa = qtb; + + + s.topLeftCorner(n,n).template triangularView() = s.topLeftCorner(n,n).transpose(); + /* eliminate the diagonal matrix d using a givens rotation. */ + for (j = 0; j < n; ++j) { + + /* prepare the row of d to be eliminated, locating the */ + /* diagonal element using p from the qr factorization. */ + const PermIndex l = iPerm.indices()(j); + if (diag[l] == 0.) + break; + sdiag.tail(n-j).setZero(); + sdiag[j] = diag[l]; + + /* the transformations to eliminate the row of d */ + /* modify only a single element of (q transpose)*b */ + /* beyond the first n, which is initially zero. */ + Scalar qtbpj = 0.; + for (k = j; k < n; ++k) { + /* determine a givens rotation which eliminates the */ + /* appropriate element in the current row of d. */ + givens.makeGivens(-s(k,k), sdiag[k]); + + /* compute the modified diagonal element of r and */ + /* the modified element of ((q transpose)*b,0). */ + s(k,k) = givens.c() * s(k,k) + givens.s() * sdiag[k]; + temp = givens.c() * wa[k] + givens.s() * qtbpj; + qtbpj = -givens.s() * wa[k] + givens.c() * qtbpj; + wa[k] = temp; + + /* accumulate the transformation in the row of s. */ + for (i = k+1; i().solveInPlace(wa.head(nsing)); + + // restore + sdiag = s.diagonal(); + s.diagonal() = x; + + /* permute the components of z back to components of x. */ + x = iPerm * wa; +} + +template +void lmqrsolv( + SparseMatrix &s, + const PermutationMatrix &iPerm, + const Matrix &diag, + const Matrix &qtb, + Matrix &x, + Matrix &sdiag) +{ + /* Local variables */ + typedef SparseMatrix FactorType; + Index i, j, k, l; + Scalar temp; + Index n = s.cols(); + Matrix wa(n); + JacobiRotation givens; + + /* Function Body */ + // the following will only change the lower triangular part of s, including + // the diagonal, though the diagonal is restored afterward + + /* copy r and (q transpose)*b to preserve input and initialize R. */ + wa = qtb; + FactorType R(s); + // Eliminate the diagonal matrix d using a givens rotation + for (j = 0; j < n; ++j) + { + // Prepare the row of d to be eliminated, locating the + // diagonal element using p from the qr factorization + l = iPerm.indices()(j); + if (diag(l) == Scalar(0)) + break; + sdiag.tail(n-j).setZero(); + sdiag[j] = diag[l]; + // the transformations to eliminate the row of d + // modify only a single element of (q transpose)*b + // beyond the first n, which is initially zero. + + Scalar qtbpj = 0; + // Browse the nonzero elements of row j of the upper triangular s + for (k = j; k < n; ++k) + { + typename FactorType::InnerIterator itk(R,k); + for (; itk; ++itk){ + if (itk.index() < k) continue; + else break; + } + //At this point, we have the diagonal element R(k,k) + // Determine a givens rotation which eliminates + // the appropriate element in the current row of d + givens.makeGivens(-itk.value(), sdiag(k)); + + // Compute the modified diagonal element of r and + // the modified element of ((q transpose)*b,0). + itk.valueRef() = givens.c() * itk.value() + givens.s() * sdiag(k); + temp = givens.c() * wa(k) + givens.s() * qtbpj; + qtbpj = -givens.s() * wa(k) + givens.c() * qtbpj; + wa(k) = temp; + + // Accumulate the transformation in the remaining k row/column of R + for (++itk; itk; ++itk) + { + i = itk.index(); + temp = givens.c() * itk.value() + givens.s() * sdiag(i); + sdiag(i) = -givens.s() * itk.value() + givens.c() * sdiag(i); + itk.valueRef() = temp; + } + } + } + + // Solve the triangular system for z. If the system is + // singular, then obtain a least squares solution + Index nsing; + for(nsing = 0; nsing().solve/*InPlace*/(wa.head(nsing)); + + sdiag = R.diagonal(); + // Permute the components of z back to components of x + x = iPerm * wa; +} +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_LMQRSOLV_H diff --git a/external/unsupported/Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h b/external/unsupported/Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h new file mode 100644 index 0000000..62561da --- /dev/null +++ b/external/unsupported/Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h @@ -0,0 +1,396 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Thomas Capricelli +// Copyright (C) 2012 Desire Nuentsa +// +// The algorithm of this class initially comes from MINPACK whose original authors are: +// Copyright Jorge More - Argonne National Laboratory +// Copyright Burt Garbow - Argonne National Laboratory +// Copyright Ken Hillstrom - Argonne National Laboratory +// +// This Source Code Form is subject to the terms of the Minpack license +// (a BSD-like license) described in the campaigned CopyrightMINPACK.txt file. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_LEVENBERGMARQUARDT_H +#define EIGEN_LEVENBERGMARQUARDT_H + + +namespace Eigen { +namespace LevenbergMarquardtSpace { + enum Status { + NotStarted = -2, + Running = -1, + ImproperInputParameters = 0, + RelativeReductionTooSmall = 1, + RelativeErrorTooSmall = 2, + RelativeErrorAndReductionTooSmall = 3, + CosinusTooSmall = 4, + TooManyFunctionEvaluation = 5, + FtolTooSmall = 6, + XtolTooSmall = 7, + GtolTooSmall = 8, + UserAsked = 9 + }; +} + +template +struct DenseFunctor +{ + typedef _Scalar Scalar; + enum { + InputsAtCompileTime = NX, + ValuesAtCompileTime = NY + }; + typedef Matrix InputType; + typedef Matrix ValueType; + typedef Matrix JacobianType; + typedef ColPivHouseholderQR QRSolver; + const int m_inputs, m_values; + + DenseFunctor() : m_inputs(InputsAtCompileTime), m_values(ValuesAtCompileTime) {} + DenseFunctor(int inputs, int values) : m_inputs(inputs), m_values(values) {} + + int inputs() const { return m_inputs; } + int values() const { return m_values; } + + //int operator()(const InputType &x, ValueType& fvec) { } + // should be defined in derived classes + + //int df(const InputType &x, JacobianType& fjac) { } + // should be defined in derived classes +}; + +template +struct SparseFunctor +{ + typedef _Scalar Scalar; + typedef _Index Index; + typedef Matrix InputType; + typedef Matrix ValueType; + typedef SparseMatrix JacobianType; + typedef SparseQR > QRSolver; + enum { + InputsAtCompileTime = Dynamic, + ValuesAtCompileTime = Dynamic + }; + + SparseFunctor(int inputs, int values) : m_inputs(inputs), m_values(values) {} + + int inputs() const { return m_inputs; } + int values() const { return m_values; } + + const int m_inputs, m_values; + //int operator()(const InputType &x, ValueType& fvec) { } + // to be defined in the functor + + //int df(const InputType &x, JacobianType& fjac) { } + // to be defined in the functor if no automatic differentiation + +}; +namespace internal { +template +void lmpar2(const QRSolver &qr, const VectorType &diag, const VectorType &qtb, + typename VectorType::Scalar m_delta, typename VectorType::Scalar &par, + VectorType &x); + } +/** + * \ingroup NonLinearOptimization_Module + * \brief Performs non linear optimization over a non-linear function, + * using a variant of the Levenberg Marquardt algorithm. + * + * Check wikipedia for more information. + * http://en.wikipedia.org/wiki/Levenberg%E2%80%93Marquardt_algorithm + */ +template +class LevenbergMarquardt : internal::no_assignment_operator +{ + public: + typedef _FunctorType FunctorType; + typedef typename FunctorType::QRSolver QRSolver; + typedef typename FunctorType::JacobianType JacobianType; + typedef typename JacobianType::Scalar Scalar; + typedef typename JacobianType::RealScalar RealScalar; + typedef typename QRSolver::StorageIndex PermIndex; + typedef Matrix FVectorType; + typedef PermutationMatrix PermutationType; + public: + LevenbergMarquardt(FunctorType& functor) + : m_functor(functor),m_nfev(0),m_njev(0),m_fnorm(0.0),m_gnorm(0), + m_isInitialized(false),m_info(InvalidInput) + { + resetParameters(); + m_useExternalScaling=false; + } + + LevenbergMarquardtSpace::Status minimize(FVectorType &x); + LevenbergMarquardtSpace::Status minimizeInit(FVectorType &x); + LevenbergMarquardtSpace::Status minimizeOneStep(FVectorType &x); + LevenbergMarquardtSpace::Status lmder1( + FVectorType &x, + const Scalar tol = std::sqrt(NumTraits::epsilon()) + ); + static LevenbergMarquardtSpace::Status lmdif1( + FunctorType &functor, + FVectorType &x, + Index *nfev, + const Scalar tol = std::sqrt(NumTraits::epsilon()) + ); + + /** Sets the default parameters */ + void resetParameters() + { + using std::sqrt; + + m_factor = 100.; + m_maxfev = 400; + m_ftol = sqrt(NumTraits::epsilon()); + m_xtol = sqrt(NumTraits::epsilon()); + m_gtol = 0. ; + m_epsfcn = 0. ; + } + + /** Sets the tolerance for the norm of the solution vector*/ + void setXtol(RealScalar xtol) { m_xtol = xtol; } + + /** Sets the tolerance for the norm of the vector function*/ + void setFtol(RealScalar ftol) { m_ftol = ftol; } + + /** Sets the tolerance for the norm of the gradient of the error vector*/ + void setGtol(RealScalar gtol) { m_gtol = gtol; } + + /** Sets the step bound for the diagonal shift */ + void setFactor(RealScalar factor) { m_factor = factor; } + + /** Sets the error precision */ + void setEpsilon (RealScalar epsfcn) { m_epsfcn = epsfcn; } + + /** Sets the maximum number of function evaluation */ + void setMaxfev(Index maxfev) {m_maxfev = maxfev; } + + /** Use an external Scaling. If set to true, pass a nonzero diagonal to diag() */ + void setExternalScaling(bool value) {m_useExternalScaling = value; } + + /** \returns the tolerance for the norm of the solution vector */ + RealScalar xtol() const {return m_xtol; } + + /** \returns the tolerance for the norm of the vector function */ + RealScalar ftol() const {return m_ftol; } + + /** \returns the tolerance for the norm of the gradient of the error vector */ + RealScalar gtol() const {return m_gtol; } + + /** \returns the step bound for the diagonal shift */ + RealScalar factor() const {return m_factor; } + + /** \returns the error precision */ + RealScalar epsilon() const {return m_epsfcn; } + + /** \returns the maximum number of function evaluation */ + Index maxfev() const {return m_maxfev; } + + /** \returns a reference to the diagonal of the jacobian */ + FVectorType& diag() {return m_diag; } + + /** \returns the number of iterations performed */ + Index iterations() { return m_iter; } + + /** \returns the number of functions evaluation */ + Index nfev() { return m_nfev; } + + /** \returns the number of jacobian evaluation */ + Index njev() { return m_njev; } + + /** \returns the norm of current vector function */ + RealScalar fnorm() {return m_fnorm; } + + /** \returns the norm of the gradient of the error */ + RealScalar gnorm() {return m_gnorm; } + + /** \returns the LevenbergMarquardt parameter */ + RealScalar lm_param(void) { return m_par; } + + /** \returns a reference to the current vector function + */ + FVectorType& fvec() {return m_fvec; } + + /** \returns a reference to the matrix where the current Jacobian matrix is stored + */ + JacobianType& jacobian() {return m_fjac; } + + /** \returns a reference to the triangular matrix R from the QR of the jacobian matrix. + * \sa jacobian() + */ + JacobianType& matrixR() {return m_rfactor; } + + /** the permutation used in the QR factorization + */ + PermutationType permutation() {return m_permutation; } + + /** + * \brief Reports whether the minimization was successful + * \returns \c Success if the minimization was successful, + * \c NumericalIssue if a numerical problem arises during the + * minimization process, for example during the QR factorization + * \c NoConvergence if the minimization did not converge after + * the maximum number of function evaluation allowed + * \c InvalidInput if the input matrix is invalid + */ + ComputationInfo info() const + { + + return m_info; + } + private: + JacobianType m_fjac; + JacobianType m_rfactor; // The triangular matrix R from the QR of the jacobian matrix m_fjac + FunctorType &m_functor; + FVectorType m_fvec, m_qtf, m_diag; + Index n; + Index m; + Index m_nfev; + Index m_njev; + RealScalar m_fnorm; // Norm of the current vector function + RealScalar m_gnorm; //Norm of the gradient of the error + RealScalar m_factor; // + Index m_maxfev; // Maximum number of function evaluation + RealScalar m_ftol; //Tolerance in the norm of the vector function + RealScalar m_xtol; // + RealScalar m_gtol; //tolerance of the norm of the error gradient + RealScalar m_epsfcn; // + Index m_iter; // Number of iterations performed + RealScalar m_delta; + bool m_useExternalScaling; + PermutationType m_permutation; + FVectorType m_wa1, m_wa2, m_wa3, m_wa4; //Temporary vectors + RealScalar m_par; + bool m_isInitialized; // Check whether the minimization step has been called + ComputationInfo m_info; +}; + +template +LevenbergMarquardtSpace::Status +LevenbergMarquardt::minimize(FVectorType &x) +{ + LevenbergMarquardtSpace::Status status = minimizeInit(x); + if (status==LevenbergMarquardtSpace::ImproperInputParameters) { + m_isInitialized = true; + return status; + } + do { +// std::cout << " uv " << x.transpose() << "\n"; + status = minimizeOneStep(x); + } while (status==LevenbergMarquardtSpace::Running); + m_isInitialized = true; + return status; +} + +template +LevenbergMarquardtSpace::Status +LevenbergMarquardt::minimizeInit(FVectorType &x) +{ + n = x.size(); + m = m_functor.values(); + + m_wa1.resize(n); m_wa2.resize(n); m_wa3.resize(n); + m_wa4.resize(m); + m_fvec.resize(m); + //FIXME Sparse Case : Allocate space for the jacobian + m_fjac.resize(m, n); +// m_fjac.reserve(VectorXi::Constant(n,5)); // FIXME Find a better alternative + if (!m_useExternalScaling) + m_diag.resize(n); + eigen_assert( (!m_useExternalScaling || m_diag.size()==n) && "When m_useExternalScaling is set, the caller must provide a valid 'm_diag'"); + m_qtf.resize(n); + + /* Function Body */ + m_nfev = 0; + m_njev = 0; + + /* check the input parameters for errors. */ + if (n <= 0 || m < n || m_ftol < 0. || m_xtol < 0. || m_gtol < 0. || m_maxfev <= 0 || m_factor <= 0.){ + m_info = InvalidInput; + return LevenbergMarquardtSpace::ImproperInputParameters; + } + + if (m_useExternalScaling) + for (Index j = 0; j < n; ++j) + if (m_diag[j] <= 0.) + { + m_info = InvalidInput; + return LevenbergMarquardtSpace::ImproperInputParameters; + } + + /* evaluate the function at the starting point */ + /* and calculate its norm. */ + m_nfev = 1; + if ( m_functor(x, m_fvec) < 0) + return LevenbergMarquardtSpace::UserAsked; + m_fnorm = m_fvec.stableNorm(); + + /* initialize levenberg-marquardt parameter and iteration counter. */ + m_par = 0.; + m_iter = 1; + + return LevenbergMarquardtSpace::NotStarted; +} + +template +LevenbergMarquardtSpace::Status +LevenbergMarquardt::lmder1( + FVectorType &x, + const Scalar tol + ) +{ + n = x.size(); + m = m_functor.values(); + + /* check the input parameters for errors. */ + if (n <= 0 || m < n || tol < 0.) + return LevenbergMarquardtSpace::ImproperInputParameters; + + resetParameters(); + m_ftol = tol; + m_xtol = tol; + m_maxfev = 100*(n+1); + + return minimize(x); +} + + +template +LevenbergMarquardtSpace::Status +LevenbergMarquardt::lmdif1( + FunctorType &functor, + FVectorType &x, + Index *nfev, + const Scalar tol + ) +{ + Index n = x.size(); + Index m = functor.values(); + + /* check the input parameters for errors. */ + if (n <= 0 || m < n || tol < 0.) + return LevenbergMarquardtSpace::ImproperInputParameters; + + NumericalDiff numDiff(functor); + // embedded LevenbergMarquardt + LevenbergMarquardt > lm(numDiff); + lm.setFtol(tol); + lm.setXtol(tol); + lm.setMaxfev(200*(n+1)); + + LevenbergMarquardtSpace::Status info = LevenbergMarquardtSpace::Status(lm.minimize(x)); + if (nfev) + * nfev = lm.nfev(); + return info; +} + +} // end namespace Eigen + +#endif // EIGEN_LEVENBERGMARQUARDT_H diff --git a/external/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h b/external/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h new file mode 100644 index 0000000..02284b0 --- /dev/null +++ b/external/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h @@ -0,0 +1,441 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009, 2010, 2013 Jitse Niesen +// Copyright (C) 2011, 2013 Chen-Pang He +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_MATRIX_EXPONENTIAL +#define EIGEN_MATRIX_EXPONENTIAL + +#include "StemFunction.h" + +namespace Eigen { +namespace internal { + +/** \brief Scaling operator. + * + * This struct is used by CwiseUnaryOp to scale a matrix by \f$ 2^{-s} \f$. + */ +template +struct MatrixExponentialScalingOp +{ + /** \brief Constructor. + * + * \param[in] squarings The integer \f$ s \f$ in this document. + */ + MatrixExponentialScalingOp(int squarings) : m_squarings(squarings) { } + + + /** \brief Scale a matrix coefficient. + * + * \param[in,out] x The scalar to be scaled, becoming \f$ 2^{-s} x \f$. + */ + inline const RealScalar operator() (const RealScalar& x) const + { + using std::ldexp; + return ldexp(x, -m_squarings); + } + + typedef std::complex ComplexScalar; + + /** \brief Scale a matrix coefficient. + * + * \param[in,out] x The scalar to be scaled, becoming \f$ 2^{-s} x \f$. + */ + inline const ComplexScalar operator() (const ComplexScalar& x) const + { + using std::ldexp; + return ComplexScalar(ldexp(x.real(), -m_squarings), ldexp(x.imag(), -m_squarings)); + } + + private: + int m_squarings; +}; + +/** \brief Compute the (3,3)-Padé approximant to the exponential. + * + * After exit, \f$ (V+U)(V-U)^{-1} \f$ is the Padé + * approximant of \f$ \exp(A) \f$ around \f$ A = 0 \f$. + */ +template +void matrix_exp_pade3(const MatA& A, MatU& U, MatV& V) +{ + typedef typename MatA::PlainObject MatrixType; + typedef typename NumTraits::Scalar>::Real RealScalar; + const RealScalar b[] = {120.L, 60.L, 12.L, 1.L}; + const MatrixType A2 = A * A; + const MatrixType tmp = b[3] * A2 + b[1] * MatrixType::Identity(A.rows(), A.cols()); + U.noalias() = A * tmp; + V = b[2] * A2 + b[0] * MatrixType::Identity(A.rows(), A.cols()); +} + +/** \brief Compute the (5,5)-Padé approximant to the exponential. + * + * After exit, \f$ (V+U)(V-U)^{-1} \f$ is the Padé + * approximant of \f$ \exp(A) \f$ around \f$ A = 0 \f$. + */ +template +void matrix_exp_pade5(const MatA& A, MatU& U, MatV& V) +{ + typedef typename MatA::PlainObject MatrixType; + typedef typename NumTraits::Scalar>::Real RealScalar; + const RealScalar b[] = {30240.L, 15120.L, 3360.L, 420.L, 30.L, 1.L}; + const MatrixType A2 = A * A; + const MatrixType A4 = A2 * A2; + const MatrixType tmp = b[5] * A4 + b[3] * A2 + b[1] * MatrixType::Identity(A.rows(), A.cols()); + U.noalias() = A * tmp; + V = b[4] * A4 + b[2] * A2 + b[0] * MatrixType::Identity(A.rows(), A.cols()); +} + +/** \brief Compute the (7,7)-Padé approximant to the exponential. + * + * After exit, \f$ (V+U)(V-U)^{-1} \f$ is the Padé + * approximant of \f$ \exp(A) \f$ around \f$ A = 0 \f$. + */ +template +void matrix_exp_pade7(const MatA& A, MatU& U, MatV& V) +{ + typedef typename MatA::PlainObject MatrixType; + typedef typename NumTraits::Scalar>::Real RealScalar; + const RealScalar b[] = {17297280.L, 8648640.L, 1995840.L, 277200.L, 25200.L, 1512.L, 56.L, 1.L}; + const MatrixType A2 = A * A; + const MatrixType A4 = A2 * A2; + const MatrixType A6 = A4 * A2; + const MatrixType tmp = b[7] * A6 + b[5] * A4 + b[3] * A2 + + b[1] * MatrixType::Identity(A.rows(), A.cols()); + U.noalias() = A * tmp; + V = b[6] * A6 + b[4] * A4 + b[2] * A2 + b[0] * MatrixType::Identity(A.rows(), A.cols()); + +} + +/** \brief Compute the (9,9)-Padé approximant to the exponential. + * + * After exit, \f$ (V+U)(V-U)^{-1} \f$ is the Padé + * approximant of \f$ \exp(A) \f$ around \f$ A = 0 \f$. + */ +template +void matrix_exp_pade9(const MatA& A, MatU& U, MatV& V) +{ + typedef typename MatA::PlainObject MatrixType; + typedef typename NumTraits::Scalar>::Real RealScalar; + const RealScalar b[] = {17643225600.L, 8821612800.L, 2075673600.L, 302702400.L, 30270240.L, + 2162160.L, 110880.L, 3960.L, 90.L, 1.L}; + const MatrixType A2 = A * A; + const MatrixType A4 = A2 * A2; + const MatrixType A6 = A4 * A2; + const MatrixType A8 = A6 * A2; + const MatrixType tmp = b[9] * A8 + b[7] * A6 + b[5] * A4 + b[3] * A2 + + b[1] * MatrixType::Identity(A.rows(), A.cols()); + U.noalias() = A * tmp; + V = b[8] * A8 + b[6] * A6 + b[4] * A4 + b[2] * A2 + b[0] * MatrixType::Identity(A.rows(), A.cols()); +} + +/** \brief Compute the (13,13)-Padé approximant to the exponential. + * + * After exit, \f$ (V+U)(V-U)^{-1} \f$ is the Padé + * approximant of \f$ \exp(A) \f$ around \f$ A = 0 \f$. + */ +template +void matrix_exp_pade13(const MatA& A, MatU& U, MatV& V) +{ + typedef typename MatA::PlainObject MatrixType; + typedef typename NumTraits::Scalar>::Real RealScalar; + const RealScalar b[] = {64764752532480000.L, 32382376266240000.L, 7771770303897600.L, + 1187353796428800.L, 129060195264000.L, 10559470521600.L, 670442572800.L, + 33522128640.L, 1323241920.L, 40840800.L, 960960.L, 16380.L, 182.L, 1.L}; + const MatrixType A2 = A * A; + const MatrixType A4 = A2 * A2; + const MatrixType A6 = A4 * A2; + V = b[13] * A6 + b[11] * A4 + b[9] * A2; // used for temporary storage + MatrixType tmp = A6 * V; + tmp += b[7] * A6 + b[5] * A4 + b[3] * A2 + b[1] * MatrixType::Identity(A.rows(), A.cols()); + U.noalias() = A * tmp; + tmp = b[12] * A6 + b[10] * A4 + b[8] * A2; + V.noalias() = A6 * tmp; + V += b[6] * A6 + b[4] * A4 + b[2] * A2 + b[0] * MatrixType::Identity(A.rows(), A.cols()); +} + +/** \brief Compute the (17,17)-Padé approximant to the exponential. + * + * After exit, \f$ (V+U)(V-U)^{-1} \f$ is the Padé + * approximant of \f$ \exp(A) \f$ around \f$ A = 0 \f$. + * + * This function activates only if your long double is double-double or quadruple. + */ +#if LDBL_MANT_DIG > 64 +template +void matrix_exp_pade17(const MatA& A, MatU& U, MatV& V) +{ + typedef typename MatA::PlainObject MatrixType; + typedef typename NumTraits::Scalar>::Real RealScalar; + const RealScalar b[] = {830034394580628357120000.L, 415017197290314178560000.L, + 100610229646136770560000.L, 15720348382208870400000.L, + 1774878043152614400000.L, 153822763739893248000.L, 10608466464820224000.L, + 595373117923584000.L, 27563570274240000.L, 1060137318240000.L, + 33924394183680.L, 899510451840.L, 19554575040.L, 341863200.L, 4651200.L, + 46512.L, 306.L, 1.L}; + const MatrixType A2 = A * A; + const MatrixType A4 = A2 * A2; + const MatrixType A6 = A4 * A2; + const MatrixType A8 = A4 * A4; + V = b[17] * A8 + b[15] * A6 + b[13] * A4 + b[11] * A2; // used for temporary storage + MatrixType tmp = A8 * V; + tmp += b[9] * A8 + b[7] * A6 + b[5] * A4 + b[3] * A2 + + b[1] * MatrixType::Identity(A.rows(), A.cols()); + U.noalias() = A * tmp; + tmp = b[16] * A8 + b[14] * A6 + b[12] * A4 + b[10] * A2; + V.noalias() = tmp * A8; + V += b[8] * A8 + b[6] * A6 + b[4] * A4 + b[2] * A2 + + b[0] * MatrixType::Identity(A.rows(), A.cols()); +} +#endif + +template ::Scalar>::Real> +struct matrix_exp_computeUV +{ + /** \brief Compute Padé approximant to the exponential. + * + * Computes \c U, \c V and \c squarings such that \f$ (V+U)(V-U)^{-1} \f$ is a Padé + * approximant of \f$ \exp(2^{-\mbox{squarings}}M) \f$ around \f$ M = 0 \f$, where \f$ M \f$ + * denotes the matrix \c arg. The degree of the Padé approximant and the value of squarings + * are chosen such that the approximation error is no more than the round-off error. + */ + static void run(const MatrixType& arg, MatrixType& U, MatrixType& V, int& squarings); +}; + +template +struct matrix_exp_computeUV +{ + template + static void run(const ArgType& arg, MatrixType& U, MatrixType& V, int& squarings) + { + using std::frexp; + using std::pow; + const float l1norm = arg.cwiseAbs().colwise().sum().maxCoeff(); + squarings = 0; + if (l1norm < 4.258730016922831e-001f) { + matrix_exp_pade3(arg, U, V); + } else if (l1norm < 1.880152677804762e+000f) { + matrix_exp_pade5(arg, U, V); + } else { + const float maxnorm = 3.925724783138660f; + frexp(l1norm / maxnorm, &squarings); + if (squarings < 0) squarings = 0; + MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp(squarings)); + matrix_exp_pade7(A, U, V); + } + } +}; + +template +struct matrix_exp_computeUV +{ + typedef typename NumTraits::Scalar>::Real RealScalar; + template + static void run(const ArgType& arg, MatrixType& U, MatrixType& V, int& squarings) + { + using std::frexp; + using std::pow; + const RealScalar l1norm = arg.cwiseAbs().colwise().sum().maxCoeff(); + squarings = 0; + if (l1norm < 1.495585217958292e-002) { + matrix_exp_pade3(arg, U, V); + } else if (l1norm < 2.539398330063230e-001) { + matrix_exp_pade5(arg, U, V); + } else if (l1norm < 9.504178996162932e-001) { + matrix_exp_pade7(arg, U, V); + } else if (l1norm < 2.097847961257068e+000) { + matrix_exp_pade9(arg, U, V); + } else { + const RealScalar maxnorm = 5.371920351148152; + frexp(l1norm / maxnorm, &squarings); + if (squarings < 0) squarings = 0; + MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp(squarings)); + matrix_exp_pade13(A, U, V); + } + } +}; + +template +struct matrix_exp_computeUV +{ + template + static void run(const ArgType& arg, MatrixType& U, MatrixType& V, int& squarings) + { +#if LDBL_MANT_DIG == 53 // double precision + matrix_exp_computeUV::run(arg, U, V, squarings); + +#else + + using std::frexp; + using std::pow; + const long double l1norm = arg.cwiseAbs().colwise().sum().maxCoeff(); + squarings = 0; + +#if LDBL_MANT_DIG <= 64 // extended precision + + if (l1norm < 4.1968497232266989671e-003L) { + matrix_exp_pade3(arg, U, V); + } else if (l1norm < 1.1848116734693823091e-001L) { + matrix_exp_pade5(arg, U, V); + } else if (l1norm < 5.5170388480686700274e-001L) { + matrix_exp_pade7(arg, U, V); + } else if (l1norm < 1.3759868875587845383e+000L) { + matrix_exp_pade9(arg, U, V); + } else { + const long double maxnorm = 4.0246098906697353063L; + frexp(l1norm / maxnorm, &squarings); + if (squarings < 0) squarings = 0; + MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp(squarings)); + matrix_exp_pade13(A, U, V); + } + +#elif LDBL_MANT_DIG <= 106 // double-double + + if (l1norm < 3.2787892205607026992947488108213e-005L) { + matrix_exp_pade3(arg, U, V); + } else if (l1norm < 6.4467025060072760084130906076332e-003L) { + matrix_exp_pade5(arg, U, V); + } else if (l1norm < 6.8988028496595374751374122881143e-002L) { + matrix_exp_pade7(arg, U, V); + } else if (l1norm < 2.7339737518502231741495857201670e-001L) { + matrix_exp_pade9(arg, U, V); + } else if (l1norm < 1.3203382096514474905666448850278e+000L) { + matrix_exp_pade13(arg, U, V); + } else { + const long double maxnorm = 3.2579440895405400856599663723517L; + frexp(l1norm / maxnorm, &squarings); + if (squarings < 0) squarings = 0; + MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp(squarings)); + matrix_exp_pade17(A, U, V); + } + +#elif LDBL_MANT_DIG <= 113 // quadruple precision + + if (l1norm < 1.639394610288918690547467954466970e-005L) { + matrix_exp_pade3(arg, U, V); + } else if (l1norm < 4.253237712165275566025884344433009e-003L) { + matrix_exp_pade5(arg, U, V); + } else if (l1norm < 5.125804063165764409885122032933142e-002L) { + matrix_exp_pade7(arg, U, V); + } else if (l1norm < 2.170000765161155195453205651889853e-001L) { + matrix_exp_pade9(arg, U, V); + } else if (l1norm < 1.125358383453143065081397882891878e+000L) { + matrix_exp_pade13(arg, U, V); + } else { + const long double maxnorm = 2.884233277829519311757165057717815L; + frexp(l1norm / maxnorm, &squarings); + if (squarings < 0) squarings = 0; + MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp(squarings)); + matrix_exp_pade17(A, U, V); + } + +#else + + // this case should be handled in compute() + eigen_assert(false && "Bug in MatrixExponential"); + +#endif +#endif // LDBL_MANT_DIG + } +}; + +template struct is_exp_known_type : false_type {}; +template<> struct is_exp_known_type : true_type {}; +template<> struct is_exp_known_type : true_type {}; +#if LDBL_MANT_DIG <= 113 +template<> struct is_exp_known_type : true_type {}; +#endif + +template +void matrix_exp_compute(const ArgType& arg, ResultType &result, true_type) // natively supported scalar type +{ + typedef typename ArgType::PlainObject MatrixType; + MatrixType U, V; + int squarings; + matrix_exp_computeUV::run(arg, U, V, squarings); // Pade approximant is (U+V) / (-U+V) + MatrixType numer = U + V; + MatrixType denom = -U + V; + result = denom.partialPivLu().solve(numer); + for (int i=0; i +void matrix_exp_compute(const ArgType& arg, ResultType &result, false_type) // default +{ + typedef typename ArgType::PlainObject MatrixType; + typedef typename traits::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + typedef typename std::complex ComplexScalar; + result = arg.matrixFunction(internal::stem_function_exp); +} + +} // end namespace Eigen::internal + +/** \ingroup MatrixFunctions_Module + * + * \brief Proxy for the matrix exponential of some matrix (expression). + * + * \tparam Derived Type of the argument to the matrix exponential. + * + * This class holds the argument to the matrix exponential until it is assigned or evaluated for + * some other reason (so the argument should not be changed in the meantime). It is the return type + * of MatrixBase::exp() and most of the time this is the only way it is used. + */ +template struct MatrixExponentialReturnValue +: public ReturnByValue > +{ + public: + /** \brief Constructor. + * + * \param src %Matrix (expression) forming the argument of the matrix exponential. + */ + MatrixExponentialReturnValue(const Derived& src) : m_src(src) { } + + /** \brief Compute the matrix exponential. + * + * \param result the matrix exponential of \p src in the constructor. + */ + template + inline void evalTo(ResultType& result) const + { + const typename internal::nested_eval::type tmp(m_src); + internal::matrix_exp_compute(tmp, result, internal::is_exp_known_type()); + } + + Index rows() const { return m_src.rows(); } + Index cols() const { return m_src.cols(); } + + protected: + const typename internal::ref_selector::type m_src; +}; + +namespace internal { +template +struct traits > +{ + typedef typename Derived::PlainObject ReturnType; +}; +} + +template +const MatrixExponentialReturnValue MatrixBase::exp() const +{ + eigen_assert(rows() == cols()); + return MatrixExponentialReturnValue(derived()); +} + +} // end namespace Eigen + +#endif // EIGEN_MATRIX_EXPONENTIAL diff --git a/external/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h b/external/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h new file mode 100644 index 0000000..cc12ab6 --- /dev/null +++ b/external/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h @@ -0,0 +1,569 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2011, 2013 Jitse Niesen +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_MATRIX_FUNCTION_H +#define EIGEN_MATRIX_FUNCTION_H + +#include "StemFunction.h" + + +namespace Eigen { + +namespace internal { + +/** \brief Maximum distance allowed between eigenvalues to be considered "close". */ +static const float matrix_function_separation = 0.1f; + +/** \ingroup MatrixFunctions_Module + * \class MatrixFunctionAtomic + * \brief Helper class for computing matrix functions of atomic matrices. + * + * Here, an atomic matrix is a triangular matrix whose diagonal entries are close to each other. + */ +template +class MatrixFunctionAtomic +{ + public: + + typedef typename MatrixType::Scalar Scalar; + typedef typename stem_function::type StemFunction; + + /** \brief Constructor + * \param[in] f matrix function to compute. + */ + MatrixFunctionAtomic(StemFunction f) : m_f(f) { } + + /** \brief Compute matrix function of atomic matrix + * \param[in] A argument of matrix function, should be upper triangular and atomic + * \returns f(A), the matrix function evaluated at the given matrix + */ + MatrixType compute(const MatrixType& A); + + private: + StemFunction* m_f; +}; + +template +typename NumTraits::Real matrix_function_compute_mu(const MatrixType& A) +{ + typedef typename plain_col_type::type VectorType; + Index rows = A.rows(); + const MatrixType N = MatrixType::Identity(rows, rows) - A; + VectorType e = VectorType::Ones(rows); + N.template triangularView().solveInPlace(e); + return e.cwiseAbs().maxCoeff(); +} + +template +MatrixType MatrixFunctionAtomic::compute(const MatrixType& A) +{ + // TODO: Use that A is upper triangular + typedef typename NumTraits::Real RealScalar; + Index rows = A.rows(); + Scalar avgEival = A.trace() / Scalar(RealScalar(rows)); + MatrixType Ashifted = A - avgEival * MatrixType::Identity(rows, rows); + RealScalar mu = matrix_function_compute_mu(Ashifted); + MatrixType F = m_f(avgEival, 0) * MatrixType::Identity(rows, rows); + MatrixType P = Ashifted; + MatrixType Fincr; + for (Index s = 1; double(s) < 1.1 * double(rows) + 10.0; s++) { // upper limit is fairly arbitrary + Fincr = m_f(avgEival, static_cast(s)) * P; + F += Fincr; + P = Scalar(RealScalar(1)/RealScalar(s + 1)) * P * Ashifted; + + // test whether Taylor series converged + const RealScalar F_norm = F.cwiseAbs().rowwise().sum().maxCoeff(); + const RealScalar Fincr_norm = Fincr.cwiseAbs().rowwise().sum().maxCoeff(); + if (Fincr_norm < NumTraits::epsilon() * F_norm) { + RealScalar delta = 0; + RealScalar rfactorial = 1; + for (Index r = 0; r < rows; r++) { + RealScalar mx = 0; + for (Index i = 0; i < rows; i++) + mx = (std::max)(mx, std::abs(m_f(Ashifted(i, i) + avgEival, static_cast(s+r)))); + if (r != 0) + rfactorial *= RealScalar(r); + delta = (std::max)(delta, mx / rfactorial); + } + const RealScalar P_norm = P.cwiseAbs().rowwise().sum().maxCoeff(); + if (mu * delta * P_norm < NumTraits::epsilon() * F_norm) // series converged + break; + } + } + return F; +} + +/** \brief Find cluster in \p clusters containing some value + * \param[in] key Value to find + * \returns Iterator to cluster containing \p key, or \c clusters.end() if no cluster in \p m_clusters + * contains \p key. + */ +template +typename ListOfClusters::iterator matrix_function_find_cluster(Index key, ListOfClusters& clusters) +{ + typename std::list::iterator j; + for (typename ListOfClusters::iterator i = clusters.begin(); i != clusters.end(); ++i) { + j = std::find(i->begin(), i->end(), key); + if (j != i->end()) + return i; + } + return clusters.end(); +} + +/** \brief Partition eigenvalues in clusters of ei'vals close to each other + * + * \param[in] eivals Eigenvalues + * \param[out] clusters Resulting partition of eigenvalues + * + * The partition satisfies the following two properties: + * # Any eigenvalue in a certain cluster is at most matrix_function_separation() away from another eigenvalue + * in the same cluster. + * # The distance between two eigenvalues in different clusters is more than matrix_function_separation(). + * The implementation follows Algorithm 4.1 in the paper of Davies and Higham. + */ +template +void matrix_function_partition_eigenvalues(const EivalsType& eivals, std::list& clusters) +{ + typedef typename EivalsType::RealScalar RealScalar; + for (Index i=0; i::iterator qi = matrix_function_find_cluster(i, clusters); + if (qi == clusters.end()) { + Cluster l; + l.push_back(i); + clusters.push_back(l); + qi = clusters.end(); + --qi; + } + + // Look for other element to add to the set + for (Index j=i+1; jbegin(), qi->end(), j) == qi->end()) { + typename std::list::iterator qj = matrix_function_find_cluster(j, clusters); + if (qj == clusters.end()) { + qi->push_back(j); + } else { + qi->insert(qi->end(), qj->begin(), qj->end()); + clusters.erase(qj); + } + } + } + } +} + +/** \brief Compute size of each cluster given a partitioning */ +template +void matrix_function_compute_cluster_size(const ListOfClusters& clusters, Matrix& clusterSize) +{ + const Index numClusters = static_cast(clusters.size()); + clusterSize.setZero(numClusters); + Index clusterIndex = 0; + for (typename ListOfClusters::const_iterator cluster = clusters.begin(); cluster != clusters.end(); ++cluster) { + clusterSize[clusterIndex] = cluster->size(); + ++clusterIndex; + } +} + +/** \brief Compute start of each block using clusterSize */ +template +void matrix_function_compute_block_start(const VectorType& clusterSize, VectorType& blockStart) +{ + blockStart.resize(clusterSize.rows()); + blockStart(0) = 0; + for (Index i = 1; i < clusterSize.rows(); i++) { + blockStart(i) = blockStart(i-1) + clusterSize(i-1); + } +} + +/** \brief Compute mapping of eigenvalue indices to cluster indices */ +template +void matrix_function_compute_map(const EivalsType& eivals, const ListOfClusters& clusters, VectorType& eivalToCluster) +{ + eivalToCluster.resize(eivals.rows()); + Index clusterIndex = 0; + for (typename ListOfClusters::const_iterator cluster = clusters.begin(); cluster != clusters.end(); ++cluster) { + for (Index i = 0; i < eivals.rows(); ++i) { + if (std::find(cluster->begin(), cluster->end(), i) != cluster->end()) { + eivalToCluster[i] = clusterIndex; + } + } + ++clusterIndex; + } +} + +/** \brief Compute permutation which groups ei'vals in same cluster together */ +template +void matrix_function_compute_permutation(const DynVectorType& blockStart, const DynVectorType& eivalToCluster, VectorType& permutation) +{ + DynVectorType indexNextEntry = blockStart; + permutation.resize(eivalToCluster.rows()); + for (Index i = 0; i < eivalToCluster.rows(); i++) { + Index cluster = eivalToCluster[i]; + permutation[i] = indexNextEntry[cluster]; + ++indexNextEntry[cluster]; + } +} + +/** \brief Permute Schur decomposition in U and T according to permutation */ +template +void matrix_function_permute_schur(VectorType& permutation, MatrixType& U, MatrixType& T) +{ + for (Index i = 0; i < permutation.rows() - 1; i++) { + Index j; + for (j = i; j < permutation.rows(); j++) { + if (permutation(j) == i) break; + } + eigen_assert(permutation(j) == i); + for (Index k = j-1; k >= i; k--) { + JacobiRotation rotation; + rotation.makeGivens(T(k, k+1), T(k+1, k+1) - T(k, k)); + T.applyOnTheLeft(k, k+1, rotation.adjoint()); + T.applyOnTheRight(k, k+1, rotation); + U.applyOnTheRight(k, k+1, rotation); + std::swap(permutation.coeffRef(k), permutation.coeffRef(k+1)); + } + } +} + +/** \brief Compute block diagonal part of matrix function. + * + * This routine computes the matrix function applied to the block diagonal part of \p T (which should be + * upper triangular), with the blocking given by \p blockStart and \p clusterSize. The matrix function of + * each diagonal block is computed by \p atomic. The off-diagonal parts of \p fT are set to zero. + */ +template +void matrix_function_compute_block_atomic(const MatrixType& T, AtomicType& atomic, const VectorType& blockStart, const VectorType& clusterSize, MatrixType& fT) +{ + fT.setZero(T.rows(), T.cols()); + for (Index i = 0; i < clusterSize.rows(); ++i) { + fT.block(blockStart(i), blockStart(i), clusterSize(i), clusterSize(i)) + = atomic.compute(T.block(blockStart(i), blockStart(i), clusterSize(i), clusterSize(i))); + } +} + +/** \brief Solve a triangular Sylvester equation AX + XB = C + * + * \param[in] A the matrix A; should be square and upper triangular + * \param[in] B the matrix B; should be square and upper triangular + * \param[in] C the matrix C; should have correct size. + * + * \returns the solution X. + * + * If A is m-by-m and B is n-by-n, then both C and X are m-by-n. The (i,j)-th component of the Sylvester + * equation is + * \f[ + * \sum_{k=i}^m A_{ik} X_{kj} + \sum_{k=1}^j X_{ik} B_{kj} = C_{ij}. + * \f] + * This can be re-arranged to yield: + * \f[ + * X_{ij} = \frac{1}{A_{ii} + B_{jj}} \Bigl( C_{ij} + * - \sum_{k=i+1}^m A_{ik} X_{kj} - \sum_{k=1}^{j-1} X_{ik} B_{kj} \Bigr). + * \f] + * It is assumed that A and B are such that the numerator is never zero (otherwise the Sylvester equation + * does not have a unique solution). In that case, these equations can be evaluated in the order + * \f$ i=m,\ldots,1 \f$ and \f$ j=1,\ldots,n \f$. + */ +template +MatrixType matrix_function_solve_triangular_sylvester(const MatrixType& A, const MatrixType& B, const MatrixType& C) +{ + eigen_assert(A.rows() == A.cols()); + eigen_assert(A.isUpperTriangular()); + eigen_assert(B.rows() == B.cols()); + eigen_assert(B.isUpperTriangular()); + eigen_assert(C.rows() == A.rows()); + eigen_assert(C.cols() == B.rows()); + + typedef typename MatrixType::Scalar Scalar; + + Index m = A.rows(); + Index n = B.rows(); + MatrixType X(m, n); + + for (Index i = m - 1; i >= 0; --i) { + for (Index j = 0; j < n; ++j) { + + // Compute AX = \sum_{k=i+1}^m A_{ik} X_{kj} + Scalar AX; + if (i == m - 1) { + AX = 0; + } else { + Matrix AXmatrix = A.row(i).tail(m-1-i) * X.col(j).tail(m-1-i); + AX = AXmatrix(0,0); + } + + // Compute XB = \sum_{k=1}^{j-1} X_{ik} B_{kj} + Scalar XB; + if (j == 0) { + XB = 0; + } else { + Matrix XBmatrix = X.row(i).head(j) * B.col(j).head(j); + XB = XBmatrix(0,0); + } + + X(i,j) = (C(i,j) - AX - XB) / (A(i,i) + B(j,j)); + } + } + return X; +} + +/** \brief Compute part of matrix function above block diagonal. + * + * This routine completes the computation of \p fT, denoting a matrix function applied to the triangular + * matrix \p T. It assumes that the block diagonal part of \p fT has already been computed. The part below + * the diagonal is zero, because \p T is upper triangular. + */ +template +void matrix_function_compute_above_diagonal(const MatrixType& T, const VectorType& blockStart, const VectorType& clusterSize, MatrixType& fT) +{ + typedef internal::traits Traits; + typedef typename MatrixType::Scalar Scalar; + static const int Options = MatrixType::Options; + typedef Matrix DynMatrixType; + + for (Index k = 1; k < clusterSize.rows(); k++) { + for (Index i = 0; i < clusterSize.rows() - k; i++) { + // compute (i, i+k) block + DynMatrixType A = T.block(blockStart(i), blockStart(i), clusterSize(i), clusterSize(i)); + DynMatrixType B = -T.block(blockStart(i+k), blockStart(i+k), clusterSize(i+k), clusterSize(i+k)); + DynMatrixType C = fT.block(blockStart(i), blockStart(i), clusterSize(i), clusterSize(i)) + * T.block(blockStart(i), blockStart(i+k), clusterSize(i), clusterSize(i+k)); + C -= T.block(blockStart(i), blockStart(i+k), clusterSize(i), clusterSize(i+k)) + * fT.block(blockStart(i+k), blockStart(i+k), clusterSize(i+k), clusterSize(i+k)); + for (Index m = i + 1; m < i + k; m++) { + C += fT.block(blockStart(i), blockStart(m), clusterSize(i), clusterSize(m)) + * T.block(blockStart(m), blockStart(i+k), clusterSize(m), clusterSize(i+k)); + C -= T.block(blockStart(i), blockStart(m), clusterSize(i), clusterSize(m)) + * fT.block(blockStart(m), blockStart(i+k), clusterSize(m), clusterSize(i+k)); + } + fT.block(blockStart(i), blockStart(i+k), clusterSize(i), clusterSize(i+k)) + = matrix_function_solve_triangular_sylvester(A, B, C); + } + } +} + +/** \ingroup MatrixFunctions_Module + * \brief Class for computing matrix functions. + * \tparam MatrixType type of the argument of the matrix function, + * expected to be an instantiation of the Matrix class template. + * \tparam AtomicType type for computing matrix function of atomic blocks. + * \tparam IsComplex used internally to select correct specialization. + * + * This class implements the Schur-Parlett algorithm for computing matrix functions. The spectrum of the + * matrix is divided in clustered of eigenvalues that lies close together. This class delegates the + * computation of the matrix function on every block corresponding to these clusters to an object of type + * \p AtomicType and uses these results to compute the matrix function of the whole matrix. The class + * \p AtomicType should have a \p compute() member function for computing the matrix function of a block. + * + * \sa class MatrixFunctionAtomic, class MatrixLogarithmAtomic + */ +template ::Scalar>::IsComplex> +struct matrix_function_compute +{ + /** \brief Compute the matrix function. + * + * \param[in] A argument of matrix function, should be a square matrix. + * \param[in] atomic class for computing matrix function of atomic blocks. + * \param[out] result the function \p f applied to \p A, as + * specified in the constructor. + * + * See MatrixBase::matrixFunction() for details on how this computation + * is implemented. + */ + template + static void run(const MatrixType& A, AtomicType& atomic, ResultType &result); +}; + +/** \internal \ingroup MatrixFunctions_Module + * \brief Partial specialization of MatrixFunction for real matrices + * + * This converts the real matrix to a complex matrix, compute the matrix function of that matrix, and then + * converts the result back to a real matrix. + */ +template +struct matrix_function_compute +{ + template + static void run(const MatA& A, AtomicType& atomic, ResultType &result) + { + typedef internal::traits Traits; + typedef typename Traits::Scalar Scalar; + static const int Rows = Traits::RowsAtCompileTime, Cols = Traits::ColsAtCompileTime; + static const int MaxRows = Traits::MaxRowsAtCompileTime, MaxCols = Traits::MaxColsAtCompileTime; + + typedef std::complex ComplexScalar; + typedef Matrix ComplexMatrix; + + ComplexMatrix CA = A.template cast(); + ComplexMatrix Cresult; + matrix_function_compute::run(CA, atomic, Cresult); + result = Cresult.real(); + } +}; + +/** \internal \ingroup MatrixFunctions_Module + * \brief Partial specialization of MatrixFunction for complex matrices + */ +template +struct matrix_function_compute +{ + template + static void run(const MatA& A, AtomicType& atomic, ResultType &result) + { + typedef internal::traits Traits; + + // compute Schur decomposition of A + const ComplexSchur schurOfA(A); + eigen_assert(schurOfA.info()==Success); + MatrixType T = schurOfA.matrixT(); + MatrixType U = schurOfA.matrixU(); + + // partition eigenvalues into clusters of ei'vals "close" to each other + std::list > clusters; + matrix_function_partition_eigenvalues(T.diagonal(), clusters); + + // compute size of each cluster + Matrix clusterSize; + matrix_function_compute_cluster_size(clusters, clusterSize); + + // blockStart[i] is row index at which block corresponding to i-th cluster starts + Matrix blockStart; + matrix_function_compute_block_start(clusterSize, blockStart); + + // compute map so that eivalToCluster[i] = j means that i-th ei'val is in j-th cluster + Matrix eivalToCluster; + matrix_function_compute_map(T.diagonal(), clusters, eivalToCluster); + + // compute permutation which groups ei'vals in same cluster together + Matrix permutation; + matrix_function_compute_permutation(blockStart, eivalToCluster, permutation); + + // permute Schur decomposition + matrix_function_permute_schur(permutation, U, T); + + // compute result + MatrixType fT; // matrix function applied to T + matrix_function_compute_block_atomic(T, atomic, blockStart, clusterSize, fT); + matrix_function_compute_above_diagonal(T, blockStart, clusterSize, fT); + result = U * (fT.template triangularView() * U.adjoint()); + } +}; + +} // end of namespace internal + +/** \ingroup MatrixFunctions_Module + * + * \brief Proxy for the matrix function of some matrix (expression). + * + * \tparam Derived Type of the argument to the matrix function. + * + * This class holds the argument to the matrix function until it is assigned or evaluated for some other + * reason (so the argument should not be changed in the meantime). It is the return type of + * matrixBase::matrixFunction() and related functions and most of the time this is the only way it is used. + */ +template class MatrixFunctionReturnValue +: public ReturnByValue > +{ + public: + typedef typename Derived::Scalar Scalar; + typedef typename internal::stem_function::type StemFunction; + + protected: + typedef typename internal::ref_selector::type DerivedNested; + + public: + + /** \brief Constructor. + * + * \param[in] A %Matrix (expression) forming the argument of the matrix function. + * \param[in] f Stem function for matrix function under consideration. + */ + MatrixFunctionReturnValue(const Derived& A, StemFunction f) : m_A(A), m_f(f) { } + + /** \brief Compute the matrix function. + * + * \param[out] result \p f applied to \p A, where \p f and \p A are as in the constructor. + */ + template + inline void evalTo(ResultType& result) const + { + typedef typename internal::nested_eval::type NestedEvalType; + typedef typename internal::remove_all::type NestedEvalTypeClean; + typedef internal::traits Traits; + typedef std::complex::Real> ComplexScalar; + typedef Matrix DynMatrixType; + + typedef internal::MatrixFunctionAtomic AtomicType; + AtomicType atomic(m_f); + + internal::matrix_function_compute::run(m_A, atomic, result); + } + + Index rows() const { return m_A.rows(); } + Index cols() const { return m_A.cols(); } + + private: + const DerivedNested m_A; + StemFunction *m_f; +}; + +namespace internal { +template +struct traits > +{ + typedef typename Derived::PlainObject ReturnType; +}; +} + + +/********** MatrixBase methods **********/ + + +template +const MatrixFunctionReturnValue MatrixBase::matrixFunction(typename internal::stem_function::Scalar>::type f) const +{ + eigen_assert(rows() == cols()); + return MatrixFunctionReturnValue(derived(), f); +} + +template +const MatrixFunctionReturnValue MatrixBase::sin() const +{ + eigen_assert(rows() == cols()); + typedef typename internal::stem_function::ComplexScalar ComplexScalar; + return MatrixFunctionReturnValue(derived(), internal::stem_function_sin); +} + +template +const MatrixFunctionReturnValue MatrixBase::cos() const +{ + eigen_assert(rows() == cols()); + typedef typename internal::stem_function::ComplexScalar ComplexScalar; + return MatrixFunctionReturnValue(derived(), internal::stem_function_cos); +} + +template +const MatrixFunctionReturnValue MatrixBase::sinh() const +{ + eigen_assert(rows() == cols()); + typedef typename internal::stem_function::ComplexScalar ComplexScalar; + return MatrixFunctionReturnValue(derived(), internal::stem_function_sinh); +} + +template +const MatrixFunctionReturnValue MatrixBase::cosh() const +{ + eigen_assert(rows() == cols()); + typedef typename internal::stem_function::ComplexScalar ComplexScalar; + return MatrixFunctionReturnValue(derived(), internal::stem_function_cosh); +} + +} // end namespace Eigen + +#endif // EIGEN_MATRIX_FUNCTION_H diff --git a/external/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h b/external/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h new file mode 100644 index 0000000..e917013 --- /dev/null +++ b/external/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h @@ -0,0 +1,373 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011, 2013 Jitse Niesen +// Copyright (C) 2011 Chen-Pang He +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_MATRIX_LOGARITHM +#define EIGEN_MATRIX_LOGARITHM + +namespace Eigen { + +namespace internal { + +template +struct matrix_log_min_pade_degree +{ + static const int value = 3; +}; + +template +struct matrix_log_max_pade_degree +{ + typedef typename NumTraits::Real RealScalar; + static const int value = std::numeric_limits::digits<= 24? 5: // single precision + std::numeric_limits::digits<= 53? 7: // double precision + std::numeric_limits::digits<= 64? 8: // extended precision + std::numeric_limits::digits<=106? 10: // double-double + 11; // quadruple precision +}; + +/** \brief Compute logarithm of 2x2 triangular matrix. */ +template +void matrix_log_compute_2x2(const MatrixType& A, MatrixType& result) +{ + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + using std::abs; + using std::ceil; + using std::imag; + using std::log; + + Scalar logA00 = log(A(0,0)); + Scalar logA11 = log(A(1,1)); + + result(0,0) = logA00; + result(1,0) = Scalar(0); + result(1,1) = logA11; + + Scalar y = A(1,1) - A(0,0); + if (y==Scalar(0)) + { + result(0,1) = A(0,1) / A(0,0); + } + else if ((abs(A(0,0)) < RealScalar(0.5)*abs(A(1,1))) || (abs(A(0,0)) > 2*abs(A(1,1)))) + { + result(0,1) = A(0,1) * (logA11 - logA00) / y; + } + else + { + // computation in previous branch is inaccurate if A(1,1) \approx A(0,0) + RealScalar unwindingNumber = ceil((imag(logA11 - logA00) - RealScalar(EIGEN_PI)) / RealScalar(2*EIGEN_PI)); + result(0,1) = A(0,1) * (numext::log1p(y/A(0,0)) + Scalar(0,RealScalar(2*EIGEN_PI)*unwindingNumber)) / y; + } +} + +/* \brief Get suitable degree for Pade approximation. (specialized for RealScalar = float) */ +inline int matrix_log_get_pade_degree(float normTminusI) +{ + const float maxNormForPade[] = { 2.5111573934555054e-1 /* degree = 3 */ , 4.0535837411880493e-1, + 5.3149729967117310e-1 }; + const int minPadeDegree = matrix_log_min_pade_degree::value; + const int maxPadeDegree = matrix_log_max_pade_degree::value; + int degree = minPadeDegree; + for (; degree <= maxPadeDegree; ++degree) + if (normTminusI <= maxNormForPade[degree - minPadeDegree]) + break; + return degree; +} + +/* \brief Get suitable degree for Pade approximation. (specialized for RealScalar = double) */ +inline int matrix_log_get_pade_degree(double normTminusI) +{ + const double maxNormForPade[] = { 1.6206284795015624e-2 /* degree = 3 */ , 5.3873532631381171e-2, + 1.1352802267628681e-1, 1.8662860613541288e-1, 2.642960831111435e-1 }; + const int minPadeDegree = matrix_log_min_pade_degree::value; + const int maxPadeDegree = matrix_log_max_pade_degree::value; + int degree = minPadeDegree; + for (; degree <= maxPadeDegree; ++degree) + if (normTminusI <= maxNormForPade[degree - minPadeDegree]) + break; + return degree; +} + +/* \brief Get suitable degree for Pade approximation. (specialized for RealScalar = long double) */ +inline int matrix_log_get_pade_degree(long double normTminusI) +{ +#if LDBL_MANT_DIG == 53 // double precision + const long double maxNormForPade[] = { 1.6206284795015624e-2L /* degree = 3 */ , 5.3873532631381171e-2L, + 1.1352802267628681e-1L, 1.8662860613541288e-1L, 2.642960831111435e-1L }; +#elif LDBL_MANT_DIG <= 64 // extended precision + const long double maxNormForPade[] = { 5.48256690357782863103e-3L /* degree = 3 */, 2.34559162387971167321e-2L, + 5.84603923897347449857e-2L, 1.08486423756725170223e-1L, 1.68385767881294446649e-1L, + 2.32777776523703892094e-1L }; +#elif LDBL_MANT_DIG <= 106 // double-double + const long double maxNormForPade[] = { 8.58970550342939562202529664318890e-5L /* degree = 3 */, + 9.34074328446359654039446552677759e-4L, 4.26117194647672175773064114582860e-3L, + 1.21546224740281848743149666560464e-2L, 2.61100544998339436713088248557444e-2L, + 4.66170074627052749243018566390567e-2L, 7.32585144444135027565872014932387e-2L, + 1.05026503471351080481093652651105e-1L }; +#else // quadruple precision + const long double maxNormForPade[] = { 4.7419931187193005048501568167858103e-5L /* degree = 3 */, + 5.8853168473544560470387769480192666e-4L, 2.9216120366601315391789493628113520e-3L, + 8.8415758124319434347116734705174308e-3L, 1.9850836029449446668518049562565291e-2L, + 3.6688019729653446926585242192447447e-2L, 5.9290962294020186998954055264528393e-2L, + 8.6998436081634343903250580992127677e-2L, 1.1880960220216759245467951592883642e-1L }; +#endif + const int minPadeDegree = matrix_log_min_pade_degree::value; + const int maxPadeDegree = matrix_log_max_pade_degree::value; + int degree = minPadeDegree; + for (; degree <= maxPadeDegree; ++degree) + if (normTminusI <= maxNormForPade[degree - minPadeDegree]) + break; + return degree; +} + +/* \brief Compute Pade approximation to matrix logarithm */ +template +void matrix_log_compute_pade(MatrixType& result, const MatrixType& T, int degree) +{ + typedef typename NumTraits::Real RealScalar; + const int minPadeDegree = 3; + const int maxPadeDegree = 11; + assert(degree >= minPadeDegree && degree <= maxPadeDegree); + // FIXME this creates float-conversion-warnings if these are enabled. + // Either manually convert each value, or disable the warning locally + const RealScalar nodes[][maxPadeDegree] = { + { 0.1127016653792583114820734600217600L, 0.5000000000000000000000000000000000L, // degree 3 + 0.8872983346207416885179265399782400L }, + { 0.0694318442029737123880267555535953L, 0.3300094782075718675986671204483777L, // degree 4 + 0.6699905217924281324013328795516223L, 0.9305681557970262876119732444464048L }, + { 0.0469100770306680036011865608503035L, 0.2307653449471584544818427896498956L, // degree 5 + 0.5000000000000000000000000000000000L, 0.7692346550528415455181572103501044L, + 0.9530899229693319963988134391496965L }, + { 0.0337652428984239860938492227530027L, 0.1693953067668677431693002024900473L, // degree 6 + 0.3806904069584015456847491391596440L, 0.6193095930415984543152508608403560L, + 0.8306046932331322568306997975099527L, 0.9662347571015760139061507772469973L }, + { 0.0254460438286207377369051579760744L, 0.1292344072003027800680676133596058L, // degree 7 + 0.2970774243113014165466967939615193L, 0.5000000000000000000000000000000000L, + 0.7029225756886985834533032060384807L, 0.8707655927996972199319323866403942L, + 0.9745539561713792622630948420239256L }, + { 0.0198550717512318841582195657152635L, 0.1016667612931866302042230317620848L, // degree 8 + 0.2372337950418355070911304754053768L, 0.4082826787521750975302619288199080L, + 0.5917173212478249024697380711800920L, 0.7627662049581644929088695245946232L, + 0.8983332387068133697957769682379152L, 0.9801449282487681158417804342847365L }, + { 0.0159198802461869550822118985481636L, 0.0819844463366821028502851059651326L, // degree 9 + 0.1933142836497048013456489803292629L, 0.3378732882980955354807309926783317L, + 0.5000000000000000000000000000000000L, 0.6621267117019044645192690073216683L, + 0.8066857163502951986543510196707371L, 0.9180155536633178971497148940348674L, + 0.9840801197538130449177881014518364L }, + { 0.0130467357414141399610179939577740L, 0.0674683166555077446339516557882535L, // degree 10 + 0.1602952158504877968828363174425632L, 0.2833023029353764046003670284171079L, + 0.4255628305091843945575869994351400L, 0.5744371694908156054424130005648600L, + 0.7166976970646235953996329715828921L, 0.8397047841495122031171636825574368L, + 0.9325316833444922553660483442117465L, 0.9869532642585858600389820060422260L }, + { 0.0108856709269715035980309994385713L, 0.0564687001159523504624211153480364L, // degree 11 + 0.1349239972129753379532918739844233L, 0.2404519353965940920371371652706952L, + 0.3652284220238275138342340072995692L, 0.5000000000000000000000000000000000L, + 0.6347715779761724861657659927004308L, 0.7595480646034059079628628347293048L, + 0.8650760027870246620467081260155767L, 0.9435312998840476495375788846519636L, + 0.9891143290730284964019690005614287L } }; + + const RealScalar weights[][maxPadeDegree] = { + { 0.2777777777777777777777777777777778L, 0.4444444444444444444444444444444444L, // degree 3 + 0.2777777777777777777777777777777778L }, + { 0.1739274225687269286865319746109997L, 0.3260725774312730713134680253890003L, // degree 4 + 0.3260725774312730713134680253890003L, 0.1739274225687269286865319746109997L }, + { 0.1184634425280945437571320203599587L, 0.2393143352496832340206457574178191L, // degree 5 + 0.2844444444444444444444444444444444L, 0.2393143352496832340206457574178191L, + 0.1184634425280945437571320203599587L }, + { 0.0856622461895851725201480710863665L, 0.1803807865240693037849167569188581L, // degree 6 + 0.2339569672863455236949351719947755L, 0.2339569672863455236949351719947755L, + 0.1803807865240693037849167569188581L, 0.0856622461895851725201480710863665L }, + { 0.0647424830844348466353057163395410L, 0.1398526957446383339507338857118898L, // degree 7 + 0.1909150252525594724751848877444876L, 0.2089795918367346938775510204081633L, + 0.1909150252525594724751848877444876L, 0.1398526957446383339507338857118898L, + 0.0647424830844348466353057163395410L }, + { 0.0506142681451881295762656771549811L, 0.1111905172266872352721779972131204L, // degree 8 + 0.1568533229389436436689811009933007L, 0.1813418916891809914825752246385978L, + 0.1813418916891809914825752246385978L, 0.1568533229389436436689811009933007L, + 0.1111905172266872352721779972131204L, 0.0506142681451881295762656771549811L }, + { 0.0406371941807872059859460790552618L, 0.0903240803474287020292360156214564L, // degree 9 + 0.1303053482014677311593714347093164L, 0.1561735385200014200343152032922218L, + 0.1651196775006298815822625346434870L, 0.1561735385200014200343152032922218L, + 0.1303053482014677311593714347093164L, 0.0903240803474287020292360156214564L, + 0.0406371941807872059859460790552618L }, + { 0.0333356721543440687967844049466659L, 0.0747256745752902965728881698288487L, // degree 10 + 0.1095431812579910219977674671140816L, 0.1346333596549981775456134607847347L, + 0.1477621123573764350869464973256692L, 0.1477621123573764350869464973256692L, + 0.1346333596549981775456134607847347L, 0.1095431812579910219977674671140816L, + 0.0747256745752902965728881698288487L, 0.0333356721543440687967844049466659L }, + { 0.0278342835580868332413768602212743L, 0.0627901847324523123173471496119701L, // degree 11 + 0.0931451054638671257130488207158280L, 0.1165968822959952399592618524215876L, + 0.1314022722551233310903444349452546L, 0.1364625433889503153572417641681711L, + 0.1314022722551233310903444349452546L, 0.1165968822959952399592618524215876L, + 0.0931451054638671257130488207158280L, 0.0627901847324523123173471496119701L, + 0.0278342835580868332413768602212743L } }; + + MatrixType TminusI = T - MatrixType::Identity(T.rows(), T.rows()); + result.setZero(T.rows(), T.rows()); + for (int k = 0; k < degree; ++k) { + RealScalar weight = weights[degree-minPadeDegree][k]; + RealScalar node = nodes[degree-minPadeDegree][k]; + result += weight * (MatrixType::Identity(T.rows(), T.rows()) + node * TminusI) + .template triangularView().solve(TminusI); + } +} + +/** \brief Compute logarithm of triangular matrices with size > 2. + * \details This uses a inverse scale-and-square algorithm. */ +template +void matrix_log_compute_big(const MatrixType& A, MatrixType& result) +{ + typedef typename MatrixType::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + using std::pow; + + int numberOfSquareRoots = 0; + int numberOfExtraSquareRoots = 0; + int degree; + MatrixType T = A, sqrtT; + + const int maxPadeDegree = matrix_log_max_pade_degree::value; + const RealScalar maxNormForPade = RealScalar( + maxPadeDegree<= 5? 5.3149729967117310e-1L: // single precision + maxPadeDegree<= 7? 2.6429608311114350e-1L: // double precision + maxPadeDegree<= 8? 2.32777776523703892094e-1L: // extended precision + maxPadeDegree<=10? 1.05026503471351080481093652651105e-1L: // double-double + 1.1880960220216759245467951592883642e-1L); // quadruple precision + + while (true) { + RealScalar normTminusI = (T - MatrixType::Identity(T.rows(), T.rows())).cwiseAbs().colwise().sum().maxCoeff(); + if (normTminusI < maxNormForPade) { + degree = matrix_log_get_pade_degree(normTminusI); + int degree2 = matrix_log_get_pade_degree(normTminusI / RealScalar(2)); + if ((degree - degree2 <= 1) || (numberOfExtraSquareRoots == 1)) + break; + ++numberOfExtraSquareRoots; + } + matrix_sqrt_triangular(T, sqrtT); + T = sqrtT.template triangularView(); + ++numberOfSquareRoots; + } + + matrix_log_compute_pade(result, T, degree); + result *= pow(RealScalar(2), RealScalar(numberOfSquareRoots)); // TODO replace by bitshift if possible +} + +/** \ingroup MatrixFunctions_Module + * \class MatrixLogarithmAtomic + * \brief Helper class for computing matrix logarithm of atomic matrices. + * + * Here, an atomic matrix is a triangular matrix whose diagonal entries are close to each other. + * + * \sa class MatrixFunctionAtomic, MatrixBase::log() + */ +template +class MatrixLogarithmAtomic +{ +public: + /** \brief Compute matrix logarithm of atomic matrix + * \param[in] A argument of matrix logarithm, should be upper triangular and atomic + * \returns The logarithm of \p A. + */ + MatrixType compute(const MatrixType& A); +}; + +template +MatrixType MatrixLogarithmAtomic::compute(const MatrixType& A) +{ + using std::log; + MatrixType result(A.rows(), A.rows()); + if (A.rows() == 1) + result(0,0) = log(A(0,0)); + else if (A.rows() == 2) + matrix_log_compute_2x2(A, result); + else + matrix_log_compute_big(A, result); + return result; +} + +} // end of namespace internal + +/** \ingroup MatrixFunctions_Module + * + * \brief Proxy for the matrix logarithm of some matrix (expression). + * + * \tparam Derived Type of the argument to the matrix function. + * + * This class holds the argument to the matrix function until it is + * assigned or evaluated for some other reason (so the argument + * should not be changed in the meantime). It is the return type of + * MatrixBase::log() and most of the time this is the only way it + * is used. + */ +template class MatrixLogarithmReturnValue +: public ReturnByValue > +{ +public: + typedef typename Derived::Scalar Scalar; + typedef typename Derived::Index Index; + +protected: + typedef typename internal::ref_selector::type DerivedNested; + +public: + + /** \brief Constructor. + * + * \param[in] A %Matrix (expression) forming the argument of the matrix logarithm. + */ + explicit MatrixLogarithmReturnValue(const Derived& A) : m_A(A) { } + + /** \brief Compute the matrix logarithm. + * + * \param[out] result Logarithm of \c A, where \c A is as specified in the constructor. + */ + template + inline void evalTo(ResultType& result) const + { + typedef typename internal::nested_eval::type DerivedEvalType; + typedef typename internal::remove_all::type DerivedEvalTypeClean; + typedef internal::traits Traits; + typedef std::complex::Real> ComplexScalar; + typedef Matrix DynMatrixType; + typedef internal::MatrixLogarithmAtomic AtomicType; + AtomicType atomic; + + internal::matrix_function_compute::run(m_A, atomic, result); + } + + Index rows() const { return m_A.rows(); } + Index cols() const { return m_A.cols(); } + +private: + const DerivedNested m_A; +}; + +namespace internal { + template + struct traits > + { + typedef typename Derived::PlainObject ReturnType; + }; +} + + +/********** MatrixBase method **********/ + + +template +const MatrixLogarithmReturnValue MatrixBase::log() const +{ + eigen_assert(rows() == cols()); + return MatrixLogarithmReturnValue(derived()); +} + +} // end namespace Eigen + +#endif // EIGEN_MATRIX_LOGARITHM diff --git a/external/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h b/external/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h new file mode 100644 index 0000000..d7672d7 --- /dev/null +++ b/external/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h @@ -0,0 +1,705 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012, 2013 Chen-Pang He +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_MATRIX_POWER +#define EIGEN_MATRIX_POWER + +namespace Eigen { + +template class MatrixPower; + +/** + * \ingroup MatrixFunctions_Module + * + * \brief Proxy for the matrix power of some matrix. + * + * \tparam MatrixType type of the base, a matrix. + * + * This class holds the arguments to the matrix power until it is + * assigned or evaluated for some other reason (so the argument + * should not be changed in the meantime). It is the return type of + * MatrixPower::operator() and related functions and most of the + * time this is the only way it is used. + */ +/* TODO This class is only used by MatrixPower, so it should be nested + * into MatrixPower, like MatrixPower::ReturnValue. However, my + * compiler complained about unused template parameter in the + * following declaration in namespace internal. + * + * template + * struct traits::ReturnValue>; + */ +template +class MatrixPowerParenthesesReturnValue : public ReturnByValue< MatrixPowerParenthesesReturnValue > +{ + public: + typedef typename MatrixType::RealScalar RealScalar; + + /** + * \brief Constructor. + * + * \param[in] pow %MatrixPower storing the base. + * \param[in] p scalar, the exponent of the matrix power. + */ + MatrixPowerParenthesesReturnValue(MatrixPower& pow, RealScalar p) : m_pow(pow), m_p(p) + { } + + /** + * \brief Compute the matrix power. + * + * \param[out] result + */ + template + inline void evalTo(ResultType& result) const + { m_pow.compute(result, m_p); } + + Index rows() const { return m_pow.rows(); } + Index cols() const { return m_pow.cols(); } + + private: + MatrixPower& m_pow; + const RealScalar m_p; +}; + +/** + * \ingroup MatrixFunctions_Module + * + * \brief Class for computing matrix powers. + * + * \tparam MatrixType type of the base, expected to be an instantiation + * of the Matrix class template. + * + * This class is capable of computing triangular real/complex matrices + * raised to a power in the interval \f$ (-1, 1) \f$. + * + * \note Currently this class is only used by MatrixPower. One may + * insist that this be nested into MatrixPower. This class is here to + * facilitate future development of triangular matrix functions. + */ +template +class MatrixPowerAtomic : internal::noncopyable +{ + private: + enum { + RowsAtCompileTime = MatrixType::RowsAtCompileTime, + MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime + }; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef std::complex ComplexScalar; + typedef Block ResultType; + + const MatrixType& m_A; + RealScalar m_p; + + void computePade(int degree, const MatrixType& IminusT, ResultType& res) const; + void compute2x2(ResultType& res, RealScalar p) const; + void computeBig(ResultType& res) const; + static int getPadeDegree(float normIminusT); + static int getPadeDegree(double normIminusT); + static int getPadeDegree(long double normIminusT); + static ComplexScalar computeSuperDiag(const ComplexScalar&, const ComplexScalar&, RealScalar p); + static RealScalar computeSuperDiag(RealScalar, RealScalar, RealScalar p); + + public: + /** + * \brief Constructor. + * + * \param[in] T the base of the matrix power. + * \param[in] p the exponent of the matrix power, should be in + * \f$ (-1, 1) \f$. + * + * The class stores a reference to T, so it should not be changed + * (or destroyed) before evaluation. Only the upper triangular + * part of T is read. + */ + MatrixPowerAtomic(const MatrixType& T, RealScalar p); + + /** + * \brief Compute the matrix power. + * + * \param[out] res \f$ A^p \f$ where A and p are specified in the + * constructor. + */ + void compute(ResultType& res) const; +}; + +template +MatrixPowerAtomic::MatrixPowerAtomic(const MatrixType& T, RealScalar p) : + m_A(T), m_p(p) +{ + eigen_assert(T.rows() == T.cols()); + eigen_assert(p > -1 && p < 1); +} + +template +void MatrixPowerAtomic::compute(ResultType& res) const +{ + using std::pow; + switch (m_A.rows()) { + case 0: + break; + case 1: + res(0,0) = pow(m_A(0,0), m_p); + break; + case 2: + compute2x2(res, m_p); + break; + default: + computeBig(res); + } +} + +template +void MatrixPowerAtomic::computePade(int degree, const MatrixType& IminusT, ResultType& res) const +{ + int i = 2*degree; + res = (m_p-RealScalar(degree)) / RealScalar(2*i-2) * IminusT; + + for (--i; i; --i) { + res = (MatrixType::Identity(IminusT.rows(), IminusT.cols()) + res).template triangularView() + .solve((i==1 ? -m_p : i&1 ? (-m_p-RealScalar(i/2))/RealScalar(2*i) : (m_p-RealScalar(i/2))/RealScalar(2*i-2)) * IminusT).eval(); + } + res += MatrixType::Identity(IminusT.rows(), IminusT.cols()); +} + +// This function assumes that res has the correct size (see bug 614) +template +void MatrixPowerAtomic::compute2x2(ResultType& res, RealScalar p) const +{ + using std::abs; + using std::pow; + res.coeffRef(0,0) = pow(m_A.coeff(0,0), p); + + for (Index i=1; i < m_A.cols(); ++i) { + res.coeffRef(i,i) = pow(m_A.coeff(i,i), p); + if (m_A.coeff(i-1,i-1) == m_A.coeff(i,i)) + res.coeffRef(i-1,i) = p * pow(m_A.coeff(i,i), p-1); + else if (2*abs(m_A.coeff(i-1,i-1)) < abs(m_A.coeff(i,i)) || 2*abs(m_A.coeff(i,i)) < abs(m_A.coeff(i-1,i-1))) + res.coeffRef(i-1,i) = (res.coeff(i,i)-res.coeff(i-1,i-1)) / (m_A.coeff(i,i)-m_A.coeff(i-1,i-1)); + else + res.coeffRef(i-1,i) = computeSuperDiag(m_A.coeff(i,i), m_A.coeff(i-1,i-1), p); + res.coeffRef(i-1,i) *= m_A.coeff(i-1,i); + } +} + +template +void MatrixPowerAtomic::computeBig(ResultType& res) const +{ + using std::ldexp; + const int digits = std::numeric_limits::digits; + const RealScalar maxNormForPade = RealScalar( + digits <= 24? 4.3386528e-1L // single precision + : digits <= 53? 2.789358995219730e-1L // double precision + : digits <= 64? 2.4471944416607995472e-1L // extended precision + : digits <= 106? 1.1016843812851143391275867258512e-1L // double-double + : 9.134603732914548552537150753385375e-2L); // quadruple precision + MatrixType IminusT, sqrtT, T = m_A.template triangularView(); + RealScalar normIminusT; + int degree, degree2, numberOfSquareRoots = 0; + bool hasExtraSquareRoot = false; + + for (Index i=0; i < m_A.cols(); ++i) + eigen_assert(m_A(i,i) != RealScalar(0)); + + while (true) { + IminusT = MatrixType::Identity(m_A.rows(), m_A.cols()) - T; + normIminusT = IminusT.cwiseAbs().colwise().sum().maxCoeff(); + if (normIminusT < maxNormForPade) { + degree = getPadeDegree(normIminusT); + degree2 = getPadeDegree(normIminusT/2); + if (degree - degree2 <= 1 || hasExtraSquareRoot) + break; + hasExtraSquareRoot = true; + } + matrix_sqrt_triangular(T, sqrtT); + T = sqrtT.template triangularView(); + ++numberOfSquareRoots; + } + computePade(degree, IminusT, res); + + for (; numberOfSquareRoots; --numberOfSquareRoots) { + compute2x2(res, ldexp(m_p, -numberOfSquareRoots)); + res = res.template triangularView() * res; + } + compute2x2(res, m_p); +} + +template +inline int MatrixPowerAtomic::getPadeDegree(float normIminusT) +{ + const float maxNormForPade[] = { 2.8064004e-1f /* degree = 3 */ , 4.3386528e-1f }; + int degree = 3; + for (; degree <= 4; ++degree) + if (normIminusT <= maxNormForPade[degree - 3]) + break; + return degree; +} + +template +inline int MatrixPowerAtomic::getPadeDegree(double normIminusT) +{ + const double maxNormForPade[] = { 1.884160592658218e-2 /* degree = 3 */ , 6.038881904059573e-2, 1.239917516308172e-1, + 1.999045567181744e-1, 2.789358995219730e-1 }; + int degree = 3; + for (; degree <= 7; ++degree) + if (normIminusT <= maxNormForPade[degree - 3]) + break; + return degree; +} + +template +inline int MatrixPowerAtomic::getPadeDegree(long double normIminusT) +{ +#if LDBL_MANT_DIG == 53 + const int maxPadeDegree = 7; + const double maxNormForPade[] = { 1.884160592658218e-2L /* degree = 3 */ , 6.038881904059573e-2L, 1.239917516308172e-1L, + 1.999045567181744e-1L, 2.789358995219730e-1L }; +#elif LDBL_MANT_DIG <= 64 + const int maxPadeDegree = 8; + const long double maxNormForPade[] = { 6.3854693117491799460e-3L /* degree = 3 */ , 2.6394893435456973676e-2L, + 6.4216043030404063729e-2L, 1.1701165502926694307e-1L, 1.7904284231268670284e-1L, 2.4471944416607995472e-1L }; +#elif LDBL_MANT_DIG <= 106 + const int maxPadeDegree = 10; + const double maxNormForPade[] = { 1.0007161601787493236741409687186e-4L /* degree = 3 */ , + 1.0007161601787493236741409687186e-3L, 4.7069769360887572939882574746264e-3L, 1.3220386624169159689406653101695e-2L, + 2.8063482381631737920612944054906e-2L, 4.9625993951953473052385361085058e-2L, 7.7367040706027886224557538328171e-2L, + 1.1016843812851143391275867258512e-1L }; +#else + const int maxPadeDegree = 10; + const double maxNormForPade[] = { 5.524506147036624377378713555116378e-5L /* degree = 3 */ , + 6.640600568157479679823602193345995e-4L, 3.227716520106894279249709728084626e-3L, + 9.619593944683432960546978734646284e-3L, 2.134595382433742403911124458161147e-2L, + 3.908166513900489428442993794761185e-2L, 6.266780814639442865832535460550138e-2L, + 9.134603732914548552537150753385375e-2L }; +#endif + int degree = 3; + for (; degree <= maxPadeDegree; ++degree) + if (normIminusT <= maxNormForPade[degree - 3]) + break; + return degree; +} + +template +inline typename MatrixPowerAtomic::ComplexScalar +MatrixPowerAtomic::computeSuperDiag(const ComplexScalar& curr, const ComplexScalar& prev, RealScalar p) +{ + using std::ceil; + using std::exp; + using std::log; + using std::sinh; + + ComplexScalar logCurr = log(curr); + ComplexScalar logPrev = log(prev); + RealScalar unwindingNumber = ceil((numext::imag(logCurr - logPrev) - RealScalar(EIGEN_PI)) / RealScalar(2*EIGEN_PI)); + ComplexScalar w = numext::log1p((curr-prev)/prev)/RealScalar(2) + ComplexScalar(0, RealScalar(EIGEN_PI)*unwindingNumber); + return RealScalar(2) * exp(RealScalar(0.5) * p * (logCurr + logPrev)) * sinh(p * w) / (curr - prev); +} + +template +inline typename MatrixPowerAtomic::RealScalar +MatrixPowerAtomic::computeSuperDiag(RealScalar curr, RealScalar prev, RealScalar p) +{ + using std::exp; + using std::log; + using std::sinh; + + RealScalar w = numext::log1p((curr-prev)/prev)/RealScalar(2); + return 2 * exp(p * (log(curr) + log(prev)) / 2) * sinh(p * w) / (curr - prev); +} + +/** + * \ingroup MatrixFunctions_Module + * + * \brief Class for computing matrix powers. + * + * \tparam MatrixType type of the base, expected to be an instantiation + * of the Matrix class template. + * + * This class is capable of computing real/complex matrices raised to + * an arbitrary real power. Meanwhile, it saves the result of Schur + * decomposition if an non-integral power has even been calculated. + * Therefore, if you want to compute multiple (>= 2) matrix powers + * for the same matrix, using the class directly is more efficient than + * calling MatrixBase::pow(). + * + * Example: + * \include MatrixPower_optimal.cpp + * Output: \verbinclude MatrixPower_optimal.out + */ +template +class MatrixPower : internal::noncopyable +{ + private: + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + + public: + /** + * \brief Constructor. + * + * \param[in] A the base of the matrix power. + * + * The class stores a reference to A, so it should not be changed + * (or destroyed) before evaluation. + */ + explicit MatrixPower(const MatrixType& A) : + m_A(A), + m_conditionNumber(0), + m_rank(A.cols()), + m_nulls(0) + { eigen_assert(A.rows() == A.cols()); } + + /** + * \brief Returns the matrix power. + * + * \param[in] p exponent, a real scalar. + * \return The expression \f$ A^p \f$, where A is specified in the + * constructor. + */ + const MatrixPowerParenthesesReturnValue operator()(RealScalar p) + { return MatrixPowerParenthesesReturnValue(*this, p); } + + /** + * \brief Compute the matrix power. + * + * \param[in] p exponent, a real scalar. + * \param[out] res \f$ A^p \f$ where A is specified in the + * constructor. + */ + template + void compute(ResultType& res, RealScalar p); + + Index rows() const { return m_A.rows(); } + Index cols() const { return m_A.cols(); } + + private: + typedef std::complex ComplexScalar; + typedef Matrix ComplexMatrix; + + /** \brief Reference to the base of matrix power. */ + typename MatrixType::Nested m_A; + + /** \brief Temporary storage. */ + MatrixType m_tmp; + + /** \brief Store the result of Schur decomposition. */ + ComplexMatrix m_T, m_U; + + /** \brief Store fractional power of m_T. */ + ComplexMatrix m_fT; + + /** + * \brief Condition number of m_A. + * + * It is initialized as 0 to avoid performing unnecessary Schur + * decomposition, which is the bottleneck. + */ + RealScalar m_conditionNumber; + + /** \brief Rank of m_A. */ + Index m_rank; + + /** \brief Rank deficiency of m_A. */ + Index m_nulls; + + /** + * \brief Split p into integral part and fractional part. + * + * \param[in] p The exponent. + * \param[out] p The fractional part ranging in \f$ (-1, 1) \f$. + * \param[out] intpart The integral part. + * + * Only if the fractional part is nonzero, it calls initialize(). + */ + void split(RealScalar& p, RealScalar& intpart); + + /** \brief Perform Schur decomposition for fractional power. */ + void initialize(); + + template + void computeIntPower(ResultType& res, RealScalar p); + + template + void computeFracPower(ResultType& res, RealScalar p); + + template + static void revertSchur( + Matrix& res, + const ComplexMatrix& T, + const ComplexMatrix& U); + + template + static void revertSchur( + Matrix& res, + const ComplexMatrix& T, + const ComplexMatrix& U); +}; + +template +template +void MatrixPower::compute(ResultType& res, RealScalar p) +{ + using std::pow; + switch (cols()) { + case 0: + break; + case 1: + res(0,0) = pow(m_A.coeff(0,0), p); + break; + default: + RealScalar intpart; + split(p, intpart); + + res = MatrixType::Identity(rows(), cols()); + computeIntPower(res, intpart); + if (p) computeFracPower(res, p); + } +} + +template +void MatrixPower::split(RealScalar& p, RealScalar& intpart) +{ + using std::floor; + using std::pow; + + intpart = floor(p); + p -= intpart; + + // Perform Schur decomposition if it is not yet performed and the power is + // not an integer. + if (!m_conditionNumber && p) + initialize(); + + // Choose the more stable of intpart = floor(p) and intpart = ceil(p). + if (p > RealScalar(0.5) && p > (1-p) * pow(m_conditionNumber, p)) { + --p; + ++intpart; + } +} + +template +void MatrixPower::initialize() +{ + const ComplexSchur schurOfA(m_A); + JacobiRotation rot; + ComplexScalar eigenvalue; + + m_fT.resizeLike(m_A); + m_T = schurOfA.matrixT(); + m_U = schurOfA.matrixU(); + m_conditionNumber = m_T.diagonal().array().abs().maxCoeff() / m_T.diagonal().array().abs().minCoeff(); + + // Move zero eigenvalues to the bottom right corner. + for (Index i = cols()-1; i>=0; --i) { + if (m_rank <= 2) + return; + if (m_T.coeff(i,i) == RealScalar(0)) { + for (Index j=i+1; j < m_rank; ++j) { + eigenvalue = m_T.coeff(j,j); + rot.makeGivens(m_T.coeff(j-1,j), eigenvalue); + m_T.applyOnTheRight(j-1, j, rot); + m_T.applyOnTheLeft(j-1, j, rot.adjoint()); + m_T.coeffRef(j-1,j-1) = eigenvalue; + m_T.coeffRef(j,j) = RealScalar(0); + m_U.applyOnTheRight(j-1, j, rot); + } + --m_rank; + } + } + + m_nulls = rows() - m_rank; + if (m_nulls) { + eigen_assert(m_T.bottomRightCorner(m_nulls, m_nulls).isZero() + && "Base of matrix power should be invertible or with a semisimple zero eigenvalue."); + m_fT.bottomRows(m_nulls).fill(RealScalar(0)); + } +} + +template +template +void MatrixPower::computeIntPower(ResultType& res, RealScalar p) +{ + using std::abs; + using std::fmod; + RealScalar pp = abs(p); + + if (p<0) + m_tmp = m_A.inverse(); + else + m_tmp = m_A; + + while (true) { + if (fmod(pp, 2) >= 1) + res = m_tmp * res; + pp /= 2; + if (pp < 1) + break; + m_tmp *= m_tmp; + } +} + +template +template +void MatrixPower::computeFracPower(ResultType& res, RealScalar p) +{ + Block blockTp(m_fT, 0, 0, m_rank, m_rank); + eigen_assert(m_conditionNumber); + eigen_assert(m_rank + m_nulls == rows()); + + MatrixPowerAtomic(m_T.topLeftCorner(m_rank, m_rank), p).compute(blockTp); + if (m_nulls) { + m_fT.topRightCorner(m_rank, m_nulls) = m_T.topLeftCorner(m_rank, m_rank).template triangularView() + .solve(blockTp * m_T.topRightCorner(m_rank, m_nulls)); + } + revertSchur(m_tmp, m_fT, m_U); + res = m_tmp * res; +} + +template +template +inline void MatrixPower::revertSchur( + Matrix& res, + const ComplexMatrix& T, + const ComplexMatrix& U) +{ res.noalias() = U * (T.template triangularView() * U.adjoint()); } + +template +template +inline void MatrixPower::revertSchur( + Matrix& res, + const ComplexMatrix& T, + const ComplexMatrix& U) +{ res.noalias() = (U * (T.template triangularView() * U.adjoint())).real(); } + +/** + * \ingroup MatrixFunctions_Module + * + * \brief Proxy for the matrix power of some matrix (expression). + * + * \tparam Derived type of the base, a matrix (expression). + * + * This class holds the arguments to the matrix power until it is + * assigned or evaluated for some other reason (so the argument + * should not be changed in the meantime). It is the return type of + * MatrixBase::pow() and related functions and most of the + * time this is the only way it is used. + */ +template +class MatrixPowerReturnValue : public ReturnByValue< MatrixPowerReturnValue > +{ + public: + typedef typename Derived::PlainObject PlainObject; + typedef typename Derived::RealScalar RealScalar; + + /** + * \brief Constructor. + * + * \param[in] A %Matrix (expression), the base of the matrix power. + * \param[in] p real scalar, the exponent of the matrix power. + */ + MatrixPowerReturnValue(const Derived& A, RealScalar p) : m_A(A), m_p(p) + { } + + /** + * \brief Compute the matrix power. + * + * \param[out] result \f$ A^p \f$ where \p A and \p p are as in the + * constructor. + */ + template + inline void evalTo(ResultType& result) const + { MatrixPower(m_A.eval()).compute(result, m_p); } + + Index rows() const { return m_A.rows(); } + Index cols() const { return m_A.cols(); } + + private: + const Derived& m_A; + const RealScalar m_p; +}; + +/** + * \ingroup MatrixFunctions_Module + * + * \brief Proxy for the matrix power of some matrix (expression). + * + * \tparam Derived type of the base, a matrix (expression). + * + * This class holds the arguments to the matrix power until it is + * assigned or evaluated for some other reason (so the argument + * should not be changed in the meantime). It is the return type of + * MatrixBase::pow() and related functions and most of the + * time this is the only way it is used. + */ +template +class MatrixComplexPowerReturnValue : public ReturnByValue< MatrixComplexPowerReturnValue > +{ + public: + typedef typename Derived::PlainObject PlainObject; + typedef typename std::complex ComplexScalar; + + /** + * \brief Constructor. + * + * \param[in] A %Matrix (expression), the base of the matrix power. + * \param[in] p complex scalar, the exponent of the matrix power. + */ + MatrixComplexPowerReturnValue(const Derived& A, const ComplexScalar& p) : m_A(A), m_p(p) + { } + + /** + * \brief Compute the matrix power. + * + * Because \p p is complex, \f$ A^p \f$ is simply evaluated as \f$ + * \exp(p \log(A)) \f$. + * + * \param[out] result \f$ A^p \f$ where \p A and \p p are as in the + * constructor. + */ + template + inline void evalTo(ResultType& result) const + { result = (m_p * m_A.log()).exp(); } + + Index rows() const { return m_A.rows(); } + Index cols() const { return m_A.cols(); } + + private: + const Derived& m_A; + const ComplexScalar m_p; +}; + +namespace internal { + +template +struct traits< MatrixPowerParenthesesReturnValue > +{ typedef typename MatrixPowerType::PlainObject ReturnType; }; + +template +struct traits< MatrixPowerReturnValue > +{ typedef typename Derived::PlainObject ReturnType; }; + +template +struct traits< MatrixComplexPowerReturnValue > +{ typedef typename Derived::PlainObject ReturnType; }; + +} + +template +const MatrixPowerReturnValue MatrixBase::pow(const RealScalar& p) const +{ return MatrixPowerReturnValue(derived(), p); } + +template +const MatrixComplexPowerReturnValue MatrixBase::pow(const std::complex& p) const +{ return MatrixComplexPowerReturnValue(derived(), p); } + +} // namespace Eigen + +#endif // EIGEN_MATRIX_POWER diff --git a/external/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h b/external/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h new file mode 100644 index 0000000..e363e77 --- /dev/null +++ b/external/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h @@ -0,0 +1,368 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011, 2013 Jitse Niesen +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_MATRIX_SQUARE_ROOT +#define EIGEN_MATRIX_SQUARE_ROOT + +namespace Eigen { + +namespace internal { + +// pre: T.block(i,i,2,2) has complex conjugate eigenvalues +// post: sqrtT.block(i,i,2,2) is square root of T.block(i,i,2,2) +template +void matrix_sqrt_quasi_triangular_2x2_diagonal_block(const MatrixType& T, Index i, ResultType& sqrtT) +{ + // TODO: This case (2-by-2 blocks with complex conjugate eigenvalues) is probably hidden somewhere + // in EigenSolver. If we expose it, we could call it directly from here. + typedef typename traits::Scalar Scalar; + Matrix block = T.template block<2,2>(i,i); + EigenSolver > es(block); + sqrtT.template block<2,2>(i,i) + = (es.eigenvectors() * es.eigenvalues().cwiseSqrt().asDiagonal() * es.eigenvectors().inverse()).real(); +} + +// pre: block structure of T is such that (i,j) is a 1x1 block, +// all blocks of sqrtT to left of and below (i,j) are correct +// post: sqrtT(i,j) has the correct value +template +void matrix_sqrt_quasi_triangular_1x1_off_diagonal_block(const MatrixType& T, Index i, Index j, ResultType& sqrtT) +{ + typedef typename traits::Scalar Scalar; + Scalar tmp = (sqrtT.row(i).segment(i+1,j-i-1) * sqrtT.col(j).segment(i+1,j-i-1)).value(); + sqrtT.coeffRef(i,j) = (T.coeff(i,j) - tmp) / (sqrtT.coeff(i,i) + sqrtT.coeff(j,j)); +} + +// similar to compute1x1offDiagonalBlock() +template +void matrix_sqrt_quasi_triangular_1x2_off_diagonal_block(const MatrixType& T, Index i, Index j, ResultType& sqrtT) +{ + typedef typename traits::Scalar Scalar; + Matrix rhs = T.template block<1,2>(i,j); + if (j-i > 1) + rhs -= sqrtT.block(i, i+1, 1, j-i-1) * sqrtT.block(i+1, j, j-i-1, 2); + Matrix A = sqrtT.coeff(i,i) * Matrix::Identity(); + A += sqrtT.template block<2,2>(j,j).transpose(); + sqrtT.template block<1,2>(i,j).transpose() = A.fullPivLu().solve(rhs.transpose()); +} + +// similar to compute1x1offDiagonalBlock() +template +void matrix_sqrt_quasi_triangular_2x1_off_diagonal_block(const MatrixType& T, Index i, Index j, ResultType& sqrtT) +{ + typedef typename traits::Scalar Scalar; + Matrix rhs = T.template block<2,1>(i,j); + if (j-i > 2) + rhs -= sqrtT.block(i, i+2, 2, j-i-2) * sqrtT.block(i+2, j, j-i-2, 1); + Matrix A = sqrtT.coeff(j,j) * Matrix::Identity(); + A += sqrtT.template block<2,2>(i,i); + sqrtT.template block<2,1>(i,j) = A.fullPivLu().solve(rhs); +} + +// solves the equation A X + X B = C where all matrices are 2-by-2 +template +void matrix_sqrt_quasi_triangular_solve_auxiliary_equation(MatrixType& X, const MatrixType& A, const MatrixType& B, const MatrixType& C) +{ + typedef typename traits::Scalar Scalar; + Matrix coeffMatrix = Matrix::Zero(); + coeffMatrix.coeffRef(0,0) = A.coeff(0,0) + B.coeff(0,0); + coeffMatrix.coeffRef(1,1) = A.coeff(0,0) + B.coeff(1,1); + coeffMatrix.coeffRef(2,2) = A.coeff(1,1) + B.coeff(0,0); + coeffMatrix.coeffRef(3,3) = A.coeff(1,1) + B.coeff(1,1); + coeffMatrix.coeffRef(0,1) = B.coeff(1,0); + coeffMatrix.coeffRef(0,2) = A.coeff(0,1); + coeffMatrix.coeffRef(1,0) = B.coeff(0,1); + coeffMatrix.coeffRef(1,3) = A.coeff(0,1); + coeffMatrix.coeffRef(2,0) = A.coeff(1,0); + coeffMatrix.coeffRef(2,3) = B.coeff(1,0); + coeffMatrix.coeffRef(3,1) = A.coeff(1,0); + coeffMatrix.coeffRef(3,2) = B.coeff(0,1); + + Matrix rhs; + rhs.coeffRef(0) = C.coeff(0,0); + rhs.coeffRef(1) = C.coeff(0,1); + rhs.coeffRef(2) = C.coeff(1,0); + rhs.coeffRef(3) = C.coeff(1,1); + + Matrix result; + result = coeffMatrix.fullPivLu().solve(rhs); + + X.coeffRef(0,0) = result.coeff(0); + X.coeffRef(0,1) = result.coeff(1); + X.coeffRef(1,0) = result.coeff(2); + X.coeffRef(1,1) = result.coeff(3); +} + +// similar to compute1x1offDiagonalBlock() +template +void matrix_sqrt_quasi_triangular_2x2_off_diagonal_block(const MatrixType& T, Index i, Index j, ResultType& sqrtT) +{ + typedef typename traits::Scalar Scalar; + Matrix A = sqrtT.template block<2,2>(i,i); + Matrix B = sqrtT.template block<2,2>(j,j); + Matrix C = T.template block<2,2>(i,j); + if (j-i > 2) + C -= sqrtT.block(i, i+2, 2, j-i-2) * sqrtT.block(i+2, j, j-i-2, 2); + Matrix X; + matrix_sqrt_quasi_triangular_solve_auxiliary_equation(X, A, B, C); + sqrtT.template block<2,2>(i,j) = X; +} + +// pre: T is quasi-upper-triangular and sqrtT is a zero matrix of the same size +// post: the diagonal blocks of sqrtT are the square roots of the diagonal blocks of T +template +void matrix_sqrt_quasi_triangular_diagonal(const MatrixType& T, ResultType& sqrtT) +{ + using std::sqrt; + const Index size = T.rows(); + for (Index i = 0; i < size; i++) { + if (i == size - 1 || T.coeff(i+1, i) == 0) { + eigen_assert(T(i,i) >= 0); + sqrtT.coeffRef(i,i) = sqrt(T.coeff(i,i)); + } + else { + matrix_sqrt_quasi_triangular_2x2_diagonal_block(T, i, sqrtT); + ++i; + } + } +} + +// pre: T is quasi-upper-triangular and diagonal blocks of sqrtT are square root of diagonal blocks of T. +// post: sqrtT is the square root of T. +template +void matrix_sqrt_quasi_triangular_off_diagonal(const MatrixType& T, ResultType& sqrtT) +{ + const Index size = T.rows(); + for (Index j = 1; j < size; j++) { + if (T.coeff(j, j-1) != 0) // if T(j-1:j, j-1:j) is a 2-by-2 block + continue; + for (Index i = j-1; i >= 0; i--) { + if (i > 0 && T.coeff(i, i-1) != 0) // if T(i-1:i, i-1:i) is a 2-by-2 block + continue; + bool iBlockIs2x2 = (i < size - 1) && (T.coeff(i+1, i) != 0); + bool jBlockIs2x2 = (j < size - 1) && (T.coeff(j+1, j) != 0); + if (iBlockIs2x2 && jBlockIs2x2) + matrix_sqrt_quasi_triangular_2x2_off_diagonal_block(T, i, j, sqrtT); + else if (iBlockIs2x2 && !jBlockIs2x2) + matrix_sqrt_quasi_triangular_2x1_off_diagonal_block(T, i, j, sqrtT); + else if (!iBlockIs2x2 && jBlockIs2x2) + matrix_sqrt_quasi_triangular_1x2_off_diagonal_block(T, i, j, sqrtT); + else if (!iBlockIs2x2 && !jBlockIs2x2) + matrix_sqrt_quasi_triangular_1x1_off_diagonal_block(T, i, j, sqrtT); + } + } +} + +} // end of namespace internal + +/** \ingroup MatrixFunctions_Module + * \brief Compute matrix square root of quasi-triangular matrix. + * + * \tparam MatrixType type of \p arg, the argument of matrix square root, + * expected to be an instantiation of the Matrix class template. + * \tparam ResultType type of \p result, where result is to be stored. + * \param[in] arg argument of matrix square root. + * \param[out] result matrix square root of upper Hessenberg part of \p arg. + * + * This function computes the square root of the upper quasi-triangular matrix stored in the upper + * Hessenberg part of \p arg. Only the upper Hessenberg part of \p result is updated, the rest is + * not touched. See MatrixBase::sqrt() for details on how this computation is implemented. + * + * \sa MatrixSquareRoot, MatrixSquareRootQuasiTriangular + */ +template +void matrix_sqrt_quasi_triangular(const MatrixType &arg, ResultType &result) +{ + eigen_assert(arg.rows() == arg.cols()); + result.resize(arg.rows(), arg.cols()); + internal::matrix_sqrt_quasi_triangular_diagonal(arg, result); + internal::matrix_sqrt_quasi_triangular_off_diagonal(arg, result); +} + + +/** \ingroup MatrixFunctions_Module + * \brief Compute matrix square root of triangular matrix. + * + * \tparam MatrixType type of \p arg, the argument of matrix square root, + * expected to be an instantiation of the Matrix class template. + * \tparam ResultType type of \p result, where result is to be stored. + * \param[in] arg argument of matrix square root. + * \param[out] result matrix square root of upper triangular part of \p arg. + * + * Only the upper triangular part (including the diagonal) of \p result is updated, the rest is not + * touched. See MatrixBase::sqrt() for details on how this computation is implemented. + * + * \sa MatrixSquareRoot, MatrixSquareRootQuasiTriangular + */ +template +void matrix_sqrt_triangular(const MatrixType &arg, ResultType &result) +{ + using std::sqrt; + typedef typename MatrixType::Scalar Scalar; + + eigen_assert(arg.rows() == arg.cols()); + + // Compute square root of arg and store it in upper triangular part of result + // This uses that the square root of triangular matrices can be computed directly. + result.resize(arg.rows(), arg.cols()); + for (Index i = 0; i < arg.rows(); i++) { + result.coeffRef(i,i) = sqrt(arg.coeff(i,i)); + } + for (Index j = 1; j < arg.cols(); j++) { + for (Index i = j-1; i >= 0; i--) { + // if i = j-1, then segment has length 0 so tmp = 0 + Scalar tmp = (result.row(i).segment(i+1,j-i-1) * result.col(j).segment(i+1,j-i-1)).value(); + // denominator may be zero if original matrix is singular + result.coeffRef(i,j) = (arg.coeff(i,j) - tmp) / (result.coeff(i,i) + result.coeff(j,j)); + } + } +} + + +namespace internal { + +/** \ingroup MatrixFunctions_Module + * \brief Helper struct for computing matrix square roots of general matrices. + * \tparam MatrixType type of the argument of the matrix square root, + * expected to be an instantiation of the Matrix class template. + * + * \sa MatrixSquareRootTriangular, MatrixSquareRootQuasiTriangular, MatrixBase::sqrt() + */ +template ::Scalar>::IsComplex> +struct matrix_sqrt_compute +{ + /** \brief Compute the matrix square root + * + * \param[in] arg matrix whose square root is to be computed. + * \param[out] result square root of \p arg. + * + * See MatrixBase::sqrt() for details on how this computation is implemented. + */ + template static void run(const MatrixType &arg, ResultType &result); +}; + + +// ********** Partial specialization for real matrices ********** + +template +struct matrix_sqrt_compute +{ + typedef typename MatrixType::PlainObject PlainType; + template + static void run(const MatrixType &arg, ResultType &result) + { + eigen_assert(arg.rows() == arg.cols()); + + // Compute Schur decomposition of arg + const RealSchur schurOfA(arg); + const PlainType& T = schurOfA.matrixT(); + const PlainType& U = schurOfA.matrixU(); + + // Compute square root of T + PlainType sqrtT = PlainType::Zero(arg.rows(), arg.cols()); + matrix_sqrt_quasi_triangular(T, sqrtT); + + // Compute square root of arg + result = U * sqrtT * U.adjoint(); + } +}; + + +// ********** Partial specialization for complex matrices ********** + +template +struct matrix_sqrt_compute +{ + typedef typename MatrixType::PlainObject PlainType; + template + static void run(const MatrixType &arg, ResultType &result) + { + eigen_assert(arg.rows() == arg.cols()); + + // Compute Schur decomposition of arg + const ComplexSchur schurOfA(arg); + const PlainType& T = schurOfA.matrixT(); + const PlainType& U = schurOfA.matrixU(); + + // Compute square root of T + PlainType sqrtT; + matrix_sqrt_triangular(T, sqrtT); + + // Compute square root of arg + result = U * (sqrtT.template triangularView() * U.adjoint()); + } +}; + +} // end namespace internal + +/** \ingroup MatrixFunctions_Module + * + * \brief Proxy for the matrix square root of some matrix (expression). + * + * \tparam Derived Type of the argument to the matrix square root. + * + * This class holds the argument to the matrix square root until it + * is assigned or evaluated for some other reason (so the argument + * should not be changed in the meantime). It is the return type of + * MatrixBase::sqrt() and most of the time this is the only way it is + * used. + */ +template class MatrixSquareRootReturnValue +: public ReturnByValue > +{ + protected: + typedef typename internal::ref_selector::type DerivedNested; + + public: + /** \brief Constructor. + * + * \param[in] src %Matrix (expression) forming the argument of the + * matrix square root. + */ + explicit MatrixSquareRootReturnValue(const Derived& src) : m_src(src) { } + + /** \brief Compute the matrix square root. + * + * \param[out] result the matrix square root of \p src in the + * constructor. + */ + template + inline void evalTo(ResultType& result) const + { + typedef typename internal::nested_eval::type DerivedEvalType; + typedef typename internal::remove_all::type DerivedEvalTypeClean; + DerivedEvalType tmp(m_src); + internal::matrix_sqrt_compute::run(tmp, result); + } + + Index rows() const { return m_src.rows(); } + Index cols() const { return m_src.cols(); } + + protected: + const DerivedNested m_src; +}; + +namespace internal { +template +struct traits > +{ + typedef typename Derived::PlainObject ReturnType; +}; +} + +template +const MatrixSquareRootReturnValue MatrixBase::sqrt() const +{ + eigen_assert(rows() == cols()); + return MatrixSquareRootReturnValue(derived()); +} + +} // end namespace Eigen + +#endif // EIGEN_MATRIX_FUNCTION diff --git a/external/unsupported/Eigen/src/MatrixFunctions/StemFunction.h b/external/unsupported/Eigen/src/MatrixFunctions/StemFunction.h new file mode 100644 index 0000000..7604df9 --- /dev/null +++ b/external/unsupported/Eigen/src/MatrixFunctions/StemFunction.h @@ -0,0 +1,117 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2010, 2013 Jitse Niesen +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_STEM_FUNCTION +#define EIGEN_STEM_FUNCTION + +namespace Eigen { + +namespace internal { + +/** \brief The exponential function (and its derivatives). */ +template +Scalar stem_function_exp(Scalar x, int) +{ + using std::exp; + return exp(x); +} + +/** \brief Cosine (and its derivatives). */ +template +Scalar stem_function_cos(Scalar x, int n) +{ + using std::cos; + using std::sin; + Scalar res; + + switch (n % 4) { + case 0: + res = std::cos(x); + break; + case 1: + res = -std::sin(x); + break; + case 2: + res = -std::cos(x); + break; + case 3: + res = std::sin(x); + break; + } + return res; +} + +/** \brief Sine (and its derivatives). */ +template +Scalar stem_function_sin(Scalar x, int n) +{ + using std::cos; + using std::sin; + Scalar res; + + switch (n % 4) { + case 0: + res = std::sin(x); + break; + case 1: + res = std::cos(x); + break; + case 2: + res = -std::sin(x); + break; + case 3: + res = -std::cos(x); + break; + } + return res; +} + +/** \brief Hyperbolic cosine (and its derivatives). */ +template +Scalar stem_function_cosh(Scalar x, int n) +{ + using std::cosh; + using std::sinh; + Scalar res; + + switch (n % 2) { + case 0: + res = std::cosh(x); + break; + case 1: + res = std::sinh(x); + break; + } + return res; +} + +/** \brief Hyperbolic sine (and its derivatives). */ +template +Scalar stem_function_sinh(Scalar x, int n) +{ + using std::cosh; + using std::sinh; + Scalar res; + + switch (n % 2) { + case 0: + res = std::sinh(x); + break; + case 1: + res = std::cosh(x); + break; + } + return res; +} + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_STEM_FUNCTION diff --git a/external/unsupported/Eigen/src/MoreVectorization/MathFunctions.h b/external/unsupported/Eigen/src/MoreVectorization/MathFunctions.h new file mode 100644 index 0000000..63cb28d --- /dev/null +++ b/external/unsupported/Eigen/src/MoreVectorization/MathFunctions.h @@ -0,0 +1,95 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Rohit Garg +// Copyright (C) 2009 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_MOREVECTORIZATION_MATHFUNCTIONS_H +#define EIGEN_MOREVECTORIZATION_MATHFUNCTIONS_H + +namespace Eigen { + +namespace internal { + +/** \internal \returns the arcsin of \a a (coeff-wise) */ +template inline static Packet pasin(Packet a) { return std::asin(a); } + +#ifdef EIGEN_VECTORIZE_SSE + +template<> EIGEN_DONT_INLINE Packet4f pasin(Packet4f x) +{ + _EIGEN_DECLARE_CONST_Packet4f(half, 0.5); + _EIGEN_DECLARE_CONST_Packet4f(minus_half, -0.5); + _EIGEN_DECLARE_CONST_Packet4f(3half, 1.5); + + _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(sign_mask, 0x80000000); + + _EIGEN_DECLARE_CONST_Packet4f(pi, 3.141592654); + _EIGEN_DECLARE_CONST_Packet4f(pi_over_2, 3.141592654*0.5); + + _EIGEN_DECLARE_CONST_Packet4f(asin1, 4.2163199048E-2); + _EIGEN_DECLARE_CONST_Packet4f(asin2, 2.4181311049E-2); + _EIGEN_DECLARE_CONST_Packet4f(asin3, 4.5470025998E-2); + _EIGEN_DECLARE_CONST_Packet4f(asin4, 7.4953002686E-2); + _EIGEN_DECLARE_CONST_Packet4f(asin5, 1.6666752422E-1); + + Packet4f a = pabs(x);//got the absolute value + + Packet4f sign_bit= _mm_and_ps(x, p4f_sign_mask);//extracted the sign bit + + Packet4f z1,z2;//will need them during computation + + +//will compute the two branches for asin +//so first compare with half + + Packet4f branch_mask= _mm_cmpgt_ps(a, p4f_half);//this is to select which branch to take +//both will be taken, and finally results will be merged +//the branch for values >0.5 + + { +//the core series expansion + z1=pmadd(p4f_minus_half,a,p4f_half); + Packet4f x1=psqrt(z1); + Packet4f s1=pmadd(p4f_asin1, z1, p4f_asin2); + Packet4f s2=pmadd(s1, z1, p4f_asin3); + Packet4f s3=pmadd(s2,z1, p4f_asin4); + Packet4f s4=pmadd(s3,z1, p4f_asin5); + Packet4f temp=pmul(s4,z1);//not really a madd but a mul by z so that the next term can be a madd + z1=pmadd(temp,x1,x1); + z1=padd(z1,z1); + z1=psub(p4f_pi_over_2,z1); + } + + { +//the core series expansion + Packet4f x2=a; + z2=pmul(x2,x2); + Packet4f s1=pmadd(p4f_asin1, z2, p4f_asin2); + Packet4f s2=pmadd(s1, z2, p4f_asin3); + Packet4f s3=pmadd(s2,z2, p4f_asin4); + Packet4f s4=pmadd(s3,z2, p4f_asin5); + Packet4f temp=pmul(s4,z2);//not really a madd but a mul by z so that the next term can be a madd + z2=pmadd(temp,x2,x2); + } + +/* select the correct result from the two branch evaluations */ + z1 = _mm_and_ps(branch_mask, z1); + z2 = _mm_andnot_ps(branch_mask, z2); + Packet4f z = _mm_or_ps(z1,z2); + +/* update the sign */ + return _mm_xor_ps(z, sign_bit); +} + +#endif // EIGEN_VECTORIZE_SSE + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_MOREVECTORIZATION_MATHFUNCTIONS_H diff --git a/external/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h b/external/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h new file mode 100644 index 0000000..07c5ef0 --- /dev/null +++ b/external/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h @@ -0,0 +1,601 @@ +// -*- coding: utf-8 +// vim: set fileencoding=utf-8 + +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Thomas Capricelli +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_HYBRIDNONLINEARSOLVER_H +#define EIGEN_HYBRIDNONLINEARSOLVER_H + +namespace Eigen { + +namespace HybridNonLinearSolverSpace { + enum Status { + Running = -1, + ImproperInputParameters = 0, + RelativeErrorTooSmall = 1, + TooManyFunctionEvaluation = 2, + TolTooSmall = 3, + NotMakingProgressJacobian = 4, + NotMakingProgressIterations = 5, + UserAsked = 6 + }; +} + +/** + * \ingroup NonLinearOptimization_Module + * \brief Finds a zero of a system of n + * nonlinear functions in n variables by a modification of the Powell + * hybrid method ("dogleg"). + * + * The user must provide a subroutine which calculates the + * functions. The Jacobian is either provided by the user, or approximated + * using a forward-difference method. + * + */ +template +class HybridNonLinearSolver +{ +public: + typedef DenseIndex Index; + + HybridNonLinearSolver(FunctorType &_functor) + : functor(_functor) { nfev=njev=iter = 0; fnorm= 0.; useExternalScaling=false;} + + struct Parameters { + Parameters() + : factor(Scalar(100.)) + , maxfev(1000) + , xtol(numext::sqrt(NumTraits::epsilon())) + , nb_of_subdiagonals(-1) + , nb_of_superdiagonals(-1) + , epsfcn(Scalar(0.)) {} + Scalar factor; + Index maxfev; // maximum number of function evaluation + Scalar xtol; + Index nb_of_subdiagonals; + Index nb_of_superdiagonals; + Scalar epsfcn; + }; + typedef Matrix< Scalar, Dynamic, 1 > FVectorType; + typedef Matrix< Scalar, Dynamic, Dynamic > JacobianType; + /* TODO: if eigen provides a triangular storage, use it here */ + typedef Matrix< Scalar, Dynamic, Dynamic > UpperTriangularType; + + HybridNonLinearSolverSpace::Status hybrj1( + FVectorType &x, + const Scalar tol = numext::sqrt(NumTraits::epsilon()) + ); + + HybridNonLinearSolverSpace::Status solveInit(FVectorType &x); + HybridNonLinearSolverSpace::Status solveOneStep(FVectorType &x); + HybridNonLinearSolverSpace::Status solve(FVectorType &x); + + HybridNonLinearSolverSpace::Status hybrd1( + FVectorType &x, + const Scalar tol = numext::sqrt(NumTraits::epsilon()) + ); + + HybridNonLinearSolverSpace::Status solveNumericalDiffInit(FVectorType &x); + HybridNonLinearSolverSpace::Status solveNumericalDiffOneStep(FVectorType &x); + HybridNonLinearSolverSpace::Status solveNumericalDiff(FVectorType &x); + + void resetParameters(void) { parameters = Parameters(); } + Parameters parameters; + FVectorType fvec, qtf, diag; + JacobianType fjac; + UpperTriangularType R; + Index nfev; + Index njev; + Index iter; + Scalar fnorm; + bool useExternalScaling; +private: + FunctorType &functor; + Index n; + Scalar sum; + bool sing; + Scalar temp; + Scalar delta; + bool jeval; + Index ncsuc; + Scalar ratio; + Scalar pnorm, xnorm, fnorm1; + Index nslow1, nslow2; + Index ncfail; + Scalar actred, prered; + FVectorType wa1, wa2, wa3, wa4; + + HybridNonLinearSolver& operator=(const HybridNonLinearSolver&); +}; + + + +template +HybridNonLinearSolverSpace::Status +HybridNonLinearSolver::hybrj1( + FVectorType &x, + const Scalar tol + ) +{ + n = x.size(); + + /* check the input parameters for errors. */ + if (n <= 0 || tol < 0.) + return HybridNonLinearSolverSpace::ImproperInputParameters; + + resetParameters(); + parameters.maxfev = 100*(n+1); + parameters.xtol = tol; + diag.setConstant(n, 1.); + useExternalScaling = true; + return solve(x); +} + +template +HybridNonLinearSolverSpace::Status +HybridNonLinearSolver::solveInit(FVectorType &x) +{ + n = x.size(); + + wa1.resize(n); wa2.resize(n); wa3.resize(n); wa4.resize(n); + fvec.resize(n); + qtf.resize(n); + fjac.resize(n, n); + if (!useExternalScaling) + diag.resize(n); + eigen_assert( (!useExternalScaling || diag.size()==n) && "When useExternalScaling is set, the caller must provide a valid 'diag'"); + + /* Function Body */ + nfev = 0; + njev = 0; + + /* check the input parameters for errors. */ + if (n <= 0 || parameters.xtol < 0. || parameters.maxfev <= 0 || parameters.factor <= 0. ) + return HybridNonLinearSolverSpace::ImproperInputParameters; + if (useExternalScaling) + for (Index j = 0; j < n; ++j) + if (diag[j] <= 0.) + return HybridNonLinearSolverSpace::ImproperInputParameters; + + /* evaluate the function at the starting point */ + /* and calculate its norm. */ + nfev = 1; + if ( functor(x, fvec) < 0) + return HybridNonLinearSolverSpace::UserAsked; + fnorm = fvec.stableNorm(); + + /* initialize iteration counter and monitors. */ + iter = 1; + ncsuc = 0; + ncfail = 0; + nslow1 = 0; + nslow2 = 0; + + return HybridNonLinearSolverSpace::Running; +} + +template +HybridNonLinearSolverSpace::Status +HybridNonLinearSolver::solveOneStep(FVectorType &x) +{ + using std::abs; + + eigen_assert(x.size()==n); // check the caller is not cheating us + + Index j; + std::vector > v_givens(n), w_givens(n); + + jeval = true; + + /* calculate the jacobian matrix. */ + if ( functor.df(x, fjac) < 0) + return HybridNonLinearSolverSpace::UserAsked; + ++njev; + + wa2 = fjac.colwise().blueNorm(); + + /* on the first iteration and if external scaling is not used, scale according */ + /* to the norms of the columns of the initial jacobian. */ + if (iter == 1) { + if (!useExternalScaling) + for (j = 0; j < n; ++j) + diag[j] = (wa2[j]==0.) ? 1. : wa2[j]; + + /* on the first iteration, calculate the norm of the scaled x */ + /* and initialize the step bound delta. */ + xnorm = diag.cwiseProduct(x).stableNorm(); + delta = parameters.factor * xnorm; + if (delta == 0.) + delta = parameters.factor; + } + + /* compute the qr factorization of the jacobian. */ + HouseholderQR qrfac(fjac); // no pivoting: + + /* copy the triangular factor of the qr factorization into r. */ + R = qrfac.matrixQR(); + + /* accumulate the orthogonal factor in fjac. */ + fjac = qrfac.householderQ(); + + /* form (q transpose)*fvec and store in qtf. */ + qtf = fjac.transpose() * fvec; + + /* rescale if necessary. */ + if (!useExternalScaling) + diag = diag.cwiseMax(wa2); + + while (true) { + /* determine the direction p. */ + internal::dogleg(R, diag, qtf, delta, wa1); + + /* store the direction p and x + p. calculate the norm of p. */ + wa1 = -wa1; + wa2 = x + wa1; + pnorm = diag.cwiseProduct(wa1).stableNorm(); + + /* on the first iteration, adjust the initial step bound. */ + if (iter == 1) + delta = (std::min)(delta,pnorm); + + /* evaluate the function at x + p and calculate its norm. */ + if ( functor(wa2, wa4) < 0) + return HybridNonLinearSolverSpace::UserAsked; + ++nfev; + fnorm1 = wa4.stableNorm(); + + /* compute the scaled actual reduction. */ + actred = -1.; + if (fnorm1 < fnorm) /* Computing 2nd power */ + actred = 1. - numext::abs2(fnorm1 / fnorm); + + /* compute the scaled predicted reduction. */ + wa3 = R.template triangularView()*wa1 + qtf; + temp = wa3.stableNorm(); + prered = 0.; + if (temp < fnorm) /* Computing 2nd power */ + prered = 1. - numext::abs2(temp / fnorm); + + /* compute the ratio of the actual to the predicted reduction. */ + ratio = 0.; + if (prered > 0.) + ratio = actred / prered; + + /* update the step bound. */ + if (ratio < Scalar(.1)) { + ncsuc = 0; + ++ncfail; + delta = Scalar(.5) * delta; + } else { + ncfail = 0; + ++ncsuc; + if (ratio >= Scalar(.5) || ncsuc > 1) + delta = (std::max)(delta, pnorm / Scalar(.5)); + if (abs(ratio - 1.) <= Scalar(.1)) { + delta = pnorm / Scalar(.5); + } + } + + /* test for successful iteration. */ + if (ratio >= Scalar(1e-4)) { + /* successful iteration. update x, fvec, and their norms. */ + x = wa2; + wa2 = diag.cwiseProduct(x); + fvec = wa4; + xnorm = wa2.stableNorm(); + fnorm = fnorm1; + ++iter; + } + + /* determine the progress of the iteration. */ + ++nslow1; + if (actred >= Scalar(.001)) + nslow1 = 0; + if (jeval) + ++nslow2; + if (actred >= Scalar(.1)) + nslow2 = 0; + + /* test for convergence. */ + if (delta <= parameters.xtol * xnorm || fnorm == 0.) + return HybridNonLinearSolverSpace::RelativeErrorTooSmall; + + /* tests for termination and stringent tolerances. */ + if (nfev >= parameters.maxfev) + return HybridNonLinearSolverSpace::TooManyFunctionEvaluation; + if (Scalar(.1) * (std::max)(Scalar(.1) * delta, pnorm) <= NumTraits::epsilon() * xnorm) + return HybridNonLinearSolverSpace::TolTooSmall; + if (nslow2 == 5) + return HybridNonLinearSolverSpace::NotMakingProgressJacobian; + if (nslow1 == 10) + return HybridNonLinearSolverSpace::NotMakingProgressIterations; + + /* criterion for recalculating jacobian. */ + if (ncfail == 2) + break; // leave inner loop and go for the next outer loop iteration + + /* calculate the rank one modification to the jacobian */ + /* and update qtf if necessary. */ + wa1 = diag.cwiseProduct( diag.cwiseProduct(wa1)/pnorm ); + wa2 = fjac.transpose() * wa4; + if (ratio >= Scalar(1e-4)) + qtf = wa2; + wa2 = (wa2-wa3)/pnorm; + + /* compute the qr factorization of the updated jacobian. */ + internal::r1updt(R, wa1, v_givens, w_givens, wa2, wa3, &sing); + internal::r1mpyq(n, n, fjac.data(), v_givens, w_givens); + internal::r1mpyq(1, n, qtf.data(), v_givens, w_givens); + + jeval = false; + } + return HybridNonLinearSolverSpace::Running; +} + +template +HybridNonLinearSolverSpace::Status +HybridNonLinearSolver::solve(FVectorType &x) +{ + HybridNonLinearSolverSpace::Status status = solveInit(x); + if (status==HybridNonLinearSolverSpace::ImproperInputParameters) + return status; + while (status==HybridNonLinearSolverSpace::Running) + status = solveOneStep(x); + return status; +} + + + +template +HybridNonLinearSolverSpace::Status +HybridNonLinearSolver::hybrd1( + FVectorType &x, + const Scalar tol + ) +{ + n = x.size(); + + /* check the input parameters for errors. */ + if (n <= 0 || tol < 0.) + return HybridNonLinearSolverSpace::ImproperInputParameters; + + resetParameters(); + parameters.maxfev = 200*(n+1); + parameters.xtol = tol; + + diag.setConstant(n, 1.); + useExternalScaling = true; + return solveNumericalDiff(x); +} + +template +HybridNonLinearSolverSpace::Status +HybridNonLinearSolver::solveNumericalDiffInit(FVectorType &x) +{ + n = x.size(); + + if (parameters.nb_of_subdiagonals<0) parameters.nb_of_subdiagonals= n-1; + if (parameters.nb_of_superdiagonals<0) parameters.nb_of_superdiagonals= n-1; + + wa1.resize(n); wa2.resize(n); wa3.resize(n); wa4.resize(n); + qtf.resize(n); + fjac.resize(n, n); + fvec.resize(n); + if (!useExternalScaling) + diag.resize(n); + eigen_assert( (!useExternalScaling || diag.size()==n) && "When useExternalScaling is set, the caller must provide a valid 'diag'"); + + /* Function Body */ + nfev = 0; + njev = 0; + + /* check the input parameters for errors. */ + if (n <= 0 || parameters.xtol < 0. || parameters.maxfev <= 0 || parameters.nb_of_subdiagonals< 0 || parameters.nb_of_superdiagonals< 0 || parameters.factor <= 0. ) + return HybridNonLinearSolverSpace::ImproperInputParameters; + if (useExternalScaling) + for (Index j = 0; j < n; ++j) + if (diag[j] <= 0.) + return HybridNonLinearSolverSpace::ImproperInputParameters; + + /* evaluate the function at the starting point */ + /* and calculate its norm. */ + nfev = 1; + if ( functor(x, fvec) < 0) + return HybridNonLinearSolverSpace::UserAsked; + fnorm = fvec.stableNorm(); + + /* initialize iteration counter and monitors. */ + iter = 1; + ncsuc = 0; + ncfail = 0; + nslow1 = 0; + nslow2 = 0; + + return HybridNonLinearSolverSpace::Running; +} + +template +HybridNonLinearSolverSpace::Status +HybridNonLinearSolver::solveNumericalDiffOneStep(FVectorType &x) +{ + using std::sqrt; + using std::abs; + + assert(x.size()==n); // check the caller is not cheating us + + Index j; + std::vector > v_givens(n), w_givens(n); + + jeval = true; + if (parameters.nb_of_subdiagonals<0) parameters.nb_of_subdiagonals= n-1; + if (parameters.nb_of_superdiagonals<0) parameters.nb_of_superdiagonals= n-1; + + /* calculate the jacobian matrix. */ + if (internal::fdjac1(functor, x, fvec, fjac, parameters.nb_of_subdiagonals, parameters.nb_of_superdiagonals, parameters.epsfcn) <0) + return HybridNonLinearSolverSpace::UserAsked; + nfev += (std::min)(parameters.nb_of_subdiagonals+parameters.nb_of_superdiagonals+ 1, n); + + wa2 = fjac.colwise().blueNorm(); + + /* on the first iteration and if external scaling is not used, scale according */ + /* to the norms of the columns of the initial jacobian. */ + if (iter == 1) { + if (!useExternalScaling) + for (j = 0; j < n; ++j) + diag[j] = (wa2[j]==0.) ? 1. : wa2[j]; + + /* on the first iteration, calculate the norm of the scaled x */ + /* and initialize the step bound delta. */ + xnorm = diag.cwiseProduct(x).stableNorm(); + delta = parameters.factor * xnorm; + if (delta == 0.) + delta = parameters.factor; + } + + /* compute the qr factorization of the jacobian. */ + HouseholderQR qrfac(fjac); // no pivoting: + + /* copy the triangular factor of the qr factorization into r. */ + R = qrfac.matrixQR(); + + /* accumulate the orthogonal factor in fjac. */ + fjac = qrfac.householderQ(); + + /* form (q transpose)*fvec and store in qtf. */ + qtf = fjac.transpose() * fvec; + + /* rescale if necessary. */ + if (!useExternalScaling) + diag = diag.cwiseMax(wa2); + + while (true) { + /* determine the direction p. */ + internal::dogleg(R, diag, qtf, delta, wa1); + + /* store the direction p and x + p. calculate the norm of p. */ + wa1 = -wa1; + wa2 = x + wa1; + pnorm = diag.cwiseProduct(wa1).stableNorm(); + + /* on the first iteration, adjust the initial step bound. */ + if (iter == 1) + delta = (std::min)(delta,pnorm); + + /* evaluate the function at x + p and calculate its norm. */ + if ( functor(wa2, wa4) < 0) + return HybridNonLinearSolverSpace::UserAsked; + ++nfev; + fnorm1 = wa4.stableNorm(); + + /* compute the scaled actual reduction. */ + actred = -1.; + if (fnorm1 < fnorm) /* Computing 2nd power */ + actred = 1. - numext::abs2(fnorm1 / fnorm); + + /* compute the scaled predicted reduction. */ + wa3 = R.template triangularView()*wa1 + qtf; + temp = wa3.stableNorm(); + prered = 0.; + if (temp < fnorm) /* Computing 2nd power */ + prered = 1. - numext::abs2(temp / fnorm); + + /* compute the ratio of the actual to the predicted reduction. */ + ratio = 0.; + if (prered > 0.) + ratio = actred / prered; + + /* update the step bound. */ + if (ratio < Scalar(.1)) { + ncsuc = 0; + ++ncfail; + delta = Scalar(.5) * delta; + } else { + ncfail = 0; + ++ncsuc; + if (ratio >= Scalar(.5) || ncsuc > 1) + delta = (std::max)(delta, pnorm / Scalar(.5)); + if (abs(ratio - 1.) <= Scalar(.1)) { + delta = pnorm / Scalar(.5); + } + } + + /* test for successful iteration. */ + if (ratio >= Scalar(1e-4)) { + /* successful iteration. update x, fvec, and their norms. */ + x = wa2; + wa2 = diag.cwiseProduct(x); + fvec = wa4; + xnorm = wa2.stableNorm(); + fnorm = fnorm1; + ++iter; + } + + /* determine the progress of the iteration. */ + ++nslow1; + if (actred >= Scalar(.001)) + nslow1 = 0; + if (jeval) + ++nslow2; + if (actred >= Scalar(.1)) + nslow2 = 0; + + /* test for convergence. */ + if (delta <= parameters.xtol * xnorm || fnorm == 0.) + return HybridNonLinearSolverSpace::RelativeErrorTooSmall; + + /* tests for termination and stringent tolerances. */ + if (nfev >= parameters.maxfev) + return HybridNonLinearSolverSpace::TooManyFunctionEvaluation; + if (Scalar(.1) * (std::max)(Scalar(.1) * delta, pnorm) <= NumTraits::epsilon() * xnorm) + return HybridNonLinearSolverSpace::TolTooSmall; + if (nslow2 == 5) + return HybridNonLinearSolverSpace::NotMakingProgressJacobian; + if (nslow1 == 10) + return HybridNonLinearSolverSpace::NotMakingProgressIterations; + + /* criterion for recalculating jacobian. */ + if (ncfail == 2) + break; // leave inner loop and go for the next outer loop iteration + + /* calculate the rank one modification to the jacobian */ + /* and update qtf if necessary. */ + wa1 = diag.cwiseProduct( diag.cwiseProduct(wa1)/pnorm ); + wa2 = fjac.transpose() * wa4; + if (ratio >= Scalar(1e-4)) + qtf = wa2; + wa2 = (wa2-wa3)/pnorm; + + /* compute the qr factorization of the updated jacobian. */ + internal::r1updt(R, wa1, v_givens, w_givens, wa2, wa3, &sing); + internal::r1mpyq(n, n, fjac.data(), v_givens, w_givens); + internal::r1mpyq(1, n, qtf.data(), v_givens, w_givens); + + jeval = false; + } + return HybridNonLinearSolverSpace::Running; +} + +template +HybridNonLinearSolverSpace::Status +HybridNonLinearSolver::solveNumericalDiff(FVectorType &x) +{ + HybridNonLinearSolverSpace::Status status = solveNumericalDiffInit(x); + if (status==HybridNonLinearSolverSpace::ImproperInputParameters) + return status; + while (status==HybridNonLinearSolverSpace::Running) + status = solveNumericalDiffOneStep(x); + return status; +} + +} // end namespace Eigen + +#endif // EIGEN_HYBRIDNONLINEARSOLVER_H + +//vim: ai ts=4 sts=4 et sw=4 diff --git a/external/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h b/external/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h new file mode 100644 index 0000000..fe3b79c --- /dev/null +++ b/external/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h @@ -0,0 +1,657 @@ +// -*- coding: utf-8 +// vim: set fileencoding=utf-8 + +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Thomas Capricelli +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_LEVENBERGMARQUARDT__H +#define EIGEN_LEVENBERGMARQUARDT__H + +namespace Eigen { + +namespace LevenbergMarquardtSpace { + enum Status { + NotStarted = -2, + Running = -1, + ImproperInputParameters = 0, + RelativeReductionTooSmall = 1, + RelativeErrorTooSmall = 2, + RelativeErrorAndReductionTooSmall = 3, + CosinusTooSmall = 4, + TooManyFunctionEvaluation = 5, + FtolTooSmall = 6, + XtolTooSmall = 7, + GtolTooSmall = 8, + UserAsked = 9 + }; +} + + + +/** + * \ingroup NonLinearOptimization_Module + * \brief Performs non linear optimization over a non-linear function, + * using a variant of the Levenberg Marquardt algorithm. + * + * Check wikipedia for more information. + * http://en.wikipedia.org/wiki/Levenberg%E2%80%93Marquardt_algorithm + */ +template +class LevenbergMarquardt +{ + static Scalar sqrt_epsilon() + { + using std::sqrt; + return sqrt(NumTraits::epsilon()); + } + +public: + LevenbergMarquardt(FunctorType &_functor) + : functor(_functor) { nfev = njev = iter = 0; fnorm = gnorm = 0.; useExternalScaling=false; } + + typedef DenseIndex Index; + + struct Parameters { + Parameters() + : factor(Scalar(100.)) + , maxfev(400) + , ftol(sqrt_epsilon()) + , xtol(sqrt_epsilon()) + , gtol(Scalar(0.)) + , epsfcn(Scalar(0.)) {} + Scalar factor; + Index maxfev; // maximum number of function evaluation + Scalar ftol; + Scalar xtol; + Scalar gtol; + Scalar epsfcn; + }; + + typedef Matrix< Scalar, Dynamic, 1 > FVectorType; + typedef Matrix< Scalar, Dynamic, Dynamic > JacobianType; + + LevenbergMarquardtSpace::Status lmder1( + FVectorType &x, + const Scalar tol = sqrt_epsilon() + ); + + LevenbergMarquardtSpace::Status minimize(FVectorType &x); + LevenbergMarquardtSpace::Status minimizeInit(FVectorType &x); + LevenbergMarquardtSpace::Status minimizeOneStep(FVectorType &x); + + static LevenbergMarquardtSpace::Status lmdif1( + FunctorType &functor, + FVectorType &x, + Index *nfev, + const Scalar tol = sqrt_epsilon() + ); + + LevenbergMarquardtSpace::Status lmstr1( + FVectorType &x, + const Scalar tol = sqrt_epsilon() + ); + + LevenbergMarquardtSpace::Status minimizeOptimumStorage(FVectorType &x); + LevenbergMarquardtSpace::Status minimizeOptimumStorageInit(FVectorType &x); + LevenbergMarquardtSpace::Status minimizeOptimumStorageOneStep(FVectorType &x); + + void resetParameters(void) { parameters = Parameters(); } + + Parameters parameters; + FVectorType fvec, qtf, diag; + JacobianType fjac; + PermutationMatrix permutation; + Index nfev; + Index njev; + Index iter; + Scalar fnorm, gnorm; + bool useExternalScaling; + + Scalar lm_param(void) { return par; } +private: + + FunctorType &functor; + Index n; + Index m; + FVectorType wa1, wa2, wa3, wa4; + + Scalar par, sum; + Scalar temp, temp1, temp2; + Scalar delta; + Scalar ratio; + Scalar pnorm, xnorm, fnorm1, actred, dirder, prered; + + LevenbergMarquardt& operator=(const LevenbergMarquardt&); +}; + +template +LevenbergMarquardtSpace::Status +LevenbergMarquardt::lmder1( + FVectorType &x, + const Scalar tol + ) +{ + n = x.size(); + m = functor.values(); + + /* check the input parameters for errors. */ + if (n <= 0 || m < n || tol < 0.) + return LevenbergMarquardtSpace::ImproperInputParameters; + + resetParameters(); + parameters.ftol = tol; + parameters.xtol = tol; + parameters.maxfev = 100*(n+1); + + return minimize(x); +} + + +template +LevenbergMarquardtSpace::Status +LevenbergMarquardt::minimize(FVectorType &x) +{ + LevenbergMarquardtSpace::Status status = minimizeInit(x); + if (status==LevenbergMarquardtSpace::ImproperInputParameters) + return status; + do { + status = minimizeOneStep(x); + } while (status==LevenbergMarquardtSpace::Running); + return status; +} + +template +LevenbergMarquardtSpace::Status +LevenbergMarquardt::minimizeInit(FVectorType &x) +{ + n = x.size(); + m = functor.values(); + + wa1.resize(n); wa2.resize(n); wa3.resize(n); + wa4.resize(m); + fvec.resize(m); + fjac.resize(m, n); + if (!useExternalScaling) + diag.resize(n); + eigen_assert( (!useExternalScaling || diag.size()==n) && "When useExternalScaling is set, the caller must provide a valid 'diag'"); + qtf.resize(n); + + /* Function Body */ + nfev = 0; + njev = 0; + + /* check the input parameters for errors. */ + if (n <= 0 || m < n || parameters.ftol < 0. || parameters.xtol < 0. || parameters.gtol < 0. || parameters.maxfev <= 0 || parameters.factor <= 0.) + return LevenbergMarquardtSpace::ImproperInputParameters; + + if (useExternalScaling) + for (Index j = 0; j < n; ++j) + if (diag[j] <= 0.) + return LevenbergMarquardtSpace::ImproperInputParameters; + + /* evaluate the function at the starting point */ + /* and calculate its norm. */ + nfev = 1; + if ( functor(x, fvec) < 0) + return LevenbergMarquardtSpace::UserAsked; + fnorm = fvec.stableNorm(); + + /* initialize levenberg-marquardt parameter and iteration counter. */ + par = 0.; + iter = 1; + + return LevenbergMarquardtSpace::NotStarted; +} + +template +LevenbergMarquardtSpace::Status +LevenbergMarquardt::minimizeOneStep(FVectorType &x) +{ + using std::abs; + using std::sqrt; + + eigen_assert(x.size()==n); // check the caller is not cheating us + + /* calculate the jacobian matrix. */ + Index df_ret = functor.df(x, fjac); + if (df_ret<0) + return LevenbergMarquardtSpace::UserAsked; + if (df_ret>0) + // numerical diff, we evaluated the function df_ret times + nfev += df_ret; + else njev++; + + /* compute the qr factorization of the jacobian. */ + wa2 = fjac.colwise().blueNorm(); + ColPivHouseholderQR qrfac(fjac); + fjac = qrfac.matrixQR(); + permutation = qrfac.colsPermutation(); + + /* on the first iteration and if external scaling is not used, scale according */ + /* to the norms of the columns of the initial jacobian. */ + if (iter == 1) { + if (!useExternalScaling) + for (Index j = 0; j < n; ++j) + diag[j] = (wa2[j]==0.)? 1. : wa2[j]; + + /* on the first iteration, calculate the norm of the scaled x */ + /* and initialize the step bound delta. */ + xnorm = diag.cwiseProduct(x).stableNorm(); + delta = parameters.factor * xnorm; + if (delta == 0.) + delta = parameters.factor; + } + + /* form (q transpose)*fvec and store the first n components in */ + /* qtf. */ + wa4 = fvec; + wa4.applyOnTheLeft(qrfac.householderQ().adjoint()); + qtf = wa4.head(n); + + /* compute the norm of the scaled gradient. */ + gnorm = 0.; + if (fnorm != 0.) + for (Index j = 0; j < n; ++j) + if (wa2[permutation.indices()[j]] != 0.) + gnorm = (std::max)(gnorm, abs( fjac.col(j).head(j+1).dot(qtf.head(j+1)/fnorm) / wa2[permutation.indices()[j]])); + + /* test for convergence of the gradient norm. */ + if (gnorm <= parameters.gtol) + return LevenbergMarquardtSpace::CosinusTooSmall; + + /* rescale if necessary. */ + if (!useExternalScaling) + diag = diag.cwiseMax(wa2); + + do { + + /* determine the levenberg-marquardt parameter. */ + internal::lmpar2(qrfac, diag, qtf, delta, par, wa1); + + /* store the direction p and x + p. calculate the norm of p. */ + wa1 = -wa1; + wa2 = x + wa1; + pnorm = diag.cwiseProduct(wa1).stableNorm(); + + /* on the first iteration, adjust the initial step bound. */ + if (iter == 1) + delta = (std::min)(delta,pnorm); + + /* evaluate the function at x + p and calculate its norm. */ + if ( functor(wa2, wa4) < 0) + return LevenbergMarquardtSpace::UserAsked; + ++nfev; + fnorm1 = wa4.stableNorm(); + + /* compute the scaled actual reduction. */ + actred = -1.; + if (Scalar(.1) * fnorm1 < fnorm) + actred = 1. - numext::abs2(fnorm1 / fnorm); + + /* compute the scaled predicted reduction and */ + /* the scaled directional derivative. */ + wa3 = fjac.template triangularView() * (qrfac.colsPermutation().inverse() *wa1); + temp1 = numext::abs2(wa3.stableNorm() / fnorm); + temp2 = numext::abs2(sqrt(par) * pnorm / fnorm); + prered = temp1 + temp2 / Scalar(.5); + dirder = -(temp1 + temp2); + + /* compute the ratio of the actual to the predicted */ + /* reduction. */ + ratio = 0.; + if (prered != 0.) + ratio = actred / prered; + + /* update the step bound. */ + if (ratio <= Scalar(.25)) { + if (actred >= 0.) + temp = Scalar(.5); + if (actred < 0.) + temp = Scalar(.5) * dirder / (dirder + Scalar(.5) * actred); + if (Scalar(.1) * fnorm1 >= fnorm || temp < Scalar(.1)) + temp = Scalar(.1); + /* Computing MIN */ + delta = temp * (std::min)(delta, pnorm / Scalar(.1)); + par /= temp; + } else if (!(par != 0. && ratio < Scalar(.75))) { + delta = pnorm / Scalar(.5); + par = Scalar(.5) * par; + } + + /* test for successful iteration. */ + if (ratio >= Scalar(1e-4)) { + /* successful iteration. update x, fvec, and their norms. */ + x = wa2; + wa2 = diag.cwiseProduct(x); + fvec = wa4; + xnorm = wa2.stableNorm(); + fnorm = fnorm1; + ++iter; + } + + /* tests for convergence. */ + if (abs(actred) <= parameters.ftol && prered <= parameters.ftol && Scalar(.5) * ratio <= 1. && delta <= parameters.xtol * xnorm) + return LevenbergMarquardtSpace::RelativeErrorAndReductionTooSmall; + if (abs(actred) <= parameters.ftol && prered <= parameters.ftol && Scalar(.5) * ratio <= 1.) + return LevenbergMarquardtSpace::RelativeReductionTooSmall; + if (delta <= parameters.xtol * xnorm) + return LevenbergMarquardtSpace::RelativeErrorTooSmall; + + /* tests for termination and stringent tolerances. */ + if (nfev >= parameters.maxfev) + return LevenbergMarquardtSpace::TooManyFunctionEvaluation; + if (abs(actred) <= NumTraits::epsilon() && prered <= NumTraits::epsilon() && Scalar(.5) * ratio <= 1.) + return LevenbergMarquardtSpace::FtolTooSmall; + if (delta <= NumTraits::epsilon() * xnorm) + return LevenbergMarquardtSpace::XtolTooSmall; + if (gnorm <= NumTraits::epsilon()) + return LevenbergMarquardtSpace::GtolTooSmall; + + } while (ratio < Scalar(1e-4)); + + return LevenbergMarquardtSpace::Running; +} + +template +LevenbergMarquardtSpace::Status +LevenbergMarquardt::lmstr1( + FVectorType &x, + const Scalar tol + ) +{ + n = x.size(); + m = functor.values(); + + /* check the input parameters for errors. */ + if (n <= 0 || m < n || tol < 0.) + return LevenbergMarquardtSpace::ImproperInputParameters; + + resetParameters(); + parameters.ftol = tol; + parameters.xtol = tol; + parameters.maxfev = 100*(n+1); + + return minimizeOptimumStorage(x); +} + +template +LevenbergMarquardtSpace::Status +LevenbergMarquardt::minimizeOptimumStorageInit(FVectorType &x) +{ + n = x.size(); + m = functor.values(); + + wa1.resize(n); wa2.resize(n); wa3.resize(n); + wa4.resize(m); + fvec.resize(m); + // Only R is stored in fjac. Q is only used to compute 'qtf', which is + // Q.transpose()*rhs. qtf will be updated using givens rotation, + // instead of storing them in Q. + // The purpose it to only use a nxn matrix, instead of mxn here, so + // that we can handle cases where m>>n : + fjac.resize(n, n); + if (!useExternalScaling) + diag.resize(n); + eigen_assert( (!useExternalScaling || diag.size()==n) && "When useExternalScaling is set, the caller must provide a valid 'diag'"); + qtf.resize(n); + + /* Function Body */ + nfev = 0; + njev = 0; + + /* check the input parameters for errors. */ + if (n <= 0 || m < n || parameters.ftol < 0. || parameters.xtol < 0. || parameters.gtol < 0. || parameters.maxfev <= 0 || parameters.factor <= 0.) + return LevenbergMarquardtSpace::ImproperInputParameters; + + if (useExternalScaling) + for (Index j = 0; j < n; ++j) + if (diag[j] <= 0.) + return LevenbergMarquardtSpace::ImproperInputParameters; + + /* evaluate the function at the starting point */ + /* and calculate its norm. */ + nfev = 1; + if ( functor(x, fvec) < 0) + return LevenbergMarquardtSpace::UserAsked; + fnorm = fvec.stableNorm(); + + /* initialize levenberg-marquardt parameter and iteration counter. */ + par = 0.; + iter = 1; + + return LevenbergMarquardtSpace::NotStarted; +} + + +template +LevenbergMarquardtSpace::Status +LevenbergMarquardt::minimizeOptimumStorageOneStep(FVectorType &x) +{ + using std::abs; + using std::sqrt; + + eigen_assert(x.size()==n); // check the caller is not cheating us + + Index i, j; + bool sing; + + /* compute the qr factorization of the jacobian matrix */ + /* calculated one row at a time, while simultaneously */ + /* forming (q transpose)*fvec and storing the first */ + /* n components in qtf. */ + qtf.fill(0.); + fjac.fill(0.); + Index rownb = 2; + for (i = 0; i < m; ++i) { + if (functor.df(x, wa3, rownb) < 0) return LevenbergMarquardtSpace::UserAsked; + internal::rwupdt(fjac, wa3, qtf, fvec[i]); + ++rownb; + } + ++njev; + + /* if the jacobian is rank deficient, call qrfac to */ + /* reorder its columns and update the components of qtf. */ + sing = false; + for (j = 0; j < n; ++j) { + if (fjac(j,j) == 0.) + sing = true; + wa2[j] = fjac.col(j).head(j).stableNorm(); + } + permutation.setIdentity(n); + if (sing) { + wa2 = fjac.colwise().blueNorm(); + // TODO We have no unit test covering this code path, do not modify + // until it is carefully tested + ColPivHouseholderQR qrfac(fjac); + fjac = qrfac.matrixQR(); + wa1 = fjac.diagonal(); + fjac.diagonal() = qrfac.hCoeffs(); + permutation = qrfac.colsPermutation(); + // TODO : avoid this: + for(Index ii=0; ii< fjac.cols(); ii++) fjac.col(ii).segment(ii+1, fjac.rows()-ii-1) *= fjac(ii,ii); // rescale vectors + + for (j = 0; j < n; ++j) { + if (fjac(j,j) != 0.) { + sum = 0.; + for (i = j; i < n; ++i) + sum += fjac(i,j) * qtf[i]; + temp = -sum / fjac(j,j); + for (i = j; i < n; ++i) + qtf[i] += fjac(i,j) * temp; + } + fjac(j,j) = wa1[j]; + } + } + + /* on the first iteration and if external scaling is not used, scale according */ + /* to the norms of the columns of the initial jacobian. */ + if (iter == 1) { + if (!useExternalScaling) + for (j = 0; j < n; ++j) + diag[j] = (wa2[j]==0.)? 1. : wa2[j]; + + /* on the first iteration, calculate the norm of the scaled x */ + /* and initialize the step bound delta. */ + xnorm = diag.cwiseProduct(x).stableNorm(); + delta = parameters.factor * xnorm; + if (delta == 0.) + delta = parameters.factor; + } + + /* compute the norm of the scaled gradient. */ + gnorm = 0.; + if (fnorm != 0.) + for (j = 0; j < n; ++j) + if (wa2[permutation.indices()[j]] != 0.) + gnorm = (std::max)(gnorm, abs( fjac.col(j).head(j+1).dot(qtf.head(j+1)/fnorm) / wa2[permutation.indices()[j]])); + + /* test for convergence of the gradient norm. */ + if (gnorm <= parameters.gtol) + return LevenbergMarquardtSpace::CosinusTooSmall; + + /* rescale if necessary. */ + if (!useExternalScaling) + diag = diag.cwiseMax(wa2); + + do { + + /* determine the levenberg-marquardt parameter. */ + internal::lmpar(fjac, permutation.indices(), diag, qtf, delta, par, wa1); + + /* store the direction p and x + p. calculate the norm of p. */ + wa1 = -wa1; + wa2 = x + wa1; + pnorm = diag.cwiseProduct(wa1).stableNorm(); + + /* on the first iteration, adjust the initial step bound. */ + if (iter == 1) + delta = (std::min)(delta,pnorm); + + /* evaluate the function at x + p and calculate its norm. */ + if ( functor(wa2, wa4) < 0) + return LevenbergMarquardtSpace::UserAsked; + ++nfev; + fnorm1 = wa4.stableNorm(); + + /* compute the scaled actual reduction. */ + actred = -1.; + if (Scalar(.1) * fnorm1 < fnorm) + actred = 1. - numext::abs2(fnorm1 / fnorm); + + /* compute the scaled predicted reduction and */ + /* the scaled directional derivative. */ + wa3 = fjac.topLeftCorner(n,n).template triangularView() * (permutation.inverse() * wa1); + temp1 = numext::abs2(wa3.stableNorm() / fnorm); + temp2 = numext::abs2(sqrt(par) * pnorm / fnorm); + prered = temp1 + temp2 / Scalar(.5); + dirder = -(temp1 + temp2); + + /* compute the ratio of the actual to the predicted */ + /* reduction. */ + ratio = 0.; + if (prered != 0.) + ratio = actred / prered; + + /* update the step bound. */ + if (ratio <= Scalar(.25)) { + if (actred >= 0.) + temp = Scalar(.5); + if (actred < 0.) + temp = Scalar(.5) * dirder / (dirder + Scalar(.5) * actred); + if (Scalar(.1) * fnorm1 >= fnorm || temp < Scalar(.1)) + temp = Scalar(.1); + /* Computing MIN */ + delta = temp * (std::min)(delta, pnorm / Scalar(.1)); + par /= temp; + } else if (!(par != 0. && ratio < Scalar(.75))) { + delta = pnorm / Scalar(.5); + par = Scalar(.5) * par; + } + + /* test for successful iteration. */ + if (ratio >= Scalar(1e-4)) { + /* successful iteration. update x, fvec, and their norms. */ + x = wa2; + wa2 = diag.cwiseProduct(x); + fvec = wa4; + xnorm = wa2.stableNorm(); + fnorm = fnorm1; + ++iter; + } + + /* tests for convergence. */ + if (abs(actred) <= parameters.ftol && prered <= parameters.ftol && Scalar(.5) * ratio <= 1. && delta <= parameters.xtol * xnorm) + return LevenbergMarquardtSpace::RelativeErrorAndReductionTooSmall; + if (abs(actred) <= parameters.ftol && prered <= parameters.ftol && Scalar(.5) * ratio <= 1.) + return LevenbergMarquardtSpace::RelativeReductionTooSmall; + if (delta <= parameters.xtol * xnorm) + return LevenbergMarquardtSpace::RelativeErrorTooSmall; + + /* tests for termination and stringent tolerances. */ + if (nfev >= parameters.maxfev) + return LevenbergMarquardtSpace::TooManyFunctionEvaluation; + if (abs(actred) <= NumTraits::epsilon() && prered <= NumTraits::epsilon() && Scalar(.5) * ratio <= 1.) + return LevenbergMarquardtSpace::FtolTooSmall; + if (delta <= NumTraits::epsilon() * xnorm) + return LevenbergMarquardtSpace::XtolTooSmall; + if (gnorm <= NumTraits::epsilon()) + return LevenbergMarquardtSpace::GtolTooSmall; + + } while (ratio < Scalar(1e-4)); + + return LevenbergMarquardtSpace::Running; +} + +template +LevenbergMarquardtSpace::Status +LevenbergMarquardt::minimizeOptimumStorage(FVectorType &x) +{ + LevenbergMarquardtSpace::Status status = minimizeOptimumStorageInit(x); + if (status==LevenbergMarquardtSpace::ImproperInputParameters) + return status; + do { + status = minimizeOptimumStorageOneStep(x); + } while (status==LevenbergMarquardtSpace::Running); + return status; +} + +template +LevenbergMarquardtSpace::Status +LevenbergMarquardt::lmdif1( + FunctorType &functor, + FVectorType &x, + Index *nfev, + const Scalar tol + ) +{ + Index n = x.size(); + Index m = functor.values(); + + /* check the input parameters for errors. */ + if (n <= 0 || m < n || tol < 0.) + return LevenbergMarquardtSpace::ImproperInputParameters; + + NumericalDiff numDiff(functor); + // embedded LevenbergMarquardt + LevenbergMarquardt, Scalar > lm(numDiff); + lm.parameters.ftol = tol; + lm.parameters.xtol = tol; + lm.parameters.maxfev = 200*(n+1); + + LevenbergMarquardtSpace::Status info = LevenbergMarquardtSpace::Status(lm.minimize(x)); + if (nfev) + * nfev = lm.nfev; + return info; +} + +} // end namespace Eigen + +#endif // EIGEN_LEVENBERGMARQUARDT__H + +//vim: ai ts=4 sts=4 et sw=4 diff --git a/external/unsupported/Eigen/src/NonLinearOptimization/chkder.h b/external/unsupported/Eigen/src/NonLinearOptimization/chkder.h new file mode 100644 index 0000000..db8ff7d --- /dev/null +++ b/external/unsupported/Eigen/src/NonLinearOptimization/chkder.h @@ -0,0 +1,66 @@ +#define chkder_log10e 0.43429448190325182765 +#define chkder_factor 100. + +namespace Eigen { + +namespace internal { + +template +void chkder( + const Matrix< Scalar, Dynamic, 1 > &x, + const Matrix< Scalar, Dynamic, 1 > &fvec, + const Matrix< Scalar, Dynamic, Dynamic > &fjac, + Matrix< Scalar, Dynamic, 1 > &xp, + const Matrix< Scalar, Dynamic, 1 > &fvecp, + int mode, + Matrix< Scalar, Dynamic, 1 > &err + ) +{ + using std::sqrt; + using std::abs; + using std::log; + + typedef DenseIndex Index; + + const Scalar eps = sqrt(NumTraits::epsilon()); + const Scalar epsf = chkder_factor * NumTraits::epsilon(); + const Scalar epslog = chkder_log10e * log(eps); + Scalar temp; + + const Index m = fvec.size(), n = x.size(); + + if (mode != 2) { + /* mode = 1. */ + xp.resize(n); + for (Index j = 0; j < n; ++j) { + temp = eps * abs(x[j]); + if (temp == 0.) + temp = eps; + xp[j] = x[j] + temp; + } + } + else { + /* mode = 2. */ + err.setZero(m); + for (Index j = 0; j < n; ++j) { + temp = abs(x[j]); + if (temp == 0.) + temp = 1.; + err += temp * fjac.col(j); + } + for (Index i = 0; i < m; ++i) { + temp = 1.; + if (fvec[i] != 0. && fvecp[i] != 0. && abs(fvecp[i] - fvec[i]) >= epsf * abs(fvec[i])) + temp = eps * abs((fvecp[i] - fvec[i]) / eps - err[i]) / (abs(fvec[i]) + abs(fvecp[i])); + err[i] = 1.; + if (temp > NumTraits::epsilon() && temp < eps) + err[i] = (chkder_log10e * log(temp) - epslog) / epslog; + if (temp >= eps) + err[i] = 0.; + } + } +} + +} // end namespace internal + +} // end namespace Eigen diff --git a/external/unsupported/Eigen/src/NonLinearOptimization/covar.h b/external/unsupported/Eigen/src/NonLinearOptimization/covar.h new file mode 100644 index 0000000..68260d1 --- /dev/null +++ b/external/unsupported/Eigen/src/NonLinearOptimization/covar.h @@ -0,0 +1,70 @@ +namespace Eigen { + +namespace internal { + +template +void covar( + Matrix< Scalar, Dynamic, Dynamic > &r, + const VectorXi &ipvt, + Scalar tol = std::sqrt(NumTraits::epsilon()) ) +{ + using std::abs; + typedef DenseIndex Index; + + /* Local variables */ + Index i, j, k, l, ii, jj; + bool sing; + Scalar temp; + + /* Function Body */ + const Index n = r.cols(); + const Scalar tolr = tol * abs(r(0,0)); + Matrix< Scalar, Dynamic, 1 > wa(n); + eigen_assert(ipvt.size()==n); + + /* form the inverse of r in the full upper triangle of r. */ + l = -1; + for (k = 0; k < n; ++k) + if (abs(r(k,k)) > tolr) { + r(k,k) = 1. / r(k,k); + for (j = 0; j <= k-1; ++j) { + temp = r(k,k) * r(j,k); + r(j,k) = 0.; + r.col(k).head(j+1) -= r.col(j).head(j+1) * temp; + } + l = k; + } + + /* form the full upper triangle of the inverse of (r transpose)*r */ + /* in the full upper triangle of r. */ + for (k = 0; k <= l; ++k) { + for (j = 0; j <= k-1; ++j) + r.col(j).head(j+1) += r.col(k).head(j+1) * r(j,k); + r.col(k).head(k+1) *= r(k,k); + } + + /* form the full lower triangle of the covariance matrix */ + /* in the strict lower triangle of r and in wa. */ + for (j = 0; j < n; ++j) { + jj = ipvt[j]; + sing = j > l; + for (i = 0; i <= j; ++i) { + if (sing) + r(i,j) = 0.; + ii = ipvt[i]; + if (ii > jj) + r(ii,jj) = r(i,j); + if (ii < jj) + r(jj,ii) = r(i,j); + } + wa[jj] = r(j,j); + } + + /* symmetrize the covariance matrix in r. */ + r.topLeftCorner(n,n).template triangularView() = r.topLeftCorner(n,n).transpose(); + r.diagonal() = wa; +} + +} // end namespace internal + +} // end namespace Eigen diff --git a/external/unsupported/Eigen/src/NonLinearOptimization/dogleg.h b/external/unsupported/Eigen/src/NonLinearOptimization/dogleg.h new file mode 100644 index 0000000..80c5d27 --- /dev/null +++ b/external/unsupported/Eigen/src/NonLinearOptimization/dogleg.h @@ -0,0 +1,107 @@ +namespace Eigen { + +namespace internal { + +template +void dogleg( + const Matrix< Scalar, Dynamic, Dynamic > &qrfac, + const Matrix< Scalar, Dynamic, 1 > &diag, + const Matrix< Scalar, Dynamic, 1 > &qtb, + Scalar delta, + Matrix< Scalar, Dynamic, 1 > &x) +{ + using std::abs; + using std::sqrt; + + typedef DenseIndex Index; + + /* Local variables */ + Index i, j; + Scalar sum, temp, alpha, bnorm; + Scalar gnorm, qnorm; + Scalar sgnorm; + + /* Function Body */ + const Scalar epsmch = NumTraits::epsilon(); + const Index n = qrfac.cols(); + eigen_assert(n==qtb.size()); + eigen_assert(n==x.size()); + eigen_assert(n==diag.size()); + Matrix< Scalar, Dynamic, 1 > wa1(n), wa2(n); + + /* first, calculate the gauss-newton direction. */ + for (j = n-1; j >=0; --j) { + temp = qrfac(j,j); + if (temp == 0.) { + temp = epsmch * qrfac.col(j).head(j+1).maxCoeff(); + if (temp == 0.) + temp = epsmch; + } + if (j==n-1) + x[j] = qtb[j] / temp; + else + x[j] = (qtb[j] - qrfac.row(j).tail(n-j-1).dot(x.tail(n-j-1))) / temp; + } + + /* test whether the gauss-newton direction is acceptable. */ + qnorm = diag.cwiseProduct(x).stableNorm(); + if (qnorm <= delta) + return; + + // TODO : this path is not tested by Eigen unit tests + + /* the gauss-newton direction is not acceptable. */ + /* next, calculate the scaled gradient direction. */ + + wa1.fill(0.); + for (j = 0; j < n; ++j) { + wa1.tail(n-j) += qrfac.row(j).tail(n-j) * qtb[j]; + wa1[j] /= diag[j]; + } + + /* calculate the norm of the scaled gradient and test for */ + /* the special case in which the scaled gradient is zero. */ + gnorm = wa1.stableNorm(); + sgnorm = 0.; + alpha = delta / qnorm; + if (gnorm == 0.) + goto algo_end; + + /* calculate the point along the scaled gradient */ + /* at which the quadratic is minimized. */ + wa1.array() /= (diag*gnorm).array(); + // TODO : once unit tests cover this part,: + // wa2 = qrfac.template triangularView() * wa1; + for (j = 0; j < n; ++j) { + sum = 0.; + for (i = j; i < n; ++i) { + sum += qrfac(j,i) * wa1[i]; + } + wa2[j] = sum; + } + temp = wa2.stableNorm(); + sgnorm = gnorm / temp / temp; + + /* test whether the scaled gradient direction is acceptable. */ + alpha = 0.; + if (sgnorm >= delta) + goto algo_end; + + /* the scaled gradient direction is not acceptable. */ + /* finally, calculate the point along the dogleg */ + /* at which the quadratic is minimized. */ + bnorm = qtb.stableNorm(); + temp = bnorm / gnorm * (bnorm / qnorm) * (sgnorm / delta); + temp = temp - delta / qnorm * numext::abs2(sgnorm / delta) + sqrt(numext::abs2(temp - delta / qnorm) + (1.-numext::abs2(delta / qnorm)) * (1.-numext::abs2(sgnorm / delta))); + alpha = delta / qnorm * (1. - numext::abs2(sgnorm / delta)) / temp; +algo_end: + + /* form appropriate convex combination of the gauss-newton */ + /* direction and the scaled gradient direction. */ + temp = (1.-alpha) * (std::min)(sgnorm,delta); + x = temp * wa1 + alpha * x; +} + +} // end namespace internal + +} // end namespace Eigen diff --git a/external/unsupported/Eigen/src/NonLinearOptimization/fdjac1.h b/external/unsupported/Eigen/src/NonLinearOptimization/fdjac1.h new file mode 100644 index 0000000..bb7cf26 --- /dev/null +++ b/external/unsupported/Eigen/src/NonLinearOptimization/fdjac1.h @@ -0,0 +1,79 @@ +namespace Eigen { + +namespace internal { + +template +DenseIndex fdjac1( + const FunctorType &Functor, + Matrix< Scalar, Dynamic, 1 > &x, + Matrix< Scalar, Dynamic, 1 > &fvec, + Matrix< Scalar, Dynamic, Dynamic > &fjac, + DenseIndex ml, DenseIndex mu, + Scalar epsfcn) +{ + using std::sqrt; + using std::abs; + + typedef DenseIndex Index; + + /* Local variables */ + Scalar h; + Index j, k; + Scalar eps, temp; + Index msum; + int iflag; + Index start, length; + + /* Function Body */ + const Scalar epsmch = NumTraits::epsilon(); + const Index n = x.size(); + eigen_assert(fvec.size()==n); + Matrix< Scalar, Dynamic, 1 > wa1(n); + Matrix< Scalar, Dynamic, 1 > wa2(n); + + eps = sqrt((std::max)(epsfcn,epsmch)); + msum = ml + mu + 1; + if (msum >= n) { + /* computation of dense approximate jacobian. */ + for (j = 0; j < n; ++j) { + temp = x[j]; + h = eps * abs(temp); + if (h == 0.) + h = eps; + x[j] = temp + h; + iflag = Functor(x, wa1); + if (iflag < 0) + return iflag; + x[j] = temp; + fjac.col(j) = (wa1-fvec)/h; + } + + }else { + /* computation of banded approximate jacobian. */ + for (k = 0; k < msum; ++k) { + for (j = k; (msum<0) ? (j>n): (jn): (j(0,j-mu); + length = (std::min)(n-1, j+ml) - start + 1; + fjac.col(j).segment(start, length) = ( wa1.segment(start, length)-fvec.segment(start, length))/h; + } + } + } + return 0; +} + +} // end namespace internal + +} // end namespace Eigen diff --git a/external/unsupported/Eigen/src/NonLinearOptimization/lmpar.h b/external/unsupported/Eigen/src/NonLinearOptimization/lmpar.h new file mode 100644 index 0000000..4c17d4c --- /dev/null +++ b/external/unsupported/Eigen/src/NonLinearOptimization/lmpar.h @@ -0,0 +1,298 @@ +namespace Eigen { + +namespace internal { + +template +void lmpar( + Matrix< Scalar, Dynamic, Dynamic > &r, + const VectorXi &ipvt, + const Matrix< Scalar, Dynamic, 1 > &diag, + const Matrix< Scalar, Dynamic, 1 > &qtb, + Scalar delta, + Scalar &par, + Matrix< Scalar, Dynamic, 1 > &x) +{ + using std::abs; + using std::sqrt; + typedef DenseIndex Index; + + /* Local variables */ + Index i, j, l; + Scalar fp; + Scalar parc, parl; + Index iter; + Scalar temp, paru; + Scalar gnorm; + Scalar dxnorm; + + + /* Function Body */ + const Scalar dwarf = (std::numeric_limits::min)(); + const Index n = r.cols(); + eigen_assert(n==diag.size()); + eigen_assert(n==qtb.size()); + eigen_assert(n==x.size()); + + Matrix< Scalar, Dynamic, 1 > wa1, wa2; + + /* compute and store in x the gauss-newton direction. if the */ + /* jacobian is rank-deficient, obtain a least squares solution. */ + Index nsing = n-1; + wa1 = qtb; + for (j = 0; j < n; ++j) { + if (r(j,j) == 0. && nsing == n-1) + nsing = j - 1; + if (nsing < n-1) + wa1[j] = 0.; + } + for (j = nsing; j>=0; --j) { + wa1[j] /= r(j,j); + temp = wa1[j]; + for (i = 0; i < j ; ++i) + wa1[i] -= r(i,j) * temp; + } + + for (j = 0; j < n; ++j) + x[ipvt[j]] = wa1[j]; + + /* initialize the iteration counter. */ + /* evaluate the function at the origin, and test */ + /* for acceptance of the gauss-newton direction. */ + iter = 0; + wa2 = diag.cwiseProduct(x); + dxnorm = wa2.blueNorm(); + fp = dxnorm - delta; + if (fp <= Scalar(0.1) * delta) { + par = 0; + return; + } + + /* if the jacobian is not rank deficient, the newton */ + /* step provides a lower bound, parl, for the zero of */ + /* the function. otherwise set this bound to zero. */ + parl = 0.; + if (nsing >= n-1) { + for (j = 0; j < n; ++j) { + l = ipvt[j]; + wa1[j] = diag[l] * (wa2[l] / dxnorm); + } + // it's actually a triangularView.solveInplace(), though in a weird + // way: + for (j = 0; j < n; ++j) { + Scalar sum = 0.; + for (i = 0; i < j; ++i) + sum += r(i,j) * wa1[i]; + wa1[j] = (wa1[j] - sum) / r(j,j); + } + temp = wa1.blueNorm(); + parl = fp / delta / temp / temp; + } + + /* calculate an upper bound, paru, for the zero of the function. */ + for (j = 0; j < n; ++j) + wa1[j] = r.col(j).head(j+1).dot(qtb.head(j+1)) / diag[ipvt[j]]; + + gnorm = wa1.stableNorm(); + paru = gnorm / delta; + if (paru == 0.) + paru = dwarf / (std::min)(delta,Scalar(0.1)); + + /* if the input par lies outside of the interval (parl,paru), */ + /* set par to the closer endpoint. */ + par = (std::max)(par,parl); + par = (std::min)(par,paru); + if (par == 0.) + par = gnorm / dxnorm; + + /* beginning of an iteration. */ + while (true) { + ++iter; + + /* evaluate the function at the current value of par. */ + if (par == 0.) + par = (std::max)(dwarf,Scalar(.001) * paru); /* Computing MAX */ + wa1 = sqrt(par)* diag; + + Matrix< Scalar, Dynamic, 1 > sdiag(n); + qrsolv(r, ipvt, wa1, qtb, x, sdiag); + + wa2 = diag.cwiseProduct(x); + dxnorm = wa2.blueNorm(); + temp = fp; + fp = dxnorm - delta; + + /* if the function is small enough, accept the current value */ + /* of par. also test for the exceptional cases where parl */ + /* is zero or the number of iterations has reached 10. */ + if (abs(fp) <= Scalar(0.1) * delta || (parl == 0. && fp <= temp && temp < 0.) || iter == 10) + break; + + /* compute the newton correction. */ + for (j = 0; j < n; ++j) { + l = ipvt[j]; + wa1[j] = diag[l] * (wa2[l] / dxnorm); + } + for (j = 0; j < n; ++j) { + wa1[j] /= sdiag[j]; + temp = wa1[j]; + for (i = j+1; i < n; ++i) + wa1[i] -= r(i,j) * temp; + } + temp = wa1.blueNorm(); + parc = fp / delta / temp / temp; + + /* depending on the sign of the function, update parl or paru. */ + if (fp > 0.) + parl = (std::max)(parl,par); + if (fp < 0.) + paru = (std::min)(paru,par); + + /* compute an improved estimate for par. */ + /* Computing MAX */ + par = (std::max)(parl,par+parc); + + /* end of an iteration. */ + } + + /* termination. */ + if (iter == 0) + par = 0.; + return; +} + +template +void lmpar2( + const ColPivHouseholderQR > &qr, + const Matrix< Scalar, Dynamic, 1 > &diag, + const Matrix< Scalar, Dynamic, 1 > &qtb, + Scalar delta, + Scalar &par, + Matrix< Scalar, Dynamic, 1 > &x) + +{ + using std::sqrt; + using std::abs; + typedef DenseIndex Index; + + /* Local variables */ + Index j; + Scalar fp; + Scalar parc, parl; + Index iter; + Scalar temp, paru; + Scalar gnorm; + Scalar dxnorm; + + + /* Function Body */ + const Scalar dwarf = (std::numeric_limits::min)(); + const Index n = qr.matrixQR().cols(); + eigen_assert(n==diag.size()); + eigen_assert(n==qtb.size()); + + Matrix< Scalar, Dynamic, 1 > wa1, wa2; + + /* compute and store in x the gauss-newton direction. if the */ + /* jacobian is rank-deficient, obtain a least squares solution. */ + +// const Index rank = qr.nonzeroPivots(); // exactly double(0.) + const Index rank = qr.rank(); // use a threshold + wa1 = qtb; + wa1.tail(n-rank).setZero(); + qr.matrixQR().topLeftCorner(rank, rank).template triangularView().solveInPlace(wa1.head(rank)); + + x = qr.colsPermutation()*wa1; + + /* initialize the iteration counter. */ + /* evaluate the function at the origin, and test */ + /* for acceptance of the gauss-newton direction. */ + iter = 0; + wa2 = diag.cwiseProduct(x); + dxnorm = wa2.blueNorm(); + fp = dxnorm - delta; + if (fp <= Scalar(0.1) * delta) { + par = 0; + return; + } + + /* if the jacobian is not rank deficient, the newton */ + /* step provides a lower bound, parl, for the zero of */ + /* the function. otherwise set this bound to zero. */ + parl = 0.; + if (rank==n) { + wa1 = qr.colsPermutation().inverse() * diag.cwiseProduct(wa2)/dxnorm; + qr.matrixQR().topLeftCorner(n, n).transpose().template triangularView().solveInPlace(wa1); + temp = wa1.blueNorm(); + parl = fp / delta / temp / temp; + } + + /* calculate an upper bound, paru, for the zero of the function. */ + for (j = 0; j < n; ++j) + wa1[j] = qr.matrixQR().col(j).head(j+1).dot(qtb.head(j+1)) / diag[qr.colsPermutation().indices()(j)]; + + gnorm = wa1.stableNorm(); + paru = gnorm / delta; + if (paru == 0.) + paru = dwarf / (std::min)(delta,Scalar(0.1)); + + /* if the input par lies outside of the interval (parl,paru), */ + /* set par to the closer endpoint. */ + par = (std::max)(par,parl); + par = (std::min)(par,paru); + if (par == 0.) + par = gnorm / dxnorm; + + /* beginning of an iteration. */ + Matrix< Scalar, Dynamic, Dynamic > s = qr.matrixQR(); + while (true) { + ++iter; + + /* evaluate the function at the current value of par. */ + if (par == 0.) + par = (std::max)(dwarf,Scalar(.001) * paru); /* Computing MAX */ + wa1 = sqrt(par)* diag; + + Matrix< Scalar, Dynamic, 1 > sdiag(n); + qrsolv(s, qr.colsPermutation().indices(), wa1, qtb, x, sdiag); + + wa2 = diag.cwiseProduct(x); + dxnorm = wa2.blueNorm(); + temp = fp; + fp = dxnorm - delta; + + /* if the function is small enough, accept the current value */ + /* of par. also test for the exceptional cases where parl */ + /* is zero or the number of iterations has reached 10. */ + if (abs(fp) <= Scalar(0.1) * delta || (parl == 0. && fp <= temp && temp < 0.) || iter == 10) + break; + + /* compute the newton correction. */ + wa1 = qr.colsPermutation().inverse() * diag.cwiseProduct(wa2/dxnorm); + // we could almost use this here, but the diagonal is outside qr, in sdiag[] + // qr.matrixQR().topLeftCorner(n, n).transpose().template triangularView().solveInPlace(wa1); + for (j = 0; j < n; ++j) { + wa1[j] /= sdiag[j]; + temp = wa1[j]; + for (Index i = j+1; i < n; ++i) + wa1[i] -= s(i,j) * temp; + } + temp = wa1.blueNorm(); + parc = fp / delta / temp / temp; + + /* depending on the sign of the function, update parl or paru. */ + if (fp > 0.) + parl = (std::max)(parl,par); + if (fp < 0.) + paru = (std::min)(paru,par); + + /* compute an improved estimate for par. */ + par = (std::max)(parl,par+parc); + } + if (iter == 0) + par = 0.; + return; +} + +} // end namespace internal + +} // end namespace Eigen diff --git a/external/unsupported/Eigen/src/NonLinearOptimization/qrsolv.h b/external/unsupported/Eigen/src/NonLinearOptimization/qrsolv.h new file mode 100644 index 0000000..4f2f560 --- /dev/null +++ b/external/unsupported/Eigen/src/NonLinearOptimization/qrsolv.h @@ -0,0 +1,91 @@ +namespace Eigen { + +namespace internal { + +// TODO : once qrsolv2 is removed, use ColPivHouseholderQR or PermutationMatrix instead of ipvt +template +void qrsolv( + Matrix< Scalar, Dynamic, Dynamic > &s, + // TODO : use a PermutationMatrix once lmpar is no more: + const VectorXi &ipvt, + const Matrix< Scalar, Dynamic, 1 > &diag, + const Matrix< Scalar, Dynamic, 1 > &qtb, + Matrix< Scalar, Dynamic, 1 > &x, + Matrix< Scalar, Dynamic, 1 > &sdiag) + +{ + typedef DenseIndex Index; + + /* Local variables */ + Index i, j, k, l; + Scalar temp; + Index n = s.cols(); + Matrix< Scalar, Dynamic, 1 > wa(n); + JacobiRotation givens; + + /* Function Body */ + // the following will only change the lower triangular part of s, including + // the diagonal, though the diagonal is restored afterward + + /* copy r and (q transpose)*b to preserve input and initialize s. */ + /* in particular, save the diagonal elements of r in x. */ + x = s.diagonal(); + wa = qtb; + + s.topLeftCorner(n,n).template triangularView() = s.topLeftCorner(n,n).transpose(); + + /* eliminate the diagonal matrix d using a givens rotation. */ + for (j = 0; j < n; ++j) { + + /* prepare the row of d to be eliminated, locating the */ + /* diagonal element using p from the qr factorization. */ + l = ipvt[j]; + if (diag[l] == 0.) + break; + sdiag.tail(n-j).setZero(); + sdiag[j] = diag[l]; + + /* the transformations to eliminate the row of d */ + /* modify only a single element of (q transpose)*b */ + /* beyond the first n, which is initially zero. */ + Scalar qtbpj = 0.; + for (k = j; k < n; ++k) { + /* determine a givens rotation which eliminates the */ + /* appropriate element in the current row of d. */ + givens.makeGivens(-s(k,k), sdiag[k]); + + /* compute the modified diagonal element of r and */ + /* the modified element of ((q transpose)*b,0). */ + s(k,k) = givens.c() * s(k,k) + givens.s() * sdiag[k]; + temp = givens.c() * wa[k] + givens.s() * qtbpj; + qtbpj = -givens.s() * wa[k] + givens.c() * qtbpj; + wa[k] = temp; + + /* accumulate the transformation in the row of s. */ + for (i = k+1; i().solveInPlace(wa.head(nsing)); + + // restore + sdiag = s.diagonal(); + s.diagonal() = x; + + /* permute the components of z back to components of x. */ + for (j = 0; j < n; ++j) x[ipvt[j]] = wa[j]; +} + +} // end namespace internal + +} // end namespace Eigen diff --git a/external/unsupported/Eigen/src/NonLinearOptimization/r1mpyq.h b/external/unsupported/Eigen/src/NonLinearOptimization/r1mpyq.h new file mode 100644 index 0000000..36ff700 --- /dev/null +++ b/external/unsupported/Eigen/src/NonLinearOptimization/r1mpyq.h @@ -0,0 +1,30 @@ +namespace Eigen { + +namespace internal { + +// TODO : move this to GivensQR once there's such a thing in Eigen + +template +void r1mpyq(DenseIndex m, DenseIndex n, Scalar *a, const std::vector > &v_givens, const std::vector > &w_givens) +{ + typedef DenseIndex Index; + + /* apply the first set of givens rotations to a. */ + for (Index j = n-2; j>=0; --j) + for (Index i = 0; i +void r1updt( + Matrix< Scalar, Dynamic, Dynamic > &s, + const Matrix< Scalar, Dynamic, 1> &u, + std::vector > &v_givens, + std::vector > &w_givens, + Matrix< Scalar, Dynamic, 1> &v, + Matrix< Scalar, Dynamic, 1> &w, + bool *sing) +{ + typedef DenseIndex Index; + const JacobiRotation IdentityRotation = JacobiRotation(1,0); + + /* Local variables */ + const Index m = s.rows(); + const Index n = s.cols(); + Index i, j=1; + Scalar temp; + JacobiRotation givens; + + // r1updt had a broader usecase, but we don't use it here. And, more + // importantly, we can not test it. + eigen_assert(m==n); + eigen_assert(u.size()==m); + eigen_assert(v.size()==n); + eigen_assert(w.size()==n); + + /* move the nontrivial part of the last column of s into w. */ + w[n-1] = s(n-1,n-1); + + /* rotate the vector v into a multiple of the n-th unit vector */ + /* in such a way that a spike is introduced into w. */ + for (j=n-2; j>=0; --j) { + w[j] = 0.; + if (v[j] != 0.) { + /* determine a givens rotation which eliminates the */ + /* j-th element of v. */ + givens.makeGivens(-v[n-1], v[j]); + + /* apply the transformation to v and store the information */ + /* necessary to recover the givens rotation. */ + v[n-1] = givens.s() * v[j] + givens.c() * v[n-1]; + v_givens[j] = givens; + + /* apply the transformation to s and extend the spike in w. */ + for (i = j; i < m; ++i) { + temp = givens.c() * s(j,i) - givens.s() * w[i]; + w[i] = givens.s() * s(j,i) + givens.c() * w[i]; + s(j,i) = temp; + } + } else + v_givens[j] = IdentityRotation; + } + + /* add the spike from the rank 1 update to w. */ + w += v[n-1] * u; + + /* eliminate the spike. */ + *sing = false; + for (j = 0; j < n-1; ++j) { + if (w[j] != 0.) { + /* determine a givens rotation which eliminates the */ + /* j-th element of the spike. */ + givens.makeGivens(-s(j,j), w[j]); + + /* apply the transformation to s and reduce the spike in w. */ + for (i = j; i < m; ++i) { + temp = givens.c() * s(j,i) + givens.s() * w[i]; + w[i] = -givens.s() * s(j,i) + givens.c() * w[i]; + s(j,i) = temp; + } + + /* store the information necessary to recover the */ + /* givens rotation. */ + w_givens[j] = givens; + } else + v_givens[j] = IdentityRotation; + + /* test for zero diagonal elements in the output s. */ + if (s(j,j) == 0.) { + *sing = true; + } + } + /* move w back into the last column of the output s. */ + s(n-1,n-1) = w[n-1]; + + if (s(j,j) == 0.) { + *sing = true; + } + return; +} + +} // end namespace internal + +} // end namespace Eigen diff --git a/external/unsupported/Eigen/src/NonLinearOptimization/rwupdt.h b/external/unsupported/Eigen/src/NonLinearOptimization/rwupdt.h new file mode 100644 index 0000000..6ebf856 --- /dev/null +++ b/external/unsupported/Eigen/src/NonLinearOptimization/rwupdt.h @@ -0,0 +1,49 @@ +namespace Eigen { + +namespace internal { + +template +void rwupdt( + Matrix< Scalar, Dynamic, Dynamic > &r, + const Matrix< Scalar, Dynamic, 1> &w, + Matrix< Scalar, Dynamic, 1> &b, + Scalar alpha) +{ + typedef DenseIndex Index; + + const Index n = r.cols(); + eigen_assert(r.rows()>=n); + std::vector > givens(n); + + /* Local variables */ + Scalar temp, rowj; + + /* Function Body */ + for (Index j = 0; j < n; ++j) { + rowj = w[j]; + + /* apply the previous transformations to */ + /* r(i,j), i=0,1,...,j-1, and to w(j). */ + for (Index i = 0; i < j; ++i) { + temp = givens[i].c() * r(i,j) + givens[i].s() * rowj; + rowj = -givens[i].s() * r(i,j) + givens[i].c() * rowj; + r(i,j) = temp; + } + + /* determine a givens rotation which eliminates w(j). */ + givens[j].makeGivens(-r(j,j), rowj); + + if (rowj == 0.) + continue; // givens[j] is identity + + /* apply the current transformation to r(j,j), b(j), and alpha. */ + r(j,j) = givens[j].c() * r(j,j) + givens[j].s() * rowj; + temp = givens[j].c() * b[j] + givens[j].s() * alpha; + alpha = -givens[j].s() * b[j] + givens[j].c() * alpha; + b[j] = temp; + } +} + +} // end namespace internal + +} // end namespace Eigen diff --git a/external/unsupported/Eigen/src/NumericalDiff/NumericalDiff.h b/external/unsupported/Eigen/src/NumericalDiff/NumericalDiff.h new file mode 100644 index 0000000..ea5d8bc --- /dev/null +++ b/external/unsupported/Eigen/src/NumericalDiff/NumericalDiff.h @@ -0,0 +1,130 @@ +// -*- coding: utf-8 +// vim: set fileencoding=utf-8 + +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Thomas Capricelli +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_NUMERICAL_DIFF_H +#define EIGEN_NUMERICAL_DIFF_H + +namespace Eigen { + +enum NumericalDiffMode { + Forward, + Central +}; + + +/** + * This class allows you to add a method df() to your functor, which will + * use numerical differentiation to compute an approximate of the + * derivative for the functor. Of course, if you have an analytical form + * for the derivative, you should rather implement df() by yourself. + * + * More information on + * http://en.wikipedia.org/wiki/Numerical_differentiation + * + * Currently only "Forward" and "Central" scheme are implemented. + */ +template +class NumericalDiff : public _Functor +{ +public: + typedef _Functor Functor; + typedef typename Functor::Scalar Scalar; + typedef typename Functor::InputType InputType; + typedef typename Functor::ValueType ValueType; + typedef typename Functor::JacobianType JacobianType; + + NumericalDiff(Scalar _epsfcn=0.) : Functor(), epsfcn(_epsfcn) {} + NumericalDiff(const Functor& f, Scalar _epsfcn=0.) : Functor(f), epsfcn(_epsfcn) {} + + // forward constructors + template + NumericalDiff(const T0& a0) : Functor(a0), epsfcn(0) {} + template + NumericalDiff(const T0& a0, const T1& a1) : Functor(a0, a1), epsfcn(0) {} + template + NumericalDiff(const T0& a0, const T1& a1, const T2& a2) : Functor(a0, a1, a2), epsfcn(0) {} + + enum { + InputsAtCompileTime = Functor::InputsAtCompileTime, + ValuesAtCompileTime = Functor::ValuesAtCompileTime + }; + + /** + * return the number of evaluation of functor + */ + int df(const InputType& _x, JacobianType &jac) const + { + using std::sqrt; + using std::abs; + /* Local variables */ + Scalar h; + int nfev=0; + const typename InputType::Index n = _x.size(); + const Scalar eps = sqrt(((std::max)(epsfcn,NumTraits::epsilon() ))); + ValueType val1, val2; + InputType x = _x; + // TODO : we should do this only if the size is not already known + val1.resize(Functor::values()); + val2.resize(Functor::values()); + + // initialization + switch(mode) { + case Forward: + // compute f(x) + Functor::operator()(x, val1); nfev++; + break; + case Central: + // do nothing + break; + default: + eigen_assert(false); + }; + + // Function Body + for (int j = 0; j < n; ++j) { + h = eps * abs(x[j]); + if (h == 0.) { + h = eps; + } + switch(mode) { + case Forward: + x[j] += h; + Functor::operator()(x, val2); + nfev++; + x[j] = _x[j]; + jac.col(j) = (val2-val1)/h; + break; + case Central: + x[j] += h; + Functor::operator()(x, val2); nfev++; + x[j] -= 2*h; + Functor::operator()(x, val1); nfev++; + x[j] = _x[j]; + jac.col(j) = (val2-val1)/(2*h); + break; + default: + eigen_assert(false); + }; + } + return nfev; + } +private: + Scalar epsfcn; + + NumericalDiff& operator=(const NumericalDiff&); +}; + +} // end namespace Eigen + +//vim: ai ts=4 sts=4 et sw=4 +#endif // EIGEN_NUMERICAL_DIFF_H + diff --git a/external/unsupported/Eigen/src/Polynomials/Companion.h b/external/unsupported/Eigen/src/Polynomials/Companion.h new file mode 100644 index 0000000..59a15b0 --- /dev/null +++ b/external/unsupported/Eigen/src/Polynomials/Companion.h @@ -0,0 +1,280 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2010 Manuel Yguel +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_COMPANION_H +#define EIGEN_COMPANION_H + +// This file requires the user to include +// * Eigen/Core +// * Eigen/src/PolynomialSolver.h + +namespace Eigen { + +namespace internal { + +#ifndef EIGEN_PARSED_BY_DOXYGEN + +template +struct decrement_if_fixed_size +{ + enum { + ret = (Size == Dynamic) ? Dynamic : Size-1 }; +}; + +#endif + +template< typename _Scalar, int _Deg > +class companion +{ + public: + EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_Deg==Dynamic ? Dynamic : _Deg) + + enum { + Deg = _Deg, + Deg_1=decrement_if_fixed_size::ret + }; + + typedef _Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + typedef Matrix RightColumn; + //typedef DiagonalMatrix< Scalar, Deg_1, Deg_1 > BottomLeftDiagonal; + typedef Matrix BottomLeftDiagonal; + + typedef Matrix DenseCompanionMatrixType; + typedef Matrix< Scalar, _Deg, Deg_1 > LeftBlock; + typedef Matrix< Scalar, Deg_1, Deg_1 > BottomLeftBlock; + typedef Matrix< Scalar, 1, Deg_1 > LeftBlockFirstRow; + + typedef DenseIndex Index; + + public: + EIGEN_STRONG_INLINE const _Scalar operator()(Index row, Index col ) const + { + if( m_bl_diag.rows() > col ) + { + if( 0 < row ){ return m_bl_diag[col]; } + else{ return 0; } + } + else{ return m_monic[row]; } + } + + public: + template + void setPolynomial( const VectorType& poly ) + { + const Index deg = poly.size()-1; + m_monic = -poly.head(deg)/poly[deg]; + m_bl_diag.setOnes(deg-1); + } + + template + companion( const VectorType& poly ){ + setPolynomial( poly ); } + + public: + DenseCompanionMatrixType denseMatrix() const + { + const Index deg = m_monic.size(); + const Index deg_1 = deg-1; + DenseCompanionMatrixType companMat(deg,deg); + companMat << + ( LeftBlock(deg,deg_1) + << LeftBlockFirstRow::Zero(1,deg_1), + BottomLeftBlock::Identity(deg-1,deg-1)*m_bl_diag.asDiagonal() ).finished() + , m_monic; + return companMat; + } + + + + protected: + /** Helper function for the balancing algorithm. + * \returns true if the row and the column, having colNorm and rowNorm + * as norms, are balanced, false otherwise. + * colB and rowB are respectively the multipliers for + * the column and the row in order to balance them. + * */ + bool balanced( RealScalar colNorm, RealScalar rowNorm, + bool& isBalanced, RealScalar& colB, RealScalar& rowB ); + + /** Helper function for the balancing algorithm. + * \returns true if the row and the column, having colNorm and rowNorm + * as norms, are balanced, false otherwise. + * colB and rowB are respectively the multipliers for + * the column and the row in order to balance them. + * */ + bool balancedR( RealScalar colNorm, RealScalar rowNorm, + bool& isBalanced, RealScalar& colB, RealScalar& rowB ); + + public: + /** + * Balancing algorithm from B. N. PARLETT and C. REINSCH (1969) + * "Balancing a matrix for calculation of eigenvalues and eigenvectors" + * adapted to the case of companion matrices. + * A matrix with non zero row and non zero column is balanced + * for a certain norm if the i-th row and the i-th column + * have same norm for all i. + */ + void balance(); + + protected: + RightColumn m_monic; + BottomLeftDiagonal m_bl_diag; +}; + + + +template< typename _Scalar, int _Deg > +inline +bool companion<_Scalar,_Deg>::balanced( RealScalar colNorm, RealScalar rowNorm, + bool& isBalanced, RealScalar& colB, RealScalar& rowB ) +{ + if( RealScalar(0) == colNorm || RealScalar(0) == rowNorm + || !(numext::isfinite)(colNorm) || !(numext::isfinite)(rowNorm)){ + return true; + } + else + { + //To find the balancing coefficients, if the radix is 2, + //one finds \f$ \sigma \f$ such that + // \f$ 2^{2\sigma-1} < rowNorm / colNorm \le 2^{2\sigma+1} \f$ + // then the balancing coefficient for the row is \f$ 1/2^{\sigma} \f$ + // and the balancing coefficient for the column is \f$ 2^{\sigma} \f$ + const RealScalar radix = RealScalar(2); + const RealScalar radix2 = RealScalar(4); + + rowB = rowNorm / radix; + colB = RealScalar(1); + const RealScalar s = colNorm + rowNorm; + + // Find sigma s.t. rowNorm / 2 <= 2^(2*sigma) * colNorm + RealScalar scout = colNorm; + while (scout < rowB) + { + colB *= radix; + scout *= radix2; + } + + // We now have an upper-bound for sigma, try to lower it. + // Find sigma s.t. 2^(2*sigma) * colNorm / 2 < rowNorm + scout = colNorm * (colB / radix) * colB; // Avoid overflow. + while (scout >= rowNorm) + { + colB /= radix; + scout /= radix2; + } + + // This line is used to avoid insubstantial balancing. + if ((rowNorm + radix * scout) < RealScalar(0.95) * s * colB) + { + isBalanced = false; + rowB = RealScalar(1) / colB; + return false; + } + else + { + return true; + } + } +} + +template< typename _Scalar, int _Deg > +inline +bool companion<_Scalar,_Deg>::balancedR( RealScalar colNorm, RealScalar rowNorm, + bool& isBalanced, RealScalar& colB, RealScalar& rowB ) +{ + if( RealScalar(0) == colNorm || RealScalar(0) == rowNorm ){ return true; } + else + { + /** + * Set the norm of the column and the row to the geometric mean + * of the row and column norm + */ + const RealScalar q = colNorm/rowNorm; + if( !isApprox( q, _Scalar(1) ) ) + { + rowB = sqrt( colNorm/rowNorm ); + colB = RealScalar(1)/rowB; + + isBalanced = false; + return false; + } + else{ + return true; } + } +} + + +template< typename _Scalar, int _Deg > +void companion<_Scalar,_Deg>::balance() +{ + using std::abs; + EIGEN_STATIC_ASSERT( Deg == Dynamic || 1 < Deg, YOU_MADE_A_PROGRAMMING_MISTAKE ); + const Index deg = m_monic.size(); + const Index deg_1 = deg-1; + + bool hasConverged=false; + while( !hasConverged ) + { + hasConverged = true; + RealScalar colNorm,rowNorm; + RealScalar colB,rowB; + + //First row, first column excluding the diagonal + //============================================== + colNorm = abs(m_bl_diag[0]); + rowNorm = abs(m_monic[0]); + + //Compute balancing of the row and the column + if( !balanced( colNorm, rowNorm, hasConverged, colB, rowB ) ) + { + m_bl_diag[0] *= colB; + m_monic[0] *= rowB; + } + + //Middle rows and columns excluding the diagonal + //============================================== + for( Index i=1; i headMonic( m_monic, 0, deg_1 ); + colNorm = headMonic.array().abs().sum(); + rowNorm = abs( m_bl_diag[ebl] ); + + //Compute balancing of the row and the column + if( !balanced( colNorm, rowNorm, hasConverged, colB, rowB ) ) + { + headMonic *= colB; + m_bl_diag[ebl] *= rowB; + } + } +} + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_COMPANION_H diff --git a/external/unsupported/Eigen/src/Polynomials/PolynomialSolver.h b/external/unsupported/Eigen/src/Polynomials/PolynomialSolver.h new file mode 100644 index 0000000..5e0ecbb --- /dev/null +++ b/external/unsupported/Eigen/src/Polynomials/PolynomialSolver.h @@ -0,0 +1,428 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2010 Manuel Yguel +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_POLYNOMIAL_SOLVER_H +#define EIGEN_POLYNOMIAL_SOLVER_H + +namespace Eigen { + +/** \ingroup Polynomials_Module + * \class PolynomialSolverBase. + * + * \brief Defined to be inherited by polynomial solvers: it provides + * convenient methods such as + * - real roots, + * - greatest, smallest complex roots, + * - real roots with greatest, smallest absolute real value, + * - greatest, smallest real roots. + * + * It stores the set of roots as a vector of complexes. + * + */ +template< typename _Scalar, int _Deg > +class PolynomialSolverBase +{ + public: + EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_Deg==Dynamic ? Dynamic : _Deg) + + typedef _Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + typedef std::complex RootType; + typedef Matrix RootsType; + + typedef DenseIndex Index; + + protected: + template< typename OtherPolynomial > + inline void setPolynomial( const OtherPolynomial& poly ){ + m_roots.resize(poly.size()-1); } + + public: + template< typename OtherPolynomial > + inline PolynomialSolverBase( const OtherPolynomial& poly ){ + setPolynomial( poly() ); } + + inline PolynomialSolverBase(){} + + public: + /** \returns the complex roots of the polynomial */ + inline const RootsType& roots() const { return m_roots; } + + public: + /** Clear and fills the back insertion sequence with the real roots of the polynomial + * i.e. the real part of the complex roots that have an imaginary part which + * absolute value is smaller than absImaginaryThreshold. + * absImaginaryThreshold takes the dummy_precision associated + * with the _Scalar template parameter of the PolynomialSolver class as the default value. + * + * \param[out] bi_seq : the back insertion sequence (stl concept) + * \param[in] absImaginaryThreshold : the maximum bound of the imaginary part of a complex + * number that is considered as real. + * */ + template + inline void realRoots( Stl_back_insertion_sequence& bi_seq, + const RealScalar& absImaginaryThreshold = NumTraits::dummy_precision() ) const + { + using std::abs; + bi_seq.clear(); + for(Index i=0; i + inline const RootType& selectComplexRoot_withRespectToNorm( squaredNormBinaryPredicate& pred ) const + { + Index res=0; + RealScalar norm2 = numext::abs2( m_roots[0] ); + for( Index i=1; i greater; + return selectComplexRoot_withRespectToNorm( greater ); + } + + /** + * \returns the complex root with smallest norm. + */ + inline const RootType& smallestRoot() const + { + std::less less; + return selectComplexRoot_withRespectToNorm( less ); + } + + protected: + template + inline const RealScalar& selectRealRoot_withRespectToAbsRealPart( + squaredRealPartBinaryPredicate& pred, + bool& hasArealRoot, + const RealScalar& absImaginaryThreshold = NumTraits::dummy_precision() ) const + { + using std::abs; + hasArealRoot = false; + Index res=0; + RealScalar abs2(0); + + for( Index i=0; i + inline const RealScalar& selectRealRoot_withRespectToRealPart( + RealPartBinaryPredicate& pred, + bool& hasArealRoot, + const RealScalar& absImaginaryThreshold = NumTraits::dummy_precision() ) const + { + using std::abs; + hasArealRoot = false; + Index res=0; + RealScalar val(0); + + for( Index i=0; i::dummy_precision() ) const + { + std::greater greater; + return selectRealRoot_withRespectToAbsRealPart( greater, hasArealRoot, absImaginaryThreshold ); + } + + + /** + * \returns a real root with smallest absolute magnitude. + * A real root is defined as the real part of a complex root with absolute imaginary + * part smallest than absImaginaryThreshold. + * absImaginaryThreshold takes the dummy_precision associated + * with the _Scalar template parameter of the PolynomialSolver class as the default value. + * If no real root is found the boolean hasArealRoot is set to false and the real part of + * the root with smallest absolute imaginary part is returned instead. + * + * \param[out] hasArealRoot : boolean true if a real root is found according to the + * absImaginaryThreshold criterion, false otherwise. + * \param[in] absImaginaryThreshold : threshold on the absolute imaginary part to decide + * whether or not a root is real. + */ + inline const RealScalar& absSmallestRealRoot( + bool& hasArealRoot, + const RealScalar& absImaginaryThreshold = NumTraits::dummy_precision() ) const + { + std::less less; + return selectRealRoot_withRespectToAbsRealPart( less, hasArealRoot, absImaginaryThreshold ); + } + + + /** + * \returns the real root with greatest value. + * A real root is defined as the real part of a complex root with absolute imaginary + * part smallest than absImaginaryThreshold. + * absImaginaryThreshold takes the dummy_precision associated + * with the _Scalar template parameter of the PolynomialSolver class as the default value. + * If no real root is found the boolean hasArealRoot is set to false and the real part of + * the root with smallest absolute imaginary part is returned instead. + * + * \param[out] hasArealRoot : boolean true if a real root is found according to the + * absImaginaryThreshold criterion, false otherwise. + * \param[in] absImaginaryThreshold : threshold on the absolute imaginary part to decide + * whether or not a root is real. + */ + inline const RealScalar& greatestRealRoot( + bool& hasArealRoot, + const RealScalar& absImaginaryThreshold = NumTraits::dummy_precision() ) const + { + std::greater greater; + return selectRealRoot_withRespectToRealPart( greater, hasArealRoot, absImaginaryThreshold ); + } + + + /** + * \returns the real root with smallest value. + * A real root is defined as the real part of a complex root with absolute imaginary + * part smallest than absImaginaryThreshold. + * absImaginaryThreshold takes the dummy_precision associated + * with the _Scalar template parameter of the PolynomialSolver class as the default value. + * If no real root is found the boolean hasArealRoot is set to false and the real part of + * the root with smallest absolute imaginary part is returned instead. + * + * \param[out] hasArealRoot : boolean true if a real root is found according to the + * absImaginaryThreshold criterion, false otherwise. + * \param[in] absImaginaryThreshold : threshold on the absolute imaginary part to decide + * whether or not a root is real. + */ + inline const RealScalar& smallestRealRoot( + bool& hasArealRoot, + const RealScalar& absImaginaryThreshold = NumTraits::dummy_precision() ) const + { + std::less less; + return selectRealRoot_withRespectToRealPart( less, hasArealRoot, absImaginaryThreshold ); + } + + protected: + RootsType m_roots; +}; + +#define EIGEN_POLYNOMIAL_SOLVER_BASE_INHERITED_TYPES( BASE ) \ + typedef typename BASE::Scalar Scalar; \ + typedef typename BASE::RealScalar RealScalar; \ + typedef typename BASE::RootType RootType; \ + typedef typename BASE::RootsType RootsType; + + + +/** \ingroup Polynomials_Module + * + * \class PolynomialSolver + * + * \brief A polynomial solver + * + * Computes the complex roots of a real polynomial. + * + * \param _Scalar the scalar type, i.e., the type of the polynomial coefficients + * \param _Deg the degree of the polynomial, can be a compile time value or Dynamic. + * Notice that the number of polynomial coefficients is _Deg+1. + * + * This class implements a polynomial solver and provides convenient methods such as + * - real roots, + * - greatest, smallest complex roots, + * - real roots with greatest, smallest absolute real value. + * - greatest, smallest real roots. + * + * WARNING: this polynomial solver is experimental, part of the unsupported Eigen modules. + * + * + * Currently a QR algorithm is used to compute the eigenvalues of the companion matrix of + * the polynomial to compute its roots. + * This supposes that the complex moduli of the roots are all distinct: e.g. there should + * be no multiple roots or conjugate roots for instance. + * With 32bit (float) floating types this problem shows up frequently. + * However, almost always, correct accuracy is reached even in these cases for 64bit + * (double) floating types and small polynomial degree (<20). + */ +template +class PolynomialSolver : public PolynomialSolverBase<_Scalar,_Deg> +{ + public: + EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_Deg==Dynamic ? Dynamic : _Deg) + + typedef PolynomialSolverBase<_Scalar,_Deg> PS_Base; + EIGEN_POLYNOMIAL_SOLVER_BASE_INHERITED_TYPES( PS_Base ) + + typedef Matrix CompanionMatrixType; + typedef typename internal::conditional::IsComplex, + ComplexEigenSolver, + EigenSolver >::type EigenSolverType; + typedef typename internal::conditional::IsComplex, Scalar, std::complex >::type ComplexScalar; + + public: + /** Computes the complex roots of a new polynomial. */ + template< typename OtherPolynomial > + void compute( const OtherPolynomial& poly ) + { + eigen_assert( Scalar(0) != poly[poly.size()-1] ); + eigen_assert( poly.size() > 1 ); + if(poly.size() > 2 ) + { + internal::companion companion( poly ); + companion.balance(); + m_eigenSolver.compute( companion.denseMatrix() ); + m_roots = m_eigenSolver.eigenvalues(); + // cleanup noise in imaginary part of real roots: + // if the imaginary part is rather small compared to the real part + // and that cancelling the imaginary part yield a smaller evaluation, + // then it's safe to keep the real part only. + RealScalar coarse_prec = RealScalar(std::pow(4,poly.size()+1))*NumTraits::epsilon(); + for(Index i = 0; i + inline PolynomialSolver( const OtherPolynomial& poly ){ + compute( poly ); } + + inline PolynomialSolver(){} + + protected: + using PS_Base::m_roots; + EigenSolverType m_eigenSolver; +}; + + +template< typename _Scalar > +class PolynomialSolver<_Scalar,1> : public PolynomialSolverBase<_Scalar,1> +{ + public: + typedef PolynomialSolverBase<_Scalar,1> PS_Base; + EIGEN_POLYNOMIAL_SOLVER_BASE_INHERITED_TYPES( PS_Base ) + + public: + /** Computes the complex roots of a new polynomial. */ + template< typename OtherPolynomial > + void compute( const OtherPolynomial& poly ) + { + eigen_assert( poly.size() == 2 ); + eigen_assert( Scalar(0) != poly[1] ); + m_roots[0] = -poly[0]/poly[1]; + } + + public: + template< typename OtherPolynomial > + inline PolynomialSolver( const OtherPolynomial& poly ){ + compute( poly ); } + + inline PolynomialSolver(){} + + protected: + using PS_Base::m_roots; +}; + +} // end namespace Eigen + +#endif // EIGEN_POLYNOMIAL_SOLVER_H diff --git a/external/unsupported/Eigen/src/Polynomials/PolynomialUtils.h b/external/unsupported/Eigen/src/Polynomials/PolynomialUtils.h new file mode 100644 index 0000000..394e857 --- /dev/null +++ b/external/unsupported/Eigen/src/Polynomials/PolynomialUtils.h @@ -0,0 +1,143 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2010 Manuel Yguel +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_POLYNOMIAL_UTILS_H +#define EIGEN_POLYNOMIAL_UTILS_H + +namespace Eigen { + +/** \ingroup Polynomials_Module + * \returns the evaluation of the polynomial at x using Horner algorithm. + * + * \param[in] poly : the vector of coefficients of the polynomial ordered + * by degrees i.e. poly[i] is the coefficient of degree i of the polynomial + * e.g. \f$ 1 + 3x^2 \f$ is stored as a vector \f$ [ 1, 0, 3 ] \f$. + * \param[in] x : the value to evaluate the polynomial at. + * + * \note for stability: + * \f$ |x| \le 1 \f$ + */ +template +inline +T poly_eval_horner( const Polynomials& poly, const T& x ) +{ + T val=poly[poly.size()-1]; + for(DenseIndex i=poly.size()-2; i>=0; --i ){ + val = val*x + poly[i]; } + return val; +} + +/** \ingroup Polynomials_Module + * \returns the evaluation of the polynomial at x using stabilized Horner algorithm. + * + * \param[in] poly : the vector of coefficients of the polynomial ordered + * by degrees i.e. poly[i] is the coefficient of degree i of the polynomial + * e.g. \f$ 1 + 3x^2 \f$ is stored as a vector \f$ [ 1, 0, 3 ] \f$. + * \param[in] x : the value to evaluate the polynomial at. + */ +template +inline +T poly_eval( const Polynomials& poly, const T& x ) +{ + typedef typename NumTraits::Real Real; + + if( numext::abs2( x ) <= Real(1) ){ + return poly_eval_horner( poly, x ); } + else + { + T val=poly[0]; + T inv_x = T(1)/x; + for( DenseIndex i=1; i +inline +typename NumTraits::Real cauchy_max_bound( const Polynomial& poly ) +{ + using std::abs; + typedef typename Polynomial::Scalar Scalar; + typedef typename NumTraits::Real Real; + + eigen_assert( Scalar(0) != poly[poly.size()-1] ); + const Scalar inv_leading_coeff = Scalar(1)/poly[poly.size()-1]; + Real cb(0); + + for( DenseIndex i=0; i +inline +typename NumTraits::Real cauchy_min_bound( const Polynomial& poly ) +{ + using std::abs; + typedef typename Polynomial::Scalar Scalar; + typedef typename NumTraits::Real Real; + + DenseIndex i=0; + while( i +void roots_to_monicPolynomial( const RootVector& rv, Polynomial& poly ) +{ + + typedef typename Polynomial::Scalar Scalar; + + poly.setZero( rv.size()+1 ); + poly[0] = -rv[0]; poly[1] = Scalar(1); + for( DenseIndex i=1; i< rv.size(); ++i ) + { + for( DenseIndex j=i+1; j>0; --j ){ poly[j] = poly[j-1] - rv[i]*poly[j]; } + poly[0] = -rv[i]*poly[0]; + } +} + +} // end namespace Eigen + +#endif // EIGEN_POLYNOMIAL_UTILS_H diff --git a/external/unsupported/Eigen/src/Skyline/SkylineInplaceLU.h b/external/unsupported/Eigen/src/Skyline/SkylineInplaceLU.h new file mode 100644 index 0000000..6d0370d --- /dev/null +++ b/external/unsupported/Eigen/src/Skyline/SkylineInplaceLU.h @@ -0,0 +1,352 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Guillaume Saupin +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SKYLINEINPLACELU_H +#define EIGEN_SKYLINEINPLACELU_H + +namespace Eigen { + +/** \ingroup Skyline_Module + * + * \class SkylineInplaceLU + * + * \brief Inplace LU decomposition of a skyline matrix and associated features + * + * \param MatrixType the type of the matrix of which we are computing the LU factorization + * + */ +template +class SkylineInplaceLU { +protected: + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::Index Index; + + typedef typename NumTraits::Real RealScalar; + +public: + + /** Creates a LU object and compute the respective factorization of \a matrix using + * flags \a flags. */ + SkylineInplaceLU(MatrixType& matrix, int flags = 0) + : /*m_matrix(matrix.rows(), matrix.cols()),*/ m_flags(flags), m_status(0), m_lu(matrix) { + m_precision = RealScalar(0.1) * Eigen::dummy_precision (); + m_lu.IsRowMajor ? computeRowMajor() : compute(); + } + + /** Sets the relative threshold value used to prune zero coefficients during the decomposition. + * + * Setting a value greater than zero speeds up computation, and yields to an incomplete + * factorization with fewer non zero coefficients. Such approximate factors are especially + * useful to initialize an iterative solver. + * + * Note that the exact meaning of this parameter might depends on the actual + * backend. Moreover, not all backends support this feature. + * + * \sa precision() */ + void setPrecision(RealScalar v) { + m_precision = v; + } + + /** \returns the current precision. + * + * \sa setPrecision() */ + RealScalar precision() const { + return m_precision; + } + + /** Sets the flags. Possible values are: + * - CompleteFactorization + * - IncompleteFactorization + * - MemoryEfficient + * - one of the ordering methods + * - etc... + * + * \sa flags() */ + void setFlags(int f) { + m_flags = f; + } + + /** \returns the current flags */ + int flags() const { + return m_flags; + } + + void setOrderingMethod(int m) { + m_flags = m; + } + + int orderingMethod() const { + return m_flags; + } + + /** Computes/re-computes the LU factorization */ + void compute(); + void computeRowMajor(); + + /** \returns the lower triangular matrix L */ + //inline const MatrixType& matrixL() const { return m_matrixL; } + + /** \returns the upper triangular matrix U */ + //inline const MatrixType& matrixU() const { return m_matrixU; } + + template + bool solve(const MatrixBase &b, MatrixBase* x, + const int transposed = 0) const; + + /** \returns true if the factorization succeeded */ + inline bool succeeded(void) const { + return m_succeeded; + } + +protected: + RealScalar m_precision; + int m_flags; + mutable int m_status; + bool m_succeeded; + MatrixType& m_lu; +}; + +/** Computes / recomputes the in place LU decomposition of the SkylineInplaceLU. + * using the default algorithm. + */ +template +//template +void SkylineInplaceLU::compute() { + const size_t rows = m_lu.rows(); + const size_t cols = m_lu.cols(); + + eigen_assert(rows == cols && "We do not (yet) support rectangular LU."); + eigen_assert(!m_lu.IsRowMajor && "LU decomposition does not work with rowMajor Storage"); + + for (Index row = 0; row < rows; row++) { + const double pivot = m_lu.coeffDiag(row); + + //Lower matrix Columns update + const Index& col = row; + for (typename MatrixType::InnerLowerIterator lIt(m_lu, col); lIt; ++lIt) { + lIt.valueRef() /= pivot; + } + + //Upper matrix update -> contiguous memory access + typename MatrixType::InnerLowerIterator lIt(m_lu, col); + for (Index rrow = row + 1; rrow < m_lu.rows(); rrow++) { + typename MatrixType::InnerUpperIterator uItPivot(m_lu, row); + typename MatrixType::InnerUpperIterator uIt(m_lu, rrow); + const double coef = lIt.value(); + + uItPivot += (rrow - row - 1); + + //update upper part -> contiguous memory access + for (++uItPivot; uIt && uItPivot;) { + uIt.valueRef() -= uItPivot.value() * coef; + + ++uIt; + ++uItPivot; + } + ++lIt; + } + + //Upper matrix update -> non contiguous memory access + typename MatrixType::InnerLowerIterator lIt3(m_lu, col); + for (Index rrow = row + 1; rrow < m_lu.rows(); rrow++) { + typename MatrixType::InnerUpperIterator uItPivot(m_lu, row); + const double coef = lIt3.value(); + + //update lower part -> non contiguous memory access + for (Index i = 0; i < rrow - row - 1; i++) { + m_lu.coeffRefLower(rrow, row + i + 1) -= uItPivot.value() * coef; + ++uItPivot; + } + ++lIt3; + } + //update diag -> contiguous + typename MatrixType::InnerLowerIterator lIt2(m_lu, col); + for (Index rrow = row + 1; rrow < m_lu.rows(); rrow++) { + + typename MatrixType::InnerUpperIterator uItPivot(m_lu, row); + typename MatrixType::InnerUpperIterator uIt(m_lu, rrow); + const double coef = lIt2.value(); + + uItPivot += (rrow - row - 1); + m_lu.coeffRefDiag(rrow) -= uItPivot.value() * coef; + ++lIt2; + } + } +} + +template +void SkylineInplaceLU::computeRowMajor() { + const size_t rows = m_lu.rows(); + const size_t cols = m_lu.cols(); + + eigen_assert(rows == cols && "We do not (yet) support rectangular LU."); + eigen_assert(m_lu.IsRowMajor && "You're trying to apply rowMajor decomposition on a ColMajor matrix !"); + + for (Index row = 0; row < rows; row++) { + typename MatrixType::InnerLowerIterator llIt(m_lu, row); + + + for (Index col = llIt.col(); col < row; col++) { + if (m_lu.coeffExistLower(row, col)) { + const double diag = m_lu.coeffDiag(col); + + typename MatrixType::InnerLowerIterator lIt(m_lu, row); + typename MatrixType::InnerUpperIterator uIt(m_lu, col); + + + const Index offset = lIt.col() - uIt.row(); + + + Index stop = offset > 0 ? col - lIt.col() : col - uIt.row(); + + //#define VECTORIZE +#ifdef VECTORIZE + Map rowVal(lIt.valuePtr() + (offset > 0 ? 0 : -offset), stop); + Map colVal(uIt.valuePtr() + (offset > 0 ? offset : 0), stop); + + + Scalar newCoeff = m_lu.coeffLower(row, col) - rowVal.dot(colVal); +#else + if (offset > 0) //Skip zero value of lIt + uIt += offset; + else //Skip zero values of uIt + lIt += -offset; + Scalar newCoeff = m_lu.coeffLower(row, col); + + for (Index k = 0; k < stop; ++k) { + const Scalar tmp = newCoeff; + newCoeff = tmp - lIt.value() * uIt.value(); + ++lIt; + ++uIt; + } +#endif + + m_lu.coeffRefLower(row, col) = newCoeff / diag; + } + } + + //Upper matrix update + const Index col = row; + typename MatrixType::InnerUpperIterator uuIt(m_lu, col); + for (Index rrow = uuIt.row(); rrow < col; rrow++) { + + typename MatrixType::InnerLowerIterator lIt(m_lu, rrow); + typename MatrixType::InnerUpperIterator uIt(m_lu, col); + const Index offset = lIt.col() - uIt.row(); + + Index stop = offset > 0 ? rrow - lIt.col() : rrow - uIt.row(); + +#ifdef VECTORIZE + Map rowVal(lIt.valuePtr() + (offset > 0 ? 0 : -offset), stop); + Map colVal(uIt.valuePtr() + (offset > 0 ? offset : 0), stop); + + Scalar newCoeff = m_lu.coeffUpper(rrow, col) - rowVal.dot(colVal); +#else + if (offset > 0) //Skip zero value of lIt + uIt += offset; + else //Skip zero values of uIt + lIt += -offset; + Scalar newCoeff = m_lu.coeffUpper(rrow, col); + for (Index k = 0; k < stop; ++k) { + const Scalar tmp = newCoeff; + newCoeff = tmp - lIt.value() * uIt.value(); + + ++lIt; + ++uIt; + } +#endif + m_lu.coeffRefUpper(rrow, col) = newCoeff; + } + + + //Diag matrix update + typename MatrixType::InnerLowerIterator lIt(m_lu, row); + typename MatrixType::InnerUpperIterator uIt(m_lu, row); + + const Index offset = lIt.col() - uIt.row(); + + + Index stop = offset > 0 ? lIt.size() : uIt.size(); +#ifdef VECTORIZE + Map rowVal(lIt.valuePtr() + (offset > 0 ? 0 : -offset), stop); + Map colVal(uIt.valuePtr() + (offset > 0 ? offset : 0), stop); + Scalar newCoeff = m_lu.coeffDiag(row) - rowVal.dot(colVal); +#else + if (offset > 0) //Skip zero value of lIt + uIt += offset; + else //Skip zero values of uIt + lIt += -offset; + Scalar newCoeff = m_lu.coeffDiag(row); + for (Index k = 0; k < stop; ++k) { + const Scalar tmp = newCoeff; + newCoeff = tmp - lIt.value() * uIt.value(); + ++lIt; + ++uIt; + } +#endif + m_lu.coeffRefDiag(row) = newCoeff; + } +} + +/** Computes *x = U^-1 L^-1 b + * + * If \a transpose is set to SvTranspose or SvAdjoint, the solution + * of the transposed/adjoint system is computed instead. + * + * Not all backends implement the solution of the transposed or + * adjoint system. + */ +template +template +bool SkylineInplaceLU::solve(const MatrixBase &b, MatrixBase* x, const int transposed) const { + const size_t rows = m_lu.rows(); + const size_t cols = m_lu.cols(); + + + for (Index row = 0; row < rows; row++) { + x->coeffRef(row) = b.coeff(row); + Scalar newVal = x->coeff(row); + typename MatrixType::InnerLowerIterator lIt(m_lu, row); + + Index col = lIt.col(); + while (lIt.col() < row) { + + newVal -= x->coeff(col++) * lIt.value(); + ++lIt; + } + + x->coeffRef(row) = newVal; + } + + + for (Index col = rows - 1; col > 0; col--) { + x->coeffRef(col) = x->coeff(col) / m_lu.coeffDiag(col); + + const Scalar x_col = x->coeff(col); + + typename MatrixType::InnerUpperIterator uIt(m_lu, col); + uIt += uIt.size()-1; + + + while (uIt) { + x->coeffRef(uIt.row()) -= x_col * uIt.value(); + //TODO : introduce --operator + uIt += -1; + } + + + } + x->coeffRef(0) = x->coeff(0) / m_lu.coeffDiag(0); + + return true; +} + +} // end namespace Eigen + +#endif // EIGEN_SKYLINEINPLACELU_H diff --git a/external/unsupported/Eigen/src/Skyline/SkylineMatrix.h b/external/unsupported/Eigen/src/Skyline/SkylineMatrix.h new file mode 100644 index 0000000..7c7eace --- /dev/null +++ b/external/unsupported/Eigen/src/Skyline/SkylineMatrix.h @@ -0,0 +1,862 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Guillaume Saupin +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SKYLINEMATRIX_H +#define EIGEN_SKYLINEMATRIX_H + +#include "SkylineStorage.h" +#include "SkylineMatrixBase.h" + +namespace Eigen { + +/** \ingroup Skyline_Module + * + * \class SkylineMatrix + * + * \brief The main skyline matrix class + * + * This class implements a skyline matrix using the very uncommon storage + * scheme. + * + * \param _Scalar the scalar type, i.e. the type of the coefficients + * \param _Options Union of bit flags controlling the storage scheme. Currently the only possibility + * is RowMajor. The default is 0 which means column-major. + * + * + */ +namespace internal { +template +struct traits > { + typedef _Scalar Scalar; + typedef Sparse StorageKind; + + enum { + RowsAtCompileTime = Dynamic, + ColsAtCompileTime = Dynamic, + MaxRowsAtCompileTime = Dynamic, + MaxColsAtCompileTime = Dynamic, + Flags = SkylineBit | _Options, + CoeffReadCost = NumTraits::ReadCost, + }; +}; +} + +template +class SkylineMatrix +: public SkylineMatrixBase > { +public: + EIGEN_SKYLINE_GENERIC_PUBLIC_INTERFACE(SkylineMatrix) + EIGEN_SKYLINE_INHERIT_ASSIGNMENT_OPERATOR(SkylineMatrix, +=) + EIGEN_SKYLINE_INHERIT_ASSIGNMENT_OPERATOR(SkylineMatrix, -=) + + using Base::IsRowMajor; + +protected: + + typedef SkylineMatrix TransposedSkylineMatrix; + + Index m_outerSize; + Index m_innerSize; + +public: + Index* m_colStartIndex; + Index* m_rowStartIndex; + SkylineStorage m_data; + +public: + + inline Index rows() const { + return IsRowMajor ? m_outerSize : m_innerSize; + } + + inline Index cols() const { + return IsRowMajor ? m_innerSize : m_outerSize; + } + + inline Index innerSize() const { + return m_innerSize; + } + + inline Index outerSize() const { + return m_outerSize; + } + + inline Index upperNonZeros() const { + return m_data.upperSize(); + } + + inline Index lowerNonZeros() const { + return m_data.lowerSize(); + } + + inline Index upperNonZeros(Index j) const { + return m_colStartIndex[j + 1] - m_colStartIndex[j]; + } + + inline Index lowerNonZeros(Index j) const { + return m_rowStartIndex[j + 1] - m_rowStartIndex[j]; + } + + inline const Scalar* _diagPtr() const { + return &m_data.diag(0); + } + + inline Scalar* _diagPtr() { + return &m_data.diag(0); + } + + inline const Scalar* _upperPtr() const { + return &m_data.upper(0); + } + + inline Scalar* _upperPtr() { + return &m_data.upper(0); + } + + inline const Scalar* _lowerPtr() const { + return &m_data.lower(0); + } + + inline Scalar* _lowerPtr() { + return &m_data.lower(0); + } + + inline const Index* _upperProfilePtr() const { + return &m_data.upperProfile(0); + } + + inline Index* _upperProfilePtr() { + return &m_data.upperProfile(0); + } + + inline const Index* _lowerProfilePtr() const { + return &m_data.lowerProfile(0); + } + + inline Index* _lowerProfilePtr() { + return &m_data.lowerProfile(0); + } + + inline Scalar coeff(Index row, Index col) const { + const Index outer = IsRowMajor ? row : col; + const Index inner = IsRowMajor ? col : row; + + eigen_assert(outer < outerSize()); + eigen_assert(inner < innerSize()); + + if (outer == inner) + return this->m_data.diag(outer); + + if (IsRowMajor) { + if (inner > outer) //upper matrix + { + const Index minOuterIndex = inner - m_data.upperProfile(inner); + if (outer >= minOuterIndex) + return this->m_data.upper(m_colStartIndex[inner] + outer - (inner - m_data.upperProfile(inner))); + else + return Scalar(0); + } + if (inner < outer) //lower matrix + { + const Index minInnerIndex = outer - m_data.lowerProfile(outer); + if (inner >= minInnerIndex) + return this->m_data.lower(m_rowStartIndex[outer] + inner - (outer - m_data.lowerProfile(outer))); + else + return Scalar(0); + } + return m_data.upper(m_colStartIndex[inner] + outer - inner); + } else { + if (outer > inner) //upper matrix + { + const Index maxOuterIndex = inner + m_data.upperProfile(inner); + if (outer <= maxOuterIndex) + return this->m_data.upper(m_colStartIndex[inner] + (outer - inner)); + else + return Scalar(0); + } + if (outer < inner) //lower matrix + { + const Index maxInnerIndex = outer + m_data.lowerProfile(outer); + + if (inner <= maxInnerIndex) + return this->m_data.lower(m_rowStartIndex[outer] + (inner - outer)); + else + return Scalar(0); + } + } + } + + inline Scalar& coeffRef(Index row, Index col) { + const Index outer = IsRowMajor ? row : col; + const Index inner = IsRowMajor ? col : row; + + eigen_assert(outer < outerSize()); + eigen_assert(inner < innerSize()); + + if (outer == inner) + return this->m_data.diag(outer); + + if (IsRowMajor) { + if (col > row) //upper matrix + { + const Index minOuterIndex = inner - m_data.upperProfile(inner); + eigen_assert(outer >= minOuterIndex && "You tried to access a coeff that does not exist in the storage"); + return this->m_data.upper(m_colStartIndex[inner] + outer - (inner - m_data.upperProfile(inner))); + } + if (col < row) //lower matrix + { + const Index minInnerIndex = outer - m_data.lowerProfile(outer); + eigen_assert(inner >= minInnerIndex && "You tried to access a coeff that does not exist in the storage"); + return this->m_data.lower(m_rowStartIndex[outer] + inner - (outer - m_data.lowerProfile(outer))); + } + } else { + if (outer > inner) //upper matrix + { + const Index maxOuterIndex = inner + m_data.upperProfile(inner); + eigen_assert(outer <= maxOuterIndex && "You tried to access a coeff that does not exist in the storage"); + return this->m_data.upper(m_colStartIndex[inner] + (outer - inner)); + } + if (outer < inner) //lower matrix + { + const Index maxInnerIndex = outer + m_data.lowerProfile(outer); + eigen_assert(inner <= maxInnerIndex && "You tried to access a coeff that does not exist in the storage"); + return this->m_data.lower(m_rowStartIndex[outer] + (inner - outer)); + } + } + } + + inline Scalar coeffDiag(Index idx) const { + eigen_assert(idx < outerSize()); + eigen_assert(idx < innerSize()); + return this->m_data.diag(idx); + } + + inline Scalar coeffLower(Index row, Index col) const { + const Index outer = IsRowMajor ? row : col; + const Index inner = IsRowMajor ? col : row; + + eigen_assert(outer < outerSize()); + eigen_assert(inner < innerSize()); + eigen_assert(inner != outer); + + if (IsRowMajor) { + const Index minInnerIndex = outer - m_data.lowerProfile(outer); + if (inner >= minInnerIndex) + return this->m_data.lower(m_rowStartIndex[outer] + inner - (outer - m_data.lowerProfile(outer))); + else + return Scalar(0); + + } else { + const Index maxInnerIndex = outer + m_data.lowerProfile(outer); + if (inner <= maxInnerIndex) + return this->m_data.lower(m_rowStartIndex[outer] + (inner - outer)); + else + return Scalar(0); + } + } + + inline Scalar coeffUpper(Index row, Index col) const { + const Index outer = IsRowMajor ? row : col; + const Index inner = IsRowMajor ? col : row; + + eigen_assert(outer < outerSize()); + eigen_assert(inner < innerSize()); + eigen_assert(inner != outer); + + if (IsRowMajor) { + const Index minOuterIndex = inner - m_data.upperProfile(inner); + if (outer >= minOuterIndex) + return this->m_data.upper(m_colStartIndex[inner] + outer - (inner - m_data.upperProfile(inner))); + else + return Scalar(0); + } else { + const Index maxOuterIndex = inner + m_data.upperProfile(inner); + if (outer <= maxOuterIndex) + return this->m_data.upper(m_colStartIndex[inner] + (outer - inner)); + else + return Scalar(0); + } + } + + inline Scalar& coeffRefDiag(Index idx) { + eigen_assert(idx < outerSize()); + eigen_assert(idx < innerSize()); + return this->m_data.diag(idx); + } + + inline Scalar& coeffRefLower(Index row, Index col) { + const Index outer = IsRowMajor ? row : col; + const Index inner = IsRowMajor ? col : row; + + eigen_assert(outer < outerSize()); + eigen_assert(inner < innerSize()); + eigen_assert(inner != outer); + + if (IsRowMajor) { + const Index minInnerIndex = outer - m_data.lowerProfile(outer); + eigen_assert(inner >= minInnerIndex && "You tried to access a coeff that does not exist in the storage"); + return this->m_data.lower(m_rowStartIndex[outer] + inner - (outer - m_data.lowerProfile(outer))); + } else { + const Index maxInnerIndex = outer + m_data.lowerProfile(outer); + eigen_assert(inner <= maxInnerIndex && "You tried to access a coeff that does not exist in the storage"); + return this->m_data.lower(m_rowStartIndex[outer] + (inner - outer)); + } + } + + inline bool coeffExistLower(Index row, Index col) { + const Index outer = IsRowMajor ? row : col; + const Index inner = IsRowMajor ? col : row; + + eigen_assert(outer < outerSize()); + eigen_assert(inner < innerSize()); + eigen_assert(inner != outer); + + if (IsRowMajor) { + const Index minInnerIndex = outer - m_data.lowerProfile(outer); + return inner >= minInnerIndex; + } else { + const Index maxInnerIndex = outer + m_data.lowerProfile(outer); + return inner <= maxInnerIndex; + } + } + + inline Scalar& coeffRefUpper(Index row, Index col) { + const Index outer = IsRowMajor ? row : col; + const Index inner = IsRowMajor ? col : row; + + eigen_assert(outer < outerSize()); + eigen_assert(inner < innerSize()); + eigen_assert(inner != outer); + + if (IsRowMajor) { + const Index minOuterIndex = inner - m_data.upperProfile(inner); + eigen_assert(outer >= minOuterIndex && "You tried to access a coeff that does not exist in the storage"); + return this->m_data.upper(m_colStartIndex[inner] + outer - (inner - m_data.upperProfile(inner))); + } else { + const Index maxOuterIndex = inner + m_data.upperProfile(inner); + eigen_assert(outer <= maxOuterIndex && "You tried to access a coeff that does not exist in the storage"); + return this->m_data.upper(m_colStartIndex[inner] + (outer - inner)); + } + } + + inline bool coeffExistUpper(Index row, Index col) { + const Index outer = IsRowMajor ? row : col; + const Index inner = IsRowMajor ? col : row; + + eigen_assert(outer < outerSize()); + eigen_assert(inner < innerSize()); + eigen_assert(inner != outer); + + if (IsRowMajor) { + const Index minOuterIndex = inner - m_data.upperProfile(inner); + return outer >= minOuterIndex; + } else { + const Index maxOuterIndex = inner + m_data.upperProfile(inner); + return outer <= maxOuterIndex; + } + } + + +protected: + +public: + class InnerUpperIterator; + class InnerLowerIterator; + + class OuterUpperIterator; + class OuterLowerIterator; + + /** Removes all non zeros */ + inline void setZero() { + m_data.clear(); + memset(m_colStartIndex, 0, (m_outerSize + 1) * sizeof (Index)); + memset(m_rowStartIndex, 0, (m_outerSize + 1) * sizeof (Index)); + } + + /** \returns the number of non zero coefficients */ + inline Index nonZeros() const { + return m_data.diagSize() + m_data.upperSize() + m_data.lowerSize(); + } + + /** Preallocates \a reserveSize non zeros */ + inline void reserve(Index reserveSize, Index reserveUpperSize, Index reserveLowerSize) { + m_data.reserve(reserveSize, reserveUpperSize, reserveLowerSize); + } + + /** \returns a reference to a novel non zero coefficient with coordinates \a row x \a col. + + * + * \warning This function can be extremely slow if the non zero coefficients + * are not inserted in a coherent order. + * + * After an insertion session, you should call the finalize() function. + */ + EIGEN_DONT_INLINE Scalar & insert(Index row, Index col) { + const Index outer = IsRowMajor ? row : col; + const Index inner = IsRowMajor ? col : row; + + eigen_assert(outer < outerSize()); + eigen_assert(inner < innerSize()); + + if (outer == inner) + return m_data.diag(col); + + if (IsRowMajor) { + if (outer < inner) //upper matrix + { + Index minOuterIndex = 0; + minOuterIndex = inner - m_data.upperProfile(inner); + + if (outer < minOuterIndex) //The value does not yet exist + { + const Index previousProfile = m_data.upperProfile(inner); + + m_data.upperProfile(inner) = inner - outer; + + + const Index bandIncrement = m_data.upperProfile(inner) - previousProfile; + //shift data stored after this new one + const Index stop = m_colStartIndex[cols()]; + const Index start = m_colStartIndex[inner]; + + + for (Index innerIdx = stop; innerIdx >= start; innerIdx--) { + m_data.upper(innerIdx + bandIncrement) = m_data.upper(innerIdx); + } + + for (Index innerIdx = cols(); innerIdx > inner; innerIdx--) { + m_colStartIndex[innerIdx] += bandIncrement; + } + + //zeros new data + memset(this->_upperPtr() + start, 0, (bandIncrement - 1) * sizeof (Scalar)); + + return m_data.upper(m_colStartIndex[inner]); + } else { + return m_data.upper(m_colStartIndex[inner] + outer - (inner - m_data.upperProfile(inner))); + } + } + + if (outer > inner) //lower matrix + { + const Index minInnerIndex = outer - m_data.lowerProfile(outer); + if (inner < minInnerIndex) //The value does not yet exist + { + const Index previousProfile = m_data.lowerProfile(outer); + m_data.lowerProfile(outer) = outer - inner; + + const Index bandIncrement = m_data.lowerProfile(outer) - previousProfile; + //shift data stored after this new one + const Index stop = m_rowStartIndex[rows()]; + const Index start = m_rowStartIndex[outer]; + + + for (Index innerIdx = stop; innerIdx >= start; innerIdx--) { + m_data.lower(innerIdx + bandIncrement) = m_data.lower(innerIdx); + } + + for (Index innerIdx = rows(); innerIdx > outer; innerIdx--) { + m_rowStartIndex[innerIdx] += bandIncrement; + } + + //zeros new data + memset(this->_lowerPtr() + start, 0, (bandIncrement - 1) * sizeof (Scalar)); + return m_data.lower(m_rowStartIndex[outer]); + } else { + return m_data.lower(m_rowStartIndex[outer] + inner - (outer - m_data.lowerProfile(outer))); + } + } + } else { + if (outer > inner) //upper matrix + { + const Index maxOuterIndex = inner + m_data.upperProfile(inner); + if (outer > maxOuterIndex) //The value does not yet exist + { + const Index previousProfile = m_data.upperProfile(inner); + m_data.upperProfile(inner) = outer - inner; + + const Index bandIncrement = m_data.upperProfile(inner) - previousProfile; + //shift data stored after this new one + const Index stop = m_rowStartIndex[rows()]; + const Index start = m_rowStartIndex[inner + 1]; + + for (Index innerIdx = stop; innerIdx >= start; innerIdx--) { + m_data.upper(innerIdx + bandIncrement) = m_data.upper(innerIdx); + } + + for (Index innerIdx = inner + 1; innerIdx < outerSize() + 1; innerIdx++) { + m_rowStartIndex[innerIdx] += bandIncrement; + } + memset(this->_upperPtr() + m_rowStartIndex[inner] + previousProfile + 1, 0, (bandIncrement - 1) * sizeof (Scalar)); + return m_data.upper(m_rowStartIndex[inner] + m_data.upperProfile(inner)); + } else { + return m_data.upper(m_rowStartIndex[inner] + (outer - inner)); + } + } + + if (outer < inner) //lower matrix + { + const Index maxInnerIndex = outer + m_data.lowerProfile(outer); + if (inner > maxInnerIndex) //The value does not yet exist + { + const Index previousProfile = m_data.lowerProfile(outer); + m_data.lowerProfile(outer) = inner - outer; + + const Index bandIncrement = m_data.lowerProfile(outer) - previousProfile; + //shift data stored after this new one + const Index stop = m_colStartIndex[cols()]; + const Index start = m_colStartIndex[outer + 1]; + + for (Index innerIdx = stop; innerIdx >= start; innerIdx--) { + m_data.lower(innerIdx + bandIncrement) = m_data.lower(innerIdx); + } + + for (Index innerIdx = outer + 1; innerIdx < outerSize() + 1; innerIdx++) { + m_colStartIndex[innerIdx] += bandIncrement; + } + memset(this->_lowerPtr() + m_colStartIndex[outer] + previousProfile + 1, 0, (bandIncrement - 1) * sizeof (Scalar)); + return m_data.lower(m_colStartIndex[outer] + m_data.lowerProfile(outer)); + } else { + return m_data.lower(m_colStartIndex[outer] + (inner - outer)); + } + } + } + } + + /** Must be called after inserting a set of non zero entries. + */ + inline void finalize() { + if (IsRowMajor) { + if (rows() > cols()) + m_data.resize(cols(), cols(), rows(), m_colStartIndex[cols()] + 1, m_rowStartIndex[rows()] + 1); + else + m_data.resize(rows(), cols(), rows(), m_colStartIndex[cols()] + 1, m_rowStartIndex[rows()] + 1); + + // eigen_assert(rows() == cols() && "memory reorganisatrion only works with suare matrix"); + // + // Scalar* newArray = new Scalar[m_colStartIndex[cols()] + 1 + m_rowStartIndex[rows()] + 1]; + // Index dataIdx = 0; + // for (Index row = 0; row < rows(); row++) { + // + // const Index nbLowerElts = m_rowStartIndex[row + 1] - m_rowStartIndex[row]; + // // std::cout << "nbLowerElts" << nbLowerElts << std::endl; + // memcpy(newArray + dataIdx, m_data.m_lower + m_rowStartIndex[row], nbLowerElts * sizeof (Scalar)); + // m_rowStartIndex[row] = dataIdx; + // dataIdx += nbLowerElts; + // + // const Index nbUpperElts = m_colStartIndex[row + 1] - m_colStartIndex[row]; + // memcpy(newArray + dataIdx, m_data.m_upper + m_colStartIndex[row], nbUpperElts * sizeof (Scalar)); + // m_colStartIndex[row] = dataIdx; + // dataIdx += nbUpperElts; + // + // + // } + // //todo : don't access m_data profile directly : add an accessor from SkylineMatrix + // m_rowStartIndex[rows()] = m_rowStartIndex[rows()-1] + m_data.lowerProfile(rows()-1); + // m_colStartIndex[cols()] = m_colStartIndex[cols()-1] + m_data.upperProfile(cols()-1); + // + // delete[] m_data.m_lower; + // delete[] m_data.m_upper; + // + // m_data.m_lower = newArray; + // m_data.m_upper = newArray; + } else { + if (rows() > cols()) + m_data.resize(cols(), rows(), cols(), m_rowStartIndex[cols()] + 1, m_colStartIndex[cols()] + 1); + else + m_data.resize(rows(), rows(), cols(), m_rowStartIndex[rows()] + 1, m_colStartIndex[rows()] + 1); + } + } + + inline void squeeze() { + finalize(); + m_data.squeeze(); + } + + void prune(Scalar reference, RealScalar epsilon = dummy_precision ()) { + //TODO + } + + /** Resizes the matrix to a \a rows x \a cols matrix and initializes it to zero + * \sa resizeNonZeros(Index), reserve(), setZero() + */ + void resize(size_t rows, size_t cols) { + const Index diagSize = rows > cols ? cols : rows; + m_innerSize = IsRowMajor ? cols : rows; + + eigen_assert(rows == cols && "Skyline matrix must be square matrix"); + + if (diagSize % 2) { // diagSize is odd + const Index k = (diagSize - 1) / 2; + + m_data.resize(diagSize, IsRowMajor ? cols : rows, IsRowMajor ? rows : cols, + 2 * k * k + k + 1, + 2 * k * k + k + 1); + + } else // diagSize is even + { + const Index k = diagSize / 2; + m_data.resize(diagSize, IsRowMajor ? cols : rows, IsRowMajor ? rows : cols, + 2 * k * k - k + 1, + 2 * k * k - k + 1); + } + + if (m_colStartIndex && m_rowStartIndex) { + delete[] m_colStartIndex; + delete[] m_rowStartIndex; + } + m_colStartIndex = new Index [cols + 1]; + m_rowStartIndex = new Index [rows + 1]; + m_outerSize = diagSize; + + m_data.reset(); + m_data.clear(); + + m_outerSize = diagSize; + memset(m_colStartIndex, 0, (cols + 1) * sizeof (Index)); + memset(m_rowStartIndex, 0, (rows + 1) * sizeof (Index)); + } + + void resizeNonZeros(Index size) { + m_data.resize(size); + } + + inline SkylineMatrix() + : m_outerSize(-1), m_innerSize(0), m_colStartIndex(0), m_rowStartIndex(0) { + resize(0, 0); + } + + inline SkylineMatrix(size_t rows, size_t cols) + : m_outerSize(0), m_innerSize(0), m_colStartIndex(0), m_rowStartIndex(0) { + resize(rows, cols); + } + + template + inline SkylineMatrix(const SkylineMatrixBase& other) + : m_outerSize(0), m_innerSize(0), m_colStartIndex(0), m_rowStartIndex(0) { + *this = other.derived(); + } + + inline SkylineMatrix(const SkylineMatrix & other) + : Base(), m_outerSize(0), m_innerSize(0), m_colStartIndex(0), m_rowStartIndex(0) { + *this = other.derived(); + } + + inline void swap(SkylineMatrix & other) { + //EIGEN_DBG_SKYLINE(std::cout << "SkylineMatrix:: swap\n"); + std::swap(m_colStartIndex, other.m_colStartIndex); + std::swap(m_rowStartIndex, other.m_rowStartIndex); + std::swap(m_innerSize, other.m_innerSize); + std::swap(m_outerSize, other.m_outerSize); + m_data.swap(other.m_data); + } + + inline SkylineMatrix & operator=(const SkylineMatrix & other) { + std::cout << "SkylineMatrix& operator=(const SkylineMatrix& other)\n"; + if (other.isRValue()) { + swap(other.const_cast_derived()); + } else { + resize(other.rows(), other.cols()); + memcpy(m_colStartIndex, other.m_colStartIndex, (m_outerSize + 1) * sizeof (Index)); + memcpy(m_rowStartIndex, other.m_rowStartIndex, (m_outerSize + 1) * sizeof (Index)); + m_data = other.m_data; + } + return *this; + } + + template + inline SkylineMatrix & operator=(const SkylineMatrixBase& other) { + const bool needToTranspose = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit); + if (needToTranspose) { + // TODO + // return *this; + } else { + // there is no special optimization + return SkylineMatrixBase::operator=(other.derived()); + } + } + + friend std::ostream & operator <<(std::ostream & s, const SkylineMatrix & m) { + + EIGEN_DBG_SKYLINE( + std::cout << "upper elements : " << std::endl; + for (Index i = 0; i < m.m_data.upperSize(); i++) + std::cout << m.m_data.upper(i) << "\t"; + std::cout << std::endl; + std::cout << "upper profile : " << std::endl; + for (Index i = 0; i < m.m_data.upperProfileSize(); i++) + std::cout << m.m_data.upperProfile(i) << "\t"; + std::cout << std::endl; + std::cout << "lower startIdx : " << std::endl; + for (Index i = 0; i < m.m_data.upperProfileSize(); i++) + std::cout << (IsRowMajor ? m.m_colStartIndex[i] : m.m_rowStartIndex[i]) << "\t"; + std::cout << std::endl; + + + std::cout << "lower elements : " << std::endl; + for (Index i = 0; i < m.m_data.lowerSize(); i++) + std::cout << m.m_data.lower(i) << "\t"; + std::cout << std::endl; + std::cout << "lower profile : " << std::endl; + for (Index i = 0; i < m.m_data.lowerProfileSize(); i++) + std::cout << m.m_data.lowerProfile(i) << "\t"; + std::cout << std::endl; + std::cout << "lower startIdx : " << std::endl; + for (Index i = 0; i < m.m_data.lowerProfileSize(); i++) + std::cout << (IsRowMajor ? m.m_rowStartIndex[i] : m.m_colStartIndex[i]) << "\t"; + std::cout << std::endl; + ); + for (Index rowIdx = 0; rowIdx < m.rows(); rowIdx++) { + for (Index colIdx = 0; colIdx < m.cols(); colIdx++) { + s << m.coeff(rowIdx, colIdx) << "\t"; + } + s << std::endl; + } + return s; + } + + /** Destructor */ + inline ~SkylineMatrix() { + delete[] m_colStartIndex; + delete[] m_rowStartIndex; + } + + /** Overloaded for performance */ + Scalar sum() const; +}; + +template +class SkylineMatrix::InnerUpperIterator { +public: + + InnerUpperIterator(const SkylineMatrix& mat, Index outer) + : m_matrix(mat), m_outer(outer), + m_id(_Options == RowMajor ? mat.m_colStartIndex[outer] : mat.m_rowStartIndex[outer] + 1), + m_start(m_id), + m_end(_Options == RowMajor ? mat.m_colStartIndex[outer + 1] : mat.m_rowStartIndex[outer + 1] + 1) { + } + + inline InnerUpperIterator & operator++() { + m_id++; + return *this; + } + + inline InnerUpperIterator & operator+=(Index shift) { + m_id += shift; + return *this; + } + + inline Scalar value() const { + return m_matrix.m_data.upper(m_id); + } + + inline Scalar* valuePtr() { + return const_cast (&(m_matrix.m_data.upper(m_id))); + } + + inline Scalar& valueRef() { + return const_cast (m_matrix.m_data.upper(m_id)); + } + + inline Index index() const { + return IsRowMajor ? m_outer - m_matrix.m_data.upperProfile(m_outer) + (m_id - m_start) : + m_outer + (m_id - m_start) + 1; + } + + inline Index row() const { + return IsRowMajor ? index() : m_outer; + } + + inline Index col() const { + return IsRowMajor ? m_outer : index(); + } + + inline size_t size() const { + return m_matrix.m_data.upperProfile(m_outer); + } + + inline operator bool() const { + return (m_id < m_end) && (m_id >= m_start); + } + +protected: + const SkylineMatrix& m_matrix; + const Index m_outer; + Index m_id; + const Index m_start; + const Index m_end; +}; + +template +class SkylineMatrix::InnerLowerIterator { +public: + + InnerLowerIterator(const SkylineMatrix& mat, Index outer) + : m_matrix(mat), + m_outer(outer), + m_id(_Options == RowMajor ? mat.m_rowStartIndex[outer] : mat.m_colStartIndex[outer] + 1), + m_start(m_id), + m_end(_Options == RowMajor ? mat.m_rowStartIndex[outer + 1] : mat.m_colStartIndex[outer + 1] + 1) { + } + + inline InnerLowerIterator & operator++() { + m_id++; + return *this; + } + + inline InnerLowerIterator & operator+=(Index shift) { + m_id += shift; + return *this; + } + + inline Scalar value() const { + return m_matrix.m_data.lower(m_id); + } + + inline Scalar* valuePtr() { + return const_cast (&(m_matrix.m_data.lower(m_id))); + } + + inline Scalar& valueRef() { + return const_cast (m_matrix.m_data.lower(m_id)); + } + + inline Index index() const { + return IsRowMajor ? m_outer - m_matrix.m_data.lowerProfile(m_outer) + (m_id - m_start) : + m_outer + (m_id - m_start) + 1; + ; + } + + inline Index row() const { + return IsRowMajor ? m_outer : index(); + } + + inline Index col() const { + return IsRowMajor ? index() : m_outer; + } + + inline size_t size() const { + return m_matrix.m_data.lowerProfile(m_outer); + } + + inline operator bool() const { + return (m_id < m_end) && (m_id >= m_start); + } + +protected: + const SkylineMatrix& m_matrix; + const Index m_outer; + Index m_id; + const Index m_start; + const Index m_end; +}; + +} // end namespace Eigen + +#endif // EIGEN_SKYLINEMATRIX_H diff --git a/external/unsupported/Eigen/src/Skyline/SkylineMatrixBase.h b/external/unsupported/Eigen/src/Skyline/SkylineMatrixBase.h new file mode 100644 index 0000000..b0d5e10 --- /dev/null +++ b/external/unsupported/Eigen/src/Skyline/SkylineMatrixBase.h @@ -0,0 +1,212 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Guillaume Saupin +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SKYLINEMATRIXBASE_H +#define EIGEN_SKYLINEMATRIXBASE_H + +#include "SkylineUtil.h" + +namespace Eigen { + +/** \ingroup Skyline_Module + * + * \class SkylineMatrixBase + * + * \brief Base class of any skyline matrices or skyline expressions + * + * \param Derived + * + */ +template class SkylineMatrixBase : public EigenBase { +public: + + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::index::type Index; + + enum { + RowsAtCompileTime = internal::traits::RowsAtCompileTime, + /**< The number of rows at compile-time. This is just a copy of the value provided + * by the \a Derived type. If a value is not known at compile-time, + * it is set to the \a Dynamic constant. + * \sa MatrixBase::rows(), MatrixBase::cols(), ColsAtCompileTime, SizeAtCompileTime */ + + ColsAtCompileTime = internal::traits::ColsAtCompileTime, + /**< The number of columns at compile-time. This is just a copy of the value provided + * by the \a Derived type. If a value is not known at compile-time, + * it is set to the \a Dynamic constant. + * \sa MatrixBase::rows(), MatrixBase::cols(), RowsAtCompileTime, SizeAtCompileTime */ + + + SizeAtCompileTime = (internal::size_at_compile_time::RowsAtCompileTime, + internal::traits::ColsAtCompileTime>::ret), + /**< This is equal to the number of coefficients, i.e. the number of + * rows times the number of columns, or to \a Dynamic if this is not + * known at compile-time. \sa RowsAtCompileTime, ColsAtCompileTime */ + + MaxRowsAtCompileTime = RowsAtCompileTime, + MaxColsAtCompileTime = ColsAtCompileTime, + + MaxSizeAtCompileTime = (internal::size_at_compile_time::ret), + + IsVectorAtCompileTime = RowsAtCompileTime == 1 || ColsAtCompileTime == 1, + /**< This is set to true if either the number of rows or the number of + * columns is known at compile-time to be equal to 1. Indeed, in that case, + * we are dealing with a column-vector (if there is only one column) or with + * a row-vector (if there is only one row). */ + + Flags = internal::traits::Flags, + /**< This stores expression \ref flags flags which may or may not be inherited by new expressions + * constructed from this one. See the \ref flags "list of flags". + */ + + CoeffReadCost = internal::traits::CoeffReadCost, + /**< This is a rough measure of how expensive it is to read one coefficient from + * this expression. + */ + + IsRowMajor = Flags & RowMajorBit ? 1 : 0 + }; + +#ifndef EIGEN_PARSED_BY_DOXYGEN + /** This is the "real scalar" type; if the \a Scalar type is already real numbers + * (e.g. int, float or double) then \a RealScalar is just the same as \a Scalar. If + * \a Scalar is \a std::complex then RealScalar is \a T. + * + * \sa class NumTraits + */ + typedef typename NumTraits::Real RealScalar; + + /** type of the equivalent square matrix */ + typedef Matrix SquareMatrixType; + + inline const Derived& derived() const { + return *static_cast (this); + } + + inline Derived& derived() { + return *static_cast (this); + } + + inline Derived& const_cast_derived() const { + return *static_cast (const_cast (this)); + } +#endif // not EIGEN_PARSED_BY_DOXYGEN + + /** \returns the number of rows. \sa cols(), RowsAtCompileTime */ + inline EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { + return derived().rows(); + } + + /** \returns the number of columns. \sa rows(), ColsAtCompileTime*/ + inline EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { + return derived().cols(); + } + + /** \returns the number of coefficients, which is \a rows()*cols(). + * \sa rows(), cols(), SizeAtCompileTime. */ + inline EIGEN_CONSTEXPR Index size() const EIGEN_NOEXCEPT { + return rows() * cols(); + } + + /** \returns the number of nonzero coefficients which is in practice the number + * of stored coefficients. */ + inline Index nonZeros() const { + return derived().nonZeros(); + } + + /** \returns the size of the storage major dimension, + * i.e., the number of columns for a columns major matrix, and the number of rows otherwise */ + Index outerSize() const { + return (int(Flags) & RowMajorBit) ? this->rows() : this->cols(); + } + + /** \returns the size of the inner dimension according to the storage order, + * i.e., the number of rows for a columns major matrix, and the number of cols otherwise */ + Index innerSize() const { + return (int(Flags) & RowMajorBit) ? this->cols() : this->rows(); + } + + bool isRValue() const { + return m_isRValue; + } + + Derived& markAsRValue() { + m_isRValue = true; + return derived(); + } + + SkylineMatrixBase() : m_isRValue(false) { + /* TODO check flags */ + } + + inline Derived & operator=(const Derived& other) { + this->operator= (other); + return derived(); + } + + template + inline void assignGeneric(const OtherDerived& other) { + derived().resize(other.rows(), other.cols()); + for (Index row = 0; row < rows(); row++) + for (Index col = 0; col < cols(); col++) { + if (other.coeff(row, col) != Scalar(0)) + derived().insert(row, col) = other.coeff(row, col); + } + derived().finalize(); + } + + template + inline Derived & operator=(const SkylineMatrixBase& other) { + //TODO + } + + template + inline Derived & operator=(const SkylineProduct& product); + + friend std::ostream & operator <<(std::ostream & s, const SkylineMatrixBase& m) { + s << m.derived(); + return s; + } + + template + const typename SkylineProductReturnType::Type + operator*(const MatrixBase &other) const; + + /** \internal use operator= */ + template + void evalTo(MatrixBase& dst) const { + dst.setZero(); + for (Index i = 0; i < rows(); i++) + for (Index j = 0; j < rows(); j++) + dst(i, j) = derived().coeff(i, j); + } + + Matrix toDense() const { + return derived(); + } + + /** \returns the matrix or vector obtained by evaluating this expression. + * + * Notice that in the case of a plain matrix or vector (not an expression) this function just returns + * a const reference, in order to avoid a useless copy. + */ + EIGEN_STRONG_INLINE const typename internal::eval::type eval() const { + return typename internal::eval::type(derived()); + } + +protected: + bool m_isRValue; +}; + +} // end namespace Eigen + +#endif // EIGEN_SKYLINEMATRIXBASE_H diff --git a/external/unsupported/Eigen/src/Skyline/SkylineProduct.h b/external/unsupported/Eigen/src/Skyline/SkylineProduct.h new file mode 100644 index 0000000..d9eb814 --- /dev/null +++ b/external/unsupported/Eigen/src/Skyline/SkylineProduct.h @@ -0,0 +1,295 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Guillaume Saupin +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SKYLINEPRODUCT_H +#define EIGEN_SKYLINEPRODUCT_H + +namespace Eigen { + +template +struct SkylineProductReturnType { + typedef const typename internal::nested_eval::type LhsNested; + typedef const typename internal::nested_eval::type RhsNested; + + typedef SkylineProduct Type; +}; + +template +struct internal::traits > { + // clean the nested types: + typedef typename internal::remove_all::type _LhsNested; + typedef typename internal::remove_all::type _RhsNested; + typedef typename _LhsNested::Scalar Scalar; + + enum { + LhsCoeffReadCost = _LhsNested::CoeffReadCost, + RhsCoeffReadCost = _RhsNested::CoeffReadCost, + LhsFlags = _LhsNested::Flags, + RhsFlags = _RhsNested::Flags, + + RowsAtCompileTime = _LhsNested::RowsAtCompileTime, + ColsAtCompileTime = _RhsNested::ColsAtCompileTime, + InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime), + + MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime, + MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime, + + EvalToRowMajor = (RhsFlags & LhsFlags & RowMajorBit), + ResultIsSkyline = ProductMode == SkylineTimeSkylineProduct, + + RemovedBits = ~((EvalToRowMajor ? 0 : RowMajorBit) | (ResultIsSkyline ? 0 : SkylineBit)), + + Flags = (int(LhsFlags | RhsFlags) & HereditaryBits & RemovedBits) + | EvalBeforeAssigningBit + | EvalBeforeNestingBit, + + CoeffReadCost = HugeCost + }; + + typedef typename internal::conditional >, + MatrixBase > >::type Base; +}; + +namespace internal { +template +class SkylineProduct : no_assignment_operator, +public traits >::Base { +public: + + EIGEN_GENERIC_PUBLIC_INTERFACE(SkylineProduct) + +private: + + typedef typename traits::_LhsNested _LhsNested; + typedef typename traits::_RhsNested _RhsNested; + +public: + + template + EIGEN_STRONG_INLINE SkylineProduct(const Lhs& lhs, const Rhs& rhs) + : m_lhs(lhs), m_rhs(rhs) { + eigen_assert(lhs.cols() == rhs.rows()); + + enum { + ProductIsValid = _LhsNested::ColsAtCompileTime == Dynamic + || _RhsNested::RowsAtCompileTime == Dynamic + || int(_LhsNested::ColsAtCompileTime) == int(_RhsNested::RowsAtCompileTime), + AreVectors = _LhsNested::IsVectorAtCompileTime && _RhsNested::IsVectorAtCompileTime, + SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(_LhsNested, _RhsNested) + }; + // note to the lost user: + // * for a dot product use: v1.dot(v2) + // * for a coeff-wise product use: v1.cwise()*v2 + EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes), + INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS) + EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors), + INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION) + EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT) + } + + EIGEN_STRONG_INLINE Index rows() const { + return m_lhs.rows(); + } + + EIGEN_STRONG_INLINE Index cols() const { + return m_rhs.cols(); + } + + EIGEN_STRONG_INLINE const _LhsNested& lhs() const { + return m_lhs; + } + + EIGEN_STRONG_INLINE const _RhsNested& rhs() const { + return m_rhs; + } + +protected: + LhsNested m_lhs; + RhsNested m_rhs; +}; + +// dense = skyline * dense +// Note that here we force no inlining and separate the setZero() because GCC messes up otherwise + +template +EIGEN_DONT_INLINE void skyline_row_major_time_dense_product(const Lhs& lhs, const Rhs& rhs, Dest& dst) { + typedef typename remove_all::type _Lhs; + typedef typename remove_all::type _Rhs; + typedef typename traits::Scalar Scalar; + + enum { + LhsIsRowMajor = (_Lhs::Flags & RowMajorBit) == RowMajorBit, + LhsIsSelfAdjoint = (_Lhs::Flags & SelfAdjointBit) == SelfAdjointBit, + ProcessFirstHalf = LhsIsSelfAdjoint + && (((_Lhs::Flags & (UpperTriangularBit | LowerTriangularBit)) == 0) + || ((_Lhs::Flags & UpperTriangularBit) && !LhsIsRowMajor) + || ((_Lhs::Flags & LowerTriangularBit) && LhsIsRowMajor)), + ProcessSecondHalf = LhsIsSelfAdjoint && (!ProcessFirstHalf) + }; + + //Use matrix diagonal part <- Improvement : use inner iterator on dense matrix. + for (Index col = 0; col < rhs.cols(); col++) { + for (Index row = 0; row < lhs.rows(); row++) { + dst(row, col) = lhs.coeffDiag(row) * rhs(row, col); + } + } + //Use matrix lower triangular part + for (Index row = 0; row < lhs.rows(); row++) { + typename _Lhs::InnerLowerIterator lIt(lhs, row); + const Index stop = lIt.col() + lIt.size(); + for (Index col = 0; col < rhs.cols(); col++) { + + Index k = lIt.col(); + Scalar tmp = 0; + while (k < stop) { + tmp += + lIt.value() * + rhs(k++, col); + ++lIt; + } + dst(row, col) += tmp; + lIt += -lIt.size(); + } + + } + + //Use matrix upper triangular part + for (Index lhscol = 0; lhscol < lhs.cols(); lhscol++) { + typename _Lhs::InnerUpperIterator uIt(lhs, lhscol); + const Index stop = uIt.size() + uIt.row(); + for (Index rhscol = 0; rhscol < rhs.cols(); rhscol++) { + + + const Scalar rhsCoeff = rhs.coeff(lhscol, rhscol); + Index k = uIt.row(); + while (k < stop) { + dst(k++, rhscol) += + uIt.value() * + rhsCoeff; + ++uIt; + } + uIt += -uIt.size(); + } + } + +} + +template +EIGEN_DONT_INLINE void skyline_col_major_time_dense_product(const Lhs& lhs, const Rhs& rhs, Dest& dst) { + typedef typename remove_all::type _Lhs; + typedef typename remove_all::type _Rhs; + typedef typename traits::Scalar Scalar; + + enum { + LhsIsRowMajor = (_Lhs::Flags & RowMajorBit) == RowMajorBit, + LhsIsSelfAdjoint = (_Lhs::Flags & SelfAdjointBit) == SelfAdjointBit, + ProcessFirstHalf = LhsIsSelfAdjoint + && (((_Lhs::Flags & (UpperTriangularBit | LowerTriangularBit)) == 0) + || ((_Lhs::Flags & UpperTriangularBit) && !LhsIsRowMajor) + || ((_Lhs::Flags & LowerTriangularBit) && LhsIsRowMajor)), + ProcessSecondHalf = LhsIsSelfAdjoint && (!ProcessFirstHalf) + }; + + //Use matrix diagonal part <- Improvement : use inner iterator on dense matrix. + for (Index col = 0; col < rhs.cols(); col++) { + for (Index row = 0; row < lhs.rows(); row++) { + dst(row, col) = lhs.coeffDiag(row) * rhs(row, col); + } + } + + //Use matrix upper triangular part + for (Index row = 0; row < lhs.rows(); row++) { + typename _Lhs::InnerUpperIterator uIt(lhs, row); + const Index stop = uIt.col() + uIt.size(); + for (Index col = 0; col < rhs.cols(); col++) { + + Index k = uIt.col(); + Scalar tmp = 0; + while (k < stop) { + tmp += + uIt.value() * + rhs(k++, col); + ++uIt; + } + + + dst(row, col) += tmp; + uIt += -uIt.size(); + } + } + + //Use matrix lower triangular part + for (Index lhscol = 0; lhscol < lhs.cols(); lhscol++) { + typename _Lhs::InnerLowerIterator lIt(lhs, lhscol); + const Index stop = lIt.size() + lIt.row(); + for (Index rhscol = 0; rhscol < rhs.cols(); rhscol++) { + + const Scalar rhsCoeff = rhs.coeff(lhscol, rhscol); + Index k = lIt.row(); + while (k < stop) { + dst(k++, rhscol) += + lIt.value() * + rhsCoeff; + ++lIt; + } + lIt += -lIt.size(); + } + } + +} + +template::Flags&RowMajorBit> + struct skyline_product_selector; + +template +struct skyline_product_selector { + typedef typename traits::type>::Scalar Scalar; + + static void run(const Lhs& lhs, const Rhs& rhs, ResultType & res) { + skyline_row_major_time_dense_product (lhs, rhs, res); + } +}; + +template +struct skyline_product_selector { + typedef typename traits::type>::Scalar Scalar; + + static void run(const Lhs& lhs, const Rhs& rhs, ResultType & res) { + skyline_col_major_time_dense_product (lhs, rhs, res); + } +}; + +} // end namespace internal + +// template +// template +// Derived & MatrixBase::lazyAssign(const SkylineProduct& product) { +// typedef typename internal::remove_all::type _Lhs; +// internal::skyline_product_selector::type, +// typename internal::remove_all::type, +// Derived>::run(product.lhs(), product.rhs(), derived()); +// +// return derived(); +// } + +// skyline * dense + +template +template +EIGEN_STRONG_INLINE const typename SkylineProductReturnType::Type +SkylineMatrixBase::operator*(const MatrixBase &other) const { + + return typename SkylineProductReturnType::Type(derived(), other.derived()); +} + +} // end namespace Eigen + +#endif // EIGEN_SKYLINEPRODUCT_H diff --git a/external/unsupported/Eigen/src/Skyline/SkylineStorage.h b/external/unsupported/Eigen/src/Skyline/SkylineStorage.h new file mode 100644 index 0000000..cc7514f --- /dev/null +++ b/external/unsupported/Eigen/src/Skyline/SkylineStorage.h @@ -0,0 +1,259 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Guillaume Saupin +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SKYLINE_STORAGE_H +#define EIGEN_SKYLINE_STORAGE_H + +namespace Eigen { + +/** Stores a skyline set of values in three structures : + * The diagonal elements + * The upper elements + * The lower elements + * + */ +template +class SkylineStorage { + typedef typename NumTraits::Real RealScalar; + typedef SparseIndex Index; +public: + + SkylineStorage() + : m_diag(0), + m_lower(0), + m_upper(0), + m_lowerProfile(0), + m_upperProfile(0), + m_diagSize(0), + m_upperSize(0), + m_lowerSize(0), + m_upperProfileSize(0), + m_lowerProfileSize(0), + m_allocatedSize(0) { + } + + SkylineStorage(const SkylineStorage& other) + : m_diag(0), + m_lower(0), + m_upper(0), + m_lowerProfile(0), + m_upperProfile(0), + m_diagSize(0), + m_upperSize(0), + m_lowerSize(0), + m_upperProfileSize(0), + m_lowerProfileSize(0), + m_allocatedSize(0) { + *this = other; + } + + SkylineStorage & operator=(const SkylineStorage& other) { + resize(other.diagSize(), other.m_upperProfileSize, other.m_lowerProfileSize, other.upperSize(), other.lowerSize()); + memcpy(m_diag, other.m_diag, m_diagSize * sizeof (Scalar)); + memcpy(m_upper, other.m_upper, other.upperSize() * sizeof (Scalar)); + memcpy(m_lower, other.m_lower, other.lowerSize() * sizeof (Scalar)); + memcpy(m_upperProfile, other.m_upperProfile, m_upperProfileSize * sizeof (Index)); + memcpy(m_lowerProfile, other.m_lowerProfile, m_lowerProfileSize * sizeof (Index)); + return *this; + } + + void swap(SkylineStorage& other) { + std::swap(m_diag, other.m_diag); + std::swap(m_upper, other.m_upper); + std::swap(m_lower, other.m_lower); + std::swap(m_upperProfile, other.m_upperProfile); + std::swap(m_lowerProfile, other.m_lowerProfile); + std::swap(m_diagSize, other.m_diagSize); + std::swap(m_upperSize, other.m_upperSize); + std::swap(m_lowerSize, other.m_lowerSize); + std::swap(m_allocatedSize, other.m_allocatedSize); + } + + ~SkylineStorage() { + delete[] m_diag; + delete[] m_upper; + if (m_upper != m_lower) + delete[] m_lower; + delete[] m_upperProfile; + delete[] m_lowerProfile; + } + + void reserve(Index size, Index upperProfileSize, Index lowerProfileSize, Index upperSize, Index lowerSize) { + Index newAllocatedSize = size + upperSize + lowerSize; + if (newAllocatedSize > m_allocatedSize) + reallocate(size, upperProfileSize, lowerProfileSize, upperSize, lowerSize); + } + + void squeeze() { + if (m_allocatedSize > m_diagSize + m_upperSize + m_lowerSize) + reallocate(m_diagSize, m_upperProfileSize, m_lowerProfileSize, m_upperSize, m_lowerSize); + } + + void resize(Index diagSize, Index upperProfileSize, Index lowerProfileSize, Index upperSize, Index lowerSize, float reserveSizeFactor = 0) { + if (m_allocatedSize < diagSize + upperSize + lowerSize) + reallocate(diagSize, upperProfileSize, lowerProfileSize, upperSize + Index(reserveSizeFactor * upperSize), lowerSize + Index(reserveSizeFactor * lowerSize)); + m_diagSize = diagSize; + m_upperSize = upperSize; + m_lowerSize = lowerSize; + m_upperProfileSize = upperProfileSize; + m_lowerProfileSize = lowerProfileSize; + } + + inline Index diagSize() const { + return m_diagSize; + } + + inline Index upperSize() const { + return m_upperSize; + } + + inline Index lowerSize() const { + return m_lowerSize; + } + + inline Index upperProfileSize() const { + return m_upperProfileSize; + } + + inline Index lowerProfileSize() const { + return m_lowerProfileSize; + } + + inline Index allocatedSize() const { + return m_allocatedSize; + } + + inline void clear() { + m_diagSize = 0; + } + + inline Scalar& diag(Index i) { + return m_diag[i]; + } + + inline const Scalar& diag(Index i) const { + return m_diag[i]; + } + + inline Scalar& upper(Index i) { + return m_upper[i]; + } + + inline const Scalar& upper(Index i) const { + return m_upper[i]; + } + + inline Scalar& lower(Index i) { + return m_lower[i]; + } + + inline const Scalar& lower(Index i) const { + return m_lower[i]; + } + + inline Index& upperProfile(Index i) { + return m_upperProfile[i]; + } + + inline const Index& upperProfile(Index i) const { + return m_upperProfile[i]; + } + + inline Index& lowerProfile(Index i) { + return m_lowerProfile[i]; + } + + inline const Index& lowerProfile(Index i) const { + return m_lowerProfile[i]; + } + + static SkylineStorage Map(Index* upperProfile, Index* lowerProfile, Scalar* diag, Scalar* upper, Scalar* lower, Index size, Index upperSize, Index lowerSize) { + SkylineStorage res; + res.m_upperProfile = upperProfile; + res.m_lowerProfile = lowerProfile; + res.m_diag = diag; + res.m_upper = upper; + res.m_lower = lower; + res.m_allocatedSize = res.m_diagSize = size; + res.m_upperSize = upperSize; + res.m_lowerSize = lowerSize; + return res; + } + + inline void reset() { + memset(m_diag, 0, m_diagSize * sizeof (Scalar)); + memset(m_upper, 0, m_upperSize * sizeof (Scalar)); + memset(m_lower, 0, m_lowerSize * sizeof (Scalar)); + memset(m_upperProfile, 0, m_diagSize * sizeof (Index)); + memset(m_lowerProfile, 0, m_diagSize * sizeof (Index)); + } + + void prune(Scalar reference, RealScalar epsilon = dummy_precision()) { + //TODO + } + +protected: + + inline void reallocate(Index diagSize, Index upperProfileSize, Index lowerProfileSize, Index upperSize, Index lowerSize) { + + Scalar* diag = new Scalar[diagSize]; + Scalar* upper = new Scalar[upperSize]; + Scalar* lower = new Scalar[lowerSize]; + Index* upperProfile = new Index[upperProfileSize]; + Index* lowerProfile = new Index[lowerProfileSize]; + + Index copyDiagSize = (std::min)(diagSize, m_diagSize); + Index copyUpperSize = (std::min)(upperSize, m_upperSize); + Index copyLowerSize = (std::min)(lowerSize, m_lowerSize); + Index copyUpperProfileSize = (std::min)(upperProfileSize, m_upperProfileSize); + Index copyLowerProfileSize = (std::min)(lowerProfileSize, m_lowerProfileSize); + + // copy + memcpy(diag, m_diag, copyDiagSize * sizeof (Scalar)); + memcpy(upper, m_upper, copyUpperSize * sizeof (Scalar)); + memcpy(lower, m_lower, copyLowerSize * sizeof (Scalar)); + memcpy(upperProfile, m_upperProfile, copyUpperProfileSize * sizeof (Index)); + memcpy(lowerProfile, m_lowerProfile, copyLowerProfileSize * sizeof (Index)); + + + + // delete old stuff + delete[] m_diag; + delete[] m_upper; + delete[] m_lower; + delete[] m_upperProfile; + delete[] m_lowerProfile; + m_diag = diag; + m_upper = upper; + m_lower = lower; + m_upperProfile = upperProfile; + m_lowerProfile = lowerProfile; + m_allocatedSize = diagSize + upperSize + lowerSize; + m_upperSize = upperSize; + m_lowerSize = lowerSize; + } + +public: + Scalar* m_diag; + Scalar* m_upper; + Scalar* m_lower; + Index* m_upperProfile; + Index* m_lowerProfile; + Index m_diagSize; + Index m_upperSize; + Index m_lowerSize; + Index m_upperProfileSize; + Index m_lowerProfileSize; + Index m_allocatedSize; + +}; + +} // end namespace Eigen + +#endif // EIGEN_SKYLINE_STORAGE_H diff --git a/external/unsupported/Eigen/src/Skyline/SkylineUtil.h b/external/unsupported/Eigen/src/Skyline/SkylineUtil.h new file mode 100644 index 0000000..75eb612 --- /dev/null +++ b/external/unsupported/Eigen/src/Skyline/SkylineUtil.h @@ -0,0 +1,89 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Guillaume Saupin +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SKYLINEUTIL_H +#define EIGEN_SKYLINEUTIL_H + +namespace Eigen { + +#ifdef NDEBUG +#define EIGEN_DBG_SKYLINE(X) +#else +#define EIGEN_DBG_SKYLINE(X) X +#endif + +const unsigned int SkylineBit = 0x1200; +template class SkylineProduct; +enum AdditionalProductEvaluationMode {SkylineTimeDenseProduct, SkylineTimeSkylineProduct, DenseTimeSkylineProduct}; +enum {IsSkyline = SkylineBit}; + + +#define EIGEN_SKYLINE_INHERIT_ASSIGNMENT_OPERATOR(Derived, Op) \ +template \ +EIGEN_STRONG_INLINE Derived& operator Op(const Eigen::SkylineMatrixBase& other) \ +{ \ + return Base::operator Op(other.derived()); \ +} \ +EIGEN_STRONG_INLINE Derived& operator Op(const Derived& other) \ +{ \ + return Base::operator Op(other); \ +} + +#define EIGEN_SKYLINE_INHERIT_SCALAR_ASSIGNMENT_OPERATOR(Derived, Op) \ +template \ +EIGEN_STRONG_INLINE Derived& operator Op(const Other& scalar) \ +{ \ + return Base::operator Op(scalar); \ +} + +#define EIGEN_SKYLINE_INHERIT_ASSIGNMENT_OPERATORS(Derived) \ + EIGEN_SKYLINE_INHERIT_ASSIGNMENT_OPERATOR(Derived, =) \ + EIGEN_SKYLINE_INHERIT_ASSIGNMENT_OPERATOR(Derived, +=) \ + EIGEN_SKYLINE_INHERIT_ASSIGNMENT_OPERATOR(Derived, -=) \ + EIGEN_SKYLINE_INHERIT_SCALAR_ASSIGNMENT_OPERATOR(Derived, *=) \ + EIGEN_SKYLINE_INHERIT_SCALAR_ASSIGNMENT_OPERATOR(Derived, /=) + +#define _EIGEN_SKYLINE_GENERIC_PUBLIC_INTERFACE(Derived, BaseClass) \ + typedef BaseClass Base; \ + typedef typename Eigen::internal::traits::Scalar Scalar; \ + typedef typename Eigen::NumTraits::Real RealScalar; \ + typedef typename Eigen::internal::traits::StorageKind StorageKind; \ + typedef typename Eigen::internal::index::type Index; \ + enum { Flags = Eigen::internal::traits::Flags, }; + +#define EIGEN_SKYLINE_GENERIC_PUBLIC_INTERFACE(Derived) \ + _EIGEN_SKYLINE_GENERIC_PUBLIC_INTERFACE(Derived, Eigen::SkylineMatrixBase) + +template class SkylineMatrixBase; +template class SkylineMatrix; +template class DynamicSkylineMatrix; +template class SkylineVector; +template class MappedSkylineMatrix; + +namespace internal { + +template struct skyline_product_mode; +template::value> struct SkylineProductReturnType; + +template class eval +{ + typedef typename traits::Scalar _Scalar; + enum { + _Flags = traits::Flags + }; + + public: + typedef SkylineMatrix<_Scalar, _Flags> type; +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_SKYLINEUTIL_H diff --git a/external/unsupported/Eigen/src/SparseExtra/BlockOfDynamicSparseMatrix.h b/external/unsupported/Eigen/src/SparseExtra/BlockOfDynamicSparseMatrix.h new file mode 100644 index 0000000..e9ec746 --- /dev/null +++ b/external/unsupported/Eigen/src/SparseExtra/BlockOfDynamicSparseMatrix.h @@ -0,0 +1,122 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPARSE_BLOCKFORDYNAMICMATRIX_H +#define EIGEN_SPARSE_BLOCKFORDYNAMICMATRIX_H + +namespace Eigen { + +#if 0 + +// NOTE Have to be reimplemented as a specialization of BlockImpl< DynamicSparseMatrix<_Scalar, _Options, _Index>, ... > +// See SparseBlock.h for an example + + +/*************************************************************************** +* specialisation for DynamicSparseMatrix +***************************************************************************/ + +template +class SparseInnerVectorSet, Size> + : public SparseMatrixBase, Size> > +{ + typedef DynamicSparseMatrix<_Scalar, _Options, _Index> MatrixType; + public: + + enum { IsRowMajor = internal::traits::IsRowMajor }; + + EIGEN_SPARSE_PUBLIC_INTERFACE(SparseInnerVectorSet) + class InnerIterator: public MatrixType::InnerIterator + { + public: + inline InnerIterator(const SparseInnerVectorSet& xpr, Index outer) + : MatrixType::InnerIterator(xpr.m_matrix, xpr.m_outerStart + outer), m_outer(outer) + {} + inline Index row() const { return IsRowMajor ? m_outer : this->index(); } + inline Index col() const { return IsRowMajor ? this->index() : m_outer; } + protected: + Index m_outer; + }; + + inline SparseInnerVectorSet(const MatrixType& matrix, Index outerStart, Index outerSize) + : m_matrix(matrix), m_outerStart(outerStart), m_outerSize(outerSize) + { + eigen_assert( (outerStart>=0) && ((outerStart+outerSize)<=matrix.outerSize()) ); + } + + inline SparseInnerVectorSet(const MatrixType& matrix, Index outer) + : m_matrix(matrix), m_outerStart(outer), m_outerSize(Size) + { + eigen_assert(Size!=Dynamic); + eigen_assert( (outer>=0) && (outer + inline SparseInnerVectorSet& operator=(const SparseMatrixBase& other) + { + if (IsRowMajor != ((OtherDerived::Flags&RowMajorBit)==RowMajorBit)) + { + // need to transpose => perform a block evaluation followed by a big swap + DynamicSparseMatrix aux(other); + *this = aux.markAsRValue(); + } + else + { + // evaluate/copy vector per vector + for (Index j=0; j aux(other.innerVector(j)); + m_matrix.const_cast_derived()._data()[m_outerStart+j].swap(aux._data()); + } + } + return *this; + } + + inline SparseInnerVectorSet& operator=(const SparseInnerVectorSet& other) + { + return operator=(other); + } + + Index nonZeros() const + { + Index count = 0; + for (Index j=0; j0); + return m_matrix.data()[m_outerStart].vale(m_matrix.data()[m_outerStart].size()-1); + } + +// template +// inline SparseInnerVectorSet& operator=(const SparseMatrixBase& other) +// { +// return *this; +// } + + EIGEN_STRONG_INLINE Index rows() const { return IsRowMajor ? m_outerSize.value() : m_matrix.rows(); } + EIGEN_STRONG_INLINE Index cols() const { return IsRowMajor ? m_matrix.cols() : m_outerSize.value(); } + + protected: + + const typename MatrixType::Nested m_matrix; + Index m_outerStart; + const internal::variable_if_dynamic m_outerSize; + +}; + +#endif + +} // end namespace Eigen + +#endif // EIGEN_SPARSE_BLOCKFORDYNAMICMATRIX_H diff --git a/external/unsupported/Eigen/src/SparseExtra/BlockSparseMatrix.h b/external/unsupported/Eigen/src/SparseExtra/BlockSparseMatrix.h new file mode 100644 index 0000000..536a0c3 --- /dev/null +++ b/external/unsupported/Eigen/src/SparseExtra/BlockSparseMatrix.h @@ -0,0 +1,1079 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2013 Desire Nuentsa +// Copyright (C) 2013 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPARSEBLOCKMATRIX_H +#define EIGEN_SPARSEBLOCKMATRIX_H + +namespace Eigen { +/** \ingroup SparseCore_Module + * + * \class BlockSparseMatrix + * + * \brief A versatile sparse matrix representation where each element is a block + * + * This class provides routines to manipulate block sparse matrices stored in a + * BSR-like representation. There are two main types : + * + * 1. All blocks have the same number of rows and columns, called block size + * in the following. In this case, if this block size is known at compile time, + * it can be given as a template parameter like + * \code + * BlockSparseMatrix bmat(b_rows, b_cols); + * \endcode + * Here, bmat is a b_rows x b_cols block sparse matrix + * where each coefficient is a 3x3 dense matrix. + * If the block size is fixed but will be given at runtime, + * \code + * BlockSparseMatrix bmat(b_rows, b_cols); + * bmat.setBlockSize(block_size); + * \endcode + * + * 2. The second case is for variable-block sparse matrices. + * Here each block has its own dimensions. The only restriction is that all the blocks + * in a row (resp. a column) should have the same number of rows (resp. of columns). + * It is thus required in this case to describe the layout of the matrix by calling + * setBlockLayout(rowBlocks, colBlocks). + * + * In any of the previous case, the matrix can be filled by calling setFromTriplets(). + * A regular sparse matrix can be converted to a block sparse matrix and vice versa. + * It is obviously required to describe the block layout beforehand by calling either + * setBlockSize() for fixed-size blocks or setBlockLayout for variable-size blocks. + * + * \tparam _Scalar The Scalar type + * \tparam _BlockAtCompileTime The block layout option. It takes the following values + * Dynamic : block size known at runtime + * a numeric number : fixed-size block known at compile time + */ +template class BlockSparseMatrix; + +template class BlockSparseMatrixView; + +namespace internal { +template +struct traits > +{ + typedef _Scalar Scalar; + typedef _Index Index; + typedef Sparse StorageKind; // FIXME Where is it used ?? + typedef MatrixXpr XprKind; + enum { + RowsAtCompileTime = Dynamic, + ColsAtCompileTime = Dynamic, + MaxRowsAtCompileTime = Dynamic, + MaxColsAtCompileTime = Dynamic, + BlockSize = _BlockAtCompileTime, + Flags = _Options | NestByRefBit | LvalueBit, + CoeffReadCost = NumTraits::ReadCost, + SupportedAccessPatterns = InnerRandomAccessPattern + }; +}; +template +struct traits > +{ + typedef Ref > Scalar; + typedef Ref > RealScalar; + +}; + +// Function object to sort a triplet list +template +struct TripletComp +{ + typedef typename Iterator::value_type Triplet; + bool operator()(const Triplet& a, const Triplet& b) + { if(IsColMajor) + return ((a.col() == b.col() && a.row() < b.row()) || (a.col() < b.col())); + else + return ((a.row() == b.row() && a.col() < b.col()) || (a.row() < b.row())); + } +}; +} // end namespace internal + + +/* Proxy to view the block sparse matrix as a regular sparse matrix */ +template +class BlockSparseMatrixView : public SparseMatrixBase +{ + public: + typedef Ref Scalar; + typedef Ref RealScalar; + typedef typename BlockSparseMatrixT::Index Index; + typedef BlockSparseMatrixT Nested; + enum { + Flags = BlockSparseMatrixT::Options, + Options = BlockSparseMatrixT::Options, + RowsAtCompileTime = BlockSparseMatrixT::RowsAtCompileTime, + ColsAtCompileTime = BlockSparseMatrixT::ColsAtCompileTime, + MaxColsAtCompileTime = BlockSparseMatrixT::MaxColsAtCompileTime, + MaxRowsAtCompileTime = BlockSparseMatrixT::MaxRowsAtCompileTime + }; + public: + BlockSparseMatrixView(const BlockSparseMatrixT& spblockmat) + : m_spblockmat(spblockmat) + {} + + Index outerSize() const + { + return (Flags&RowMajorBit) == 1 ? this->rows() : this->cols(); + } + Index cols() const + { + return m_spblockmat.blockCols(); + } + Index rows() const + { + return m_spblockmat.blockRows(); + } + Scalar coeff(Index row, Index col) + { + return m_spblockmat.coeff(row, col); + } + Scalar coeffRef(Index row, Index col) + { + return m_spblockmat.coeffRef(row, col); + } + // Wrapper to iterate over all blocks + class InnerIterator : public BlockSparseMatrixT::BlockInnerIterator + { + public: + InnerIterator(const BlockSparseMatrixView& mat, Index outer) + : BlockSparseMatrixT::BlockInnerIterator(mat.m_spblockmat, outer) + {} + + }; + + protected: + const BlockSparseMatrixT& m_spblockmat; +}; + +// Proxy to view a regular vector as a block vector +template +class BlockVectorView +{ + public: + enum { + BlockSize = BlockSparseMatrixT::BlockSize, + ColsAtCompileTime = VectorType::ColsAtCompileTime, + RowsAtCompileTime = VectorType::RowsAtCompileTime, + Flags = VectorType::Flags + }; + typedef Ref >Scalar; + typedef typename BlockSparseMatrixT::Index Index; + public: + BlockVectorView(const BlockSparseMatrixT& spblockmat, const VectorType& vec) + : m_spblockmat(spblockmat),m_vec(vec) + { } + inline Index cols() const + { + return m_vec.cols(); + } + inline Index size() const + { + return m_spblockmat.blockRows(); + } + inline Scalar coeff(Index bi) const + { + Index startRow = m_spblockmat.blockRowsIndex(bi); + Index rowSize = m_spblockmat.blockRowsIndex(bi+1) - startRow; + return m_vec.middleRows(startRow, rowSize); + } + inline Scalar coeff(Index bi, Index j) const + { + Index startRow = m_spblockmat.blockRowsIndex(bi); + Index rowSize = m_spblockmat.blockRowsIndex(bi+1) - startRow; + return m_vec.block(startRow, j, rowSize, 1); + } + protected: + const BlockSparseMatrixT& m_spblockmat; + const VectorType& m_vec; +}; + +template class BlockVectorReturn; + + +// Proxy to view a regular vector as a block vector +template +class BlockVectorReturn +{ + public: + enum { + ColsAtCompileTime = VectorType::ColsAtCompileTime, + RowsAtCompileTime = VectorType::RowsAtCompileTime, + Flags = VectorType::Flags + }; + typedef Ref > Scalar; + typedef typename BlockSparseMatrixT::Index Index; + public: + BlockVectorReturn(const BlockSparseMatrixT& spblockmat, VectorType& vec) + : m_spblockmat(spblockmat),m_vec(vec) + { } + inline Index size() const + { + return m_spblockmat.blockRows(); + } + inline Scalar coeffRef(Index bi) + { + Index startRow = m_spblockmat.blockRowsIndex(bi); + Index rowSize = m_spblockmat.blockRowsIndex(bi+1) - startRow; + return m_vec.middleRows(startRow, rowSize); + } + inline Scalar coeffRef(Index bi, Index j) + { + Index startRow = m_spblockmat.blockRowsIndex(bi); + Index rowSize = m_spblockmat.blockRowsIndex(bi+1) - startRow; + return m_vec.block(startRow, j, rowSize, 1); + } + + protected: + const BlockSparseMatrixT& m_spblockmat; + VectorType& m_vec; +}; + +// Block version of the sparse dense product +template +class BlockSparseTimeDenseProduct; + +namespace internal { + +template +struct traits > +{ + typedef Dense StorageKind; + typedef MatrixXpr XprKind; + typedef typename BlockSparseMatrixT::Scalar Scalar; + typedef typename BlockSparseMatrixT::Index Index; + enum { + RowsAtCompileTime = Dynamic, + ColsAtCompileTime = Dynamic, + MaxRowsAtCompileTime = Dynamic, + MaxColsAtCompileTime = Dynamic, + Flags = 0, + CoeffReadCost = internal::traits::CoeffReadCost + }; +}; +} // end namespace internal + +template +class BlockSparseTimeDenseProduct + : public ProductBase, Lhs, Rhs> +{ + public: + EIGEN_PRODUCT_PUBLIC_INTERFACE(BlockSparseTimeDenseProduct) + + BlockSparseTimeDenseProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) + {} + + template void scaleAndAddTo(Dest& dest, const typename Rhs::Scalar& alpha) const + { + BlockVectorReturn tmpDest(m_lhs, dest); + internal::sparse_time_dense_product( BlockSparseMatrixView(m_lhs), BlockVectorView(m_lhs, m_rhs), tmpDest, alpha); + } + + private: + BlockSparseTimeDenseProduct& operator=(const BlockSparseTimeDenseProduct&); +}; + +template +class BlockSparseMatrix : public SparseMatrixBase > +{ + public: + typedef _Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + typedef _StorageIndex StorageIndex; + typedef typename internal::ref_selector >::type Nested; + + enum { + Options = _Options, + Flags = Options, + BlockSize=_BlockAtCompileTime, + RowsAtCompileTime = Dynamic, + ColsAtCompileTime = Dynamic, + MaxRowsAtCompileTime = Dynamic, + MaxColsAtCompileTime = Dynamic, + IsVectorAtCompileTime = 0, + IsColMajor = Flags&RowMajorBit ? 0 : 1 + }; + typedef Matrix BlockScalar; + typedef Matrix BlockRealScalar; + typedef typename internal::conditional<_BlockAtCompileTime==Dynamic, Scalar, BlockScalar>::type BlockScalarReturnType; + typedef BlockSparseMatrix PlainObject; + public: + // Default constructor + BlockSparseMatrix() + : m_innerBSize(0),m_outerBSize(0),m_innerOffset(0),m_outerOffset(0), + m_nonzerosblocks(0),m_values(0),m_blockPtr(0),m_indices(0), + m_outerIndex(0),m_blockSize(BlockSize) + { } + + + /** + * \brief Construct and resize + * + */ + BlockSparseMatrix(Index brow, Index bcol) + : m_innerBSize(IsColMajor ? brow : bcol), + m_outerBSize(IsColMajor ? bcol : brow), + m_innerOffset(0),m_outerOffset(0),m_nonzerosblocks(0), + m_values(0),m_blockPtr(0),m_indices(0), + m_outerIndex(0),m_blockSize(BlockSize) + { } + + /** + * \brief Copy-constructor + */ + BlockSparseMatrix(const BlockSparseMatrix& other) + : m_innerBSize(other.m_innerBSize),m_outerBSize(other.m_outerBSize), + m_nonzerosblocks(other.m_nonzerosblocks),m_nonzeros(other.m_nonzeros), + m_blockPtr(0),m_blockSize(other.m_blockSize) + { + // should we allow copying between variable-size blocks and fixed-size blocks ?? + eigen_assert(m_blockSize == BlockSize && " CAN NOT COPY BETWEEN FIXED-SIZE AND VARIABLE-SIZE BLOCKS"); + + std::copy(other.m_innerOffset, other.m_innerOffset+m_innerBSize+1, m_innerOffset); + std::copy(other.m_outerOffset, other.m_outerOffset+m_outerBSize+1, m_outerOffset); + std::copy(other.m_values, other.m_values+m_nonzeros, m_values); + + if(m_blockSize != Dynamic) + std::copy(other.m_blockPtr, other.m_blockPtr+m_nonzerosblocks, m_blockPtr); + + std::copy(other.m_indices, other.m_indices+m_nonzerosblocks, m_indices); + std::copy(other.m_outerIndex, other.m_outerIndex+m_outerBSize, m_outerIndex); + } + + friend void swap(BlockSparseMatrix& first, BlockSparseMatrix& second) + { + std::swap(first.m_innerBSize, second.m_innerBSize); + std::swap(first.m_outerBSize, second.m_outerBSize); + std::swap(first.m_innerOffset, second.m_innerOffset); + std::swap(first.m_outerOffset, second.m_outerOffset); + std::swap(first.m_nonzerosblocks, second.m_nonzerosblocks); + std::swap(first.m_nonzeros, second.m_nonzeros); + std::swap(first.m_values, second.m_values); + std::swap(first.m_blockPtr, second.m_blockPtr); + std::swap(first.m_indices, second.m_indices); + std::swap(first.m_outerIndex, second.m_outerIndex); + std::swap(first.m_BlockSize, second.m_blockSize); + } + + BlockSparseMatrix& operator=(BlockSparseMatrix other) + { + //Copy-and-swap paradigm ... avoid leaked data if thrown + swap(*this, other); + return *this; + } + + // Destructor + ~BlockSparseMatrix() + { + delete[] m_outerIndex; + delete[] m_innerOffset; + delete[] m_outerOffset; + delete[] m_indices; + delete[] m_blockPtr; + delete[] m_values; + } + + + /** + * \brief Constructor from a sparse matrix + * + */ + template + inline BlockSparseMatrix(const MatrixType& spmat) : m_blockSize(BlockSize) + { + EIGEN_STATIC_ASSERT((m_blockSize != Dynamic), THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE); + + *this = spmat; + } + + /** + * \brief Assignment from a sparse matrix with the same storage order + * + * Convert from a sparse matrix to block sparse matrix. + * \warning Before calling this function, tt is necessary to call + * either setBlockLayout() (matrices with variable-size blocks) + * or setBlockSize() (for fixed-size blocks). + */ + template + inline BlockSparseMatrix& operator=(const MatrixType& spmat) + { + eigen_assert((m_innerBSize != 0 && m_outerBSize != 0) + && "Trying to assign to a zero-size matrix, call resize() first"); + eigen_assert(((MatrixType::Options&RowMajorBit) != IsColMajor) && "Wrong storage order"); + typedef SparseMatrix MatrixPatternType; + MatrixPatternType blockPattern(blockRows(), blockCols()); + m_nonzeros = 0; + + // First, compute the number of nonzero blocks and their locations + for(StorageIndex bj = 0; bj < m_outerBSize; ++bj) + { + // Browse each outer block and compute the structure + std::vector nzblocksFlag(m_innerBSize,false); // Record the existing blocks + blockPattern.startVec(bj); + for(StorageIndex j = blockOuterIndex(bj); j < blockOuterIndex(bj+1); ++j) + { + typename MatrixType::InnerIterator it_spmat(spmat, j); + for(; it_spmat; ++it_spmat) + { + StorageIndex bi = innerToBlock(it_spmat.index()); // Index of the current nonzero block + if(!nzblocksFlag[bi]) + { + // Save the index of this nonzero block + nzblocksFlag[bi] = true; + blockPattern.insertBackByOuterInnerUnordered(bj, bi) = true; + // Compute the total number of nonzeros (including explicit zeros in blocks) + m_nonzeros += blockOuterSize(bj) * blockInnerSize(bi); + } + } + } // end current outer block + } + blockPattern.finalize(); + + // Allocate the internal arrays + setBlockStructure(blockPattern); + + for(StorageIndex nz = 0; nz < m_nonzeros; ++nz) m_values[nz] = Scalar(0); + for(StorageIndex bj = 0; bj < m_outerBSize; ++bj) + { + // Now copy the values + for(StorageIndex j = blockOuterIndex(bj); j < blockOuterIndex(bj+1); ++j) + { + // Browse the outer block column by column (for column-major matrices) + typename MatrixType::InnerIterator it_spmat(spmat, j); + for(; it_spmat; ++it_spmat) + { + StorageIndex idx = 0; // Position of this block in the column block + StorageIndex bi = innerToBlock(it_spmat.index()); // Index of the current nonzero block + // Go to the inner block where this element belongs to + while(bi > m_indices[m_outerIndex[bj]+idx]) ++idx; // Not expensive for ordered blocks + StorageIndex idxVal;// Get the right position in the array of values for this element + if(m_blockSize == Dynamic) + { + // Offset from all blocks before ... + idxVal = m_blockPtr[m_outerIndex[bj]+idx]; + // ... and offset inside the block + idxVal += (j - blockOuterIndex(bj)) * blockOuterSize(bj) + it_spmat.index() - m_innerOffset[bi]; + } + else + { + // All blocks before + idxVal = (m_outerIndex[bj] + idx) * m_blockSize * m_blockSize; + // inside the block + idxVal += (j - blockOuterIndex(bj)) * m_blockSize + (it_spmat.index()%m_blockSize); + } + // Insert the value + m_values[idxVal] = it_spmat.value(); + } // end of this column + } // end of this block + } // end of this outer block + + return *this; + } + + /** + * \brief Set the nonzero block pattern of the matrix + * + * Given a sparse matrix describing the nonzero block pattern, + * this function prepares the internal pointers for values. + * After calling this function, any *nonzero* block (bi, bj) can be set + * with a simple call to coeffRef(bi,bj). + * + * + * \warning Before calling this function, tt is necessary to call + * either setBlockLayout() (matrices with variable-size blocks) + * or setBlockSize() (for fixed-size blocks). + * + * \param blockPattern Sparse matrix of boolean elements describing the block structure + * + * \sa setBlockLayout() \sa setBlockSize() + */ + template + void setBlockStructure(const MatrixType& blockPattern) + { + resize(blockPattern.rows(), blockPattern.cols()); + reserve(blockPattern.nonZeros()); + + // Browse the block pattern and set up the various pointers + m_outerIndex[0] = 0; + if(m_blockSize == Dynamic) m_blockPtr[0] = 0; + for(StorageIndex nz = 0; nz < m_nonzeros; ++nz) m_values[nz] = Scalar(0); + for(StorageIndex bj = 0; bj < m_outerBSize; ++bj) + { + //Browse each outer block + + //First, copy and save the indices of nonzero blocks + //FIXME : find a way to avoid this ... + std::vector nzBlockIdx; + typename MatrixType::InnerIterator it(blockPattern, bj); + for(; it; ++it) + { + nzBlockIdx.push_back(it.index()); + } + std::sort(nzBlockIdx.begin(), nzBlockIdx.end()); + + // Now, fill block indices and (eventually) pointers to blocks + for(StorageIndex idx = 0; idx < nzBlockIdx.size(); ++idx) + { + StorageIndex offset = m_outerIndex[bj]+idx; // offset in m_indices + m_indices[offset] = nzBlockIdx[idx]; + if(m_blockSize == Dynamic) + m_blockPtr[offset] = m_blockPtr[offset-1] + blockInnerSize(nzBlockIdx[idx]) * blockOuterSize(bj); + // There is no blockPtr for fixed-size blocks... not needed !??? + } + // Save the pointer to the next outer block + m_outerIndex[bj+1] = m_outerIndex[bj] + nzBlockIdx.size(); + } + } + + /** + * \brief Set the number of rows and columns blocks + */ + inline void resize(Index brow, Index bcol) + { + m_innerBSize = IsColMajor ? brow : bcol; + m_outerBSize = IsColMajor ? bcol : brow; + } + + /** + * \brief set the block size at runtime for fixed-size block layout + * + * Call this only for fixed-size blocks + */ + inline void setBlockSize(Index blockSize) + { + m_blockSize = blockSize; + } + + /** + * \brief Set the row and column block layouts, + * + * This function set the size of each row and column block. + * So this function should be used only for blocks with variable size. + * \param rowBlocks : Number of rows per row block + * \param colBlocks : Number of columns per column block + * \sa resize(), setBlockSize() + */ + inline void setBlockLayout(const VectorXi& rowBlocks, const VectorXi& colBlocks) + { + const VectorXi& innerBlocks = IsColMajor ? rowBlocks : colBlocks; + const VectorXi& outerBlocks = IsColMajor ? colBlocks : rowBlocks; + eigen_assert(m_innerBSize == innerBlocks.size() && "CHECK THE NUMBER OF ROW OR COLUMN BLOCKS"); + eigen_assert(m_outerBSize == outerBlocks.size() && "CHECK THE NUMBER OF ROW OR COLUMN BLOCKS"); + m_outerBSize = outerBlocks.size(); + // starting index of blocks... cumulative sums + m_innerOffset = new StorageIndex[m_innerBSize+1]; + m_outerOffset = new StorageIndex[m_outerBSize+1]; + m_innerOffset[0] = 0; + m_outerOffset[0] = 0; + std::partial_sum(&innerBlocks[0], &innerBlocks[m_innerBSize-1]+1, &m_innerOffset[1]); + std::partial_sum(&outerBlocks[0], &outerBlocks[m_outerBSize-1]+1, &m_outerOffset[1]); + + // Compute the total number of nonzeros + m_nonzeros = 0; + for(StorageIndex bj = 0; bj < m_outerBSize; ++bj) + for(StorageIndex bi = 0; bi < m_innerBSize; ++bi) + m_nonzeros += outerBlocks[bj] * innerBlocks[bi]; + + } + + /** + * \brief Allocate the internal array of pointers to blocks and their inner indices + * + * \note For fixed-size blocks, call setBlockSize() to set the block. + * And For variable-size blocks, call setBlockLayout() before using this function + * + * \param nonzerosblocks Number of nonzero blocks. The total number of nonzeros is + * is computed in setBlockLayout() for variable-size blocks + * \sa setBlockSize() + */ + inline void reserve(const Index nonzerosblocks) + { + eigen_assert((m_innerBSize != 0 && m_outerBSize != 0) && + "TRYING TO RESERVE ZERO-SIZE MATRICES, CALL resize() first"); + + //FIXME Should free if already allocated + m_outerIndex = new StorageIndex[m_outerBSize+1]; + + m_nonzerosblocks = nonzerosblocks; + if(m_blockSize != Dynamic) + { + m_nonzeros = nonzerosblocks * (m_blockSize * m_blockSize); + m_blockPtr = 0; + } + else + { + // m_nonzeros is already computed in setBlockLayout() + m_blockPtr = new StorageIndex[m_nonzerosblocks+1]; + } + m_indices = new StorageIndex[m_nonzerosblocks+1]; + m_values = new Scalar[m_nonzeros]; + } + + + /** + * \brief Fill values in a matrix from a triplet list. + * + * Each triplet item has a block stored in an Eigen dense matrix. + * The InputIterator class should provide the functions row(), col() and value() + * + * \note For fixed-size blocks, call setBlockSize() before this function. + * + * FIXME Do not accept duplicates + */ + template + void setFromTriplets(const InputIterator& begin, const InputIterator& end) + { + eigen_assert((m_innerBSize!=0 && m_outerBSize !=0) && "ZERO BLOCKS, PLEASE CALL resize() before"); + + /* First, sort the triplet list + * FIXME This can be unnecessarily expensive since only the inner indices have to be sorted + * The best approach is like in SparseMatrix::setFromTriplets() + */ + internal::TripletComp tripletcomp; + std::sort(begin, end, tripletcomp); + + /* Count the number of rows and column blocks, + * and the number of nonzero blocks per outer dimension + */ + VectorXi rowBlocks(m_innerBSize); // Size of each block row + VectorXi colBlocks(m_outerBSize); // Size of each block column + rowBlocks.setZero(); colBlocks.setZero(); + VectorXi nzblock_outer(m_outerBSize); // Number of nz blocks per outer vector + VectorXi nz_outer(m_outerBSize); // Number of nz per outer vector...for variable-size blocks + nzblock_outer.setZero(); + nz_outer.setZero(); + for(InputIterator it(begin); it !=end; ++it) + { + eigen_assert(it->row() >= 0 && it->row() < this->blockRows() && it->col() >= 0 && it->col() < this->blockCols()); + eigen_assert((it->value().rows() == it->value().cols() && (it->value().rows() == m_blockSize)) + || (m_blockSize == Dynamic)); + + if(m_blockSize == Dynamic) + { + eigen_assert((rowBlocks[it->row()] == 0 || rowBlocks[it->row()] == it->value().rows()) && + "NON CORRESPONDING SIZES FOR ROW BLOCKS"); + eigen_assert((colBlocks[it->col()] == 0 || colBlocks[it->col()] == it->value().cols()) && + "NON CORRESPONDING SIZES FOR COLUMN BLOCKS"); + rowBlocks[it->row()] =it->value().rows(); + colBlocks[it->col()] = it->value().cols(); + } + nz_outer(IsColMajor ? it->col() : it->row()) += it->value().rows() * it->value().cols(); + nzblock_outer(IsColMajor ? it->col() : it->row())++; + } + // Allocate member arrays + if(m_blockSize == Dynamic) setBlockLayout(rowBlocks, colBlocks); + StorageIndex nzblocks = nzblock_outer.sum(); + reserve(nzblocks); + + // Temporary markers + VectorXi block_id(m_outerBSize); // To be used as a block marker during insertion + + // Setup outer index pointers and markers + m_outerIndex[0] = 0; + if (m_blockSize == Dynamic) m_blockPtr[0] = 0; + for(StorageIndex bj = 0; bj < m_outerBSize; ++bj) + { + m_outerIndex[bj+1] = m_outerIndex[bj] + nzblock_outer(bj); + block_id(bj) = m_outerIndex[bj]; + if(m_blockSize==Dynamic) + { + m_blockPtr[m_outerIndex[bj+1]] = m_blockPtr[m_outerIndex[bj]] + nz_outer(bj); + } + } + + // Fill the matrix + for(InputIterator it(begin); it!=end; ++it) + { + StorageIndex outer = IsColMajor ? it->col() : it->row(); + StorageIndex inner = IsColMajor ? it->row() : it->col(); + m_indices[block_id(outer)] = inner; + StorageIndex block_size = it->value().rows()*it->value().cols(); + StorageIndex nz_marker = blockPtr(block_id[outer]); + memcpy(&(m_values[nz_marker]), it->value().data(), block_size * sizeof(Scalar)); + if(m_blockSize == Dynamic) + { + m_blockPtr[block_id(outer)+1] = m_blockPtr[block_id(outer)] + block_size; + } + block_id(outer)++; + } + + // An alternative when the outer indices are sorted...no need to use an array of markers +// for(Index bcol = 0; bcol < m_outerBSize; ++bcol) +// { +// Index id = 0, id_nz = 0, id_nzblock = 0; +// for(InputIterator it(begin); it!=end; ++it) +// { +// while (idvalue().rows()*it->value().cols(); +// m_blockPtr[id_nzblock+1] = m_blockPtr[id_nzblock] + block_size; +// id_nzblock++; +// memcpy(&(m_values[id_nz]),it->value().data(), block_size*sizeof(Scalar)); +// id_nz += block_size; +// } +// while(id < m_outerBSize-1) // Empty columns at the end +// { +// id++; +// m_outerIndex[id+1]=m_outerIndex[id]; +// } +// } + } + + + /** + * \returns the number of rows + */ + inline Index rows() const + { +// return blockRows(); + return (IsColMajor ? innerSize() : outerSize()); + } + + /** + * \returns the number of cols + */ + inline Index cols() const + { +// return blockCols(); + return (IsColMajor ? outerSize() : innerSize()); + } + + inline Index innerSize() const + { + if(m_blockSize == Dynamic) return m_innerOffset[m_innerBSize]; + else return (m_innerBSize * m_blockSize) ; + } + + inline Index outerSize() const + { + if(m_blockSize == Dynamic) return m_outerOffset[m_outerBSize]; + else return (m_outerBSize * m_blockSize) ; + } + /** \returns the number of rows grouped by blocks */ + inline Index blockRows() const + { + return (IsColMajor ? m_innerBSize : m_outerBSize); + } + /** \returns the number of columns grouped by blocks */ + inline Index blockCols() const + { + return (IsColMajor ? m_outerBSize : m_innerBSize); + } + + inline Index outerBlocks() const { return m_outerBSize; } + inline Index innerBlocks() const { return m_innerBSize; } + + /** \returns the block index where outer belongs to */ + inline Index outerToBlock(Index outer) const + { + eigen_assert(outer < outerSize() && "OUTER INDEX OUT OF BOUNDS"); + + if(m_blockSize != Dynamic) + return (outer / m_blockSize); // Integer division + + StorageIndex b_outer = 0; + while(m_outerOffset[b_outer] <= outer) ++b_outer; + return b_outer - 1; + } + /** \returns the block index where inner belongs to */ + inline Index innerToBlock(Index inner) const + { + eigen_assert(inner < innerSize() && "OUTER INDEX OUT OF BOUNDS"); + + if(m_blockSize != Dynamic) + return (inner / m_blockSize); // Integer division + + StorageIndex b_inner = 0; + while(m_innerOffset[b_inner] <= inner) ++b_inner; + return b_inner - 1; + } + + /** + *\returns a reference to the (i,j) block as an Eigen Dense Matrix + */ + Ref coeffRef(Index brow, Index bcol) + { + eigen_assert(brow < blockRows() && "BLOCK ROW INDEX OUT OF BOUNDS"); + eigen_assert(bcol < blockCols() && "BLOCK nzblocksFlagCOLUMN OUT OF BOUNDS"); + + StorageIndex rsize = IsColMajor ? blockInnerSize(brow): blockOuterSize(bcol); + StorageIndex csize = IsColMajor ? blockOuterSize(bcol) : blockInnerSize(brow); + StorageIndex inner = IsColMajor ? brow : bcol; + StorageIndex outer = IsColMajor ? bcol : brow; + StorageIndex offset = m_outerIndex[outer]; + while(offset < m_outerIndex[outer+1] && m_indices[offset] != inner) + offset++; + if(m_indices[offset] == inner) + { + return Map(&(m_values[blockPtr(offset)]), rsize, csize); + } + else + { + //FIXME the block does not exist, Insert it !!!!!!!!! + eigen_assert("DYNAMIC INSERTION IS NOT YET SUPPORTED"); + } + } + + /** + * \returns the value of the (i,j) block as an Eigen Dense Matrix + */ + Map coeff(Index brow, Index bcol) const + { + eigen_assert(brow < blockRows() && "BLOCK ROW INDEX OUT OF BOUNDS"); + eigen_assert(bcol < blockCols() && "BLOCK COLUMN OUT OF BOUNDS"); + + StorageIndex rsize = IsColMajor ? blockInnerSize(brow): blockOuterSize(bcol); + StorageIndex csize = IsColMajor ? blockOuterSize(bcol) : blockInnerSize(brow); + StorageIndex inner = IsColMajor ? brow : bcol; + StorageIndex outer = IsColMajor ? bcol : brow; + StorageIndex offset = m_outerIndex[outer]; + while(offset < m_outerIndex[outer+1] && m_indices[offset] != inner) offset++; + if(m_indices[offset] == inner) + { + return Map (&(m_values[blockPtr(offset)]), rsize, csize); + } + else +// return BlockScalar::Zero(rsize, csize); + eigen_assert("NOT YET SUPPORTED"); + } + + // Block Matrix times vector product + template + BlockSparseTimeDenseProduct operator*(const VecType& lhs) const + { + return BlockSparseTimeDenseProduct(*this, lhs); + } + + /** \returns the number of nonzero blocks */ + inline Index nonZerosBlocks() const { return m_nonzerosblocks; } + /** \returns the total number of nonzero elements, including eventual explicit zeros in blocks */ + inline Index nonZeros() const { return m_nonzeros; } + + inline BlockScalarReturnType *valuePtr() {return static_cast(m_values);} +// inline Scalar *valuePtr(){ return m_values; } + inline StorageIndex *innerIndexPtr() {return m_indices; } + inline const StorageIndex *innerIndexPtr() const {return m_indices; } + inline StorageIndex *outerIndexPtr() {return m_outerIndex; } + inline const StorageIndex* outerIndexPtr() const {return m_outerIndex; } + + /** \brief for compatibility purposes with the SparseMatrix class */ + inline bool isCompressed() const {return true;} + /** + * \returns the starting index of the bi row block + */ + inline Index blockRowsIndex(Index bi) const + { + return IsColMajor ? blockInnerIndex(bi) : blockOuterIndex(bi); + } + + /** + * \returns the starting index of the bj col block + */ + inline Index blockColsIndex(Index bj) const + { + return IsColMajor ? blockOuterIndex(bj) : blockInnerIndex(bj); + } + + inline Index blockOuterIndex(Index bj) const + { + return (m_blockSize == Dynamic) ? m_outerOffset[bj] : (bj * m_blockSize); + } + inline Index blockInnerIndex(Index bi) const + { + return (m_blockSize == Dynamic) ? m_innerOffset[bi] : (bi * m_blockSize); + } + + // Not needed ??? + inline Index blockInnerSize(Index bi) const + { + return (m_blockSize == Dynamic) ? (m_innerOffset[bi+1] - m_innerOffset[bi]) : m_blockSize; + } + inline Index blockOuterSize(Index bj) const + { + return (m_blockSize == Dynamic) ? (m_outerOffset[bj+1]- m_outerOffset[bj]) : m_blockSize; + } + + /** + * \brief Browse the matrix by outer index + */ + class InnerIterator; // Browse column by column + + /** + * \brief Browse the matrix by block outer index + */ + class BlockInnerIterator; // Browse block by block + + friend std::ostream & operator << (std::ostream & s, const BlockSparseMatrix& m) + { + for (StorageIndex j = 0; j < m.outerBlocks(); ++j) + { + BlockInnerIterator itb(m, j); + for(; itb; ++itb) + { + s << "("<::type()); + } + + + protected: +// inline Index blockDynIdx(Index id, internal::true_type) const +// { +// return m_blockPtr[id]; +// } +// inline Index blockDynIdx(Index id, internal::false_type) const +// { +// return id * BlockSize * BlockSize; +// } + + // To be implemented + // Insert a block at a particular location... need to make a room for that + Map insert(Index brow, Index bcol); + + Index m_innerBSize; // Number of block rows + Index m_outerBSize; // Number of block columns + StorageIndex *m_innerOffset; // Starting index of each inner block (size m_innerBSize+1) + StorageIndex *m_outerOffset; // Starting index of each outer block (size m_outerBSize+1) + Index m_nonzerosblocks; // Total nonzeros blocks (lower than m_innerBSize x m_outerBSize) + Index m_nonzeros; // Total nonzeros elements + Scalar *m_values; //Values stored block column after block column (size m_nonzeros) + StorageIndex *m_blockPtr; // Pointer to the beginning of each block in m_values, size m_nonzeroblocks ... null for fixed-size blocks + StorageIndex *m_indices; //Inner block indices, size m_nonzerosblocks ... OK + StorageIndex *m_outerIndex; // Starting pointer of each block column in m_indices (size m_outerBSize)... OK + Index m_blockSize; // Size of a block for fixed-size blocks, otherwise -1 +}; + +template +class BlockSparseMatrix<_Scalar, _BlockAtCompileTime, _Options, _StorageIndex>::BlockInnerIterator +{ + public: + + enum{ + Flags = _Options + }; + + BlockInnerIterator(const BlockSparseMatrix& mat, const Index outer) + : m_mat(mat),m_outer(outer), + m_id(mat.m_outerIndex[outer]), + m_end(mat.m_outerIndex[outer+1]) + { + } + + inline BlockInnerIterator& operator++() {m_id++; return *this; } + + inline const Map value() const + { + return Map(&(m_mat.m_values[m_mat.blockPtr(m_id)]), + rows(),cols()); + } + inline Map valueRef() + { + return Map(&(m_mat.m_values[m_mat.blockPtr(m_id)]), + rows(),cols()); + } + // Block inner index + inline Index index() const {return m_mat.m_indices[m_id]; } + inline Index outer() const { return m_outer; } + // block row index + inline Index row() const {return index(); } + // block column index + inline Index col() const {return outer(); } + // FIXME Number of rows in the current block + inline Index rows() const { return (m_mat.m_blockSize==Dynamic) ? (m_mat.m_innerOffset[index()+1] - m_mat.m_innerOffset[index()]) : m_mat.m_blockSize; } + // Number of columns in the current block ... + inline Index cols() const { return (m_mat.m_blockSize==Dynamic) ? (m_mat.m_outerOffset[m_outer+1]-m_mat.m_outerOffset[m_outer]) : m_mat.m_blockSize;} + inline operator bool() const { return (m_id < m_end); } + + protected: + const BlockSparseMatrix<_Scalar, _BlockAtCompileTime, _Options, StorageIndex>& m_mat; + const Index m_outer; + Index m_id; + Index m_end; +}; + +template +class BlockSparseMatrix<_Scalar, _BlockAtCompileTime, _Options, _StorageIndex>::InnerIterator +{ + public: + InnerIterator(const BlockSparseMatrix& mat, Index outer) + : m_mat(mat),m_outerB(mat.outerToBlock(outer)),m_outer(outer), + itb(mat, mat.outerToBlock(outer)), + m_offset(outer - mat.blockOuterIndex(m_outerB)) + { + if (itb) + { + m_id = m_mat.blockInnerIndex(itb.index()); + m_start = m_id; + m_end = m_mat.blockInnerIndex(itb.index()+1); + } + } + inline InnerIterator& operator++() + { + m_id++; + if (m_id >= m_end) + { + ++itb; + if (itb) + { + m_id = m_mat.blockInnerIndex(itb.index()); + m_start = m_id; + m_end = m_mat.blockInnerIndex(itb.index()+1); + } + } + return *this; + } + inline const Scalar& value() const + { + return itb.value().coeff(m_id - m_start, m_offset); + } + inline Scalar& valueRef() + { + return itb.valueRef().coeff(m_id - m_start, m_offset); + } + inline Index index() const { return m_id; } + inline Index outer() const {return m_outer; } + inline Index col() const {return outer(); } + inline Index row() const { return index();} + inline operator bool() const + { + return itb; + } + protected: + const BlockSparseMatrix& m_mat; + const Index m_outer; + const Index m_outerB; + BlockInnerIterator itb; // Iterator through the blocks + const Index m_offset; // Position of this column in the block + Index m_start; // starting inner index of this block + Index m_id; // current inner index in the block + Index m_end; // starting inner index of the next block + +}; +} // end namespace Eigen + +#endif // EIGEN_SPARSEBLOCKMATRIX_H diff --git a/external/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h b/external/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h new file mode 100644 index 0000000..42c99e4 --- /dev/null +++ b/external/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h @@ -0,0 +1,404 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_DYNAMIC_SPARSEMATRIX_H +#define EIGEN_DYNAMIC_SPARSEMATRIX_H + +namespace Eigen { + +/** \deprecated use a SparseMatrix in an uncompressed mode + * + * \class DynamicSparseMatrix + * + * \brief A sparse matrix class designed for matrix assembly purpose + * + * \param _Scalar the scalar type, i.e. the type of the coefficients + * + * Unlike SparseMatrix, this class provides a much higher degree of flexibility. In particular, it allows + * random read/write accesses in log(rho*outer_size) where \c rho is the probability that a coefficient is + * nonzero and outer_size is the number of columns if the matrix is column-major and the number of rows + * otherwise. + * + * Internally, the data are stored as a std::vector of compressed vector. The performances of random writes might + * decrease as the number of nonzeros per inner-vector increase. In practice, we observed very good performance + * till about 100 nonzeros/vector, and the performance remains relatively good till 500 nonzeros/vectors. + * + * \see SparseMatrix + */ + +namespace internal { +template +struct traits > +{ + typedef _Scalar Scalar; + typedef _StorageIndex StorageIndex; + typedef Sparse StorageKind; + typedef MatrixXpr XprKind; + enum { + RowsAtCompileTime = Dynamic, + ColsAtCompileTime = Dynamic, + MaxRowsAtCompileTime = Dynamic, + MaxColsAtCompileTime = Dynamic, + Flags = _Options | NestByRefBit | LvalueBit, + CoeffReadCost = NumTraits::ReadCost, + SupportedAccessPatterns = OuterRandomAccessPattern + }; +}; +} + +template + class DynamicSparseMatrix + : public SparseMatrixBase > +{ + typedef SparseMatrixBase Base; + using Base::convert_index; + public: + EIGEN_SPARSE_PUBLIC_INTERFACE(DynamicSparseMatrix) + // FIXME: why are these operator already alvailable ??? + // EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(DynamicSparseMatrix, +=) + // EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(DynamicSparseMatrix, -=) + typedef MappedSparseMatrix Map; + using Base::IsRowMajor; + using Base::operator=; + enum { + Options = _Options + }; + + protected: + + typedef DynamicSparseMatrix TransposedSparseMatrix; + + Index m_innerSize; + std::vector > m_data; + + public: + + inline Index rows() const { return IsRowMajor ? outerSize() : m_innerSize; } + inline Index cols() const { return IsRowMajor ? m_innerSize : outerSize(); } + inline Index innerSize() const { return m_innerSize; } + inline Index outerSize() const { return convert_index(m_data.size()); } + inline Index innerNonZeros(Index j) const { return m_data[j].size(); } + + std::vector >& _data() { return m_data; } + const std::vector >& _data() const { return m_data; } + + /** \returns the coefficient value at given position \a row, \a col + * This operation involes a log(rho*outer_size) binary search. + */ + inline Scalar coeff(Index row, Index col) const + { + const Index outer = IsRowMajor ? row : col; + const Index inner = IsRowMajor ? col : row; + return m_data[outer].at(inner); + } + + /** \returns a reference to the coefficient value at given position \a row, \a col + * This operation involes a log(rho*outer_size) binary search. If the coefficient does not + * exist yet, then a sorted insertion into a sequential buffer is performed. + */ + inline Scalar& coeffRef(Index row, Index col) + { + const Index outer = IsRowMajor ? row : col; + const Index inner = IsRowMajor ? col : row; + return m_data[outer].atWithInsertion(inner); + } + + class InnerIterator; + class ReverseInnerIterator; + + void setZero() + { + for (Index j=0; j0) + { + Index reserveSizePerVector = (std::max)(reserveSize/outerSize(),Index(4)); + for (Index j=0; j(m_data[outer].size()) - 1; + m_data[outer].resize(id+2,1); + + while ( (id >= startId) && (m_data[outer].index(id) > inner) ) + { + m_data[outer].index(id+1) = m_data[outer].index(id); + m_data[outer].value(id+1) = m_data[outer].value(id); + --id; + } + m_data[outer].index(id+1) = inner; + m_data[outer].value(id+1) = 0; + return m_data[outer].value(id+1); + } + + /** Does nothing: provided for compatibility with SparseMatrix */ + inline void finalize() {} + + /** Suppress all nonzeros which are smaller than \a reference under the tolerance \a epsilon */ + void prune(Scalar reference, RealScalar epsilon = NumTraits::dummy_precision()) + { + for (Index j=0; jinnerSize) + { + // remove all coefficients with innerCoord>=innerSize + // TODO + //std::cerr << "not implemented yet\n"; + exit(2); + } + if (m_data.size() != outerSize) + { + m_data.resize(outerSize); + } + } + + /** The class DynamicSparseMatrix is deprecated */ + EIGEN_DEPRECATED inline DynamicSparseMatrix() + : m_innerSize(0), m_data(0) + { + #ifdef EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN + EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN + #endif + eigen_assert(innerSize()==0 && outerSize()==0); + } + + /** The class DynamicSparseMatrix is deprecated */ + EIGEN_DEPRECATED inline DynamicSparseMatrix(Index rows, Index cols) + : m_innerSize(0) + { + #ifdef EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN + EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN + #endif + resize(rows, cols); + } + + /** The class DynamicSparseMatrix is deprecated */ + template + EIGEN_DEPRECATED explicit inline DynamicSparseMatrix(const SparseMatrixBase& other) + : m_innerSize(0) + { + #ifdef EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN + EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN + #endif + Base::operator=(other.derived()); + } + + inline DynamicSparseMatrix(const DynamicSparseMatrix& other) + : Base(), m_innerSize(0) + { + #ifdef EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN + EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN + #endif + *this = other.derived(); + } + + inline void swap(DynamicSparseMatrix& other) + { + //EIGEN_DBG_SPARSE(std::cout << "SparseMatrix:: swap\n"); + std::swap(m_innerSize, other.m_innerSize); + //std::swap(m_outerSize, other.m_outerSize); + m_data.swap(other.m_data); + } + + inline DynamicSparseMatrix& operator=(const DynamicSparseMatrix& other) + { + if (other.isRValue()) + { + swap(other.const_cast_derived()); + } + else + { + resize(other.rows(), other.cols()); + m_data = other.m_data; + } + return *this; + } + + /** Destructor */ + inline ~DynamicSparseMatrix() {} + + public: + + /** \deprecated + * Set the matrix to zero and reserve the memory for \a reserveSize nonzero coefficients. */ + EIGEN_DEPRECATED void startFill(Index reserveSize = 1000) + { + setZero(); + reserve(reserveSize); + } + + /** \deprecated use insert() + * inserts a nonzero coefficient at given coordinates \a row, \a col and returns its reference assuming that: + * 1 - the coefficient does not exist yet + * 2 - this the coefficient with greater inner coordinate for the given outer coordinate. + * In other words, assuming \c *this is column-major, then there must not exists any nonzero coefficient of coordinates + * \c i \c x \a col such that \c i >= \a row. Otherwise the matrix is invalid. + * + * \see fillrand(), coeffRef() + */ + EIGEN_DEPRECATED Scalar& fill(Index row, Index col) + { + const Index outer = IsRowMajor ? row : col; + const Index inner = IsRowMajor ? col : row; + return insertBack(outer,inner); + } + + /** \deprecated use insert() + * Like fill() but with random inner coordinates. + * Compared to the generic coeffRef(), the unique limitation is that we assume + * the coefficient does not exist yet. + */ + EIGEN_DEPRECATED Scalar& fillrand(Index row, Index col) + { + return insert(row,col); + } + + /** \deprecated use finalize() + * Does nothing. Provided for compatibility with SparseMatrix. */ + EIGEN_DEPRECATED void endFill() {} + +# ifdef EIGEN_DYNAMICSPARSEMATRIX_PLUGIN +# include EIGEN_DYNAMICSPARSEMATRIX_PLUGIN +# endif + }; + +template +class DynamicSparseMatrix::InnerIterator : public SparseVector::InnerIterator +{ + typedef typename SparseVector::InnerIterator Base; + public: + InnerIterator(const DynamicSparseMatrix& mat, Index outer) + : Base(mat.m_data[outer]), m_outer(outer) + {} + + inline Index row() const { return IsRowMajor ? m_outer : Base::index(); } + inline Index col() const { return IsRowMajor ? Base::index() : m_outer; } + inline Index outer() const { return m_outer; } + + protected: + const Index m_outer; +}; + +template +class DynamicSparseMatrix::ReverseInnerIterator : public SparseVector::ReverseInnerIterator +{ + typedef typename SparseVector::ReverseInnerIterator Base; + public: + ReverseInnerIterator(const DynamicSparseMatrix& mat, Index outer) + : Base(mat.m_data[outer]), m_outer(outer) + {} + + inline Index row() const { return IsRowMajor ? m_outer : Base::index(); } + inline Index col() const { return IsRowMajor ? Base::index() : m_outer; } + inline Index outer() const { return m_outer; } + + protected: + const Index m_outer; +}; + +namespace internal { + +template +struct evaluator > + : evaluator_base > +{ + typedef _Scalar Scalar; + typedef DynamicSparseMatrix<_Scalar,_Options,_StorageIndex> SparseMatrixType; + typedef typename SparseMatrixType::InnerIterator InnerIterator; + typedef typename SparseMatrixType::ReverseInnerIterator ReverseInnerIterator; + + enum { + CoeffReadCost = NumTraits<_Scalar>::ReadCost, + Flags = SparseMatrixType::Flags + }; + + evaluator() : m_matrix(0) {} + evaluator(const SparseMatrixType &mat) : m_matrix(&mat) {} + + operator SparseMatrixType&() { return m_matrix->const_cast_derived(); } + operator const SparseMatrixType&() const { return *m_matrix; } + + Scalar coeff(Index row, Index col) const { return m_matrix->coeff(row,col); } + + Index nonZerosEstimate() const { return m_matrix->nonZeros(); } + + const SparseMatrixType *m_matrix; +}; + +} + +} // end namespace Eigen + +#endif // EIGEN_DYNAMIC_SPARSEMATRIX_H diff --git a/external/unsupported/Eigen/src/SparseExtra/MarketIO.h b/external/unsupported/Eigen/src/SparseExtra/MarketIO.h new file mode 100644 index 0000000..dd786d5 --- /dev/null +++ b/external/unsupported/Eigen/src/SparseExtra/MarketIO.h @@ -0,0 +1,282 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011 Gael Guennebaud +// Copyright (C) 2012 Desire NUENTSA WAKAM +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPARSE_MARKET_IO_H +#define EIGEN_SPARSE_MARKET_IO_H + +#include +#include + +namespace Eigen { + +namespace internal +{ + template + inline void GetMarketLine (const char* line, StorageIndex& i, StorageIndex& j, Scalar& value) + { + std::stringstream sline(line); + sline >> i >> j >> value; + } + + template<> inline void GetMarketLine (const char* line, int& i, int& j, float& value) + { std::sscanf(line, "%d %d %g", &i, &j, &value); } + + template<> inline void GetMarketLine (const char* line, int& i, int& j, double& value) + { std::sscanf(line, "%d %d %lg", &i, &j, &value); } + + template<> inline void GetMarketLine (const char* line, int& i, int& j, std::complex& value) + { std::sscanf(line, "%d %d %g %g", &i, &j, &numext::real_ref(value), &numext::imag_ref(value)); } + + template<> inline void GetMarketLine (const char* line, int& i, int& j, std::complex& value) + { std::sscanf(line, "%d %d %lg %lg", &i, &j, &numext::real_ref(value), &numext::imag_ref(value)); } + + template + inline void GetMarketLine (const char* line, StorageIndex& i, StorageIndex& j, std::complex& value) + { + std::stringstream sline(line); + Scalar valR, valI; + sline >> i >> j >> valR >> valI; + value = std::complex(valR,valI); + } + + template + inline void GetVectorElt (const std::string& line, RealScalar& val) + { + std::istringstream newline(line); + newline >> val; + } + + template + inline void GetVectorElt (const std::string& line, std::complex& val) + { + RealScalar valR, valI; + std::istringstream newline(line); + newline >> valR >> valI; + val = std::complex(valR, valI); + } + + template + inline void putMarketHeader(std::string& header,int sym) + { + header= "%%MatrixMarket matrix coordinate "; + if(internal::is_same >::value || internal::is_same >::value) + { + header += " complex"; + if(sym == Symmetric) header += " symmetric"; + else if (sym == SelfAdjoint) header += " Hermitian"; + else header += " general"; + } + else + { + header += " real"; + if(sym == Symmetric) header += " symmetric"; + else header += " general"; + } + } + + template + inline void PutMatrixElt(Scalar value, StorageIndex row, StorageIndex col, std::ofstream& out) + { + out << row << " "<< col << " " << value << "\n"; + } + template + inline void PutMatrixElt(std::complex value, StorageIndex row, StorageIndex col, std::ofstream& out) + { + out << row << " " << col << " " << value.real() << " " << value.imag() << "\n"; + } + + + template + inline void putVectorElt(Scalar value, std::ofstream& out) + { + out << value << "\n"; + } + template + inline void putVectorElt(std::complex value, std::ofstream& out) + { + out << value.real() << " " << value.imag()<< "\n"; + } + +} // end namespace internal + +inline bool getMarketHeader(const std::string& filename, int& sym, bool& iscomplex, bool& isvector) +{ + sym = 0; + iscomplex = false; + isvector = false; + std::ifstream in(filename.c_str(),std::ios::in); + if(!in) + return false; + + std::string line; + // The matrix header is always the first line in the file + std::getline(in, line); eigen_assert(in.good()); + + std::stringstream fmtline(line); + std::string substr[5]; + fmtline>> substr[0] >> substr[1] >> substr[2] >> substr[3] >> substr[4]; + if(substr[2].compare("array") == 0) isvector = true; + if(substr[3].compare("complex") == 0) iscomplex = true; + if(substr[4].compare("symmetric") == 0) sym = Symmetric; + else if (substr[4].compare("Hermitian") == 0) sym = SelfAdjoint; + + return true; +} + +template +bool loadMarket(SparseMatrixType& mat, const std::string& filename) +{ + typedef typename SparseMatrixType::Scalar Scalar; + typedef typename SparseMatrixType::StorageIndex StorageIndex; + std::ifstream input(filename.c_str(),std::ios::in); + if(!input) + return false; + + char rdbuffer[4096]; + input.rdbuf()->pubsetbuf(rdbuffer, 4096); + + const int maxBuffersize = 2048; + char buffer[maxBuffersize]; + + bool readsizes = false; + + typedef Triplet T; + std::vector elements; + + Index M(-1), N(-1), NNZ(-1); + Index count = 0; + while(input.getline(buffer, maxBuffersize)) + { + // skip comments + //NOTE An appropriate test should be done on the header to get the symmetry + if(buffer[0]=='%') + continue; + + if(!readsizes) + { + std::stringstream line(buffer); + line >> M >> N >> NNZ; + if(M > 0 && N > 0) + { + readsizes = true; + mat.resize(M,N); + mat.reserve(NNZ); + } + } + else + { + StorageIndex i(-1), j(-1); + Scalar value; + internal::GetMarketLine(buffer, i, j, value); + + i--; + j--; + if(i>=0 && j>=0 && i +bool loadMarketVector(VectorType& vec, const std::string& filename) +{ + typedef typename VectorType::Scalar Scalar; + std::ifstream in(filename.c_str(), std::ios::in); + if(!in) + return false; + + std::string line; + int n(0), col(0); + do + { // Skip comments + std::getline(in, line); eigen_assert(in.good()); + } while (line[0] == '%'); + std::istringstream newline(line); + newline >> n >> col; + eigen_assert(n>0 && col>0); + vec.resize(n); + int i = 0; + Scalar value; + while ( std::getline(in, line) && (i < n) ){ + internal::GetVectorElt(line, value); + vec(i++) = value; + } + in.close(); + if (i!=n){ + std::cerr<< "Unable to read all elements from file " << filename << "\n"; + return false; + } + return true; +} + +template +bool saveMarket(const SparseMatrixType& mat, const std::string& filename, int sym = 0) +{ + typedef typename SparseMatrixType::Scalar Scalar; + typedef typename SparseMatrixType::RealScalar RealScalar; + std::ofstream out(filename.c_str(),std::ios::out); + if(!out) + return false; + + out.flags(std::ios_base::scientific); + out.precision(std::numeric_limits::digits10 + 2); + std::string header; + internal::putMarketHeader(header, sym); + out << header << std::endl; + out << mat.rows() << " " << mat.cols() << " " << mat.nonZeros() << "\n"; + int count = 0; + for(int j=0; j +bool saveMarketVector (const VectorType& vec, const std::string& filename) +{ + typedef typename VectorType::Scalar Scalar; + typedef typename VectorType::RealScalar RealScalar; + std::ofstream out(filename.c_str(),std::ios::out); + if(!out) + return false; + + out.flags(std::ios_base::scientific); + out.precision(std::numeric_limits::digits10 + 2); + if(internal::is_same >::value || internal::is_same >::value) + out << "%%MatrixMarket matrix array complex general\n"; + else + out << "%%MatrixMarket matrix array real general\n"; + out << vec.size() << " "<< 1 << "\n"; + for (int i=0; i < vec.size(); i++){ + internal::putVectorElt(vec(i), out); + } + out.close(); + return true; +} + +} // end namespace Eigen + +#endif // EIGEN_SPARSE_MARKET_IO_H diff --git a/external/unsupported/Eigen/src/SparseExtra/MatrixMarketIterator.h b/external/unsupported/Eigen/src/SparseExtra/MatrixMarketIterator.h new file mode 100644 index 0000000..02916ea --- /dev/null +++ b/external/unsupported/Eigen/src/SparseExtra/MatrixMarketIterator.h @@ -0,0 +1,247 @@ + +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Desire NUENTSA WAKAM +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_BROWSE_MATRICES_H +#define EIGEN_BROWSE_MATRICES_H + +namespace Eigen { + +enum { + SPD = 0x100, + NonSymmetric = 0x0 +}; + +/** + * @brief Iterator to browse matrices from a specified folder + * + * This is used to load all the matrices from a folder. + * The matrices should be in Matrix Market format + * It is assumed that the matrices are named as matname.mtx + * and matname_SPD.mtx if the matrix is Symmetric and positive definite (or Hermitian) + * The right hand side vectors are loaded as well, if they exist. + * They should be named as matname_b.mtx. + * Note that the right hand side for a SPD matrix is named as matname_SPD_b.mtx + * + * Sometimes a reference solution is available. In this case, it should be named as matname_x.mtx + * + * Sample code + * \code + * + * \endcode + * + * \tparam Scalar The scalar type + */ +template +class MatrixMarketIterator +{ + typedef typename NumTraits::Real RealScalar; + public: + typedef Matrix VectorType; + typedef SparseMatrix MatrixType; + + public: + MatrixMarketIterator(const std::string &folder) + : m_sym(0), m_isvalid(false), m_matIsLoaded(false), m_hasRhs(false), m_hasrefX(false), m_folder(folder) + { + m_folder_id = opendir(folder.c_str()); + if(m_folder_id) + Getnextvalidmatrix(); + } + + ~MatrixMarketIterator() + { + if (m_folder_id) closedir(m_folder_id); + } + + inline MatrixMarketIterator& operator++() + { + m_matIsLoaded = false; + m_hasrefX = false; + m_hasRhs = false; + Getnextvalidmatrix(); + return *this; + } + inline operator bool() const { return m_isvalid;} + + /** Return the sparse matrix corresponding to the current file */ + inline MatrixType& matrix() + { + // Read the matrix + if (m_matIsLoaded) return m_mat; + + std::string matrix_file = m_folder + "/" + m_matname + ".mtx"; + if ( !loadMarket(m_mat, matrix_file)) + { + std::cerr << "Warning loadMarket failed when loading \"" << matrix_file << "\"" << std::endl; + m_matIsLoaded = false; + return m_mat; + } + m_matIsLoaded = true; + + if (m_sym != NonSymmetric) + { + // Check whether we need to restore a full matrix: + RealScalar diag_norm = m_mat.diagonal().norm(); + RealScalar lower_norm = m_mat.template triangularView().norm(); + RealScalar upper_norm = m_mat.template triangularView().norm(); + if(lower_norm>diag_norm && upper_norm==diag_norm) + { + // only the lower part is stored + MatrixType tmp(m_mat); + m_mat = tmp.template selfadjointView(); + } + else if(upper_norm>diag_norm && lower_norm==diag_norm) + { + // only the upper part is stored + MatrixType tmp(m_mat); + m_mat = tmp.template selfadjointView(); + } + } + return m_mat; + } + + /** Return the right hand side corresponding to the current matrix. + * If the rhs file is not provided, a random rhs is generated + */ + inline VectorType& rhs() + { + // Get the right hand side + if (m_hasRhs) return m_rhs; + + std::string rhs_file; + rhs_file = m_folder + "/" + m_matname + "_b.mtx"; // The pattern is matname_b.mtx + m_hasRhs = Fileexists(rhs_file); + if (m_hasRhs) + { + m_rhs.resize(m_mat.cols()); + m_hasRhs = loadMarketVector(m_rhs, rhs_file); + } + if (!m_hasRhs) + { + // Generate a random right hand side + if (!m_matIsLoaded) this->matrix(); + m_refX.resize(m_mat.cols()); + m_refX.setRandom(); + m_rhs = m_mat * m_refX; + m_hasrefX = true; + m_hasRhs = true; + } + return m_rhs; + } + + /** Return a reference solution + * If it is not provided and if the right hand side is not available + * then refX is randomly generated such that A*refX = b + * where A and b are the matrix and the rhs. + * Note that when a rhs is provided, refX is not available + */ + inline VectorType& refX() + { + // Check if a reference solution is provided + if (m_hasrefX) return m_refX; + + std::string lhs_file; + lhs_file = m_folder + "/" + m_matname + "_x.mtx"; + m_hasrefX = Fileexists(lhs_file); + if (m_hasrefX) + { + m_refX.resize(m_mat.cols()); + m_hasrefX = loadMarketVector(m_refX, lhs_file); + } + else + m_refX.resize(0); + return m_refX; + } + + inline std::string& matname() { return m_matname; } + + inline int sym() { return m_sym; } + + bool hasRhs() {return m_hasRhs; } + bool hasrefX() {return m_hasrefX; } + bool isFolderValid() { return bool(m_folder_id); } + + protected: + + inline bool Fileexists(std::string file) + { + std::ifstream file_id(file.c_str()); + if (!file_id.good() ) + { + return false; + } + else + { + file_id.close(); + return true; + } + } + + void Getnextvalidmatrix( ) + { + m_isvalid = false; + // Here, we return with the next valid matrix in the folder + while ( (m_curs_id = readdir(m_folder_id)) != NULL) { + m_isvalid = false; + std::string curfile; + curfile = m_folder + "/" + m_curs_id->d_name; + // Discard if it is a folder + if (m_curs_id->d_type == DT_DIR) continue; //FIXME This may not be available on non BSD systems +// struct stat st_buf; +// stat (curfile.c_str(), &st_buf); +// if (S_ISDIR(st_buf.st_mode)) continue; + + // Determine from the header if it is a matrix or a right hand side + bool isvector,iscomplex=false; + if(!getMarketHeader(curfile,m_sym,iscomplex,isvector)) continue; + if(isvector) continue; + if (!iscomplex) + { + if(internal::is_same >::value || internal::is_same >::value) + continue; + } + if (iscomplex) + { + if(internal::is_same::value || internal::is_same::value) + continue; + } + + + // Get the matrix name + std::string filename = m_curs_id->d_name; + m_matname = filename.substr(0, filename.length()-4); + + // Find if the matrix is SPD + size_t found = m_matname.find("SPD"); + if( (found!=std::string::npos) && (m_sym != NonSymmetric) ) + m_sym = SPD; + + m_isvalid = true; + break; + } + } + int m_sym; // Symmetry of the matrix + MatrixType m_mat; // Current matrix + VectorType m_rhs; // Current vector + VectorType m_refX; // The reference solution, if exists + std::string m_matname; // Matrix Name + bool m_isvalid; + bool m_matIsLoaded; // Determine if the matrix has already been loaded from the file + bool m_hasRhs; // The right hand side exists + bool m_hasrefX; // A reference solution is provided + std::string m_folder; + DIR * m_folder_id; + struct dirent *m_curs_id; + +}; + +} // end namespace Eigen + +#endif diff --git a/external/unsupported/Eigen/src/SparseExtra/RandomSetter.h b/external/unsupported/Eigen/src/SparseExtra/RandomSetter.h new file mode 100644 index 0000000..985702b --- /dev/null +++ b/external/unsupported/Eigen/src/SparseExtra/RandomSetter.h @@ -0,0 +1,349 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_RANDOMSETTER_H +#define EIGEN_RANDOMSETTER_H + +#if defined(EIGEN_GOOGLEHASH_SUPPORT) +// Ensure the ::google namespace exists, required for checking existence of +// ::google::dense_hash_map and ::google::sparse_hash_map. +namespace google {} +#endif + +namespace Eigen { + +/** Represents a std::map + * + * \see RandomSetter + */ +template struct StdMapTraits +{ + typedef int KeyType; + typedef std::map Type; + enum { + IsSorted = 1 + }; + + static void setInvalidKey(Type&, const KeyType&) {} +}; + +#ifdef EIGEN_UNORDERED_MAP_SUPPORT +/** Represents a std::unordered_map + * + * To use it you need to both define EIGEN_UNORDERED_MAP_SUPPORT and include the unordered_map header file + * yourself making sure that unordered_map is defined in the std namespace. + * + * For instance, with current version of gcc you can either enable C++0x standard (-std=c++0x) or do: + * \code + * #include + * #define EIGEN_UNORDERED_MAP_SUPPORT + * namespace std { + * using std::tr1::unordered_map; + * } + * \endcode + * + * \see RandomSetter + */ +template struct StdUnorderedMapTraits +{ + typedef int KeyType; + typedef std::unordered_map Type; + enum { + IsSorted = 0 + }; + + static void setInvalidKey(Type&, const KeyType&) {} +}; +#endif // EIGEN_UNORDERED_MAP_SUPPORT + +#if defined(EIGEN_GOOGLEHASH_SUPPORT) + +namespace google { + +// Namespace work-around, since sometimes dense_hash_map and sparse_hash_map +// are in the global namespace, and other times they are under ::google. +using namespace ::google; + +template +struct DenseHashMap { + typedef dense_hash_map type; +}; + +template +struct SparseHashMap { + typedef sparse_hash_map type; +}; + +} // namespace google + +/** Represents a google::dense_hash_map + * + * \see RandomSetter + */ +template struct GoogleDenseHashMapTraits +{ + typedef int KeyType; + typedef typename google::DenseHashMap::type Type; + enum { + IsSorted = 0 + }; + + static void setInvalidKey(Type& map, const KeyType& k) + { map.set_empty_key(k); } +}; + +/** Represents a google::sparse_hash_map + * + * \see RandomSetter + */ +template struct GoogleSparseHashMapTraits +{ + typedef int KeyType; + typedef typename google::SparseHashMap::type Type; + enum { + IsSorted = 0 + }; + + static void setInvalidKey(Type&, const KeyType&) {} +}; +#endif + +/** \class RandomSetter + * + * \brief The RandomSetter is a wrapper object allowing to set/update a sparse matrix with random access + * + * \tparam SparseMatrixType the type of the sparse matrix we are updating + * \tparam MapTraits a traits class representing the map implementation used for the temporary sparse storage. + * Its default value depends on the system. + * \tparam OuterPacketBits defines the number of rows (or columns) manage by a single map object + * as a power of two exponent. + * + * This class temporarily represents a sparse matrix object using a generic map implementation allowing for + * efficient random access. The conversion from the compressed representation to a hash_map object is performed + * in the RandomSetter constructor, while the sparse matrix is updated back at destruction time. This strategy + * suggest the use of nested blocks as in this example: + * + * \code + * SparseMatrix m(rows,cols); + * { + * RandomSetter > w(m); + * // don't use m but w instead with read/write random access to the coefficients: + * for(;;) + * w(rand(),rand()) = rand; + * } + * // when w is deleted, the data are copied back to m + * // and m is ready to use. + * \endcode + * + * Since hash_map objects are not fully sorted, representing a full matrix as a single hash_map would + * involve a big and costly sort to update the compressed matrix back. To overcome this issue, a RandomSetter + * use multiple hash_map, each representing 2^OuterPacketBits columns or rows according to the storage order. + * To reach optimal performance, this value should be adjusted according to the average number of nonzeros + * per rows/columns. + * + * The possible values for the template parameter MapTraits are: + * - \b StdMapTraits: corresponds to std::map. (does not perform very well) + * - \b GnuHashMapTraits: corresponds to __gnu_cxx::hash_map (available only with GCC) + * - \b GoogleDenseHashMapTraits: corresponds to google::dense_hash_map (best efficiency, reasonable memory consumption) + * - \b GoogleSparseHashMapTraits: corresponds to google::sparse_hash_map (best memory consumption, relatively good performance) + * + * The default map implementation depends on the availability, and the preferred order is: + * GoogleSparseHashMapTraits, GnuHashMapTraits, and finally StdMapTraits. + * + * For performance and memory consumption reasons it is highly recommended to use one of + * Google's hash_map implementations. To enable the support for them, you must define + * EIGEN_GOOGLEHASH_SUPPORT. This will include both and + * for you. + * + * \see https://github.com/sparsehash/sparsehash + */ +template class MapTraits = +#if defined(EIGEN_GOOGLEHASH_SUPPORT) + GoogleDenseHashMapTraits +#elif defined(_HASH_MAP) + GnuHashMapTraits +#else + StdMapTraits +#endif + ,int OuterPacketBits = 6> +class RandomSetter +{ + typedef typename SparseMatrixType::Scalar Scalar; + typedef typename SparseMatrixType::StorageIndex StorageIndex; + + struct ScalarWrapper + { + ScalarWrapper() : value(0) {} + Scalar value; + }; + typedef typename MapTraits::KeyType KeyType; + typedef typename MapTraits::Type HashMapType; + static const int OuterPacketMask = (1 << OuterPacketBits) - 1; + enum { + SwapStorage = 1 - MapTraits::IsSorted, + TargetRowMajor = (SparseMatrixType::Flags & RowMajorBit) ? 1 : 0, + SetterRowMajor = SwapStorage ? 1-TargetRowMajor : TargetRowMajor + }; + + public: + + /** Constructs a random setter object from the sparse matrix \a target + * + * Note that the initial value of \a target are imported. If you want to re-set + * a sparse matrix from scratch, then you must set it to zero first using the + * setZero() function. + */ + inline RandomSetter(SparseMatrixType& target) + : mp_target(&target) + { + const Index outerSize = SwapStorage ? target.innerSize() : target.outerSize(); + const Index innerSize = SwapStorage ? target.outerSize() : target.innerSize(); + m_outerPackets = outerSize >> OuterPacketBits; + if (outerSize&OuterPacketMask) + m_outerPackets += 1; + m_hashmaps = new HashMapType[m_outerPackets]; + // compute number of bits needed to store inner indices + Index aux = innerSize - 1; + m_keyBitsOffset = 0; + while (aux) + { + ++m_keyBitsOffset; + aux = aux >> 1; + } + KeyType ik = (1<<(OuterPacketBits+m_keyBitsOffset)); + for (Index k=0; k::setInvalidKey(m_hashmaps[k],ik); + + // insert current coeffs + for (Index j=0; jouterSize(); ++j) + for (typename SparseMatrixType::InnerIterator it(*mp_target,j); it; ++it) + (*this)(TargetRowMajor?j:it.index(), TargetRowMajor?it.index():j) = it.value(); + } + + /** Destructor updating back the sparse matrix target */ + ~RandomSetter() + { + KeyType keyBitsMask = (1<setZero(); + mp_target->makeCompressed(); + mp_target->reserve(nonZeros()); + Index prevOuter = -1; + for (Index k=0; kfirst >> m_keyBitsOffset) + outerOffset; + const Index inner = it->first & keyBitsMask; + if (prevOuter!=outer) + { + for (Index j=prevOuter+1;j<=outer;++j) + mp_target->startVec(j); + prevOuter = outer; + } + mp_target->insertBackByOuterInner(outer, inner) = it->second.value; + } + } + mp_target->finalize(); + } + else + { + VectorXi positions(mp_target->outerSize()); + positions.setZero(); + // pass 1 + for (Index k=0; kfirst & keyBitsMask; + ++positions[outer]; + } + } + // prefix sum + StorageIndex count = 0; + for (Index j=0; jouterSize(); ++j) + { + StorageIndex tmp = positions[j]; + mp_target->outerIndexPtr()[j] = count; + positions[j] = count; + count += tmp; + } + mp_target->makeCompressed(); + mp_target->outerIndexPtr()[mp_target->outerSize()] = count; + mp_target->resizeNonZeros(count); + // pass 2 + for (Index k=0; kfirst >> m_keyBitsOffset) + outerOffset; + const Index outer = it->first & keyBitsMask; + // sorted insertion + // Note that we have to deal with at most 2^OuterPacketBits unsorted coefficients, + // moreover those 2^OuterPacketBits coeffs are likely to be sparse, an so only a + // small fraction of them have to be sorted, whence the following simple procedure: + Index posStart = mp_target->outerIndexPtr()[outer]; + Index i = (positions[outer]++) - 1; + while ( (i >= posStart) && (mp_target->innerIndexPtr()[i] > inner) ) + { + mp_target->valuePtr()[i+1] = mp_target->valuePtr()[i]; + mp_target->innerIndexPtr()[i+1] = mp_target->innerIndexPtr()[i]; + --i; + } + mp_target->innerIndexPtr()[i+1] = internal::convert_index(inner); + mp_target->valuePtr()[i+1] = it->second.value; + } + } + } + delete[] m_hashmaps; + } + + /** \returns a reference to the coefficient at given coordinates \a row, \a col */ + Scalar& operator() (Index row, Index col) + { + const Index outer = SetterRowMajor ? row : col; + const Index inner = SetterRowMajor ? col : row; + const Index outerMajor = outer >> OuterPacketBits; // index of the packet/map + const Index outerMinor = outer & OuterPacketMask; // index of the inner vector in the packet + const KeyType key = internal::convert_index((outerMinor<(m_hashmaps[k].size()); + return nz; + } + + + protected: + + HashMapType* m_hashmaps; + SparseMatrixType* mp_target; + Index m_outerPackets; + unsigned char m_keyBitsOffset; +}; + +} // end namespace Eigen + +#endif // EIGEN_RANDOMSETTER_H diff --git a/external/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsArrayAPI.h b/external/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsArrayAPI.h new file mode 100644 index 0000000..41d2bf6 --- /dev/null +++ b/external/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsArrayAPI.h @@ -0,0 +1,286 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#ifndef EIGEN_BESSELFUNCTIONS_ARRAYAPI_H +#define EIGEN_BESSELFUNCTIONS_ARRAYAPI_H + +namespace Eigen { + +/** \returns an expression of the coefficient-wise i0(\a x) to the given + * arrays. + * + * It returns the modified Bessel function of the first kind of order zero. + * + * \param x is the argument + * + * \note This function supports only float and double scalar types. To support + * other scalar types, the user has to provide implementations of i0(T) for + * any scalar type T to be supported. + * + * \sa ArrayBase::bessel_i0() + */ +template +EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_i0_op, const Derived> +bessel_i0(const Eigen::ArrayBase& x) { + return Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_i0_op, + const Derived>(x.derived()); +} + +/** \returns an expression of the coefficient-wise i0e(\a x) to the given + * arrays. + * + * It returns the exponentially scaled modified Bessel + * function of the first kind of order zero. + * + * \param x is the argument + * + * \note This function supports only float and double scalar types. To support + * other scalar types, the user has to provide implementations of i0e(T) for + * any scalar type T to be supported. + * + * \sa ArrayBase::bessel_i0e() + */ +template +EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_i0e_op, const Derived> +bessel_i0e(const Eigen::ArrayBase& x) { + return Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_i0e_op, + const Derived>(x.derived()); +} + +/** \returns an expression of the coefficient-wise i1(\a x) to the given + * arrays. + * + * It returns the modified Bessel function of the first kind of order one. + * + * \param x is the argument + * + * \note This function supports only float and double scalar types. To support + * other scalar types, the user has to provide implementations of i1(T) for + * any scalar type T to be supported. + * + * \sa ArrayBase::bessel_i1() + */ +template +EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_i1_op, const Derived> +bessel_i1(const Eigen::ArrayBase& x) { + return Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_i1_op, + const Derived>(x.derived()); +} + +/** \returns an expression of the coefficient-wise i1e(\a x) to the given + * arrays. + * + * It returns the exponentially scaled modified Bessel + * function of the first kind of order one. + * + * \param x is the argument + * + * \note This function supports only float and double scalar types. To support + * other scalar types, the user has to provide implementations of i1e(T) for + * any scalar type T to be supported. + * + * \sa ArrayBase::bessel_i1e() + */ +template +EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_i1e_op, const Derived> +bessel_i1e(const Eigen::ArrayBase& x) { + return Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_i1e_op, + const Derived>(x.derived()); +} + +/** \returns an expression of the coefficient-wise k0(\a x) to the given + * arrays. + * + * It returns the modified Bessel function of the second kind of order zero. + * + * \param x is the argument + * + * \note This function supports only float and double scalar types. To support + * other scalar types, the user has to provide implementations of k0(T) for + * any scalar type T to be supported. + * + * \sa ArrayBase::bessel_k0() + */ +template +EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_k0_op, const Derived> +bessel_k0(const Eigen::ArrayBase& x) { + return Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_k0_op, + const Derived>(x.derived()); +} + +/** \returns an expression of the coefficient-wise k0e(\a x) to the given + * arrays. + * + * It returns the exponentially scaled modified Bessel + * function of the second kind of order zero. + * + * \param x is the argument + * + * \note This function supports only float and double scalar types. To support + * other scalar types, the user has to provide implementations of k0e(T) for + * any scalar type T to be supported. + * + * \sa ArrayBase::bessel_k0e() + */ +template +EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_k0e_op, const Derived> +bessel_k0e(const Eigen::ArrayBase& x) { + return Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_k0e_op, + const Derived>(x.derived()); +} + +/** \returns an expression of the coefficient-wise k1(\a x) to the given + * arrays. + * + * It returns the modified Bessel function of the second kind of order one. + * + * \param x is the argument + * + * \note This function supports only float and double scalar types. To support + * other scalar types, the user has to provide implementations of k1(T) for + * any scalar type T to be supported. + * + * \sa ArrayBase::bessel_k1() + */ +template +EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_k1_op, const Derived> +bessel_k1(const Eigen::ArrayBase& x) { + return Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_k1_op, + const Derived>(x.derived()); +} + +/** \returns an expression of the coefficient-wise k1e(\a x) to the given + * arrays. + * + * It returns the exponentially scaled modified Bessel + * function of the second kind of order one. + * + * \param x is the argument + * + * \note This function supports only float and double scalar types. To support + * other scalar types, the user has to provide implementations of k1e(T) for + * any scalar type T to be supported. + * + * \sa ArrayBase::bessel_k1e() + */ +template +EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_k1e_op, const Derived> +bessel_k1e(const Eigen::ArrayBase& x) { + return Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_k1e_op, + const Derived>(x.derived()); +} + +/** \returns an expression of the coefficient-wise j0(\a x) to the given + * arrays. + * + * It returns the Bessel function of the first kind of order zero. + * + * \param x is the argument + * + * \note This function supports only float and double scalar types. To support + * other scalar types, the user has to provide implementations of j0(T) for + * any scalar type T to be supported. + * + * \sa ArrayBase::bessel_j0() + */ +template +EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_j0_op, const Derived> +bessel_j0(const Eigen::ArrayBase& x) { + return Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_j0_op, + const Derived>(x.derived()); +} + +/** \returns an expression of the coefficient-wise y0(\a x) to the given + * arrays. + * + * It returns the Bessel function of the second kind of order zero. + * + * \param x is the argument + * + * \note This function supports only float and double scalar types. To support + * other scalar types, the user has to provide implementations of y0(T) for + * any scalar type T to be supported. + * + * \sa ArrayBase::bessel_y0() + */ +template +EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_y0_op, const Derived> +bessel_y0(const Eigen::ArrayBase& x) { + return Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_y0_op, + const Derived>(x.derived()); +} + +/** \returns an expression of the coefficient-wise j1(\a x) to the given + * arrays. + * + * It returns the modified Bessel function of the first kind of order one. + * + * \param x is the argument + * + * \note This function supports only float and double scalar types. To support + * other scalar types, the user has to provide implementations of j1(T) for + * any scalar type T to be supported. + * + * \sa ArrayBase::bessel_j1() + */ +template +EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_j1_op, const Derived> +bessel_j1(const Eigen::ArrayBase& x) { + return Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_j1_op, + const Derived>(x.derived()); +} + +/** \returns an expression of the coefficient-wise y1(\a x) to the given + * arrays. + * + * It returns the Bessel function of the second kind of order one. + * + * \param x is the argument + * + * \note This function supports only float and double scalar types. To support + * other scalar types, the user has to provide implementations of y1(T) for + * any scalar type T to be supported. + * + * \sa ArrayBase::bessel_y1() + */ +template +EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_y1_op, const Derived> +bessel_y1(const Eigen::ArrayBase& x) { + return Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_y1_op, + const Derived>(x.derived()); +} + +} // end namespace Eigen + +#endif // EIGEN_BESSELFUNCTIONS_ARRAYAPI_H diff --git a/external/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsBFloat16.h b/external/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsBFloat16.h new file mode 100644 index 0000000..6049cc2 --- /dev/null +++ b/external/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsBFloat16.h @@ -0,0 +1,68 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_BESSELFUNCTIONS_BFLOAT16_H +#define EIGEN_BESSELFUNCTIONS_BFLOAT16_H + +namespace Eigen { +namespace numext { + +#if EIGEN_HAS_C99_MATH +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bessel_i0(const Eigen::bfloat16& x) { + return Eigen::bfloat16(Eigen::numext::bessel_i0(static_cast(x))); +} +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bessel_i0e(const Eigen::bfloat16& x) { + return Eigen::bfloat16(Eigen::numext::bessel_i0e(static_cast(x))); +} +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bessel_i1(const Eigen::bfloat16& x) { + return Eigen::bfloat16(Eigen::numext::bessel_i1(static_cast(x))); +} +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bessel_i1e(const Eigen::bfloat16& x) { + return Eigen::bfloat16(Eigen::numext::bessel_i1e(static_cast(x))); +} +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bessel_j0(const Eigen::bfloat16& x) { + return Eigen::bfloat16(Eigen::numext::bessel_j0(static_cast(x))); +} +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bessel_j1(const Eigen::bfloat16& x) { + return Eigen::bfloat16(Eigen::numext::bessel_j1(static_cast(x))); +} +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bessel_y0(const Eigen::bfloat16& x) { + return Eigen::bfloat16(Eigen::numext::bessel_y0(static_cast(x))); +} +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bessel_y1(const Eigen::bfloat16& x) { + return Eigen::bfloat16(Eigen::numext::bessel_y1(static_cast(x))); +} +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bessel_k0(const Eigen::bfloat16& x) { + return Eigen::bfloat16(Eigen::numext::bessel_k0(static_cast(x))); +} +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bessel_k0e(const Eigen::bfloat16& x) { + return Eigen::bfloat16(Eigen::numext::bessel_k0e(static_cast(x))); +} +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bessel_k1(const Eigen::bfloat16& x) { + return Eigen::bfloat16(Eigen::numext::bessel_k1(static_cast(x))); +} +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bessel_k1e(const Eigen::bfloat16& x) { + return Eigen::bfloat16(Eigen::numext::bessel_k1e(static_cast(x))); +} +#endif + +} // end namespace numext +} // end namespace Eigen + +#endif // EIGEN_BESSELFUNCTIONS_BFLOAT16_H diff --git a/external/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsFunctors.h b/external/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsFunctors.h new file mode 100644 index 0000000..8606a9f --- /dev/null +++ b/external/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsFunctors.h @@ -0,0 +1,357 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Eugene Brevdo +// Copyright (C) 2016 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_BESSELFUNCTIONS_FUNCTORS_H +#define EIGEN_BESSELFUNCTIONS_FUNCTORS_H + +namespace Eigen { + +namespace internal { + +/** \internal + * \brief Template functor to compute the modified Bessel function of the first + * kind of order zero. + * \sa class CwiseUnaryOp, Cwise::bessel_i0() + */ +template +struct scalar_bessel_i0_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_i0_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const { + using numext::bessel_i0; + return bessel_i0(x); + } + typedef typename packet_traits::type Packet; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const { + return internal::pbessel_i0(x); + } +}; +template +struct functor_traits > { + enum { + // On average, a Chebyshev polynomial of order N=20 is computed. + // The cost is N multiplications and 2N additions. We also add + // the cost of an additional exp over i0e. + Cost = 28 * NumTraits::MulCost + 48 * NumTraits::AddCost, + PacketAccess = packet_traits::HasBessel + }; +}; + +/** \internal + * \brief Template functor to compute the exponentially scaled modified Bessel + * function of the first kind of order zero + * \sa class CwiseUnaryOp, Cwise::bessel_i0e() + */ +template +struct scalar_bessel_i0e_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_i0e_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const { + using numext::bessel_i0e; + return bessel_i0e(x); + } + typedef typename packet_traits::type Packet; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const { + return internal::pbessel_i0e(x); + } +}; +template +struct functor_traits > { + enum { + // On average, a Chebyshev polynomial of order N=20 is computed. + // The cost is N multiplications and 2N additions. + Cost = 20 * NumTraits::MulCost + 40 * NumTraits::AddCost, + PacketAccess = packet_traits::HasBessel + }; +}; + +/** \internal + * \brief Template functor to compute the modified Bessel function of the first + * kind of order one + * \sa class CwiseUnaryOp, Cwise::bessel_i1() + */ +template +struct scalar_bessel_i1_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_i1_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const { + using numext::bessel_i1; + return bessel_i1(x); + } + typedef typename packet_traits::type Packet; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const { + return internal::pbessel_i1(x); + } +}; +template +struct functor_traits > { + enum { + // On average, a Chebyshev polynomial of order N=20 is computed. + // The cost is N multiplications and 2N additions. We also add + // the cost of an additional exp over i1e. + Cost = 28 * NumTraits::MulCost + 48 * NumTraits::AddCost, + PacketAccess = packet_traits::HasBessel + }; +}; + +/** \internal + * \brief Template functor to compute the exponentially scaled modified Bessel + * function of the first kind of order zero + * \sa class CwiseUnaryOp, Cwise::bessel_i1e() + */ +template +struct scalar_bessel_i1e_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_i1e_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const { + using numext::bessel_i1e; + return bessel_i1e(x); + } + typedef typename packet_traits::type Packet; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const { + return internal::pbessel_i1e(x); + } +}; +template +struct functor_traits > { + enum { + // On average, a Chebyshev polynomial of order N=20 is computed. + // The cost is N multiplications and 2N additions. + Cost = 20 * NumTraits::MulCost + 40 * NumTraits::AddCost, + PacketAccess = packet_traits::HasBessel + }; +}; + +/** \internal + * \brief Template functor to compute the Bessel function of the second kind of + * order zero + * \sa class CwiseUnaryOp, Cwise::bessel_j0() + */ +template +struct scalar_bessel_j0_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_j0_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const { + using numext::bessel_j0; + return bessel_j0(x); + } + typedef typename packet_traits::type Packet; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const { + return internal::pbessel_j0(x); + } +}; +template +struct functor_traits > { + enum { + // 6 polynomial of order ~N=8 is computed. + // The cost is N multiplications and N additions each, along with a + // sine, cosine and rsqrt cost. + Cost = 63 * NumTraits::MulCost + 48 * NumTraits::AddCost, + PacketAccess = packet_traits::HasBessel + }; +}; + +/** \internal + * \brief Template functor to compute the Bessel function of the second kind of + * order zero + * \sa class CwiseUnaryOp, Cwise::bessel_y0() + */ +template +struct scalar_bessel_y0_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_y0_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const { + using numext::bessel_y0; + return bessel_y0(x); + } + typedef typename packet_traits::type Packet; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const { + return internal::pbessel_y0(x); + } +}; +template +struct functor_traits > { + enum { + // 6 polynomial of order ~N=8 is computed. + // The cost is N multiplications and N additions each, along with a + // sine, cosine, rsqrt and j0 cost. + Cost = 126 * NumTraits::MulCost + 96 * NumTraits::AddCost, + PacketAccess = packet_traits::HasBessel + }; +}; + +/** \internal + * \brief Template functor to compute the Bessel function of the first kind of + * order one + * \sa class CwiseUnaryOp, Cwise::bessel_j1() + */ +template +struct scalar_bessel_j1_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_j1_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const { + using numext::bessel_j1; + return bessel_j1(x); + } + typedef typename packet_traits::type Packet; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const { + return internal::pbessel_j1(x); + } +}; +template +struct functor_traits > { + enum { + // 6 polynomial of order ~N=8 is computed. + // The cost is N multiplications and N additions each, along with a + // sine, cosine and rsqrt cost. + Cost = 63 * NumTraits::MulCost + 48 * NumTraits::AddCost, + PacketAccess = packet_traits::HasBessel + }; +}; + +/** \internal + * \brief Template functor to compute the Bessel function of the second kind of + * order one + * \sa class CwiseUnaryOp, Cwise::bessel_j1e() + */ +template +struct scalar_bessel_y1_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_y1_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const { + using numext::bessel_y1; + return bessel_y1(x); + } + typedef typename packet_traits::type Packet; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const { + return internal::pbessel_y1(x); + } +}; +template +struct functor_traits > { + enum { + // 6 polynomial of order ~N=8 is computed. + // The cost is N multiplications and N additions each, along with a + // sine, cosine, rsqrt and j1 cost. + Cost = 126 * NumTraits::MulCost + 96 * NumTraits::AddCost, + PacketAccess = packet_traits::HasBessel + }; +}; + +/** \internal + * \brief Template functor to compute the modified Bessel function of the second + * kind of order zero + * \sa class CwiseUnaryOp, Cwise::bessel_k0() + */ +template +struct scalar_bessel_k0_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_k0_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const { + using numext::bessel_k0; + return bessel_k0(x); + } + typedef typename packet_traits::type Packet; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const { + return internal::pbessel_k0(x); + } +}; +template +struct functor_traits > { + enum { + // On average, a Chebyshev polynomial of order N=10 is computed. + // The cost is N multiplications and 2N additions. In addition we compute + // i0, a log, exp and prsqrt and sin and cos. + Cost = 68 * NumTraits::MulCost + 88 * NumTraits::AddCost, + PacketAccess = packet_traits::HasBessel + }; +}; + +/** \internal + * \brief Template functor to compute the exponentially scaled modified Bessel + * function of the second kind of order zero + * \sa class CwiseUnaryOp, Cwise::bessel_k0e() + */ +template +struct scalar_bessel_k0e_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_k0e_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const { + using numext::bessel_k0e; + return bessel_k0e(x); + } + typedef typename packet_traits::type Packet; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const { + return internal::pbessel_k0e(x); + } +}; +template +struct functor_traits > { + enum { + // On average, a Chebyshev polynomial of order N=10 is computed. + // The cost is N multiplications and 2N additions. In addition we compute + // i0, a log, exp and prsqrt and sin and cos. + Cost = 68 * NumTraits::MulCost + 88 * NumTraits::AddCost, + PacketAccess = packet_traits::HasBessel + }; +}; + +/** \internal + * \brief Template functor to compute the modified Bessel function of the + * second kind of order one + * \sa class CwiseUnaryOp, Cwise::bessel_k1() + */ +template +struct scalar_bessel_k1_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_k1_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const { + using numext::bessel_k1; + return bessel_k1(x); + } + typedef typename packet_traits::type Packet; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const { + return internal::pbessel_k1(x); + } +}; +template +struct functor_traits > { + enum { + // On average, a Chebyshev polynomial of order N=10 is computed. + // The cost is N multiplications and 2N additions. In addition we compute + // i1, a log, exp and prsqrt and sin and cos. + Cost = 68 * NumTraits::MulCost + 88 * NumTraits::AddCost, + PacketAccess = packet_traits::HasBessel + }; +}; + +/** \internal + * \brief Template functor to compute the exponentially scaled modified Bessel + * function of the second kind of order one + * \sa class CwiseUnaryOp, Cwise::bessel_k1e() + */ +template +struct scalar_bessel_k1e_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_k1e_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const { + using numext::bessel_k1e; + return bessel_k1e(x); + } + typedef typename packet_traits::type Packet; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const { + return internal::pbessel_k1e(x); + } +}; +template +struct functor_traits > { + enum { + // On average, a Chebyshev polynomial of order N=10 is computed. + // The cost is N multiplications and 2N additions. In addition we compute + // i1, a log, exp and prsqrt and sin and cos. + Cost = 68 * NumTraits::MulCost + 88 * NumTraits::AddCost, + PacketAccess = packet_traits::HasBessel + }; +}; + + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_BESSELFUNCTIONS_FUNCTORS_H diff --git a/external/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsHalf.h b/external/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsHalf.h new file mode 100644 index 0000000..8930d1a --- /dev/null +++ b/external/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsHalf.h @@ -0,0 +1,66 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_BESSELFUNCTIONS_HALF_H +#define EIGEN_BESSELFUNCTIONS_HALF_H + +namespace Eigen { +namespace numext { + +#if EIGEN_HAS_C99_MATH +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half bessel_i0(const Eigen::half& x) { + return Eigen::half(Eigen::numext::bessel_i0(static_cast(x))); +} +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half bessel_i0e(const Eigen::half& x) { + return Eigen::half(Eigen::numext::bessel_i0e(static_cast(x))); +} +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half bessel_i1(const Eigen::half& x) { + return Eigen::half(Eigen::numext::bessel_i1(static_cast(x))); +} +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half bessel_i1e(const Eigen::half& x) { + return Eigen::half(Eigen::numext::bessel_i1e(static_cast(x))); +} +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half bessel_j0(const Eigen::half& x) { + return Eigen::half(Eigen::numext::bessel_j0(static_cast(x))); +} +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half bessel_j1(const Eigen::half& x) { + return Eigen::half(Eigen::numext::bessel_j1(static_cast(x))); +} +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half bessel_y0(const Eigen::half& x) { + return Eigen::half(Eigen::numext::bessel_y0(static_cast(x))); +} +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half bessel_y1(const Eigen::half& x) { + return Eigen::half(Eigen::numext::bessel_y1(static_cast(x))); +} +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half bessel_k0(const Eigen::half& x) { + return Eigen::half(Eigen::numext::bessel_k0(static_cast(x))); +} +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half bessel_k0e(const Eigen::half& x) { + return Eigen::half(Eigen::numext::bessel_k0e(static_cast(x))); +} +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half bessel_k1(const Eigen::half& x) { + return Eigen::half(Eigen::numext::bessel_k1(static_cast(x))); +} +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half bessel_k1e(const Eigen::half& x) { + return Eigen::half(Eigen::numext::bessel_k1e(static_cast(x))); +} +#endif + +} // end namespace numext +} // end namespace Eigen + +#endif // EIGEN_BESSELFUNCTIONS_HALF_H diff --git a/external/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsImpl.h b/external/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsImpl.h new file mode 100644 index 0000000..24812be --- /dev/null +++ b/external/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsImpl.h @@ -0,0 +1,1959 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Eugene Brevdo +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_BESSEL_FUNCTIONS_H +#define EIGEN_BESSEL_FUNCTIONS_H + +namespace Eigen { +namespace internal { + +// Parts of this code are based on the Cephes Math Library. +// +// Cephes Math Library Release 2.8: June, 2000 +// Copyright 1984, 1987, 1992, 2000 by Stephen L. Moshier +// +// Permission has been kindly provided by the original author +// to incorporate the Cephes software into the Eigen codebase: +// +// From: Stephen Moshier +// To: Eugene Brevdo +// Subject: Re: Permission to wrap several cephes functions in Eigen +// +// Hello Eugene, +// +// Thank you for writing. +// +// If your licensing is similar to BSD, the formal way that has been +// handled is simply to add a statement to the effect that you are incorporating +// the Cephes software by permission of the author. +// +// Good luck with your project, +// Steve + + +/**************************************************************************** + * Implementation of Bessel function, based on Cephes * + ****************************************************************************/ + +template +struct bessel_i0e_retval { + typedef Scalar type; +}; + +template ::type> +struct generic_i0e { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T&) { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return ScalarType(0); + } +}; + +template +struct generic_i0e { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + /* i0ef.c + * + * Modified Bessel function of order zero, + * exponentially scaled + * + * + * + * SYNOPSIS: + * + * float x, y, i0ef(); + * + * y = i0ef( x ); + * + * + * + * DESCRIPTION: + * + * Returns exponentially scaled modified Bessel function + * of order zero of the argument. + * + * The function is defined as i0e(x) = exp(-|x|) j0( ix ). + * + * + * + * ACCURACY: + * + * Relative error: + * arithmetic domain # trials peak rms + * IEEE 0,30 100000 3.7e-7 7.0e-8 + * See i0f(). + * + */ + + const float A[] = {-1.30002500998624804212E-8f, 6.04699502254191894932E-8f, + -2.67079385394061173391E-7f, 1.11738753912010371815E-6f, + -4.41673835845875056359E-6f, 1.64484480707288970893E-5f, + -5.75419501008210370398E-5f, 1.88502885095841655729E-4f, + -5.76375574538582365885E-4f, 1.63947561694133579842E-3f, + -4.32430999505057594430E-3f, 1.05464603945949983183E-2f, + -2.37374148058994688156E-2f, 4.93052842396707084878E-2f, + -9.49010970480476444210E-2f, 1.71620901522208775349E-1f, + -3.04682672343198398683E-1f, 6.76795274409476084995E-1f}; + + const float B[] = {3.39623202570838634515E-9f, 2.26666899049817806459E-8f, + 2.04891858946906374183E-7f, 2.89137052083475648297E-6f, + 6.88975834691682398426E-5f, 3.36911647825569408990E-3f, + 8.04490411014108831608E-1f}; + T y = pabs(x); + T y_le_eight = internal::pchebevl::run( + pmadd(pset1(0.5f), y, pset1(-2.0f)), A); + T y_gt_eight = pmul( + internal::pchebevl::run( + psub(pdiv(pset1(32.0f), y), pset1(2.0f)), B), + prsqrt(y)); + // TODO: Perhaps instead check whether all packet elements are in + // [-8, 8] and evaluate a branch based off of that. It's possible + // in practice most elements are in this region. + return pselect(pcmp_le(y, pset1(8.0f)), y_le_eight, y_gt_eight); + } +}; + +template +struct generic_i0e { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + /* i0e.c + * + * Modified Bessel function of order zero, + * exponentially scaled + * + * + * + * SYNOPSIS: + * + * double x, y, i0e(); + * + * y = i0e( x ); + * + * + * + * DESCRIPTION: + * + * Returns exponentially scaled modified Bessel function + * of order zero of the argument. + * + * The function is defined as i0e(x) = exp(-|x|) j0( ix ). + * + * + * + * ACCURACY: + * + * Relative error: + * arithmetic domain # trials peak rms + * IEEE 0,30 30000 5.4e-16 1.2e-16 + * See i0(). + * + */ + + const double A[] = {-4.41534164647933937950E-18, 3.33079451882223809783E-17, + -2.43127984654795469359E-16, 1.71539128555513303061E-15, + -1.16853328779934516808E-14, 7.67618549860493561688E-14, + -4.85644678311192946090E-13, 2.95505266312963983461E-12, + -1.72682629144155570723E-11, 9.67580903537323691224E-11, + -5.18979560163526290666E-10, 2.65982372468238665035E-9, + -1.30002500998624804212E-8, 6.04699502254191894932E-8, + -2.67079385394061173391E-7, 1.11738753912010371815E-6, + -4.41673835845875056359E-6, 1.64484480707288970893E-5, + -5.75419501008210370398E-5, 1.88502885095841655729E-4, + -5.76375574538582365885E-4, 1.63947561694133579842E-3, + -4.32430999505057594430E-3, 1.05464603945949983183E-2, + -2.37374148058994688156E-2, 4.93052842396707084878E-2, + -9.49010970480476444210E-2, 1.71620901522208775349E-1, + -3.04682672343198398683E-1, 6.76795274409476084995E-1}; + const double B[] = { + -7.23318048787475395456E-18, -4.83050448594418207126E-18, + 4.46562142029675999901E-17, 3.46122286769746109310E-17, + -2.82762398051658348494E-16, -3.42548561967721913462E-16, + 1.77256013305652638360E-15, 3.81168066935262242075E-15, + -9.55484669882830764870E-15, -4.15056934728722208663E-14, + 1.54008621752140982691E-14, 3.85277838274214270114E-13, + 7.18012445138366623367E-13, -1.79417853150680611778E-12, + -1.32158118404477131188E-11, -3.14991652796324136454E-11, + 1.18891471078464383424E-11, 4.94060238822496958910E-10, + 3.39623202570838634515E-9, 2.26666899049817806459E-8, + 2.04891858946906374183E-7, 2.89137052083475648297E-6, + 6.88975834691682398426E-5, 3.36911647825569408990E-3, + 8.04490411014108831608E-1}; + T y = pabs(x); + T y_le_eight = internal::pchebevl::run( + pmadd(pset1(0.5), y, pset1(-2.0)), A); + T y_gt_eight = pmul( + internal::pchebevl::run( + psub(pdiv(pset1(32.0), y), pset1(2.0)), B), + prsqrt(y)); + // TODO: Perhaps instead check whether all packet elements are in + // [-8, 8] and evaluate a branch based off of that. It's possible + // in practice most elements are in this region. + return pselect(pcmp_le(y, pset1(8.0)), y_le_eight, y_gt_eight); + } +}; + +template +struct bessel_i0e_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T x) { + return generic_i0e::run(x); + } +}; + +template +struct bessel_i0_retval { + typedef Scalar type; +}; + +template ::type> +struct generic_i0 { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + return pmul( + pexp(pabs(x)), + generic_i0e::run(x)); + } +}; + +template +struct bessel_i0_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T x) { + return generic_i0::run(x); + } +}; + +template +struct bessel_i1e_retval { + typedef Scalar type; +}; + +template ::type > +struct generic_i1e { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T&) { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return ScalarType(0); + } +}; + +template +struct generic_i1e { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + /* i1ef.c + * + * Modified Bessel function of order one, + * exponentially scaled + * + * + * + * SYNOPSIS: + * + * float x, y, i1ef(); + * + * y = i1ef( x ); + * + * + * + * DESCRIPTION: + * + * Returns exponentially scaled modified Bessel function + * of order one of the argument. + * + * The function is defined as i1(x) = -i exp(-|x|) j1( ix ). + * + * + * + * ACCURACY: + * + * Relative error: + * arithmetic domain # trials peak rms + * IEEE 0, 30 30000 1.5e-6 1.5e-7 + * See i1(). + * + */ + const float A[] = {9.38153738649577178388E-9f, -4.44505912879632808065E-8f, + 2.00329475355213526229E-7f, -8.56872026469545474066E-7f, + 3.47025130813767847674E-6f, -1.32731636560394358279E-5f, + 4.78156510755005422638E-5f, -1.61760815825896745588E-4f, + 5.12285956168575772895E-4f, -1.51357245063125314899E-3f, + 4.15642294431288815669E-3f, -1.05640848946261981558E-2f, + 2.47264490306265168283E-2f, -5.29459812080949914269E-2f, + 1.02643658689847095384E-1f, -1.76416518357834055153E-1f, + 2.52587186443633654823E-1f}; + + const float B[] = {-3.83538038596423702205E-9f, -2.63146884688951950684E-8f, + -2.51223623787020892529E-7f, -3.88256480887769039346E-6f, + -1.10588938762623716291E-4f, -9.76109749136146840777E-3f, + 7.78576235018280120474E-1f}; + + + T y = pabs(x); + T y_le_eight = pmul(y, internal::pchebevl::run( + pmadd(pset1(0.5f), y, pset1(-2.0f)), A)); + T y_gt_eight = pmul( + internal::pchebevl::run( + psub(pdiv(pset1(32.0f), y), + pset1(2.0f)), B), + prsqrt(y)); + // TODO: Perhaps instead check whether all packet elements are in + // [-8, 8] and evaluate a branch based off of that. It's possible + // in practice most elements are in this region. + y = pselect(pcmp_le(y, pset1(8.0f)), y_le_eight, y_gt_eight); + return pselect(pcmp_lt(x, pset1(0.0f)), pnegate(y), y); + } +}; + +template +struct generic_i1e { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + /* i1e.c + * + * Modified Bessel function of order one, + * exponentially scaled + * + * + * + * SYNOPSIS: + * + * double x, y, i1e(); + * + * y = i1e( x ); + * + * + * + * DESCRIPTION: + * + * Returns exponentially scaled modified Bessel function + * of order one of the argument. + * + * The function is defined as i1(x) = -i exp(-|x|) j1( ix ). + * + * + * + * ACCURACY: + * + * Relative error: + * arithmetic domain # trials peak rms + * IEEE 0, 30 30000 2.0e-15 2.0e-16 + * See i1(). + * + */ + const double A[] = {2.77791411276104639959E-18, -2.11142121435816608115E-17, + 1.55363195773620046921E-16, -1.10559694773538630805E-15, + 7.60068429473540693410E-15, -5.04218550472791168711E-14, + 3.22379336594557470981E-13, -1.98397439776494371520E-12, + 1.17361862988909016308E-11, -6.66348972350202774223E-11, + 3.62559028155211703701E-10, -1.88724975172282928790E-9, + 9.38153738649577178388E-9, -4.44505912879632808065E-8, + 2.00329475355213526229E-7, -8.56872026469545474066E-7, + 3.47025130813767847674E-6, -1.32731636560394358279E-5, + 4.78156510755005422638E-5, -1.61760815825896745588E-4, + 5.12285956168575772895E-4, -1.51357245063125314899E-3, + 4.15642294431288815669E-3, -1.05640848946261981558E-2, + 2.47264490306265168283E-2, -5.29459812080949914269E-2, + 1.02643658689847095384E-1, -1.76416518357834055153E-1, + 2.52587186443633654823E-1}; + const double B[] = { + 7.51729631084210481353E-18, 4.41434832307170791151E-18, + -4.65030536848935832153E-17, -3.20952592199342395980E-17, + 2.96262899764595013876E-16, 3.30820231092092828324E-16, + -1.88035477551078244854E-15, -3.81440307243700780478E-15, + 1.04202769841288027642E-14, 4.27244001671195135429E-14, + -2.10154184277266431302E-14, -4.08355111109219731823E-13, + -7.19855177624590851209E-13, 2.03562854414708950722E-12, + 1.41258074366137813316E-11, 3.25260358301548823856E-11, + -1.89749581235054123450E-11, -5.58974346219658380687E-10, + -3.83538038596423702205E-9, -2.63146884688951950684E-8, + -2.51223623787020892529E-7, -3.88256480887769039346E-6, + -1.10588938762623716291E-4, -9.76109749136146840777E-3, + 7.78576235018280120474E-1}; + T y = pabs(x); + T y_le_eight = pmul(y, internal::pchebevl::run( + pmadd(pset1(0.5), y, pset1(-2.0)), A)); + T y_gt_eight = pmul( + internal::pchebevl::run( + psub(pdiv(pset1(32.0), y), + pset1(2.0)), B), + prsqrt(y)); + // TODO: Perhaps instead check whether all packet elements are in + // [-8, 8] and evaluate a branch based off of that. It's possible + // in practice most elements are in this region. + y = pselect(pcmp_le(y, pset1(8.0)), y_le_eight, y_gt_eight); + return pselect(pcmp_lt(x, pset1(0.0)), pnegate(y), y); + } +}; + +template +struct bessel_i1e_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T x) { + return generic_i1e::run(x); + } +}; + +template +struct bessel_i1_retval { + typedef T type; +}; + +template ::type> +struct generic_i1 { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + return pmul( + pexp(pabs(x)), + generic_i1e::run(x)); + } +}; + +template +struct bessel_i1_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T x) { + return generic_i1::run(x); + } +}; + +template +struct bessel_k0e_retval { + typedef T type; +}; + +template ::type> +struct generic_k0e { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T&) { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return ScalarType(0); + } +}; + +template +struct generic_k0e { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + /* k0ef.c + * Modified Bessel function, third kind, order zero, + * exponentially scaled + * + * + * + * SYNOPSIS: + * + * float x, y, k0ef(); + * + * y = k0ef( x ); + * + * + * + * DESCRIPTION: + * + * Returns exponentially scaled modified Bessel function + * of the third kind of order zero of the argument. + * + * + * + * ACCURACY: + * + * Relative error: + * arithmetic domain # trials peak rms + * IEEE 0, 30 30000 8.1e-7 7.8e-8 + * See k0(). + * + */ + + const float A[] = {1.90451637722020886025E-9f, 2.53479107902614945675E-7f, + 2.28621210311945178607E-5f, 1.26461541144692592338E-3f, + 3.59799365153615016266E-2f, 3.44289899924628486886E-1f, + -5.35327393233902768720E-1f}; + + const float B[] = {-1.69753450938905987466E-9f, 8.57403401741422608519E-9f, + -4.66048989768794782956E-8f, 2.76681363944501510342E-7f, + -1.83175552271911948767E-6f, 1.39498137188764993662E-5f, + -1.28495495816278026384E-4f, 1.56988388573005337491E-3f, + -3.14481013119645005427E-2f, 2.44030308206595545468E0f}; + const T MAXNUM = pset1(NumTraits::infinity()); + const T two = pset1(2.0); + T x_le_two = internal::pchebevl::run( + pmadd(x, x, pset1(-2.0)), A); + x_le_two = pmadd( + generic_i0::run(x), pnegate( + plog(pmul(pset1(0.5), x))), x_le_two); + x_le_two = pmul(pexp(x), x_le_two); + T x_gt_two = pmul( + internal::pchebevl::run( + psub(pdiv(pset1(8.0), x), two), B), + prsqrt(x)); + return pselect( + pcmp_le(x, pset1(0.0)), + MAXNUM, + pselect(pcmp_le(x, two), x_le_two, x_gt_two)); + } +}; + +template +struct generic_k0e { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + /* k0e.c + * Modified Bessel function, third kind, order zero, + * exponentially scaled + * + * + * + * SYNOPSIS: + * + * double x, y, k0e(); + * + * y = k0e( x ); + * + * + * + * DESCRIPTION: + * + * Returns exponentially scaled modified Bessel function + * of the third kind of order zero of the argument. + * + * + * + * ACCURACY: + * + * Relative error: + * arithmetic domain # trials peak rms + * IEEE 0, 30 30000 1.4e-15 1.4e-16 + * See k0(). + * + */ + + const double A[] = { + 1.37446543561352307156E-16, + 4.25981614279661018399E-14, + 1.03496952576338420167E-11, + 1.90451637722020886025E-9, + 2.53479107902614945675E-7, + 2.28621210311945178607E-5, + 1.26461541144692592338E-3, + 3.59799365153615016266E-2, + 3.44289899924628486886E-1, + -5.35327393233902768720E-1}; + const double B[] = { + 5.30043377268626276149E-18, -1.64758043015242134646E-17, + 5.21039150503902756861E-17, -1.67823109680541210385E-16, + 5.51205597852431940784E-16, -1.84859337734377901440E-15, + 6.34007647740507060557E-15, -2.22751332699166985548E-14, + 8.03289077536357521100E-14, -2.98009692317273043925E-13, + 1.14034058820847496303E-12, -4.51459788337394416547E-12, + 1.85594911495471785253E-11, -7.95748924447710747776E-11, + 3.57739728140030116597E-10, -1.69753450938905987466E-9, + 8.57403401741422608519E-9, -4.66048989768794782956E-8, + 2.76681363944501510342E-7, -1.83175552271911948767E-6, + 1.39498137188764993662E-5, -1.28495495816278026384E-4, + 1.56988388573005337491E-3, -3.14481013119645005427E-2, + 2.44030308206595545468E0 + }; + const T MAXNUM = pset1(NumTraits::infinity()); + const T two = pset1(2.0); + T x_le_two = internal::pchebevl::run( + pmadd(x, x, pset1(-2.0)), A); + x_le_two = pmadd( + generic_i0::run(x), pmul( + pset1(-1.0), plog(pmul(pset1(0.5), x))), x_le_two); + x_le_two = pmul(pexp(x), x_le_two); + x_le_two = pselect(pcmp_le(x, pset1(0.0)), MAXNUM, x_le_two); + T x_gt_two = pmul( + internal::pchebevl::run( + psub(pdiv(pset1(8.0), x), two), B), + prsqrt(x)); + return pselect(pcmp_le(x, two), x_le_two, x_gt_two); + } +}; + +template +struct bessel_k0e_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T x) { + return generic_k0e::run(x); + } +}; + +template +struct bessel_k0_retval { + typedef T type; +}; + +template ::type> +struct generic_k0 { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T&) { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return ScalarType(0); + } +}; + +template +struct generic_k0 { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + /* k0f.c + * Modified Bessel function, third kind, order zero + * + * + * + * SYNOPSIS: + * + * float x, y, k0f(); + * + * y = k0f( x ); + * + * + * + * DESCRIPTION: + * + * Returns modified Bessel function of the third kind + * of order zero of the argument. + * + * The range is partitioned into the two intervals [0,8] and + * (8, infinity). Chebyshev polynomial expansions are employed + * in each interval. + * + * + * + * ACCURACY: + * + * Tested at 2000 random points between 0 and 8. Peak absolute + * error (relative when K0 > 1) was 1.46e-14; rms, 4.26e-15. + * Relative error: + * arithmetic domain # trials peak rms + * IEEE 0, 30 30000 7.8e-7 8.5e-8 + * + * ERROR MESSAGES: + * + * message condition value returned + * K0 domain x <= 0 MAXNUM + * + */ + + const float A[] = {1.90451637722020886025E-9f, 2.53479107902614945675E-7f, + 2.28621210311945178607E-5f, 1.26461541144692592338E-3f, + 3.59799365153615016266E-2f, 3.44289899924628486886E-1f, + -5.35327393233902768720E-1f}; + + const float B[] = {-1.69753450938905987466E-9f, 8.57403401741422608519E-9f, + -4.66048989768794782956E-8f, 2.76681363944501510342E-7f, + -1.83175552271911948767E-6f, 1.39498137188764993662E-5f, + -1.28495495816278026384E-4f, 1.56988388573005337491E-3f, + -3.14481013119645005427E-2f, 2.44030308206595545468E0f}; + const T MAXNUM = pset1(NumTraits::infinity()); + const T two = pset1(2.0); + T x_le_two = internal::pchebevl::run( + pmadd(x, x, pset1(-2.0)), A); + x_le_two = pmadd( + generic_i0::run(x), pnegate( + plog(pmul(pset1(0.5), x))), x_le_two); + x_le_two = pselect(pcmp_le(x, pset1(0.0)), MAXNUM, x_le_two); + T x_gt_two = pmul( + pmul( + pexp(pnegate(x)), + internal::pchebevl::run( + psub(pdiv(pset1(8.0), x), two), B)), + prsqrt(x)); + return pselect(pcmp_le(x, two), x_le_two, x_gt_two); + } +}; + +template +struct generic_k0 { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + /* + * + * Modified Bessel function, third kind, order zero, + * exponentially scaled + * + * + * + * SYNOPSIS: + * + * double x, y, k0(); + * + * y = k0( x ); + * + * + * + * DESCRIPTION: + * + * Returns exponentially scaled modified Bessel function + * of the third kind of order zero of the argument. + * + * + * + * ACCURACY: + * + * Relative error: + * arithmetic domain # trials peak rms + * IEEE 0, 30 30000 1.4e-15 1.4e-16 + * See k0(). + * + */ + const double A[] = { + 1.37446543561352307156E-16, + 4.25981614279661018399E-14, + 1.03496952576338420167E-11, + 1.90451637722020886025E-9, + 2.53479107902614945675E-7, + 2.28621210311945178607E-5, + 1.26461541144692592338E-3, + 3.59799365153615016266E-2, + 3.44289899924628486886E-1, + -5.35327393233902768720E-1}; + const double B[] = { + 5.30043377268626276149E-18, -1.64758043015242134646E-17, + 5.21039150503902756861E-17, -1.67823109680541210385E-16, + 5.51205597852431940784E-16, -1.84859337734377901440E-15, + 6.34007647740507060557E-15, -2.22751332699166985548E-14, + 8.03289077536357521100E-14, -2.98009692317273043925E-13, + 1.14034058820847496303E-12, -4.51459788337394416547E-12, + 1.85594911495471785253E-11, -7.95748924447710747776E-11, + 3.57739728140030116597E-10, -1.69753450938905987466E-9, + 8.57403401741422608519E-9, -4.66048989768794782956E-8, + 2.76681363944501510342E-7, -1.83175552271911948767E-6, + 1.39498137188764993662E-5, -1.28495495816278026384E-4, + 1.56988388573005337491E-3, -3.14481013119645005427E-2, + 2.44030308206595545468E0 + }; + const T MAXNUM = pset1(NumTraits::infinity()); + const T two = pset1(2.0); + T x_le_two = internal::pchebevl::run( + pmadd(x, x, pset1(-2.0)), A); + x_le_two = pmadd( + generic_i0::run(x), pnegate( + plog(pmul(pset1(0.5), x))), x_le_two); + x_le_two = pselect(pcmp_le(x, pset1(0.0)), MAXNUM, x_le_two); + T x_gt_two = pmul( + pmul( + pexp(-x), + internal::pchebevl::run( + psub(pdiv(pset1(8.0), x), two), B)), + prsqrt(x)); + return pselect(pcmp_le(x, two), x_le_two, x_gt_two); + } +}; + +template +struct bessel_k0_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T x) { + return generic_k0::run(x); + } +}; + +template +struct bessel_k1e_retval { + typedef T type; +}; + +template ::type> +struct generic_k1e { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T&) { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return ScalarType(0); + } +}; + +template +struct generic_k1e { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + /* k1ef.c + * + * Modified Bessel function, third kind, order one, + * exponentially scaled + * + * + * + * SYNOPSIS: + * + * float x, y, k1ef(); + * + * y = k1ef( x ); + * + * + * + * DESCRIPTION: + * + * Returns exponentially scaled modified Bessel function + * of the third kind of order one of the argument: + * + * k1e(x) = exp(x) * k1(x). + * + * + * + * ACCURACY: + * + * Relative error: + * arithmetic domain # trials peak rms + * IEEE 0, 30 30000 4.9e-7 6.7e-8 + * See k1(). + * + */ + + const float A[] = {-2.21338763073472585583E-8f, -2.43340614156596823496E-6f, + -1.73028895751305206302E-4f, -6.97572385963986435018E-3f, + -1.22611180822657148235E-1f, -3.53155960776544875667E-1f, + 1.52530022733894777053E0f}; + const float B[] = {2.01504975519703286596E-9f, -1.03457624656780970260E-8f, + 5.74108412545004946722E-8f, -3.50196060308781257119E-7f, + 2.40648494783721712015E-6f, -1.93619797416608296024E-5f, + 1.95215518471351631108E-4f, -2.85781685962277938680E-3f, + 1.03923736576817238437E-1f, 2.72062619048444266945E0f}; + const T MAXNUM = pset1(NumTraits::infinity()); + const T two = pset1(2.0); + T x_le_two = pdiv(internal::pchebevl::run( + pmadd(x, x, pset1(-2.0)), A), x); + x_le_two = pmadd( + generic_i1::run(x), plog(pmul(pset1(0.5), x)), x_le_two); + x_le_two = pmul(x_le_two, pexp(x)); + x_le_two = pselect(pcmp_le(x, pset1(0.0)), MAXNUM, x_le_two); + T x_gt_two = pmul( + internal::pchebevl::run( + psub(pdiv(pset1(8.0), x), two), B), + prsqrt(x)); + return pselect(pcmp_le(x, two), x_le_two, x_gt_two); + } +}; + +template +struct generic_k1e { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + /* k1e.c + * + * Modified Bessel function, third kind, order one, + * exponentially scaled + * + * + * + * SYNOPSIS: + * + * double x, y, k1e(); + * + * y = k1e( x ); + * + * + * + * DESCRIPTION: + * + * Returns exponentially scaled modified Bessel function + * of the third kind of order one of the argument: + * + * k1e(x) = exp(x) * k1(x). + * + * + * + * ACCURACY: + * + * Relative error: + * arithmetic domain # trials peak rms + * IEEE 0, 30 30000 7.8e-16 1.2e-16 + * See k1(). + * + */ + const double A[] = {-7.02386347938628759343E-18, -2.42744985051936593393E-15, + -6.66690169419932900609E-13, -1.41148839263352776110E-10, + -2.21338763073472585583E-8, -2.43340614156596823496E-6, + -1.73028895751305206302E-4, -6.97572385963986435018E-3, + -1.22611180822657148235E-1, -3.53155960776544875667E-1, + 1.52530022733894777053E0}; + const double B[] = {-5.75674448366501715755E-18, 1.79405087314755922667E-17, + -5.68946255844285935196E-17, 1.83809354436663880070E-16, + -6.05704724837331885336E-16, 2.03870316562433424052E-15, + -7.01983709041831346144E-15, 2.47715442448130437068E-14, + -8.97670518232499435011E-14, 3.34841966607842919884E-13, + -1.28917396095102890680E-12, 5.13963967348173025100E-12, + -2.12996783842756842877E-11, 9.21831518760500529508E-11, + -4.19035475934189648750E-10, 2.01504975519703286596E-9, + -1.03457624656780970260E-8, 5.74108412545004946722E-8, + -3.50196060308781257119E-7, 2.40648494783721712015E-6, + -1.93619797416608296024E-5, 1.95215518471351631108E-4, + -2.85781685962277938680E-3, 1.03923736576817238437E-1, + 2.72062619048444266945E0}; + const T MAXNUM = pset1(NumTraits::infinity()); + const T two = pset1(2.0); + T x_le_two = pdiv(internal::pchebevl::run( + pmadd(x, x, pset1(-2.0)), A), x); + x_le_two = pmadd( + generic_i1::run(x), plog(pmul(pset1(0.5), x)), x_le_two); + x_le_two = pmul(x_le_two, pexp(x)); + x_le_two = pselect(pcmp_le(x, pset1(0.0)), MAXNUM, x_le_two); + T x_gt_two = pmul( + internal::pchebevl::run( + psub(pdiv(pset1(8.0), x), two), B), + prsqrt(x)); + return pselect(pcmp_le(x, two), x_le_two, x_gt_two); + } +}; + +template +struct bessel_k1e_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T x) { + return generic_k1e::run(x); + } +}; + +template +struct bessel_k1_retval { + typedef T type; +}; + +template ::type> +struct generic_k1 { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T&) { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return ScalarType(0); + } +}; + +template +struct generic_k1 { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + /* k1f.c + * Modified Bessel function, third kind, order one + * + * + * + * SYNOPSIS: + * + * float x, y, k1f(); + * + * y = k1f( x ); + * + * + * + * DESCRIPTION: + * + * Computes the modified Bessel function of the third kind + * of order one of the argument. + * + * The range is partitioned into the two intervals [0,2] and + * (2, infinity). Chebyshev polynomial expansions are employed + * in each interval. + * + * + * + * ACCURACY: + * + * Relative error: + * arithmetic domain # trials peak rms + * IEEE 0, 30 30000 4.6e-7 7.6e-8 + * + * ERROR MESSAGES: + * + * message condition value returned + * k1 domain x <= 0 MAXNUM + * + */ + + const float A[] = {-2.21338763073472585583E-8f, -2.43340614156596823496E-6f, + -1.73028895751305206302E-4f, -6.97572385963986435018E-3f, + -1.22611180822657148235E-1f, -3.53155960776544875667E-1f, + 1.52530022733894777053E0f}; + const float B[] = {2.01504975519703286596E-9f, -1.03457624656780970260E-8f, + 5.74108412545004946722E-8f, -3.50196060308781257119E-7f, + 2.40648494783721712015E-6f, -1.93619797416608296024E-5f, + 1.95215518471351631108E-4f, -2.85781685962277938680E-3f, + 1.03923736576817238437E-1f, 2.72062619048444266945E0f}; + const T MAXNUM = pset1(NumTraits::infinity()); + const T two = pset1(2.0); + T x_le_two = pdiv(internal::pchebevl::run( + pmadd(x, x, pset1(-2.0)), A), x); + x_le_two = pmadd( + generic_i1::run(x), plog(pmul(pset1(0.5), x)), x_le_two); + x_le_two = pselect(pcmp_le(x, pset1(0.0)), MAXNUM, x_le_two); + T x_gt_two = pmul( + pexp(pnegate(x)), + pmul( + internal::pchebevl::run( + psub(pdiv(pset1(8.0), x), two), B), + prsqrt(x))); + return pselect(pcmp_le(x, two), x_le_two, x_gt_two); + } +}; + +template +struct generic_k1 { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + /* k1.c + * Modified Bessel function, third kind, order one + * + * + * + * SYNOPSIS: + * + * float x, y, k1f(); + * + * y = k1f( x ); + * + * + * + * DESCRIPTION: + * + * Computes the modified Bessel function of the third kind + * of order one of the argument. + * + * The range is partitioned into the two intervals [0,2] and + * (2, infinity). Chebyshev polynomial expansions are employed + * in each interval. + * + * + * + * ACCURACY: + * + * Relative error: + * arithmetic domain # trials peak rms + * IEEE 0, 30 30000 4.6e-7 7.6e-8 + * + * ERROR MESSAGES: + * + * message condition value returned + * k1 domain x <= 0 MAXNUM + * + */ + const double A[] = {-7.02386347938628759343E-18, -2.42744985051936593393E-15, + -6.66690169419932900609E-13, -1.41148839263352776110E-10, + -2.21338763073472585583E-8, -2.43340614156596823496E-6, + -1.73028895751305206302E-4, -6.97572385963986435018E-3, + -1.22611180822657148235E-1, -3.53155960776544875667E-1, + 1.52530022733894777053E0}; + const double B[] = {-5.75674448366501715755E-18, 1.79405087314755922667E-17, + -5.68946255844285935196E-17, 1.83809354436663880070E-16, + -6.05704724837331885336E-16, 2.03870316562433424052E-15, + -7.01983709041831346144E-15, 2.47715442448130437068E-14, + -8.97670518232499435011E-14, 3.34841966607842919884E-13, + -1.28917396095102890680E-12, 5.13963967348173025100E-12, + -2.12996783842756842877E-11, 9.21831518760500529508E-11, + -4.19035475934189648750E-10, 2.01504975519703286596E-9, + -1.03457624656780970260E-8, 5.74108412545004946722E-8, + -3.50196060308781257119E-7, 2.40648494783721712015E-6, + -1.93619797416608296024E-5, 1.95215518471351631108E-4, + -2.85781685962277938680E-3, 1.03923736576817238437E-1, + 2.72062619048444266945E0}; + const T MAXNUM = pset1(NumTraits::infinity()); + const T two = pset1(2.0); + T x_le_two = pdiv(internal::pchebevl::run( + pmadd(x, x, pset1(-2.0)), A), x); + x_le_two = pmadd( + generic_i1::run(x), plog(pmul(pset1(0.5), x)), x_le_two); + x_le_two = pselect(pcmp_le(x, pset1(0.0)), MAXNUM, x_le_two); + T x_gt_two = pmul( + pexp(-x), + pmul( + internal::pchebevl::run( + psub(pdiv(pset1(8.0), x), two), B), + prsqrt(x))); + return pselect(pcmp_le(x, two), x_le_two, x_gt_two); + } +}; + +template +struct bessel_k1_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T x) { + return generic_k1::run(x); + } +}; + +template +struct bessel_j0_retval { + typedef T type; +}; + +template ::type> +struct generic_j0 { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T&) { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return ScalarType(0); + } +}; + +template +struct generic_j0 { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + /* j0f.c + * Bessel function of order zero + * + * + * + * SYNOPSIS: + * + * float x, y, j0f(); + * + * y = j0f( x ); + * + * + * + * DESCRIPTION: + * + * Returns Bessel function of order zero of the argument. + * + * The domain is divided into the intervals [0, 2] and + * (2, infinity). In the first interval the following polynomial + * approximation is used: + * + * + * 2 2 2 + * (w - r ) (w - r ) (w - r ) P(w) + * 1 2 3 + * + * 2 + * where w = x and the three r's are zeros of the function. + * + * In the second interval, the modulus and phase are approximated + * by polynomials of the form Modulus(x) = sqrt(1/x) Q(1/x) + * and Phase(x) = x + 1/x R(1/x^2) - pi/4. The function is + * + * j0(x) = Modulus(x) cos( Phase(x) ). + * + * + * + * ACCURACY: + * + * Absolute error: + * arithmetic domain # trials peak rms + * IEEE 0, 2 100000 1.3e-7 3.6e-8 + * IEEE 2, 32 100000 1.9e-7 5.4e-8 + * + */ + + const float JP[] = {-6.068350350393235E-008f, 6.388945720783375E-006f, + -3.969646342510940E-004f, 1.332913422519003E-002f, + -1.729150680240724E-001f}; + const float MO[] = {-6.838999669318810E-002f, 1.864949361379502E-001f, + -2.145007480346739E-001f, 1.197549369473540E-001f, + -3.560281861530129E-003f, -4.969382655296620E-002f, + -3.355424622293709E-006f, 7.978845717621440E-001f}; + const float PH[] = {3.242077816988247E+001f, -3.630592630518434E+001f, + 1.756221482109099E+001f, -4.974978466280903E+000f, + 1.001973420681837E+000f, -1.939906941791308E-001f, + 6.490598792654666E-002f, -1.249992184872738E-001f}; + const T DR1 = pset1(5.78318596294678452118f); + const T NEG_PIO4F = pset1(-0.7853981633974483096f); /* -pi / 4 */ + T y = pabs(x); + T z = pmul(y, y); + T y_le_two = pselect( + pcmp_lt(y, pset1(1.0e-3f)), + pmadd(z, pset1(-0.25f), pset1(1.0f)), + pmul(psub(z, DR1), internal::ppolevl::run(z, JP))); + T q = pdiv(pset1(1.0f), y); + T w = prsqrt(y); + T p = pmul(w, internal::ppolevl::run(q, MO)); + w = pmul(q, q); + T yn = pmadd(q, internal::ppolevl::run(w, PH), NEG_PIO4F); + T y_gt_two = pmul(p, pcos(padd(yn, y))); + return pselect(pcmp_le(y, pset1(2.0)), y_le_two, y_gt_two); + } +}; + +template +struct generic_j0 { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + /* j0.c + * Bessel function of order zero + * + * + * + * SYNOPSIS: + * + * double x, y, j0(); + * + * y = j0( x ); + * + * + * + * DESCRIPTION: + * + * Returns Bessel function of order zero of the argument. + * + * The domain is divided into the intervals [0, 5] and + * (5, infinity). In the first interval the following rational + * approximation is used: + * + * + * 2 2 + * (w - r ) (w - r ) P (w) / Q (w) + * 1 2 3 8 + * + * 2 + * where w = x and the two r's are zeros of the function. + * + * In the second interval, the Hankel asymptotic expansion + * is employed with two rational functions of degree 6/6 + * and 7/7. + * + * + * + * ACCURACY: + * + * Absolute error: + * arithmetic domain # trials peak rms + * DEC 0, 30 10000 4.4e-17 6.3e-18 + * IEEE 0, 30 60000 4.2e-16 1.1e-16 + * + */ + const double PP[] = {7.96936729297347051624E-4, 8.28352392107440799803E-2, + 1.23953371646414299388E0, 5.44725003058768775090E0, + 8.74716500199817011941E0, 5.30324038235394892183E0, + 9.99999999999999997821E-1}; + const double PQ[] = {9.24408810558863637013E-4, 8.56288474354474431428E-2, + 1.25352743901058953537E0, 5.47097740330417105182E0, + 8.76190883237069594232E0, 5.30605288235394617618E0, + 1.00000000000000000218E0}; + const double QP[] = {-1.13663838898469149931E-2, -1.28252718670509318512E0, + -1.95539544257735972385E1, -9.32060152123768231369E1, + -1.77681167980488050595E2, -1.47077505154951170175E2, + -5.14105326766599330220E1, -6.05014350600728481186E0}; + const double QQ[] = {1.00000000000000000000E0, 6.43178256118178023184E1, + 8.56430025976980587198E2, 3.88240183605401609683E3, + 7.24046774195652478189E3, 5.93072701187316984827E3, + 2.06209331660327847417E3, 2.42005740240291393179E2}; + const double RP[] = {-4.79443220978201773821E9, 1.95617491946556577543E12, + -2.49248344360967716204E14, 9.70862251047306323952E15}; + const double RQ[] = {1.00000000000000000000E0, 4.99563147152651017219E2, + 1.73785401676374683123E5, 4.84409658339962045305E7, + 1.11855537045356834862E10, 2.11277520115489217587E12, + 3.10518229857422583814E14, 3.18121955943204943306E16, + 1.71086294081043136091E18}; + const T DR1 = pset1(5.78318596294678452118E0); + const T DR2 = pset1(3.04712623436620863991E1); + const T SQ2OPI = pset1(7.9788456080286535587989E-1); /* sqrt(2 / pi) */ + const T NEG_PIO4 = pset1(-0.7853981633974483096); /* pi / 4 */ + + T y = pabs(x); + T z = pmul(y, y); + T y_le_five = pselect( + pcmp_lt(y, pset1(1.0e-5)), + pmadd(z, pset1(-0.25), pset1(1.0)), + pmul(pmul(psub(z, DR1), psub(z, DR2)), + pdiv(internal::ppolevl::run(z, RP), + internal::ppolevl::run(z, RQ)))); + T s = pdiv(pset1(25.0), z); + T p = pdiv( + internal::ppolevl::run(s, PP), + internal::ppolevl::run(s, PQ)); + T q = pdiv( + internal::ppolevl::run(s, QP), + internal::ppolevl::run(s, QQ)); + T yn = padd(y, NEG_PIO4); + T w = pdiv(pset1(-5.0), y); + p = pmadd(p, pcos(yn), pmul(w, pmul(q, psin(yn)))); + T y_gt_five = pmul(p, pmul(SQ2OPI, prsqrt(y))); + return pselect(pcmp_le(y, pset1(5.0)), y_le_five, y_gt_five); + } +}; + +template +struct bessel_j0_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T x) { + return generic_j0::run(x); + } +}; + +template +struct bessel_y0_retval { + typedef T type; +}; + +template ::type> +struct generic_y0 { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T&) { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return ScalarType(0); + } +}; + +template +struct generic_y0 { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + /* j0f.c + * Bessel function of the second kind, order zero + * + * + * + * SYNOPSIS: + * + * float x, y, y0f(); + * + * y = y0f( x ); + * + * + * + * DESCRIPTION: + * + * Returns Bessel function of the second kind, of order + * zero, of the argument. + * + * The domain is divided into the intervals [0, 2] and + * (2, infinity). In the first interval a rational approximation + * R(x) is employed to compute + * + * 2 2 2 + * y0(x) = (w - r ) (w - r ) (w - r ) R(x) + 2/pi ln(x) j0(x). + * 1 2 3 + * + * Thus a call to j0() is required. The three zeros are removed + * from R(x) to improve its numerical stability. + * + * In the second interval, the modulus and phase are approximated + * by polynomials of the form Modulus(x) = sqrt(1/x) Q(1/x) + * and Phase(x) = x + 1/x S(1/x^2) - pi/4. Then the function is + * + * y0(x) = Modulus(x) sin( Phase(x) ). + * + * + * + * + * ACCURACY: + * + * Absolute error, when y0(x) < 1; else relative error: + * + * arithmetic domain # trials peak rms + * IEEE 0, 2 100000 2.4e-7 3.4e-8 + * IEEE 2, 32 100000 1.8e-7 5.3e-8 + * + */ + + const float YP[] = {9.454583683980369E-008f, -9.413212653797057E-006f, + 5.344486707214273E-004f, -1.584289289821316E-002f, + 1.707584643733568E-001f}; + const float MO[] = {-6.838999669318810E-002f, 1.864949361379502E-001f, + -2.145007480346739E-001f, 1.197549369473540E-001f, + -3.560281861530129E-003f, -4.969382655296620E-002f, + -3.355424622293709E-006f, 7.978845717621440E-001f}; + const float PH[] = {3.242077816988247E+001f, -3.630592630518434E+001f, + 1.756221482109099E+001f, -4.974978466280903E+000f, + 1.001973420681837E+000f, -1.939906941791308E-001f, + 6.490598792654666E-002f, -1.249992184872738E-001f}; + const T YZ1 = pset1(0.43221455686510834878f); + const T TWOOPI = pset1(0.636619772367581343075535f); /* 2 / pi */ + const T NEG_PIO4F = pset1(-0.7853981633974483096f); /* -pi / 4 */ + const T NEG_MAXNUM = pset1(-NumTraits::infinity()); + T z = pmul(x, x); + T x_le_two = pmul(TWOOPI, pmul(plog(x), generic_j0::run(x))); + x_le_two = pmadd( + psub(z, YZ1), internal::ppolevl::run(z, YP), x_le_two); + x_le_two = pselect(pcmp_le(x, pset1(0.0)), NEG_MAXNUM, x_le_two); + T q = pdiv(pset1(1.0), x); + T w = prsqrt(x); + T p = pmul(w, internal::ppolevl::run(q, MO)); + T u = pmul(q, q); + T xn = pmadd(q, internal::ppolevl::run(u, PH), NEG_PIO4F); + T x_gt_two = pmul(p, psin(padd(xn, x))); + return pselect(pcmp_le(x, pset1(2.0)), x_le_two, x_gt_two); + } +}; + +template +struct generic_y0 { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + /* j0.c + * Bessel function of the second kind, order zero + * + * + * + * SYNOPSIS: + * + * double x, y, y0(); + * + * y = y0( x ); + * + * + * + * DESCRIPTION: + * + * Returns Bessel function of the second kind, of order + * zero, of the argument. + * + * The domain is divided into the intervals [0, 5] and + * (5, infinity). In the first interval a rational approximation + * R(x) is employed to compute + * y0(x) = R(x) + 2 * log(x) * j0(x) / PI. + * Thus a call to j0() is required. + * + * In the second interval, the Hankel asymptotic expansion + * is employed with two rational functions of degree 6/6 + * and 7/7. + * + * + * + * ACCURACY: + * + * Absolute error, when y0(x) < 1; else relative error: + * + * arithmetic domain # trials peak rms + * DEC 0, 30 9400 7.0e-17 7.9e-18 + * IEEE 0, 30 30000 1.3e-15 1.6e-16 + * + */ + const double PP[] = {7.96936729297347051624E-4, 8.28352392107440799803E-2, + 1.23953371646414299388E0, 5.44725003058768775090E0, + 8.74716500199817011941E0, 5.30324038235394892183E0, + 9.99999999999999997821E-1}; + const double PQ[] = {9.24408810558863637013E-4, 8.56288474354474431428E-2, + 1.25352743901058953537E0, 5.47097740330417105182E0, + 8.76190883237069594232E0, 5.30605288235394617618E0, + 1.00000000000000000218E0}; + const double QP[] = {-1.13663838898469149931E-2, -1.28252718670509318512E0, + -1.95539544257735972385E1, -9.32060152123768231369E1, + -1.77681167980488050595E2, -1.47077505154951170175E2, + -5.14105326766599330220E1, -6.05014350600728481186E0}; + const double QQ[] = {1.00000000000000000000E0, 6.43178256118178023184E1, + 8.56430025976980587198E2, 3.88240183605401609683E3, + 7.24046774195652478189E3, 5.93072701187316984827E3, + 2.06209331660327847417E3, 2.42005740240291393179E2}; + const double YP[] = {1.55924367855235737965E4, -1.46639295903971606143E7, + 5.43526477051876500413E9, -9.82136065717911466409E11, + 8.75906394395366999549E13, -3.46628303384729719441E15, + 4.42733268572569800351E16, -1.84950800436986690637E16}; + const double YQ[] = {1.00000000000000000000E0, 1.04128353664259848412E3, + 6.26107330137134956842E5, 2.68919633393814121987E8, + 8.64002487103935000337E10, 2.02979612750105546709E13, + 3.17157752842975028269E15, 2.50596256172653059228E17}; + const T SQ2OPI = pset1(7.9788456080286535587989E-1); /* sqrt(2 / pi) */ + const T TWOOPI = pset1(0.636619772367581343075535); /* 2 / pi */ + const T NEG_PIO4 = pset1(-0.7853981633974483096); /* -pi / 4 */ + const T NEG_MAXNUM = pset1(-NumTraits::infinity()); + + T z = pmul(x, x); + T x_le_five = pdiv(internal::ppolevl::run(z, YP), + internal::ppolevl::run(z, YQ)); + x_le_five = pmadd( + pmul(TWOOPI, plog(x)), generic_j0::run(x), x_le_five); + x_le_five = pselect(pcmp_le(x, pset1(0.0)), NEG_MAXNUM, x_le_five); + T s = pdiv(pset1(25.0), z); + T p = pdiv( + internal::ppolevl::run(s, PP), + internal::ppolevl::run(s, PQ)); + T q = pdiv( + internal::ppolevl::run(s, QP), + internal::ppolevl::run(s, QQ)); + T xn = padd(x, NEG_PIO4); + T w = pdiv(pset1(5.0), x); + p = pmadd(p, psin(xn), pmul(w, pmul(q, pcos(xn)))); + T x_gt_five = pmul(p, pmul(SQ2OPI, prsqrt(x))); + return pselect(pcmp_le(x, pset1(5.0)), x_le_five, x_gt_five); + } +}; + +template +struct bessel_y0_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T x) { + return generic_y0::run(x); + } +}; + +template +struct bessel_j1_retval { + typedef T type; +}; + +template ::type> +struct generic_j1 { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T&) { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return ScalarType(0); + } +}; + +template +struct generic_j1 { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + /* j1f.c + * Bessel function of order one + * + * + * + * SYNOPSIS: + * + * float x, y, j1f(); + * + * y = j1f( x ); + * + * + * + * DESCRIPTION: + * + * Returns Bessel function of order one of the argument. + * + * The domain is divided into the intervals [0, 2] and + * (2, infinity). In the first interval a polynomial approximation + * 2 + * (w - r ) x P(w) + * 1 + * 2 + * is used, where w = x and r is the first zero of the function. + * + * In the second interval, the modulus and phase are approximated + * by polynomials of the form Modulus(x) = sqrt(1/x) Q(1/x) + * and Phase(x) = x + 1/x R(1/x^2) - 3pi/4. The function is + * + * j0(x) = Modulus(x) cos( Phase(x) ). + * + * + * + * ACCURACY: + * + * Absolute error: + * arithmetic domain # trials peak rms + * IEEE 0, 2 100000 1.2e-7 2.5e-8 + * IEEE 2, 32 100000 2.0e-7 5.3e-8 + * + * + */ + + const float JP[] = {-4.878788132172128E-009f, 6.009061827883699E-007f, + -4.541343896997497E-005f, 1.937383947804541E-003f, + -3.405537384615824E-002f}; + const float MO1[] = {6.913942741265801E-002f, -2.284801500053359E-001f, + 3.138238455499697E-001f, -2.102302420403875E-001f, + 5.435364690523026E-003f, 1.493389585089498E-001f, + 4.976029650847191E-006f, 7.978845453073848E-001f}; + const float PH1[] = {-4.497014141919556E+001f, 5.073465654089319E+001f, + -2.485774108720340E+001f, 7.222973196770240E+000f, + -1.544842782180211E+000f, 3.503787691653334E-001f, + -1.637986776941202E-001f, 3.749989509080821E-001f}; + const T Z1 = pset1(1.46819706421238932572E1f); + const T NEG_THPIO4F = pset1(-2.35619449019234492885f); /* -3*pi/4 */ + + T y = pabs(x); + T z = pmul(y, y); + T y_le_two = pmul( + psub(z, Z1), + pmul(x, internal::ppolevl::run(z, JP))); + T q = pdiv(pset1(1.0f), y); + T w = prsqrt(y); + T p = pmul(w, internal::ppolevl::run(q, MO1)); + w = pmul(q, q); + T yn = pmadd(q, internal::ppolevl::run(w, PH1), NEG_THPIO4F); + T y_gt_two = pmul(p, pcos(padd(yn, y))); + // j1 is an odd function. This implementation differs from cephes to + // take this fact in to account. Cephes returns -j1(x) for y > 2 range. + y_gt_two = pselect( + pcmp_lt(x, pset1(0.0f)), pnegate(y_gt_two), y_gt_two); + return pselect(pcmp_le(y, pset1(2.0f)), y_le_two, y_gt_two); + } +}; + +template +struct generic_j1 { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + /* j1.c + * Bessel function of order one + * + * + * + * SYNOPSIS: + * + * double x, y, j1(); + * + * y = j1( x ); + * + * + * + * DESCRIPTION: + * + * Returns Bessel function of order one of the argument. + * + * The domain is divided into the intervals [0, 8] and + * (8, infinity). In the first interval a 24 term Chebyshev + * expansion is used. In the second, the asymptotic + * trigonometric representation is employed using two + * rational functions of degree 5/5. + * + * + * + * ACCURACY: + * + * Absolute error: + * arithmetic domain # trials peak rms + * DEC 0, 30 10000 4.0e-17 1.1e-17 + * IEEE 0, 30 30000 2.6e-16 1.1e-16 + * + */ + const double PP[] = {7.62125616208173112003E-4, 7.31397056940917570436E-2, + 1.12719608129684925192E0, 5.11207951146807644818E0, + 8.42404590141772420927E0, 5.21451598682361504063E0, + 1.00000000000000000254E0}; + const double PQ[] = {5.71323128072548699714E-4, 6.88455908754495404082E-2, + 1.10514232634061696926E0, 5.07386386128601488557E0, + 8.39985554327604159757E0, 5.20982848682361821619E0, + 9.99999999999999997461E-1}; + const double QP[] = {5.10862594750176621635E-2, 4.98213872951233449420E0, + 7.58238284132545283818E1, 3.66779609360150777800E2, + 7.10856304998926107277E2, 5.97489612400613639965E2, + 2.11688757100572135698E2, 2.52070205858023719784E1}; + const double QQ[] = {1.00000000000000000000E0, 7.42373277035675149943E1, + 1.05644886038262816351E3, 4.98641058337653607651E3, + 9.56231892404756170795E3, 7.99704160447350683650E3, + 2.82619278517639096600E3, 3.36093607810698293419E2}; + const double RP[] = {-8.99971225705559398224E8, 4.52228297998194034323E11, + -7.27494245221818276015E13, 3.68295732863852883286E15}; + const double RQ[] = {1.00000000000000000000E0, 6.20836478118054335476E2, + 2.56987256757748830383E5, 8.35146791431949253037E7, + 2.21511595479792499675E10, 4.74914122079991414898E12, + 7.84369607876235854894E14, 8.95222336184627338078E16, + 5.32278620332680085395E18}; + const T Z1 = pset1(1.46819706421238932572E1); + const T Z2 = pset1(4.92184563216946036703E1); + const T NEG_THPIO4 = pset1(-2.35619449019234492885); /* -3*pi/4 */ + const T SQ2OPI = pset1(7.9788456080286535587989E-1); /* sqrt(2 / pi) */ + T y = pabs(x); + T z = pmul(y, y); + T y_le_five = pdiv(internal::ppolevl::run(z, RP), + internal::ppolevl::run(z, RQ)); + y_le_five = pmul(pmul(pmul(y_le_five, x), psub(z, Z1)), psub(z, Z2)); + T s = pdiv(pset1(25.0), z); + T p = pdiv( + internal::ppolevl::run(s, PP), + internal::ppolevl::run(s, PQ)); + T q = pdiv( + internal::ppolevl::run(s, QP), + internal::ppolevl::run(s, QQ)); + T yn = padd(y, NEG_THPIO4); + T w = pdiv(pset1(-5.0), y); + p = pmadd(p, pcos(yn), pmul(w, pmul(q, psin(yn)))); + T y_gt_five = pmul(p, pmul(SQ2OPI, prsqrt(y))); + // j1 is an odd function. This implementation differs from cephes to + // take this fact in to account. Cephes returns -j1(x) for y > 5 range. + y_gt_five = pselect( + pcmp_lt(x, pset1(0.0)), pnegate(y_gt_five), y_gt_five); + return pselect(pcmp_le(y, pset1(5.0)), y_le_five, y_gt_five); + } +}; + +template +struct bessel_j1_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T x) { + return generic_j1::run(x); + } +}; + +template +struct bessel_y1_retval { + typedef T type; +}; + +template ::type> +struct generic_y1 { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T&) { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return ScalarType(0); + } +}; + +template +struct generic_y1 { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + /* j1f.c + * Bessel function of second kind of order one + * + * + * + * SYNOPSIS: + * + * double x, y, y1(); + * + * y = y1( x ); + * + * + * + * DESCRIPTION: + * + * Returns Bessel function of the second kind of order one + * of the argument. + * + * The domain is divided into the intervals [0, 2] and + * (2, infinity). In the first interval a rational approximation + * R(x) is employed to compute + * + * 2 + * y0(x) = (w - r ) x R(x^2) + 2/pi (ln(x) j1(x) - 1/x) . + * 1 + * + * Thus a call to j1() is required. + * + * In the second interval, the modulus and phase are approximated + * by polynomials of the form Modulus(x) = sqrt(1/x) Q(1/x) + * and Phase(x) = x + 1/x S(1/x^2) - 3pi/4. Then the function is + * + * y0(x) = Modulus(x) sin( Phase(x) ). + * + * + * + * + * ACCURACY: + * + * Absolute error: + * arithmetic domain # trials peak rms + * IEEE 0, 2 100000 2.2e-7 4.6e-8 + * IEEE 2, 32 100000 1.9e-7 5.3e-8 + * + * (error criterion relative when |y1| > 1). + * + */ + + const float YP[] = {8.061978323326852E-009f, -9.496460629917016E-007f, + 6.719543806674249E-005f, -2.641785726447862E-003f, + 4.202369946500099E-002f}; + const float MO1[] = {6.913942741265801E-002f, -2.284801500053359E-001f, + 3.138238455499697E-001f, -2.102302420403875E-001f, + 5.435364690523026E-003f, 1.493389585089498E-001f, + 4.976029650847191E-006f, 7.978845453073848E-001f}; + const float PH1[] = {-4.497014141919556E+001f, 5.073465654089319E+001f, + -2.485774108720340E+001f, 7.222973196770240E+000f, + -1.544842782180211E+000f, 3.503787691653334E-001f, + -1.637986776941202E-001f, 3.749989509080821E-001f}; + const T YO1 = pset1(4.66539330185668857532f); + const T NEG_THPIO4F = pset1(-2.35619449019234492885f); /* -3*pi/4 */ + const T TWOOPI = pset1(0.636619772367581343075535f); /* 2/pi */ + const T NEG_MAXNUM = pset1(-NumTraits::infinity()); + + T z = pmul(x, x); + T x_le_two = pmul(psub(z, YO1), internal::ppolevl::run(z, YP)); + x_le_two = pmadd( + x_le_two, x, + pmul(TWOOPI, pmadd( + generic_j1::run(x), plog(x), + pdiv(pset1(-1.0f), x)))); + x_le_two = pselect(pcmp_lt(x, pset1(0.0f)), NEG_MAXNUM, x_le_two); + + T q = pdiv(pset1(1.0), x); + T w = prsqrt(x); + T p = pmul(w, internal::ppolevl::run(q, MO1)); + w = pmul(q, q); + T xn = pmadd(q, internal::ppolevl::run(w, PH1), NEG_THPIO4F); + T x_gt_two = pmul(p, psin(padd(xn, x))); + return pselect(pcmp_le(x, pset1(2.0)), x_le_two, x_gt_two); + } +}; + +template +struct generic_y1 { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + /* j1.c + * Bessel function of second kind of order one + * + * + * + * SYNOPSIS: + * + * double x, y, y1(); + * + * y = y1( x ); + * + * + * + * DESCRIPTION: + * + * Returns Bessel function of the second kind of order one + * of the argument. + * + * The domain is divided into the intervals [0, 8] and + * (8, infinity). In the first interval a 25 term Chebyshev + * expansion is used, and a call to j1() is required. + * In the second, the asymptotic trigonometric representation + * is employed using two rational functions of degree 5/5. + * + * + * + * ACCURACY: + * + * Absolute error: + * arithmetic domain # trials peak rms + * DEC 0, 30 10000 8.6e-17 1.3e-17 + * IEEE 0, 30 30000 1.0e-15 1.3e-16 + * + * (error criterion relative when |y1| > 1). + * + */ + const double PP[] = {7.62125616208173112003E-4, 7.31397056940917570436E-2, + 1.12719608129684925192E0, 5.11207951146807644818E0, + 8.42404590141772420927E0, 5.21451598682361504063E0, + 1.00000000000000000254E0}; + const double PQ[] = {5.71323128072548699714E-4, 6.88455908754495404082E-2, + 1.10514232634061696926E0, 5.07386386128601488557E0, + 8.39985554327604159757E0, 5.20982848682361821619E0, + 9.99999999999999997461E-1}; + const double QP[] = {5.10862594750176621635E-2, 4.98213872951233449420E0, + 7.58238284132545283818E1, 3.66779609360150777800E2, + 7.10856304998926107277E2, 5.97489612400613639965E2, + 2.11688757100572135698E2, 2.52070205858023719784E1}; + const double QQ[] = {1.00000000000000000000E0, 7.42373277035675149943E1, + 1.05644886038262816351E3, 4.98641058337653607651E3, + 9.56231892404756170795E3, 7.99704160447350683650E3, + 2.82619278517639096600E3, 3.36093607810698293419E2}; + const double YP[] = {1.26320474790178026440E9, -6.47355876379160291031E11, + 1.14509511541823727583E14, -8.12770255501325109621E15, + 2.02439475713594898196E17, -7.78877196265950026825E17}; + const double YQ[] = {1.00000000000000000000E0, 5.94301592346128195359E2, + 2.35564092943068577943E5, 7.34811944459721705660E7, + 1.87601316108706159478E10, 3.88231277496238566008E12, + 6.20557727146953693363E14, 6.87141087355300489866E16, + 3.97270608116560655612E18}; + const T SQ2OPI = pset1(.79788456080286535588); + const T NEG_THPIO4 = pset1(-2.35619449019234492885); /* -3*pi/4 */ + const T TWOOPI = pset1(0.636619772367581343075535); /* 2/pi */ + const T NEG_MAXNUM = pset1(-NumTraits::infinity()); + + T z = pmul(x, x); + T x_le_five = pdiv(internal::ppolevl::run(z, YP), + internal::ppolevl::run(z, YQ)); + x_le_five = pmadd( + x_le_five, x, pmul( + TWOOPI, pmadd(generic_j1::run(x), plog(x), + pdiv(pset1(-1.0), x)))); + + x_le_five = pselect(pcmp_le(x, pset1(0.0)), NEG_MAXNUM, x_le_five); + T s = pdiv(pset1(25.0), z); + T p = pdiv( + internal::ppolevl::run(s, PP), + internal::ppolevl::run(s, PQ)); + T q = pdiv( + internal::ppolevl::run(s, QP), + internal::ppolevl::run(s, QQ)); + T xn = padd(x, NEG_THPIO4); + T w = pdiv(pset1(5.0), x); + p = pmadd(p, psin(xn), pmul(w, pmul(q, pcos(xn)))); + T x_gt_five = pmul(p, pmul(SQ2OPI, prsqrt(x))); + return pselect(pcmp_le(x, pset1(5.0)), x_le_five, x_gt_five); + } +}; + +template +struct bessel_y1_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T x) { + return generic_y1::run(x); + } +}; + +} // end namespace internal + +namespace numext { + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(bessel_i0, Scalar) + bessel_i0(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(bessel_i0, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(bessel_i0e, Scalar) + bessel_i0e(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(bessel_i0e, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(bessel_i1, Scalar) + bessel_i1(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(bessel_i1, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(bessel_i1e, Scalar) + bessel_i1e(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(bessel_i1e, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(bessel_k0, Scalar) + bessel_k0(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(bessel_k0, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(bessel_k0e, Scalar) + bessel_k0e(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(bessel_k0e, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(bessel_k1, Scalar) + bessel_k1(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(bessel_k1, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(bessel_k1e, Scalar) + bessel_k1e(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(bessel_k1e, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(bessel_j0, Scalar) + bessel_j0(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(bessel_j0, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(bessel_y0, Scalar) + bessel_y0(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(bessel_y0, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(bessel_j1, Scalar) + bessel_j1(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(bessel_j1, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(bessel_y1, Scalar) + bessel_y1(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(bessel_y1, Scalar)::run(x); +} + +} // end namespace numext + +} // end namespace Eigen + +#endif // EIGEN_BESSEL_FUNCTIONS_H diff --git a/external/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsPacketMath.h b/external/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsPacketMath.h new file mode 100644 index 0000000..943d10f --- /dev/null +++ b/external/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsPacketMath.h @@ -0,0 +1,118 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_BESSELFUNCTIONS_PACKETMATH_H +#define EIGEN_BESSELFUNCTIONS_PACKETMATH_H + +namespace Eigen { + +namespace internal { + +/** \internal \returns the exponentially scaled modified Bessel function of + * order zero i0(\a a) (coeff-wise) */ +template +EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pbessel_i0(const Packet& x) { + return numext::bessel_i0(x); +} + +/** \internal \returns the exponentially scaled modified Bessel function of + * order zero i0e(\a a) (coeff-wise) */ +template +EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pbessel_i0e(const Packet& x) { + return numext::bessel_i0e(x); +} + +/** \internal \returns the exponentially scaled modified Bessel function of + * order one i1(\a a) (coeff-wise) */ +template +EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pbessel_i1(const Packet& x) { + return numext::bessel_i1(x); +} + +/** \internal \returns the exponentially scaled modified Bessel function of + * order one i1e(\a a) (coeff-wise) */ +template +EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pbessel_i1e(const Packet& x) { + return numext::bessel_i1e(x); +} + +/** \internal \returns the exponentially scaled modified Bessel function of + * order zero j0(\a a) (coeff-wise) */ +template +EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pbessel_j0(const Packet& x) { + return numext::bessel_j0(x); +} + +/** \internal \returns the exponentially scaled modified Bessel function of + * order zero j1(\a a) (coeff-wise) */ +template +EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pbessel_j1(const Packet& x) { + return numext::bessel_j1(x); +} + +/** \internal \returns the exponentially scaled modified Bessel function of + * order one y0(\a a) (coeff-wise) */ +template +EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pbessel_y0(const Packet& x) { + return numext::bessel_y0(x); +} + +/** \internal \returns the exponentially scaled modified Bessel function of + * order one y1(\a a) (coeff-wise) */ +template +EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pbessel_y1(const Packet& x) { + return numext::bessel_y1(x); +} + +/** \internal \returns the exponentially scaled modified Bessel function of + * order zero k0(\a a) (coeff-wise) */ +template +EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pbessel_k0(const Packet& x) { + return numext::bessel_k0(x); +} + +/** \internal \returns the exponentially scaled modified Bessel function of + * order zero k0e(\a a) (coeff-wise) */ +template +EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pbessel_k0e(const Packet& x) { + return numext::bessel_k0e(x); +} + +/** \internal \returns the exponentially scaled modified Bessel function of + * order one k1e(\a a) (coeff-wise) */ +template +EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pbessel_k1(const Packet& x) { + return numext::bessel_k1(x); +} + +/** \internal \returns the exponentially scaled modified Bessel function of + * order one k1e(\a a) (coeff-wise) */ +template +EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pbessel_k1e(const Packet& x) { + return numext::bessel_k1e(x); +} + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_BESSELFUNCTIONS_PACKETMATH_H + diff --git a/external/unsupported/Eigen/src/SpecialFunctions/HipVectorCompatibility.h b/external/unsupported/Eigen/src/SpecialFunctions/HipVectorCompatibility.h new file mode 100644 index 0000000..d7b231a --- /dev/null +++ b/external/unsupported/Eigen/src/SpecialFunctions/HipVectorCompatibility.h @@ -0,0 +1,67 @@ +#ifndef HIP_VECTOR_COMPATIBILITY_H +#define HIP_VECTOR_COMPATIBILITY_H + +namespace hip_impl { + template struct Scalar_accessor; +} // end namespace hip_impl + +namespace Eigen { +namespace internal { + +#define HIP_SCALAR_ACCESSOR_BUILDER(NAME) \ +template \ +struct NAME > : NAME {}; + +#define HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(NAME) \ +template \ +struct NAME##_impl > : NAME##_impl {}; \ +template \ +struct NAME##_retval > : NAME##_retval {}; + +#define HIP_SCALAR_ACCESSOR_BUILDER_IGAMMA(NAME) \ +template \ +struct NAME , mode> : NAME {}; + +#if EIGEN_HAS_C99_MATH +HIP_SCALAR_ACCESSOR_BUILDER(betainc_helper) +HIP_SCALAR_ACCESSOR_BUILDER(incbeta_cfe) + +HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(erf) +HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(erfc) +HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(igammac) +HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(lgamma) +HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(ndtri) +HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(polygamma) + +HIP_SCALAR_ACCESSOR_BUILDER_IGAMMA(igamma_generic_impl) +#endif + +HIP_SCALAR_ACCESSOR_BUILDER(digamma_impl_maybe_poly) +HIP_SCALAR_ACCESSOR_BUILDER(zeta_impl_series) + +HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(bessel_i0) +HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(bessel_i0e) +HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(bessel_i1) +HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(bessel_i1e) +HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(bessel_j0) +HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(bessel_j1) +HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(bessel_k0) +HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(bessel_k0e) +HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(bessel_k1) +HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(bessel_k1e) +HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(bessel_y0) +HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(bessel_y1) +HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(betainc) +HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(digamma) +HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(gamma_sample_der_alpha) +HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(igamma_der_a) +HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(igamma) +HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(zeta) + +HIP_SCALAR_ACCESSOR_BUILDER_IGAMMA(igamma_series_impl) +HIP_SCALAR_ACCESSOR_BUILDER_IGAMMA(igammac_cf_impl) + +} // end namespace internal +} // end namespace Eigen + +#endif // HIP_VECTOR_COMPATIBILITY_H diff --git a/external/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h b/external/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h new file mode 100644 index 0000000..691ff4d --- /dev/null +++ b/external/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h @@ -0,0 +1,167 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#ifndef EIGEN_SPECIALFUNCTIONS_ARRAYAPI_H +#define EIGEN_SPECIALFUNCTIONS_ARRAYAPI_H + +namespace Eigen { + +/** \cpp11 \returns an expression of the coefficient-wise igamma(\a a, \a x) to the given arrays. + * + * This function computes the coefficient-wise incomplete gamma function. + * + * \note This function supports only float and double scalar types in c++11 mode. To support other scalar types, + * or float/double in non c++11 mode, the user has to provide implementations of igammac(T,T) for any scalar + * type T to be supported. + * + * \sa Eigen::igammac(), Eigen::lgamma() + */ +template +EIGEN_STRONG_INLINE const Eigen::CwiseBinaryOp, const Derived, const ExponentDerived> +igamma(const Eigen::ArrayBase& a, const Eigen::ArrayBase& x) +{ + return Eigen::CwiseBinaryOp, const Derived, const ExponentDerived>( + a.derived(), + x.derived() + ); +} + +/** \cpp11 \returns an expression of the coefficient-wise igamma_der_a(\a a, \a x) to the given arrays. + * + * This function computes the coefficient-wise derivative of the incomplete + * gamma function with respect to the parameter a. + * + * \note This function supports only float and double scalar types in c++11 + * mode. To support other scalar types, + * or float/double in non c++11 mode, the user has to provide implementations + * of igamma_der_a(T,T) for any scalar + * type T to be supported. + * + * \sa Eigen::igamma(), Eigen::lgamma() + */ +template +EIGEN_STRONG_INLINE const Eigen::CwiseBinaryOp, const Derived, const ExponentDerived> +igamma_der_a(const Eigen::ArrayBase& a, const Eigen::ArrayBase& x) { + return Eigen::CwiseBinaryOp, const Derived, const ExponentDerived>( + a.derived(), + x.derived()); +} + +/** \cpp11 \returns an expression of the coefficient-wise gamma_sample_der_alpha(\a alpha, \a sample) to the given arrays. + * + * This function computes the coefficient-wise derivative of the sample + * of a Gamma(alpha, 1) random variable with respect to the parameter alpha. + * + * \note This function supports only float and double scalar types in c++11 + * mode. To support other scalar types, + * or float/double in non c++11 mode, the user has to provide implementations + * of gamma_sample_der_alpha(T,T) for any scalar + * type T to be supported. + * + * \sa Eigen::igamma(), Eigen::lgamma() + */ +template +EIGEN_STRONG_INLINE const Eigen::CwiseBinaryOp, const AlphaDerived, const SampleDerived> +gamma_sample_der_alpha(const Eigen::ArrayBase& alpha, const Eigen::ArrayBase& sample) { + return Eigen::CwiseBinaryOp, const AlphaDerived, const SampleDerived>( + alpha.derived(), + sample.derived()); +} + +/** \cpp11 \returns an expression of the coefficient-wise igammac(\a a, \a x) to the given arrays. + * + * This function computes the coefficient-wise complementary incomplete gamma function. + * + * \note This function supports only float and double scalar types in c++11 mode. To support other scalar types, + * or float/double in non c++11 mode, the user has to provide implementations of igammac(T,T) for any scalar + * type T to be supported. + * + * \sa Eigen::igamma(), Eigen::lgamma() + */ +template +EIGEN_STRONG_INLINE const Eigen::CwiseBinaryOp, const Derived, const ExponentDerived> +igammac(const Eigen::ArrayBase& a, const Eigen::ArrayBase& x) +{ + return Eigen::CwiseBinaryOp, const Derived, const ExponentDerived>( + a.derived(), + x.derived() + ); +} + +/** \cpp11 \returns an expression of the coefficient-wise polygamma(\a n, \a x) to the given arrays. + * + * It returns the \a n -th derivative of the digamma(psi) evaluated at \c x. + * + * \note This function supports only float and double scalar types in c++11 mode. To support other scalar types, + * or float/double in non c++11 mode, the user has to provide implementations of polygamma(T,T) for any scalar + * type T to be supported. + * + * \sa Eigen::digamma() + */ +// * \warning Be careful with the order of the parameters: x.polygamma(n) is equivalent to polygamma(n,x) +// * \sa ArrayBase::polygamma() +template +EIGEN_STRONG_INLINE const Eigen::CwiseBinaryOp, const DerivedN, const DerivedX> +polygamma(const Eigen::ArrayBase& n, const Eigen::ArrayBase& x) +{ + return Eigen::CwiseBinaryOp, const DerivedN, const DerivedX>( + n.derived(), + x.derived() + ); +} + +/** \cpp11 \returns an expression of the coefficient-wise betainc(\a x, \a a, \a b) to the given arrays. + * + * This function computes the regularized incomplete beta function (integral). + * + * \note This function supports only float and double scalar types in c++11 mode. To support other scalar types, + * or float/double in non c++11 mode, the user has to provide implementations of betainc(T,T,T) for any scalar + * type T to be supported. + * + * \sa Eigen::betainc(), Eigen::lgamma() + */ +template +EIGEN_STRONG_INLINE const Eigen::CwiseTernaryOp, const ArgADerived, const ArgBDerived, const ArgXDerived> +betainc(const Eigen::ArrayBase& a, const Eigen::ArrayBase& b, const Eigen::ArrayBase& x) +{ + return Eigen::CwiseTernaryOp, const ArgADerived, const ArgBDerived, const ArgXDerived>( + a.derived(), + b.derived(), + x.derived() + ); +} + + +/** \returns an expression of the coefficient-wise zeta(\a x, \a q) to the given arrays. + * + * It returns the Riemann zeta function of two arguments \a x and \a q: + * + * \param x is the exponent, it must be > 1 + * \param q is the shift, it must be > 0 + * + * \note This function supports only float and double scalar types. To support other scalar types, the user has + * to provide implementations of zeta(T,T) for any scalar type T to be supported. + * + * \sa ArrayBase::zeta() + */ +template +EIGEN_STRONG_INLINE const Eigen::CwiseBinaryOp, const DerivedX, const DerivedQ> +zeta(const Eigen::ArrayBase& x, const Eigen::ArrayBase& q) +{ + return Eigen::CwiseBinaryOp, const DerivedX, const DerivedQ>( + x.derived(), + q.derived() + ); +} + + +} // end namespace Eigen + +#endif // EIGEN_SPECIALFUNCTIONS_ARRAYAPI_H diff --git a/external/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsBFloat16.h b/external/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsBFloat16.h new file mode 100644 index 0000000..2d94231 --- /dev/null +++ b/external/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsBFloat16.h @@ -0,0 +1,58 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPECIALFUNCTIONS_BFLOAT16_H +#define EIGEN_SPECIALFUNCTIONS_BFLOAT16_H + +namespace Eigen { +namespace numext { + +#if EIGEN_HAS_C99_MATH +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 lgamma(const Eigen::bfloat16& a) { + return Eigen::bfloat16(Eigen::numext::lgamma(static_cast(a))); +} +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 digamma(const Eigen::bfloat16& a) { + return Eigen::bfloat16(Eigen::numext::digamma(static_cast(a))); +} +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 zeta(const Eigen::bfloat16& x, const Eigen::bfloat16& q) { + return Eigen::bfloat16(Eigen::numext::zeta(static_cast(x), static_cast(q))); +} +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 polygamma(const Eigen::bfloat16& n, const Eigen::bfloat16& x) { + return Eigen::bfloat16(Eigen::numext::polygamma(static_cast(n), static_cast(x))); +} +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 erf(const Eigen::bfloat16& a) { + return Eigen::bfloat16(Eigen::numext::erf(static_cast(a))); +} +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 erfc(const Eigen::bfloat16& a) { + return Eigen::bfloat16(Eigen::numext::erfc(static_cast(a))); +} +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 ndtri(const Eigen::bfloat16& a) { + return Eigen::bfloat16(Eigen::numext::ndtri(static_cast(a))); +} +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 igamma(const Eigen::bfloat16& a, const Eigen::bfloat16& x) { + return Eigen::bfloat16(Eigen::numext::igamma(static_cast(a), static_cast(x))); +} +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 igamma_der_a(const Eigen::bfloat16& a, const Eigen::bfloat16& x) { + return Eigen::bfloat16(Eigen::numext::igamma_der_a(static_cast(a), static_cast(x))); +} +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 gamma_sample_der_alpha(const Eigen::bfloat16& alpha, const Eigen::bfloat16& sample) { + return Eigen::bfloat16(Eigen::numext::gamma_sample_der_alpha(static_cast(alpha), static_cast(sample))); +} +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 igammac(const Eigen::bfloat16& a, const Eigen::bfloat16& x) { + return Eigen::bfloat16(Eigen::numext::igammac(static_cast(a), static_cast(x))); +} +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 betainc(const Eigen::bfloat16& a, const Eigen::bfloat16& b, const Eigen::bfloat16& x) { + return Eigen::bfloat16(Eigen::numext::betainc(static_cast(a), static_cast(b), static_cast(x))); +} +#endif + +} // end namespace numext +} // end namespace Eigen + +#endif // EIGEN_SPECIALFUNCTIONS_BFLOAT16_H diff --git a/external/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h b/external/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h new file mode 100644 index 0000000..abefe99 --- /dev/null +++ b/external/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h @@ -0,0 +1,330 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Eugene Brevdo +// Copyright (C) 2016 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPECIALFUNCTIONS_FUNCTORS_H +#define EIGEN_SPECIALFUNCTIONS_FUNCTORS_H + +namespace Eigen { + +namespace internal { + + +/** \internal + * \brief Template functor to compute the incomplete gamma function igamma(a, x) + * + * \sa class CwiseBinaryOp, Cwise::igamma + */ +template struct scalar_igamma_op : binary_op_base +{ + EIGEN_EMPTY_STRUCT_CTOR(scalar_igamma_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& x) const { + using numext::igamma; return igamma(a, x); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& x) const { + return internal::pigamma(a, x); + } +}; +template +struct functor_traits > { + enum { + // Guesstimate + Cost = 20 * NumTraits::MulCost + 10 * NumTraits::AddCost, + PacketAccess = packet_traits::HasIGamma + }; +}; + +/** \internal + * \brief Template functor to compute the derivative of the incomplete gamma + * function igamma_der_a(a, x) + * + * \sa class CwiseBinaryOp, Cwise::igamma_der_a + */ +template +struct scalar_igamma_der_a_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_igamma_der_a_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& a, const Scalar& x) const { + using numext::igamma_der_a; + return igamma_der_a(a, x); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& x) const { + return internal::pigamma_der_a(a, x); + } +}; +template +struct functor_traits > { + enum { + // 2x the cost of igamma + Cost = 40 * NumTraits::MulCost + 20 * NumTraits::AddCost, + PacketAccess = packet_traits::HasIGammaDerA + }; +}; + +/** \internal + * \brief Template functor to compute the derivative of the sample + * of a Gamma(alpha, 1) random variable with respect to the parameter alpha + * gamma_sample_der_alpha(alpha, sample) + * + * \sa class CwiseBinaryOp, Cwise::gamma_sample_der_alpha + */ +template +struct scalar_gamma_sample_der_alpha_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_gamma_sample_der_alpha_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& alpha, const Scalar& sample) const { + using numext::gamma_sample_der_alpha; + return gamma_sample_der_alpha(alpha, sample); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& alpha, const Packet& sample) const { + return internal::pgamma_sample_der_alpha(alpha, sample); + } +}; +template +struct functor_traits > { + enum { + // 2x the cost of igamma, minus the lgamma cost (the lgamma cancels out) + Cost = 30 * NumTraits::MulCost + 15 * NumTraits::AddCost, + PacketAccess = packet_traits::HasGammaSampleDerAlpha + }; +}; + +/** \internal + * \brief Template functor to compute the complementary incomplete gamma function igammac(a, x) + * + * \sa class CwiseBinaryOp, Cwise::igammac + */ +template struct scalar_igammac_op : binary_op_base +{ + EIGEN_EMPTY_STRUCT_CTOR(scalar_igammac_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& x) const { + using numext::igammac; return igammac(a, x); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& x) const + { + return internal::pigammac(a, x); + } +}; +template +struct functor_traits > { + enum { + // Guesstimate + Cost = 20 * NumTraits::MulCost + 10 * NumTraits::AddCost, + PacketAccess = packet_traits::HasIGammac + }; +}; + + +/** \internal + * \brief Template functor to compute the incomplete beta integral betainc(a, b, x) + * + */ +template struct scalar_betainc_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_betainc_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& x, const Scalar& a, const Scalar& b) const { + using numext::betainc; return betainc(x, a, b); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& x, const Packet& a, const Packet& b) const + { + return internal::pbetainc(x, a, b); + } +}; +template +struct functor_traits > { + enum { + // Guesstimate + Cost = 400 * NumTraits::MulCost + 400 * NumTraits::AddCost, + PacketAccess = packet_traits::HasBetaInc + }; +}; + + +/** \internal + * \brief Template functor to compute the natural log of the absolute + * value of Gamma of a scalar + * \sa class CwiseUnaryOp, Cwise::lgamma() + */ +template struct scalar_lgamma_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_lgamma_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { + using numext::lgamma; return lgamma(a); + } + typedef typename packet_traits::type Packet; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a) const { return internal::plgamma(a); } +}; +template +struct functor_traits > +{ + enum { + // Guesstimate + Cost = 10 * NumTraits::MulCost + 5 * NumTraits::AddCost, + PacketAccess = packet_traits::HasLGamma + }; +}; + +/** \internal + * \brief Template functor to compute psi, the derivative of lgamma of a scalar. + * \sa class CwiseUnaryOp, Cwise::digamma() + */ +template struct scalar_digamma_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_digamma_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { + using numext::digamma; return digamma(a); + } + typedef typename packet_traits::type Packet; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a) const { return internal::pdigamma(a); } +}; +template +struct functor_traits > +{ + enum { + // Guesstimate + Cost = 10 * NumTraits::MulCost + 5 * NumTraits::AddCost, + PacketAccess = packet_traits::HasDiGamma + }; +}; + +/** \internal + * \brief Template functor to compute the Riemann Zeta function of two arguments. + * \sa class CwiseUnaryOp, Cwise::zeta() + */ +template struct scalar_zeta_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_zeta_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& x, const Scalar& q) const { + using numext::zeta; return zeta(x, q); + } + typedef typename packet_traits::type Packet; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x, const Packet& q) const { return internal::pzeta(x, q); } +}; +template +struct functor_traits > +{ + enum { + // Guesstimate + Cost = 10 * NumTraits::MulCost + 5 * NumTraits::AddCost, + PacketAccess = packet_traits::HasZeta + }; +}; + +/** \internal + * \brief Template functor to compute the polygamma function. + * \sa class CwiseUnaryOp, Cwise::polygamma() + */ +template struct scalar_polygamma_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_polygamma_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& n, const Scalar& x) const { + using numext::polygamma; return polygamma(n, x); + } + typedef typename packet_traits::type Packet; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& n, const Packet& x) const { return internal::ppolygamma(n, x); } +}; +template +struct functor_traits > +{ + enum { + // Guesstimate + Cost = 10 * NumTraits::MulCost + 5 * NumTraits::AddCost, + PacketAccess = packet_traits::HasPolygamma + }; +}; + +/** \internal + * \brief Template functor to compute the error function of a scalar + * \sa class CwiseUnaryOp, ArrayBase::erf() + */ +template struct scalar_erf_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_erf_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar + operator()(const Scalar& a) const { + return numext::erf(a); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const { + return perf(x); + } +}; +template +struct functor_traits > { + enum { + PacketAccess = packet_traits::HasErf, + Cost = + (PacketAccess +#ifdef EIGEN_VECTORIZE_FMA + // TODO(rmlarsen): Move the FMA cost model to a central location. + // Haswell can issue 2 add/mul/madd per cycle. + // 10 pmadd, 2 pmul, 1 div, 2 other + ? (2 * NumTraits::AddCost + + 7 * NumTraits::MulCost + + scalar_div_cost::HasDiv>::value) +#else + ? (12 * NumTraits::AddCost + + 12 * NumTraits::MulCost + + scalar_div_cost::HasDiv>::value) +#endif + // Assume for simplicity that this is as expensive as an exp(). + : (functor_traits >::Cost)) + }; +}; + +/** \internal + * \brief Template functor to compute the Complementary Error Function + * of a scalar + * \sa class CwiseUnaryOp, Cwise::erfc() + */ +template struct scalar_erfc_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_erfc_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { + using numext::erfc; return erfc(a); + } + typedef typename packet_traits::type Packet; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a) const { return internal::perfc(a); } +}; +template +struct functor_traits > +{ + enum { + // Guesstimate + Cost = 10 * NumTraits::MulCost + 5 * NumTraits::AddCost, + PacketAccess = packet_traits::HasErfc + }; +}; + +/** \internal + * \brief Template functor to compute the Inverse of the normal distribution + * function of a scalar + * \sa class CwiseUnaryOp, Cwise::ndtri() + */ +template struct scalar_ndtri_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_ndtri_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { + using numext::ndtri; return ndtri(a); + } + typedef typename packet_traits::type Packet; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a) const { return internal::pndtri(a); } +}; +template +struct functor_traits > +{ + enum { + // On average, We are evaluating rational functions with degree N=9 in the + // numerator and denominator. This results in 2*N additions and 2*N + // multiplications. + Cost = 18 * NumTraits::MulCost + 18 * NumTraits::AddCost, + PacketAccess = packet_traits::HasNdtri + }; +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_SPECIALFUNCTIONS_FUNCTORS_H diff --git a/external/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h b/external/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h new file mode 100644 index 0000000..2a3a531 --- /dev/null +++ b/external/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h @@ -0,0 +1,58 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPECIALFUNCTIONS_HALF_H +#define EIGEN_SPECIALFUNCTIONS_HALF_H + +namespace Eigen { +namespace numext { + +#if EIGEN_HAS_C99_MATH +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half lgamma(const Eigen::half& a) { + return Eigen::half(Eigen::numext::lgamma(static_cast(a))); +} +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half digamma(const Eigen::half& a) { + return Eigen::half(Eigen::numext::digamma(static_cast(a))); +} +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half zeta(const Eigen::half& x, const Eigen::half& q) { + return Eigen::half(Eigen::numext::zeta(static_cast(x), static_cast(q))); +} +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half polygamma(const Eigen::half& n, const Eigen::half& x) { + return Eigen::half(Eigen::numext::polygamma(static_cast(n), static_cast(x))); +} +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half erf(const Eigen::half& a) { + return Eigen::half(Eigen::numext::erf(static_cast(a))); +} +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half erfc(const Eigen::half& a) { + return Eigen::half(Eigen::numext::erfc(static_cast(a))); +} +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half ndtri(const Eigen::half& a) { + return Eigen::half(Eigen::numext::ndtri(static_cast(a))); +} +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igamma(const Eigen::half& a, const Eigen::half& x) { + return Eigen::half(Eigen::numext::igamma(static_cast(a), static_cast(x))); +} +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igamma_der_a(const Eigen::half& a, const Eigen::half& x) { + return Eigen::half(Eigen::numext::igamma_der_a(static_cast(a), static_cast(x))); +} +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half gamma_sample_der_alpha(const Eigen::half& alpha, const Eigen::half& sample) { + return Eigen::half(Eigen::numext::gamma_sample_der_alpha(static_cast(alpha), static_cast(sample))); +} +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igammac(const Eigen::half& a, const Eigen::half& x) { + return Eigen::half(Eigen::numext::igammac(static_cast(a), static_cast(x))); +} +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half betainc(const Eigen::half& a, const Eigen::half& b, const Eigen::half& x) { + return Eigen::half(Eigen::numext::betainc(static_cast(a), static_cast(b), static_cast(x))); +} +#endif + +} // end namespace numext +} // end namespace Eigen + +#endif // EIGEN_SPECIALFUNCTIONS_HALF_H diff --git a/external/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h b/external/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h new file mode 100644 index 0000000..f1c260e --- /dev/null +++ b/external/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h @@ -0,0 +1,2045 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Eugene Brevdo +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPECIAL_FUNCTIONS_H +#define EIGEN_SPECIAL_FUNCTIONS_H + +namespace Eigen { +namespace internal { + +// Parts of this code are based on the Cephes Math Library. +// +// Cephes Math Library Release 2.8: June, 2000 +// Copyright 1984, 1987, 1992, 2000 by Stephen L. Moshier +// +// Permission has been kindly provided by the original author +// to incorporate the Cephes software into the Eigen codebase: +// +// From: Stephen Moshier +// To: Eugene Brevdo +// Subject: Re: Permission to wrap several cephes functions in Eigen +// +// Hello Eugene, +// +// Thank you for writing. +// +// If your licensing is similar to BSD, the formal way that has been +// handled is simply to add a statement to the effect that you are incorporating +// the Cephes software by permission of the author. +// +// Good luck with your project, +// Steve + + +/**************************************************************************** + * Implementation of lgamma, requires C++11/C99 * + ****************************************************************************/ + +template +struct lgamma_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar) { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); + } +}; + +template +struct lgamma_retval { + typedef Scalar type; +}; + +#if EIGEN_HAS_C99_MATH +// Since glibc 2.19 +#if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 19) || __GLIBC__>2) \ + && (defined(_DEFAULT_SOURCE) || defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) +#define EIGEN_HAS_LGAMMA_R +#endif + +// Glibc versions before 2.19 +#if defined(__GLIBC__) && ((__GLIBC__==2 && __GLIBC_MINOR__ < 19) || __GLIBC__<2) \ + && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) +#define EIGEN_HAS_LGAMMA_R +#endif + +template <> +struct lgamma_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE float run(float x) { +#if !defined(EIGEN_GPU_COMPILE_PHASE) && defined (EIGEN_HAS_LGAMMA_R) && !defined(__APPLE__) + int dummy; + return ::lgammaf_r(x, &dummy); +#elif defined(SYCL_DEVICE_ONLY) + return cl::sycl::lgamma(x); +#else + return ::lgammaf(x); +#endif + } +}; + +template <> +struct lgamma_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE double run(double x) { +#if !defined(EIGEN_GPU_COMPILE_PHASE) && defined(EIGEN_HAS_LGAMMA_R) && !defined(__APPLE__) + int dummy; + return ::lgamma_r(x, &dummy); +#elif defined(SYCL_DEVICE_ONLY) + return cl::sycl::lgamma(x); +#else + return ::lgamma(x); +#endif + } +}; + +#undef EIGEN_HAS_LGAMMA_R +#endif + +/**************************************************************************** + * Implementation of digamma (psi), based on Cephes * + ****************************************************************************/ + +template +struct digamma_retval { + typedef Scalar type; +}; + +/* + * + * Polynomial evaluation helper for the Psi (digamma) function. + * + * digamma_impl_maybe_poly::run(s) evaluates the asymptotic Psi expansion for + * input Scalar s, assuming s is above 10.0. + * + * If s is above a certain threshold for the given Scalar type, zero + * is returned. Otherwise the polynomial is evaluated with enough + * coefficients for results matching Scalar machine precision. + * + * + */ +template +struct digamma_impl_maybe_poly { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar) { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); + } +}; + + +template <> +struct digamma_impl_maybe_poly { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE float run(const float s) { + const float A[] = { + -4.16666666666666666667E-3f, + 3.96825396825396825397E-3f, + -8.33333333333333333333E-3f, + 8.33333333333333333333E-2f + }; + + float z; + if (s < 1.0e8f) { + z = 1.0f / (s * s); + return z * internal::ppolevl::run(z, A); + } else return 0.0f; + } +}; + +template <> +struct digamma_impl_maybe_poly { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE double run(const double s) { + const double A[] = { + 8.33333333333333333333E-2, + -2.10927960927960927961E-2, + 7.57575757575757575758E-3, + -4.16666666666666666667E-3, + 3.96825396825396825397E-3, + -8.33333333333333333333E-3, + 8.33333333333333333333E-2 + }; + + double z; + if (s < 1.0e17) { + z = 1.0 / (s * s); + return z * internal::ppolevl::run(z, A); + } + else return 0.0; + } +}; + +template +struct digamma_impl { + EIGEN_DEVICE_FUNC + static Scalar run(Scalar x) { + /* + * + * Psi (digamma) function (modified for Eigen) + * + * + * SYNOPSIS: + * + * double x, y, psi(); + * + * y = psi( x ); + * + * + * DESCRIPTION: + * + * d - + * psi(x) = -- ln | (x) + * dx + * + * is the logarithmic derivative of the gamma function. + * For integer x, + * n-1 + * - + * psi(n) = -EUL + > 1/k. + * - + * k=1 + * + * If x is negative, it is transformed to a positive argument by the + * reflection formula psi(1-x) = psi(x) + pi cot(pi x). + * For general positive x, the argument is made greater than 10 + * using the recurrence psi(x+1) = psi(x) + 1/x. + * Then the following asymptotic expansion is applied: + * + * inf. B + * - 2k + * psi(x) = log(x) - 1/2x - > ------- + * - 2k + * k=1 2k x + * + * where the B2k are Bernoulli numbers. + * + * ACCURACY (float): + * Relative error (except absolute when |psi| < 1): + * arithmetic domain # trials peak rms + * IEEE 0,30 30000 1.3e-15 1.4e-16 + * IEEE -30,0 40000 1.5e-15 2.2e-16 + * + * ACCURACY (double): + * Absolute error, relative when |psi| > 1 : + * arithmetic domain # trials peak rms + * IEEE -33,0 30000 8.2e-7 1.2e-7 + * IEEE 0,33 100000 7.3e-7 7.7e-8 + * + * ERROR MESSAGES: + * message condition value returned + * psi singularity x integer <=0 INFINITY + */ + + Scalar p, q, nz, s, w, y; + bool negative = false; + + const Scalar nan = NumTraits::quiet_NaN(); + const Scalar m_pi = Scalar(EIGEN_PI); + + const Scalar zero = Scalar(0); + const Scalar one = Scalar(1); + const Scalar half = Scalar(0.5); + nz = zero; + + if (x <= zero) { + negative = true; + q = x; + p = numext::floor(q); + if (p == q) { + return nan; + } + /* Remove the zeros of tan(m_pi x) + * by subtracting the nearest integer from x + */ + nz = q - p; + if (nz != half) { + if (nz > half) { + p += one; + nz = q - p; + } + nz = m_pi / numext::tan(m_pi * nz); + } + else { + nz = zero; + } + x = one - x; + } + + /* use the recurrence psi(x+1) = psi(x) + 1/x. */ + s = x; + w = zero; + while (s < Scalar(10)) { + w += one / s; + s += one; + } + + y = digamma_impl_maybe_poly::run(s); + + y = numext::log(s) - (half / s) - y - w; + + return (negative) ? y - nz : y; + } +}; + +/**************************************************************************** + * Implementation of erf, requires C++11/C99 * + ****************************************************************************/ + +/** \internal \returns the error function of \a a (coeff-wise) + Doesn't do anything fancy, just a 13/8-degree rational interpolant which + is accurate up to a couple of ulp in the range [-4, 4], outside of which + fl(erf(x)) = +/-1. + + This implementation works on both scalars and Ts. +*/ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T generic_fast_erf_float(const T& a_x) { + // Clamp the inputs to the range [-4, 4] since anything outside + // this range is +/-1.0f in single-precision. + const T plus_4 = pset1(4.f); + const T minus_4 = pset1(-4.f); + const T x = pmax(pmin(a_x, plus_4), minus_4); + // The monomial coefficients of the numerator polynomial (odd). + const T alpha_1 = pset1(-1.60960333262415e-02f); + const T alpha_3 = pset1(-2.95459980854025e-03f); + const T alpha_5 = pset1(-7.34990630326855e-04f); + const T alpha_7 = pset1(-5.69250639462346e-05f); + const T alpha_9 = pset1(-2.10102402082508e-06f); + const T alpha_11 = pset1(2.77068142495902e-08f); + const T alpha_13 = pset1(-2.72614225801306e-10f); + + // The monomial coefficients of the denominator polynomial (even). + const T beta_0 = pset1(-1.42647390514189e-02f); + const T beta_2 = pset1(-7.37332916720468e-03f); + const T beta_4 = pset1(-1.68282697438203e-03f); + const T beta_6 = pset1(-2.13374055278905e-04f); + const T beta_8 = pset1(-1.45660718464996e-05f); + + // Since the polynomials are odd/even, we need x^2. + const T x2 = pmul(x, x); + + // Evaluate the numerator polynomial p. + T p = pmadd(x2, alpha_13, alpha_11); + p = pmadd(x2, p, alpha_9); + p = pmadd(x2, p, alpha_7); + p = pmadd(x2, p, alpha_5); + p = pmadd(x2, p, alpha_3); + p = pmadd(x2, p, alpha_1); + p = pmul(x, p); + + // Evaluate the denominator polynomial p. + T q = pmadd(x2, beta_8, beta_6); + q = pmadd(x2, q, beta_4); + q = pmadd(x2, q, beta_2); + q = pmadd(x2, q, beta_0); + + // Divide the numerator by the denominator. + return pdiv(p, q); +} + +template +struct erf_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + return generic_fast_erf_float(x); + } +}; + +template +struct erf_retval { + typedef Scalar type; +}; + +#if EIGEN_HAS_C99_MATH +template <> +struct erf_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE float run(float x) { +#if defined(SYCL_DEVICE_ONLY) + return cl::sycl::erf(x); +#else + return generic_fast_erf_float(x); +#endif + } +}; + +template <> +struct erf_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE double run(double x) { +#if defined(SYCL_DEVICE_ONLY) + return cl::sycl::erf(x); +#else + return ::erf(x); +#endif + } +}; +#endif // EIGEN_HAS_C99_MATH + +/*************************************************************************** +* Implementation of erfc, requires C++11/C99 * +****************************************************************************/ + +template +struct erfc_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar) { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); + } +}; + +template +struct erfc_retval { + typedef Scalar type; +}; + +#if EIGEN_HAS_C99_MATH +template <> +struct erfc_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE float run(const float x) { +#if defined(SYCL_DEVICE_ONLY) + return cl::sycl::erfc(x); +#else + return ::erfcf(x); +#endif + } +}; + +template <> +struct erfc_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE double run(const double x) { +#if defined(SYCL_DEVICE_ONLY) + return cl::sycl::erfc(x); +#else + return ::erfc(x); +#endif + } +}; +#endif // EIGEN_HAS_C99_MATH + + +/*************************************************************************** +* Implementation of ndtri. * +****************************************************************************/ + +/* Inverse of Normal distribution function (modified for Eigen). + * + * + * SYNOPSIS: + * + * double x, y, ndtri(); + * + * x = ndtri( y ); + * + * + * + * DESCRIPTION: + * + * Returns the argument, x, for which the area under the + * Gaussian probability density function (integrated from + * minus infinity to x) is equal to y. + * + * + * For small arguments 0 < y < exp(-2), the program computes + * z = sqrt( -2.0 * log(y) ); then the approximation is + * x = z - log(z)/z - (1/z) P(1/z) / Q(1/z). + * There are two rational functions P/Q, one for 0 < y < exp(-32) + * and the other for y up to exp(-2). For larger arguments, + * w = y - 0.5, and x/sqrt(2pi) = w + w**3 R(w**2)/S(w**2)). + * + * + * ACCURACY: + * + * Relative error: + * arithmetic domain # trials peak rms + * DEC 0.125, 1 5500 9.5e-17 2.1e-17 + * DEC 6e-39, 0.135 3500 5.7e-17 1.3e-17 + * IEEE 0.125, 1 20000 7.2e-16 1.3e-16 + * IEEE 3e-308, 0.135 50000 4.6e-16 9.8e-17 + * + * + * ERROR MESSAGES: + * + * message condition value returned + * ndtri domain x <= 0 -MAXNUM + * ndtri domain x >= 1 MAXNUM + * + */ + /* + Cephes Math Library Release 2.2: June, 1992 + Copyright 1985, 1987, 1992 by Stephen L. Moshier + Direct inquiries to 30 Frost Street, Cambridge, MA 02140 + */ + + +// TODO: Add a cheaper approximation for float. + + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T flipsign( + const T& should_flipsign, const T& x) { + typedef typename unpacket_traits::type Scalar; + const T sign_mask = pset1(Scalar(-0.0)); + T sign_bit = pand(should_flipsign, sign_mask); + return pxor(sign_bit, x); +} + +template<> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double flipsign( + const double& should_flipsign, const double& x) { + return should_flipsign == 0 ? x : -x; +} + +template<> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float flipsign( + const float& should_flipsign, const float& x) { + return should_flipsign == 0 ? x : -x; +} + +// We split this computation in to two so that in the scalar path +// only one branch is evaluated (due to our template specialization of pselect +// being an if statement.) + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T generic_ndtri_gt_exp_neg_two(const T& b) { + const ScalarType p0[] = { + ScalarType(-5.99633501014107895267e1), + ScalarType(9.80010754185999661536e1), + ScalarType(-5.66762857469070293439e1), + ScalarType(1.39312609387279679503e1), + ScalarType(-1.23916583867381258016e0) + }; + const ScalarType q0[] = { + ScalarType(1.0), + ScalarType(1.95448858338141759834e0), + ScalarType(4.67627912898881538453e0), + ScalarType(8.63602421390890590575e1), + ScalarType(-2.25462687854119370527e2), + ScalarType(2.00260212380060660359e2), + ScalarType(-8.20372256168333339912e1), + ScalarType(1.59056225126211695515e1), + ScalarType(-1.18331621121330003142e0) + }; + const T sqrt2pi = pset1(ScalarType(2.50662827463100050242e0)); + const T half = pset1(ScalarType(0.5)); + T c, c2, ndtri_gt_exp_neg_two; + + c = psub(b, half); + c2 = pmul(c, c); + ndtri_gt_exp_neg_two = pmadd(c, pmul( + c2, pdiv( + internal::ppolevl::run(c2, p0), + internal::ppolevl::run(c2, q0))), c); + return pmul(ndtri_gt_exp_neg_two, sqrt2pi); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T generic_ndtri_lt_exp_neg_two( + const T& b, const T& should_flipsign) { + /* Approximation for interval z = sqrt(-2 log a ) between 2 and 8 + * i.e., a between exp(-2) = .135 and exp(-32) = 1.27e-14. + */ + const ScalarType p1[] = { + ScalarType(4.05544892305962419923e0), + ScalarType(3.15251094599893866154e1), + ScalarType(5.71628192246421288162e1), + ScalarType(4.40805073893200834700e1), + ScalarType(1.46849561928858024014e1), + ScalarType(2.18663306850790267539e0), + ScalarType(-1.40256079171354495875e-1), + ScalarType(-3.50424626827848203418e-2), + ScalarType(-8.57456785154685413611e-4) + }; + const ScalarType q1[] = { + ScalarType(1.0), + ScalarType(1.57799883256466749731e1), + ScalarType(4.53907635128879210584e1), + ScalarType(4.13172038254672030440e1), + ScalarType(1.50425385692907503408e1), + ScalarType(2.50464946208309415979e0), + ScalarType(-1.42182922854787788574e-1), + ScalarType(-3.80806407691578277194e-2), + ScalarType(-9.33259480895457427372e-4) + }; + /* Approximation for interval z = sqrt(-2 log a ) between 8 and 64 + * i.e., a between exp(-32) = 1.27e-14 and exp(-2048) = 3.67e-890. + */ + const ScalarType p2[] = { + ScalarType(3.23774891776946035970e0), + ScalarType(6.91522889068984211695e0), + ScalarType(3.93881025292474443415e0), + ScalarType(1.33303460815807542389e0), + ScalarType(2.01485389549179081538e-1), + ScalarType(1.23716634817820021358e-2), + ScalarType(3.01581553508235416007e-4), + ScalarType(2.65806974686737550832e-6), + ScalarType(6.23974539184983293730e-9) + }; + const ScalarType q2[] = { + ScalarType(1.0), + ScalarType(6.02427039364742014255e0), + ScalarType(3.67983563856160859403e0), + ScalarType(1.37702099489081330271e0), + ScalarType(2.16236993594496635890e-1), + ScalarType(1.34204006088543189037e-2), + ScalarType(3.28014464682127739104e-4), + ScalarType(2.89247864745380683936e-6), + ScalarType(6.79019408009981274425e-9) + }; + const T eight = pset1(ScalarType(8.0)); + const T one = pset1(ScalarType(1)); + const T neg_two = pset1(ScalarType(-2)); + T x, x0, x1, z; + + x = psqrt(pmul(neg_two, plog(b))); + x0 = psub(x, pdiv(plog(x), x)); + z = pdiv(one, x); + x1 = pmul( + z, pselect( + pcmp_lt(x, eight), + pdiv(internal::ppolevl::run(z, p1), + internal::ppolevl::run(z, q1)), + pdiv(internal::ppolevl::run(z, p2), + internal::ppolevl::run(z, q2)))); + return flipsign(should_flipsign, psub(x0, x1)); +} + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T generic_ndtri(const T& a) { + const T maxnum = pset1(NumTraits::infinity()); + const T neg_maxnum = pset1(-NumTraits::infinity()); + + const T zero = pset1(ScalarType(0)); + const T one = pset1(ScalarType(1)); + // exp(-2) + const T exp_neg_two = pset1(ScalarType(0.13533528323661269189)); + T b, ndtri, should_flipsign; + + should_flipsign = pcmp_le(a, psub(one, exp_neg_two)); + b = pselect(should_flipsign, a, psub(one, a)); + + ndtri = pselect( + pcmp_lt(exp_neg_two, b), + generic_ndtri_gt_exp_neg_two(b), + generic_ndtri_lt_exp_neg_two(b, should_flipsign)); + + return pselect( + pcmp_le(a, zero), neg_maxnum, + pselect(pcmp_le(one, a), maxnum, ndtri)); +} + +template +struct ndtri_retval { + typedef Scalar type; +}; + +#if !EIGEN_HAS_C99_MATH + +template +struct ndtri_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar) { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); + } +}; + +# else + +template +struct ndtri_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar x) { + return generic_ndtri(x); + } +}; + +#endif // EIGEN_HAS_C99_MATH + + +/************************************************************************************************************** + * Implementation of igammac (complemented incomplete gamma integral), based on Cephes but requires C++11/C99 * + **************************************************************************************************************/ + +template +struct igammac_retval { + typedef Scalar type; +}; + +// NOTE: cephes_helper is also used to implement zeta +template +struct cephes_helper { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar machep() { assert(false && "machep not supported for this type"); return 0.0; } + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar big() { assert(false && "big not supported for this type"); return 0.0; } + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar biginv() { assert(false && "biginv not supported for this type"); return 0.0; } +}; + +template <> +struct cephes_helper { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE float machep() { + return NumTraits::epsilon() / 2; // 1.0 - machep == 1.0 + } + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE float big() { + // use epsneg (1.0 - epsneg == 1.0) + return 1.0f / (NumTraits::epsilon() / 2); + } + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE float biginv() { + // epsneg + return machep(); + } +}; + +template <> +struct cephes_helper { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE double machep() { + return NumTraits::epsilon() / 2; // 1.0 - machep == 1.0 + } + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE double big() { + return 1.0 / NumTraits::epsilon(); + } + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE double biginv() { + // inverse of eps + return NumTraits::epsilon(); + } +}; + +enum IgammaComputationMode { VALUE, DERIVATIVE, SAMPLE_DERIVATIVE }; + +template +EIGEN_DEVICE_FUNC +static EIGEN_STRONG_INLINE Scalar main_igamma_term(Scalar a, Scalar x) { + /* Compute x**a * exp(-x) / gamma(a) */ + Scalar logax = a * numext::log(x) - x - lgamma_impl::run(a); + if (logax < -numext::log(NumTraits::highest()) || + // Assuming x and a aren't Nan. + (numext::isnan)(logax)) { + return Scalar(0); + } + return numext::exp(logax); +} + +template +EIGEN_DEVICE_FUNC +int igamma_num_iterations() { + /* Returns the maximum number of internal iterations for igamma computation. + */ + if (mode == VALUE) { + return 2000; + } + + if (internal::is_same::value) { + return 200; + } else if (internal::is_same::value) { + return 500; + } else { + return 2000; + } +} + +template +struct igammac_cf_impl { + /* Computes igamc(a, x) or derivative (depending on the mode) + * using the continued fraction expansion of the complementary + * incomplete Gamma function. + * + * Preconditions: + * a > 0 + * x >= 1 + * x >= a + */ + EIGEN_DEVICE_FUNC + static Scalar run(Scalar a, Scalar x) { + const Scalar zero = 0; + const Scalar one = 1; + const Scalar two = 2; + const Scalar machep = cephes_helper::machep(); + const Scalar big = cephes_helper::big(); + const Scalar biginv = cephes_helper::biginv(); + + if ((numext::isinf)(x)) { + return zero; + } + + Scalar ax = main_igamma_term(a, x); + // This is independent of mode. If this value is zero, + // then the function value is zero. If the function value is zero, + // then we are in a neighborhood where the function value evalutes to zero, + // so the derivative is zero. + if (ax == zero) { + return zero; + } + + // continued fraction + Scalar y = one - a; + Scalar z = x + y + one; + Scalar c = zero; + Scalar pkm2 = one; + Scalar qkm2 = x; + Scalar pkm1 = x + one; + Scalar qkm1 = z * x; + Scalar ans = pkm1 / qkm1; + + Scalar dpkm2_da = zero; + Scalar dqkm2_da = zero; + Scalar dpkm1_da = zero; + Scalar dqkm1_da = -x; + Scalar dans_da = (dpkm1_da - ans * dqkm1_da) / qkm1; + + for (int i = 0; i < igamma_num_iterations(); i++) { + c += one; + y += one; + z += two; + + Scalar yc = y * c; + Scalar pk = pkm1 * z - pkm2 * yc; + Scalar qk = qkm1 * z - qkm2 * yc; + + Scalar dpk_da = dpkm1_da * z - pkm1 - dpkm2_da * yc + pkm2 * c; + Scalar dqk_da = dqkm1_da * z - qkm1 - dqkm2_da * yc + qkm2 * c; + + if (qk != zero) { + Scalar ans_prev = ans; + ans = pk / qk; + + Scalar dans_da_prev = dans_da; + dans_da = (dpk_da - ans * dqk_da) / qk; + + if (mode == VALUE) { + if (numext::abs(ans_prev - ans) <= machep * numext::abs(ans)) { + break; + } + } else { + if (numext::abs(dans_da - dans_da_prev) <= machep) { + break; + } + } + } + + pkm2 = pkm1; + pkm1 = pk; + qkm2 = qkm1; + qkm1 = qk; + + dpkm2_da = dpkm1_da; + dpkm1_da = dpk_da; + dqkm2_da = dqkm1_da; + dqkm1_da = dqk_da; + + if (numext::abs(pk) > big) { + pkm2 *= biginv; + pkm1 *= biginv; + qkm2 *= biginv; + qkm1 *= biginv; + + dpkm2_da *= biginv; + dpkm1_da *= biginv; + dqkm2_da *= biginv; + dqkm1_da *= biginv; + } + } + + /* Compute x**a * exp(-x) / gamma(a) */ + Scalar dlogax_da = numext::log(x) - digamma_impl::run(a); + Scalar dax_da = ax * dlogax_da; + + switch (mode) { + case VALUE: + return ans * ax; + case DERIVATIVE: + return ans * dax_da + dans_da * ax; + case SAMPLE_DERIVATIVE: + default: // this is needed to suppress clang warning + return -(dans_da + ans * dlogax_da) * x; + } + } +}; + +template +struct igamma_series_impl { + /* Computes igam(a, x) or its derivative (depending on the mode) + * using the series expansion of the incomplete Gamma function. + * + * Preconditions: + * x > 0 + * a > 0 + * !(x > 1 && x > a) + */ + EIGEN_DEVICE_FUNC + static Scalar run(Scalar a, Scalar x) { + const Scalar zero = 0; + const Scalar one = 1; + const Scalar machep = cephes_helper::machep(); + + Scalar ax = main_igamma_term(a, x); + + // This is independent of mode. If this value is zero, + // then the function value is zero. If the function value is zero, + // then we are in a neighborhood where the function value evalutes to zero, + // so the derivative is zero. + if (ax == zero) { + return zero; + } + + ax /= a; + + /* power series */ + Scalar r = a; + Scalar c = one; + Scalar ans = one; + + Scalar dc_da = zero; + Scalar dans_da = zero; + + for (int i = 0; i < igamma_num_iterations(); i++) { + r += one; + Scalar term = x / r; + Scalar dterm_da = -x / (r * r); + dc_da = term * dc_da + dterm_da * c; + dans_da += dc_da; + c *= term; + ans += c; + + if (mode == VALUE) { + if (c <= machep * ans) { + break; + } + } else { + if (numext::abs(dc_da) <= machep * numext::abs(dans_da)) { + break; + } + } + } + + Scalar dlogax_da = numext::log(x) - digamma_impl::run(a + one); + Scalar dax_da = ax * dlogax_da; + + switch (mode) { + case VALUE: + return ans * ax; + case DERIVATIVE: + return ans * dax_da + dans_da * ax; + case SAMPLE_DERIVATIVE: + default: // this is needed to suppress clang warning + return -(dans_da + ans * dlogax_da) * x / a; + } + } +}; + +#if !EIGEN_HAS_C99_MATH + +template +struct igammac_impl { + EIGEN_DEVICE_FUNC + static Scalar run(Scalar a, Scalar x) { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); + } +}; + +#else + +template +struct igammac_impl { + EIGEN_DEVICE_FUNC + static Scalar run(Scalar a, Scalar x) { + /* igamc() + * + * Incomplete gamma integral (modified for Eigen) + * + * + * + * SYNOPSIS: + * + * double a, x, y, igamc(); + * + * y = igamc( a, x ); + * + * DESCRIPTION: + * + * The function is defined by + * + * + * igamc(a,x) = 1 - igam(a,x) + * + * inf. + * - + * 1 | | -t a-1 + * = ----- | e t dt. + * - | | + * | (a) - + * x + * + * + * In this implementation both arguments must be positive. + * The integral is evaluated by either a power series or + * continued fraction expansion, depending on the relative + * values of a and x. + * + * ACCURACY (float): + * + * Relative error: + * arithmetic domain # trials peak rms + * IEEE 0,30 30000 7.8e-6 5.9e-7 + * + * + * ACCURACY (double): + * + * Tested at random a, x. + * a x Relative error: + * arithmetic domain domain # trials peak rms + * IEEE 0.5,100 0,100 200000 1.9e-14 1.7e-15 + * IEEE 0.01,0.5 0,100 200000 1.4e-13 1.6e-15 + * + */ + /* + Cephes Math Library Release 2.2: June, 1992 + Copyright 1985, 1987, 1992 by Stephen L. Moshier + Direct inquiries to 30 Frost Street, Cambridge, MA 02140 + */ + const Scalar zero = 0; + const Scalar one = 1; + const Scalar nan = NumTraits::quiet_NaN(); + + if ((x < zero) || (a <= zero)) { + // domain error + return nan; + } + + if ((numext::isnan)(a) || (numext::isnan)(x)) { // propagate nans + return nan; + } + + if ((x < one) || (x < a)) { + return (one - igamma_series_impl::run(a, x)); + } + + return igammac_cf_impl::run(a, x); + } +}; + +#endif // EIGEN_HAS_C99_MATH + +/************************************************************************************************ + * Implementation of igamma (incomplete gamma integral), based on Cephes but requires C++11/C99 * + ************************************************************************************************/ + +#if !EIGEN_HAS_C99_MATH + +template +struct igamma_generic_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(Scalar a, Scalar x) { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); + } +}; + +#else + +template +struct igamma_generic_impl { + EIGEN_DEVICE_FUNC + static Scalar run(Scalar a, Scalar x) { + /* Depending on the mode, returns + * - VALUE: incomplete Gamma function igamma(a, x) + * - DERIVATIVE: derivative of incomplete Gamma function d/da igamma(a, x) + * - SAMPLE_DERIVATIVE: implicit derivative of a Gamma random variable + * x ~ Gamma(x | a, 1), dx/da = -1 / Gamma(x | a, 1) * d igamma(a, x) / dx + * + * Derivatives are implemented by forward-mode differentiation. + */ + const Scalar zero = 0; + const Scalar one = 1; + const Scalar nan = NumTraits::quiet_NaN(); + + if (x == zero) return zero; + + if ((x < zero) || (a <= zero)) { // domain error + return nan; + } + + if ((numext::isnan)(a) || (numext::isnan)(x)) { // propagate nans + return nan; + } + + if ((x > one) && (x > a)) { + Scalar ret = igammac_cf_impl::run(a, x); + if (mode == VALUE) { + return one - ret; + } else { + return -ret; + } + } + + return igamma_series_impl::run(a, x); + } +}; + +#endif // EIGEN_HAS_C99_MATH + +template +struct igamma_retval { + typedef Scalar type; +}; + +template +struct igamma_impl : igamma_generic_impl { + /* igam() + * Incomplete gamma integral. + * + * The CDF of Gamma(a, 1) random variable at the point x. + * + * Accuracy estimation. For each a in [10^-2, 10^-1...10^3] we sample + * 50 Gamma random variables x ~ Gamma(x | a, 1), a total of 300 points. + * The ground truth is computed by mpmath. Mean absolute error: + * float: 1.26713e-05 + * double: 2.33606e-12 + * + * Cephes documentation below. + * + * SYNOPSIS: + * + * double a, x, y, igam(); + * + * y = igam( a, x ); + * + * DESCRIPTION: + * + * The function is defined by + * + * x + * - + * 1 | | -t a-1 + * igam(a,x) = ----- | e t dt. + * - | | + * | (a) - + * 0 + * + * + * In this implementation both arguments must be positive. + * The integral is evaluated by either a power series or + * continued fraction expansion, depending on the relative + * values of a and x. + * + * ACCURACY (double): + * + * Relative error: + * arithmetic domain # trials peak rms + * IEEE 0,30 200000 3.6e-14 2.9e-15 + * IEEE 0,100 300000 9.9e-14 1.5e-14 + * + * + * ACCURACY (float): + * + * Relative error: + * arithmetic domain # trials peak rms + * IEEE 0,30 20000 7.8e-6 5.9e-7 + * + */ + /* + Cephes Math Library Release 2.2: June, 1992 + Copyright 1985, 1987, 1992 by Stephen L. Moshier + Direct inquiries to 30 Frost Street, Cambridge, MA 02140 + */ + + /* left tail of incomplete gamma function: + * + * inf. k + * a -x - x + * x e > ---------- + * - - + * k=0 | (a+k+1) + * + */ +}; + +template +struct igamma_der_a_retval : igamma_retval {}; + +template +struct igamma_der_a_impl : igamma_generic_impl { + /* Derivative of the incomplete Gamma function with respect to a. + * + * Computes d/da igamma(a, x) by forward differentiation of the igamma code. + * + * Accuracy estimation. For each a in [10^-2, 10^-1...10^3] we sample + * 50 Gamma random variables x ~ Gamma(x | a, 1), a total of 300 points. + * The ground truth is computed by mpmath. Mean absolute error: + * float: 6.17992e-07 + * double: 4.60453e-12 + * + * Reference: + * R. Moore. "Algorithm AS 187: Derivatives of the incomplete gamma + * integral". Journal of the Royal Statistical Society. 1982 + */ +}; + +template +struct gamma_sample_der_alpha_retval : igamma_retval {}; + +template +struct gamma_sample_der_alpha_impl + : igamma_generic_impl { + /* Derivative of a Gamma random variable sample with respect to alpha. + * + * Consider a sample of a Gamma random variable with the concentration + * parameter alpha: sample ~ Gamma(alpha, 1). The reparameterization + * derivative that we want to compute is dsample / dalpha = + * d igammainv(alpha, u) / dalpha, where u = igamma(alpha, sample). + * However, this formula is numerically unstable and expensive, so instead + * we use implicit differentiation: + * + * igamma(alpha, sample) = u, where u ~ Uniform(0, 1). + * Apply d / dalpha to both sides: + * d igamma(alpha, sample) / dalpha + * + d igamma(alpha, sample) / dsample * dsample/dalpha = 0 + * d igamma(alpha, sample) / dalpha + * + Gamma(sample | alpha, 1) dsample / dalpha = 0 + * dsample/dalpha = - (d igamma(alpha, sample) / dalpha) + * / Gamma(sample | alpha, 1) + * + * Here Gamma(sample | alpha, 1) is the PDF of the Gamma distribution + * (note that the derivative of the CDF w.r.t. sample is the PDF). + * See the reference below for more details. + * + * The derivative of igamma(alpha, sample) is computed by forward + * differentiation of the igamma code. Division by the Gamma PDF is performed + * in the same code, increasing the accuracy and speed due to cancellation + * of some terms. + * + * Accuracy estimation. For each alpha in [10^-2, 10^-1...10^3] we sample + * 50 Gamma random variables sample ~ Gamma(sample | alpha, 1), a total of 300 + * points. The ground truth is computed by mpmath. Mean absolute error: + * float: 2.1686e-06 + * double: 1.4774e-12 + * + * Reference: + * M. Figurnov, S. Mohamed, A. Mnih "Implicit Reparameterization Gradients". + * 2018 + */ +}; + +/***************************************************************************** + * Implementation of Riemann zeta function of two arguments, based on Cephes * + *****************************************************************************/ + +template +struct zeta_retval { + typedef Scalar type; +}; + +template +struct zeta_impl_series { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar) { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); + } +}; + +template <> +struct zeta_impl_series { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE bool run(float& a, float& b, float& s, const float x, const float machep) { + int i = 0; + while(i < 9) + { + i += 1; + a += 1.0f; + b = numext::pow( a, -x ); + s += b; + if( numext::abs(b/s) < machep ) + return true; + } + + //Return whether we are done + return false; + } +}; + +template <> +struct zeta_impl_series { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE bool run(double& a, double& b, double& s, const double x, const double machep) { + int i = 0; + while( (i < 9) || (a <= 9.0) ) + { + i += 1; + a += 1.0; + b = numext::pow( a, -x ); + s += b; + if( numext::abs(b/s) < machep ) + return true; + } + + //Return whether we are done + return false; + } +}; + +template +struct zeta_impl { + EIGEN_DEVICE_FUNC + static Scalar run(Scalar x, Scalar q) { + /* zeta.c + * + * Riemann zeta function of two arguments + * + * + * + * SYNOPSIS: + * + * double x, q, y, zeta(); + * + * y = zeta( x, q ); + * + * + * + * DESCRIPTION: + * + * + * + * inf. + * - -x + * zeta(x,q) = > (k+q) + * - + * k=0 + * + * where x > 1 and q is not a negative integer or zero. + * The Euler-Maclaurin summation formula is used to obtain + * the expansion + * + * n + * - -x + * zeta(x,q) = > (k+q) + * - + * k=1 + * + * 1-x inf. B x(x+1)...(x+2j) + * (n+q) 1 - 2j + * + --------- - ------- + > -------------------- + * x-1 x - x+2j+1 + * 2(n+q) j=1 (2j)! (n+q) + * + * where the B2j are Bernoulli numbers. Note that (see zetac.c) + * zeta(x,1) = zetac(x) + 1. + * + * + * + * ACCURACY: + * + * Relative error for single precision: + * arithmetic domain # trials peak rms + * IEEE 0,25 10000 6.9e-7 1.0e-7 + * + * Large arguments may produce underflow in powf(), in which + * case the results are inaccurate. + * + * REFERENCE: + * + * Gradshteyn, I. S., and I. M. Ryzhik, Tables of Integrals, + * Series, and Products, p. 1073; Academic Press, 1980. + * + */ + + int i; + Scalar p, r, a, b, k, s, t, w; + + const Scalar A[] = { + Scalar(12.0), + Scalar(-720.0), + Scalar(30240.0), + Scalar(-1209600.0), + Scalar(47900160.0), + Scalar(-1.8924375803183791606e9), /*1.307674368e12/691*/ + Scalar(7.47242496e10), + Scalar(-2.950130727918164224e12), /*1.067062284288e16/3617*/ + Scalar(1.1646782814350067249e14), /*5.109094217170944e18/43867*/ + Scalar(-4.5979787224074726105e15), /*8.028576626982912e20/174611*/ + Scalar(1.8152105401943546773e17), /*1.5511210043330985984e23/854513*/ + Scalar(-7.1661652561756670113e18) /*1.6938241367317436694528e27/236364091*/ + }; + + const Scalar maxnum = NumTraits::infinity(); + const Scalar zero = 0.0, half = 0.5, one = 1.0; + const Scalar machep = cephes_helper::machep(); + const Scalar nan = NumTraits::quiet_NaN(); + + if( x == one ) + return maxnum; + + if( x < one ) + { + return nan; + } + + if( q <= zero ) + { + if(q == numext::floor(q)) + { + if (x == numext::floor(x) && long(x) % 2 == 0) { + return maxnum; + } + else { + return nan; + } + } + p = x; + r = numext::floor(p); + if (p != r) + return nan; + } + + /* Permit negative q but continue sum until n+q > +9 . + * This case should be handled by a reflection formula. + * If q<0 and x is an integer, there is a relation to + * the polygamma function. + */ + s = numext::pow( q, -x ); + a = q; + b = zero; + // Run the summation in a helper function that is specific to the floating precision + if (zeta_impl_series::run(a, b, s, x, machep)) { + return s; + } + + w = a; + s += b*w/(x-one); + s -= half * b; + a = one; + k = zero; + for( i=0; i<12; i++ ) + { + a *= x + k; + b /= w; + t = a*b/A[i]; + s = s + t; + t = numext::abs(t/s); + if( t < machep ) { + break; + } + k += one; + a *= x + k; + b /= w; + k += one; + } + return s; + } +}; + +/**************************************************************************** + * Implementation of polygamma function, requires C++11/C99 * + ****************************************************************************/ + +template +struct polygamma_retval { + typedef Scalar type; +}; + +#if !EIGEN_HAS_C99_MATH + +template +struct polygamma_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(Scalar n, Scalar x) { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); + } +}; + +#else + +template +struct polygamma_impl { + EIGEN_DEVICE_FUNC + static Scalar run(Scalar n, Scalar x) { + Scalar zero = 0.0, one = 1.0; + Scalar nplus = n + one; + const Scalar nan = NumTraits::quiet_NaN(); + + // Check that n is a non-negative integer + if (numext::floor(n) != n || n < zero) { + return nan; + } + // Just return the digamma function for n = 0 + else if (n == zero) { + return digamma_impl::run(x); + } + // Use the same implementation as scipy + else { + Scalar factorial = numext::exp(lgamma_impl::run(nplus)); + return numext::pow(-one, nplus) * factorial * zeta_impl::run(nplus, x); + } + } +}; + +#endif // EIGEN_HAS_C99_MATH + +/************************************************************************************************ + * Implementation of betainc (incomplete beta integral), based on Cephes but requires C++11/C99 * + ************************************************************************************************/ + +template +struct betainc_retval { + typedef Scalar type; +}; + +#if !EIGEN_HAS_C99_MATH + +template +struct betainc_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(Scalar a, Scalar b, Scalar x) { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); + } +}; + +#else + +template +struct betainc_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(Scalar, Scalar, Scalar) { + /* betaincf.c + * + * Incomplete beta integral + * + * + * SYNOPSIS: + * + * float a, b, x, y, betaincf(); + * + * y = betaincf( a, b, x ); + * + * + * DESCRIPTION: + * + * Returns incomplete beta integral of the arguments, evaluated + * from zero to x. The function is defined as + * + * x + * - - + * | (a+b) | | a-1 b-1 + * ----------- | t (1-t) dt. + * - - | | + * | (a) | (b) - + * 0 + * + * The domain of definition is 0 <= x <= 1. In this + * implementation a and b are restricted to positive values. + * The integral from x to 1 may be obtained by the symmetry + * relation + * + * 1 - betainc( a, b, x ) = betainc( b, a, 1-x ). + * + * The integral is evaluated by a continued fraction expansion. + * If a < 1, the function calls itself recursively after a + * transformation to increase a to a+1. + * + * ACCURACY (float): + * + * Tested at random points (a,b,x) with a and b in the indicated + * interval and x between 0 and 1. + * + * arithmetic domain # trials peak rms + * Relative error: + * IEEE 0,30 10000 3.7e-5 5.1e-6 + * IEEE 0,100 10000 1.7e-4 2.5e-5 + * The useful domain for relative error is limited by underflow + * of the single precision exponential function. + * Absolute error: + * IEEE 0,30 100000 2.2e-5 9.6e-7 + * IEEE 0,100 10000 6.5e-5 3.7e-6 + * + * Larger errors may occur for extreme ratios of a and b. + * + * ACCURACY (double): + * arithmetic domain # trials peak rms + * IEEE 0,5 10000 6.9e-15 4.5e-16 + * IEEE 0,85 250000 2.2e-13 1.7e-14 + * IEEE 0,1000 30000 5.3e-12 6.3e-13 + * IEEE 0,10000 250000 9.3e-11 7.1e-12 + * IEEE 0,100000 10000 8.7e-10 4.8e-11 + * Outputs smaller than the IEEE gradual underflow threshold + * were excluded from these statistics. + * + * ERROR MESSAGES: + * message condition value returned + * incbet domain x<0, x>1 nan + * incbet underflow nan + */ + + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); + } +}; + +/* Continued fraction expansion #1 for incomplete beta integral (small_branch = True) + * Continued fraction expansion #2 for incomplete beta integral (small_branch = False) + */ +template +struct incbeta_cfe { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(Scalar a, Scalar b, Scalar x, bool small_branch) { + EIGEN_STATIC_ASSERT((internal::is_same::value || + internal::is_same::value), + THIS_TYPE_IS_NOT_SUPPORTED); + const Scalar big = cephes_helper::big(); + const Scalar machep = cephes_helper::machep(); + const Scalar biginv = cephes_helper::biginv(); + + const Scalar zero = 0; + const Scalar one = 1; + const Scalar two = 2; + + Scalar xk, pk, pkm1, pkm2, qk, qkm1, qkm2; + Scalar k1, k2, k3, k4, k5, k6, k7, k8, k26update; + Scalar ans; + int n; + + const int num_iters = (internal::is_same::value) ? 100 : 300; + const Scalar thresh = + (internal::is_same::value) ? machep : Scalar(3) * machep; + Scalar r = (internal::is_same::value) ? zero : one; + + if (small_branch) { + k1 = a; + k2 = a + b; + k3 = a; + k4 = a + one; + k5 = one; + k6 = b - one; + k7 = k4; + k8 = a + two; + k26update = one; + } else { + k1 = a; + k2 = b - one; + k3 = a; + k4 = a + one; + k5 = one; + k6 = a + b; + k7 = a + one; + k8 = a + two; + k26update = -one; + x = x / (one - x); + } + + pkm2 = zero; + qkm2 = one; + pkm1 = one; + qkm1 = one; + ans = one; + n = 0; + + do { + xk = -(x * k1 * k2) / (k3 * k4); + pk = pkm1 + pkm2 * xk; + qk = qkm1 + qkm2 * xk; + pkm2 = pkm1; + pkm1 = pk; + qkm2 = qkm1; + qkm1 = qk; + + xk = (x * k5 * k6) / (k7 * k8); + pk = pkm1 + pkm2 * xk; + qk = qkm1 + qkm2 * xk; + pkm2 = pkm1; + pkm1 = pk; + qkm2 = qkm1; + qkm1 = qk; + + if (qk != zero) { + r = pk / qk; + if (numext::abs(ans - r) < numext::abs(r) * thresh) { + return r; + } + ans = r; + } + + k1 += one; + k2 += k26update; + k3 += two; + k4 += two; + k5 += one; + k6 -= k26update; + k7 += two; + k8 += two; + + if ((numext::abs(qk) + numext::abs(pk)) > big) { + pkm2 *= biginv; + pkm1 *= biginv; + qkm2 *= biginv; + qkm1 *= biginv; + } + if ((numext::abs(qk) < biginv) || (numext::abs(pk) < biginv)) { + pkm2 *= big; + pkm1 *= big; + qkm2 *= big; + qkm1 *= big; + } + } while (++n < num_iters); + + return ans; + } +}; + +/* Helper functions depending on the Scalar type */ +template +struct betainc_helper {}; + +template <> +struct betainc_helper { + /* Core implementation, assumes a large (> 1.0) */ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE float incbsa(float aa, float bb, + float xx) { + float ans, a, b, t, x, onemx; + bool reversed_a_b = false; + + onemx = 1.0f - xx; + + /* see if x is greater than the mean */ + if (xx > (aa / (aa + bb))) { + reversed_a_b = true; + a = bb; + b = aa; + t = xx; + x = onemx; + } else { + a = aa; + b = bb; + t = onemx; + x = xx; + } + + /* Choose expansion for optimal convergence */ + if (b > 10.0f) { + if (numext::abs(b * x / a) < 0.3f) { + t = betainc_helper::incbps(a, b, x); + if (reversed_a_b) t = 1.0f - t; + return t; + } + } + + ans = x * (a + b - 2.0f) / (a - 1.0f); + if (ans < 1.0f) { + ans = incbeta_cfe::run(a, b, x, true /* small_branch */); + t = b * numext::log(t); + } else { + ans = incbeta_cfe::run(a, b, x, false /* small_branch */); + t = (b - 1.0f) * numext::log(t); + } + + t += a * numext::log(x) + lgamma_impl::run(a + b) - + lgamma_impl::run(a) - lgamma_impl::run(b); + t += numext::log(ans / a); + t = numext::exp(t); + + if (reversed_a_b) t = 1.0f - t; + return t; + } + + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE float incbps(float a, float b, float x) { + float t, u, y, s; + const float machep = cephes_helper::machep(); + + y = a * numext::log(x) + (b - 1.0f) * numext::log1p(-x) - numext::log(a); + y -= lgamma_impl::run(a) + lgamma_impl::run(b); + y += lgamma_impl::run(a + b); + + t = x / (1.0f - x); + s = 0.0f; + u = 1.0f; + do { + b -= 1.0f; + if (b == 0.0f) { + break; + } + a += 1.0f; + u *= t * b / a; + s += u; + } while (numext::abs(u) > machep); + + return numext::exp(y) * (1.0f + s); + } +}; + +template <> +struct betainc_impl { + EIGEN_DEVICE_FUNC + static float run(float a, float b, float x) { + const float nan = NumTraits::quiet_NaN(); + float ans, t; + + if (a <= 0.0f) return nan; + if (b <= 0.0f) return nan; + if ((x <= 0.0f) || (x >= 1.0f)) { + if (x == 0.0f) return 0.0f; + if (x == 1.0f) return 1.0f; + // mtherr("betaincf", DOMAIN); + return nan; + } + + /* transformation for small aa */ + if (a <= 1.0f) { + ans = betainc_helper::incbsa(a + 1.0f, b, x); + t = a * numext::log(x) + b * numext::log1p(-x) + + lgamma_impl::run(a + b) - lgamma_impl::run(a + 1.0f) - + lgamma_impl::run(b); + return (ans + numext::exp(t)); + } else { + return betainc_helper::incbsa(a, b, x); + } + } +}; + +template <> +struct betainc_helper { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE double incbps(double a, double b, double x) { + const double machep = cephes_helper::machep(); + + double s, t, u, v, n, t1, z, ai; + + ai = 1.0 / a; + u = (1.0 - b) * x; + v = u / (a + 1.0); + t1 = v; + t = u; + n = 2.0; + s = 0.0; + z = machep * ai; + while (numext::abs(v) > z) { + u = (n - b) * x / n; + t *= u; + v = t / (a + n); + s += v; + n += 1.0; + } + s += t1; + s += ai; + + u = a * numext::log(x); + // TODO: gamma() is not directly implemented in Eigen. + /* + if ((a + b) < maxgam && numext::abs(u) < maxlog) { + t = gamma(a + b) / (gamma(a) * gamma(b)); + s = s * t * pow(x, a); + } + */ + t = lgamma_impl::run(a + b) - lgamma_impl::run(a) - + lgamma_impl::run(b) + u + numext::log(s); + return s = numext::exp(t); + } +}; + +template <> +struct betainc_impl { + EIGEN_DEVICE_FUNC + static double run(double aa, double bb, double xx) { + const double nan = NumTraits::quiet_NaN(); + const double machep = cephes_helper::machep(); + // const double maxgam = 171.624376956302725; + + double a, b, t, x, xc, w, y; + bool reversed_a_b = false; + + if (aa <= 0.0 || bb <= 0.0) { + return nan; // goto domerr; + } + + if ((xx <= 0.0) || (xx >= 1.0)) { + if (xx == 0.0) return (0.0); + if (xx == 1.0) return (1.0); + // mtherr("incbet", DOMAIN); + return nan; + } + + if ((bb * xx) <= 1.0 && xx <= 0.95) { + return betainc_helper::incbps(aa, bb, xx); + } + + w = 1.0 - xx; + + /* Reverse a and b if x is greater than the mean. */ + if (xx > (aa / (aa + bb))) { + reversed_a_b = true; + a = bb; + b = aa; + xc = xx; + x = w; + } else { + a = aa; + b = bb; + xc = w; + x = xx; + } + + if (reversed_a_b && (b * x) <= 1.0 && x <= 0.95) { + t = betainc_helper::incbps(a, b, x); + if (t <= machep) { + t = 1.0 - machep; + } else { + t = 1.0 - t; + } + return t; + } + + /* Choose expansion for better convergence. */ + y = x * (a + b - 2.0) - (a - 1.0); + if (y < 0.0) { + w = incbeta_cfe::run(a, b, x, true /* small_branch */); + } else { + w = incbeta_cfe::run(a, b, x, false /* small_branch */) / xc; + } + + /* Multiply w by the factor + a b _ _ _ + x (1-x) | (a+b) / ( a | (a) | (b) ) . */ + + y = a * numext::log(x); + t = b * numext::log(xc); + // TODO: gamma is not directly implemented in Eigen. + /* + if ((a + b) < maxgam && numext::abs(y) < maxlog && numext::abs(t) < maxlog) + { + t = pow(xc, b); + t *= pow(x, a); + t /= a; + t *= w; + t *= gamma(a + b) / (gamma(a) * gamma(b)); + } else { + */ + /* Resort to logarithms. */ + y += t + lgamma_impl::run(a + b) - lgamma_impl::run(a) - + lgamma_impl::run(b); + y += numext::log(w / a); + t = numext::exp(y); + + /* } */ + // done: + + if (reversed_a_b) { + if (t <= machep) { + t = 1.0 - machep; + } else { + t = 1.0 - t; + } + } + return t; + } +}; + +#endif // EIGEN_HAS_C99_MATH + +} // end namespace internal + +namespace numext { + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(lgamma, Scalar) + lgamma(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(lgamma, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(digamma, Scalar) + digamma(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(digamma, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(zeta, Scalar) +zeta(const Scalar& x, const Scalar& q) { + return EIGEN_MATHFUNC_IMPL(zeta, Scalar)::run(x, q); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(polygamma, Scalar) +polygamma(const Scalar& n, const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(polygamma, Scalar)::run(n, x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(erf, Scalar) + erf(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(erf, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(erfc, Scalar) + erfc(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(erfc, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(ndtri, Scalar) + ndtri(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(ndtri, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(igamma, Scalar) + igamma(const Scalar& a, const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(igamma, Scalar)::run(a, x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(igamma_der_a, Scalar) + igamma_der_a(const Scalar& a, const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(igamma_der_a, Scalar)::run(a, x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(gamma_sample_der_alpha, Scalar) + gamma_sample_der_alpha(const Scalar& a, const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(gamma_sample_der_alpha, Scalar)::run(a, x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(igammac, Scalar) + igammac(const Scalar& a, const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(igammac, Scalar)::run(a, x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(betainc, Scalar) + betainc(const Scalar& a, const Scalar& b, const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(betainc, Scalar)::run(a, b, x); +} + +} // end namespace numext +} // end namespace Eigen + +#endif // EIGEN_SPECIAL_FUNCTIONS_H diff --git a/external/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h b/external/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h new file mode 100644 index 0000000..2bb0179 --- /dev/null +++ b/external/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h @@ -0,0 +1,79 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPECIALFUNCTIONS_PACKETMATH_H +#define EIGEN_SPECIALFUNCTIONS_PACKETMATH_H + +namespace Eigen { + +namespace internal { + +/** \internal \returns the ln(|gamma(\a a)|) (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet plgamma(const Packet& a) { using numext::lgamma; return lgamma(a); } + +/** \internal \returns the derivative of lgamma, psi(\a a) (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pdigamma(const Packet& a) { using numext::digamma; return digamma(a); } + +/** \internal \returns the zeta function of two arguments (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pzeta(const Packet& x, const Packet& q) { using numext::zeta; return zeta(x, q); } + +/** \internal \returns the polygamma function (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet ppolygamma(const Packet& n, const Packet& x) { using numext::polygamma; return polygamma(n, x); } + +/** \internal \returns the erf(\a a) (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet perf(const Packet& a) { using numext::erf; return erf(a); } + +/** \internal \returns the erfc(\a a) (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); } + +/** \internal \returns the ndtri(\a a) (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pndtri(const Packet& a) { + typedef typename unpacket_traits::type ScalarType; + using internal::generic_ndtri; return generic_ndtri(a); +} + +/** \internal \returns the incomplete gamma function igamma(\a a, \a x) */ +template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +Packet pigamma(const Packet& a, const Packet& x) { using numext::igamma; return igamma(a, x); } + +/** \internal \returns the derivative of the incomplete gamma function + * igamma_der_a(\a a, \a x) */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pigamma_der_a(const Packet& a, const Packet& x) { + using numext::igamma_der_a; return igamma_der_a(a, x); +} + +/** \internal \returns compute the derivative of the sample + * of Gamma(alpha, 1) random variable with respect to the parameter a + * gamma_sample_der_alpha(\a alpha, \a sample) */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pgamma_sample_der_alpha(const Packet& alpha, const Packet& sample) { + using numext::gamma_sample_der_alpha; return gamma_sample_der_alpha(alpha, sample); +} + +/** \internal \returns the complementary incomplete gamma function igammac(\a a, \a x) */ +template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +Packet pigammac(const Packet& a, const Packet& x) { using numext::igammac; return igammac(a, x); } + +/** \internal \returns the complementary incomplete gamma function betainc(\a a, \a b, \a x) */ +template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +Packet pbetainc(const Packet& a, const Packet& b,const Packet& x) { using numext::betainc; return betainc(a, b, x); } + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_SPECIALFUNCTIONS_PACKETMATH_H diff --git a/external/unsupported/Eigen/src/SpecialFunctions/arch/AVX/BesselFunctions.h b/external/unsupported/Eigen/src/SpecialFunctions/arch/AVX/BesselFunctions.h new file mode 100644 index 0000000..2d76692 --- /dev/null +++ b/external/unsupported/Eigen/src/SpecialFunctions/arch/AVX/BesselFunctions.h @@ -0,0 +1,46 @@ +#ifndef EIGEN_AVX_BESSELFUNCTIONS_H +#define EIGEN_AVX_BESSELFUNCTIONS_H + +namespace Eigen { +namespace internal { + +F16_PACKET_FUNCTION(Packet8f, Packet8h, pbessel_i0) +BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pbessel_i0) + +F16_PACKET_FUNCTION(Packet8f, Packet8h, pbessel_i0e) +BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pbessel_i0e) + +F16_PACKET_FUNCTION(Packet8f, Packet8h, pbessel_i1) +BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pbessel_i1) + +F16_PACKET_FUNCTION(Packet8f, Packet8h, pbessel_i1e) +BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pbessel_i1e) + +F16_PACKET_FUNCTION(Packet8f, Packet8h, pbessel_j0) +BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pbessel_j0) + +F16_PACKET_FUNCTION(Packet8f, Packet8h, pbessel_j1) +BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pbessel_j1) + +F16_PACKET_FUNCTION(Packet8f, Packet8h, pbessel_k0) +BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pbessel_k0) + +F16_PACKET_FUNCTION(Packet8f, Packet8h, pbessel_k0e) +BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pbessel_k0e) + +F16_PACKET_FUNCTION(Packet8f, Packet8h, pbessel_k1) +BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pbessel_k1) + +F16_PACKET_FUNCTION(Packet8f, Packet8h, pbessel_k1e) +BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pbessel_k1e) + +F16_PACKET_FUNCTION(Packet8f, Packet8h, pbessel_y0) +BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pbessel_y0) + +F16_PACKET_FUNCTION(Packet8f, Packet8h, pbessel_y1) +BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pbessel_y1) + +} // namespace internal +} // namespace Eigen + +#endif // EIGEN_AVX_BESSELFUNCTIONS_H diff --git a/external/unsupported/Eigen/src/SpecialFunctions/arch/AVX/SpecialFunctions.h b/external/unsupported/Eigen/src/SpecialFunctions/arch/AVX/SpecialFunctions.h new file mode 100644 index 0000000..35e62a8 --- /dev/null +++ b/external/unsupported/Eigen/src/SpecialFunctions/arch/AVX/SpecialFunctions.h @@ -0,0 +1,16 @@ +#ifndef EIGEN_AVX_SPECIALFUNCTIONS_H +#define EIGEN_AVX_SPECIALFUNCTIONS_H + +namespace Eigen { +namespace internal { + +F16_PACKET_FUNCTION(Packet8f, Packet8h, perf) +BF16_PACKET_FUNCTION(Packet8f, Packet8bf, perf) + +F16_PACKET_FUNCTION(Packet8f, Packet8h, pndtri) +BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pndtri) + +} // namespace internal +} // namespace Eigen + +#endif // EIGEN_AVX_SPECIAL_FUNCTIONS_H diff --git a/external/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/BesselFunctions.h b/external/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/BesselFunctions.h new file mode 100644 index 0000000..7dd3c3e --- /dev/null +++ b/external/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/BesselFunctions.h @@ -0,0 +1,46 @@ +#ifndef EIGEN_AVX512_BESSELFUNCTIONS_H +#define EIGEN_AVX512_BESSELFUNCTIONS_H + +namespace Eigen { +namespace internal { + +F16_PACKET_FUNCTION(Packet16f, Packet16h, pbessel_i0) +BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pbessel_i0) + +F16_PACKET_FUNCTION(Packet16f, Packet16h, pbessel_i0e) +BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pbessel_i0e) + +F16_PACKET_FUNCTION(Packet16f, Packet16h, pbessel_i1) +BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pbessel_i1) + +F16_PACKET_FUNCTION(Packet16f, Packet16h, pbessel_i1e) +BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pbessel_i1e) + +F16_PACKET_FUNCTION(Packet16f, Packet16h, pbessel_j0) +BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pbessel_j0) + +F16_PACKET_FUNCTION(Packet16f, Packet16h, pbessel_j1) +BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pbessel_j1) + +F16_PACKET_FUNCTION(Packet16f, Packet16h, pbessel_k0) +BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pbessel_k0) + +F16_PACKET_FUNCTION(Packet16f, Packet16h, pbessel_k0e) +BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pbessel_k0e) + +F16_PACKET_FUNCTION(Packet16f, Packet16h, pbessel_k1) +BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pbessel_k1) + +F16_PACKET_FUNCTION(Packet16f, Packet16h, pbessel_k1e) +BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pbessel_k1e) + +F16_PACKET_FUNCTION(Packet16f, Packet16h, pbessel_y0) +BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pbessel_y0) + +F16_PACKET_FUNCTION(Packet16f, Packet16h, pbessel_y1) +BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pbessel_y1) + +} // namespace internal +} // namespace Eigen + +#endif // EIGEN_AVX512_BESSELFUNCTIONS_H diff --git a/external/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/SpecialFunctions.h b/external/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/SpecialFunctions.h new file mode 100644 index 0000000..79878f2 --- /dev/null +++ b/external/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/SpecialFunctions.h @@ -0,0 +1,16 @@ +#ifndef EIGEN_AVX512_SPECIALFUNCTIONS_H +#define EIGEN_AVX512_SPECIALFUNCTIONS_H + +namespace Eigen { +namespace internal { + +F16_PACKET_FUNCTION(Packet16f, Packet16h, perf) +BF16_PACKET_FUNCTION(Packet16f, Packet16bf, perf) + +F16_PACKET_FUNCTION(Packet16f, Packet16h, pndtri) +BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pndtri) + +} // namespace internal +} // namespace Eigen + +#endif // EIGEN_AVX512_SPECIAL_FUNCTIONS_H diff --git a/external/unsupported/Eigen/src/SpecialFunctions/arch/GPU/SpecialFunctions.h b/external/unsupported/Eigen/src/SpecialFunctions/arch/GPU/SpecialFunctions.h new file mode 100644 index 0000000..dd3bf4d --- /dev/null +++ b/external/unsupported/Eigen/src/SpecialFunctions/arch/GPU/SpecialFunctions.h @@ -0,0 +1,369 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_GPU_SPECIALFUNCTIONS_H +#define EIGEN_GPU_SPECIALFUNCTIONS_H + +namespace Eigen { + +namespace internal { + +// Make sure this is only available when targeting a GPU: we don't want to +// introduce conflicts between these packet_traits definitions and the ones +// we'll use on the host side (SSE, AVX, ...) +#if defined(EIGEN_GPUCC) && defined(EIGEN_USE_GPU) + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 plgamma(const float4& a) +{ + return make_float4(lgammaf(a.x), lgammaf(a.y), lgammaf(a.z), lgammaf(a.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 plgamma(const double2& a) +{ + using numext::lgamma; + return make_double2(lgamma(a.x), lgamma(a.y)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 pdigamma(const float4& a) +{ + using numext::digamma; + return make_float4(digamma(a.x), digamma(a.y), digamma(a.z), digamma(a.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 pdigamma(const double2& a) +{ + using numext::digamma; + return make_double2(digamma(a.x), digamma(a.y)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 pzeta(const float4& x, const float4& q) +{ + using numext::zeta; + return make_float4(zeta(x.x, q.x), zeta(x.y, q.y), zeta(x.z, q.z), zeta(x.w, q.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 pzeta(const double2& x, const double2& q) +{ + using numext::zeta; + return make_double2(zeta(x.x, q.x), zeta(x.y, q.y)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 ppolygamma(const float4& n, const float4& x) +{ + using numext::polygamma; + return make_float4(polygamma(n.x, x.x), polygamma(n.y, x.y), polygamma(n.z, x.z), polygamma(n.w, x.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 ppolygamma(const double2& n, const double2& x) +{ + using numext::polygamma; + return make_double2(polygamma(n.x, x.x), polygamma(n.y, x.y)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 perf(const float4& a) +{ + return make_float4(erff(a.x), erff(a.y), erff(a.z), erff(a.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 perf(const double2& a) +{ + using numext::erf; + return make_double2(erf(a.x), erf(a.y)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 perfc(const float4& a) +{ + using numext::erfc; + return make_float4(erfc(a.x), erfc(a.y), erfc(a.z), erfc(a.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 perfc(const double2& a) +{ + using numext::erfc; + return make_double2(erfc(a.x), erfc(a.y)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 pndtri(const float4& a) +{ + using numext::ndtri; + return make_float4(ndtri(a.x), ndtri(a.y), ndtri(a.z), ndtri(a.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 pndtri(const double2& a) +{ + using numext::ndtri; + return make_double2(ndtri(a.x), ndtri(a.y)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 pigamma(const float4& a, const float4& x) +{ + using numext::igamma; + return make_float4( + igamma(a.x, x.x), + igamma(a.y, x.y), + igamma(a.z, x.z), + igamma(a.w, x.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 pigamma(const double2& a, const double2& x) +{ + using numext::igamma; + return make_double2(igamma(a.x, x.x), igamma(a.y, x.y)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pigamma_der_a( + const float4& a, const float4& x) { + using numext::igamma_der_a; + return make_float4(igamma_der_a(a.x, x.x), igamma_der_a(a.y, x.y), + igamma_der_a(a.z, x.z), igamma_der_a(a.w, x.w)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 +pigamma_der_a(const double2& a, const double2& x) { + using numext::igamma_der_a; + return make_double2(igamma_der_a(a.x, x.x), igamma_der_a(a.y, x.y)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pgamma_sample_der_alpha( + const float4& alpha, const float4& sample) { + using numext::gamma_sample_der_alpha; + return make_float4( + gamma_sample_der_alpha(alpha.x, sample.x), + gamma_sample_der_alpha(alpha.y, sample.y), + gamma_sample_der_alpha(alpha.z, sample.z), + gamma_sample_der_alpha(alpha.w, sample.w)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 +pgamma_sample_der_alpha(const double2& alpha, const double2& sample) { + using numext::gamma_sample_der_alpha; + return make_double2( + gamma_sample_der_alpha(alpha.x, sample.x), + gamma_sample_der_alpha(alpha.y, sample.y)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 pigammac(const float4& a, const float4& x) +{ + using numext::igammac; + return make_float4( + igammac(a.x, x.x), + igammac(a.y, x.y), + igammac(a.z, x.z), + igammac(a.w, x.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 pigammac(const double2& a, const double2& x) +{ + using numext::igammac; + return make_double2(igammac(a.x, x.x), igammac(a.y, x.y)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 pbetainc(const float4& a, const float4& b, const float4& x) +{ + using numext::betainc; + return make_float4( + betainc(a.x, b.x, x.x), + betainc(a.y, b.y, x.y), + betainc(a.z, b.z, x.z), + betainc(a.w, b.w, x.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 pbetainc(const double2& a, const double2& b, const double2& x) +{ + using numext::betainc; + return make_double2(betainc(a.x, b.x, x.x), betainc(a.y, b.y, x.y)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pbessel_i0e(const float4& x) { + using numext::bessel_i0e; + return make_float4(bessel_i0e(x.x), bessel_i0e(x.y), bessel_i0e(x.z), bessel_i0e(x.w)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 +pbessel_i0e(const double2& x) { + using numext::bessel_i0e; + return make_double2(bessel_i0e(x.x), bessel_i0e(x.y)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pbessel_i0(const float4& x) { + using numext::bessel_i0; + return make_float4(bessel_i0(x.x), bessel_i0(x.y), bessel_i0(x.z), bessel_i0(x.w)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 +pbessel_i0(const double2& x) { + using numext::bessel_i0; + return make_double2(bessel_i0(x.x), bessel_i0(x.y)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pbessel_i1e(const float4& x) { + using numext::bessel_i1e; + return make_float4(bessel_i1e(x.x), bessel_i1e(x.y), bessel_i1e(x.z), bessel_i1e(x.w)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 +pbessel_i1e(const double2& x) { + using numext::bessel_i1e; + return make_double2(bessel_i1e(x.x), bessel_i1e(x.y)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pbessel_i1(const float4& x) { + using numext::bessel_i1; + return make_float4(bessel_i1(x.x), bessel_i1(x.y), bessel_i1(x.z), bessel_i1(x.w)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 +pbessel_i1(const double2& x) { + using numext::bessel_i1; + return make_double2(bessel_i1(x.x), bessel_i1(x.y)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pbessel_k0e(const float4& x) { + using numext::bessel_k0e; + return make_float4(bessel_k0e(x.x), bessel_k0e(x.y), bessel_k0e(x.z), bessel_k0e(x.w)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 +pbessel_k0e(const double2& x) { + using numext::bessel_k0e; + return make_double2(bessel_k0e(x.x), bessel_k0e(x.y)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pbessel_k0(const float4& x) { + using numext::bessel_k0; + return make_float4(bessel_k0(x.x), bessel_k0(x.y), bessel_k0(x.z), bessel_k0(x.w)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 +pbessel_k0(const double2& x) { + using numext::bessel_k0; + return make_double2(bessel_k0(x.x), bessel_k0(x.y)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pbessel_k1e(const float4& x) { + using numext::bessel_k1e; + return make_float4(bessel_k1e(x.x), bessel_k1e(x.y), bessel_k1e(x.z), bessel_k1e(x.w)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 +pbessel_k1e(const double2& x) { + using numext::bessel_k1e; + return make_double2(bessel_k1e(x.x), bessel_k1e(x.y)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pbessel_k1(const float4& x) { + using numext::bessel_k1; + return make_float4(bessel_k1(x.x), bessel_k1(x.y), bessel_k1(x.z), bessel_k1(x.w)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 +pbessel_k1(const double2& x) { + using numext::bessel_k1; + return make_double2(bessel_k1(x.x), bessel_k1(x.y)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pbessel_j0(const float4& x) { + using numext::bessel_j0; + return make_float4(bessel_j0(x.x), bessel_j0(x.y), bessel_j0(x.z), bessel_j0(x.w)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 +pbessel_j0(const double2& x) { + using numext::bessel_j0; + return make_double2(bessel_j0(x.x), bessel_j0(x.y)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pbessel_j1(const float4& x) { + using numext::bessel_j1; + return make_float4(bessel_j1(x.x), bessel_j1(x.y), bessel_j1(x.z), bessel_j1(x.w)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 +pbessel_j1(const double2& x) { + using numext::bessel_j1; + return make_double2(bessel_j1(x.x), bessel_j1(x.y)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pbessel_y0(const float4& x) { + using numext::bessel_y0; + return make_float4(bessel_y0(x.x), bessel_y0(x.y), bessel_y0(x.z), bessel_y0(x.w)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 +pbessel_y0(const double2& x) { + using numext::bessel_y0; + return make_double2(bessel_y0(x.x), bessel_y0(x.y)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pbessel_y1(const float4& x) { + using numext::bessel_y1; + return make_float4(bessel_y1(x.x), bessel_y1(x.y), bessel_y1(x.z), bessel_y1(x.w)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 +pbessel_y1(const double2& x) { + using numext::bessel_y1; + return make_double2(bessel_y1(x.x), bessel_y1(x.y)); +} + +#endif + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_GPU_SPECIALFUNCTIONS_H diff --git a/external/unsupported/Eigen/src/SpecialFunctions/arch/NEON/BesselFunctions.h b/external/unsupported/Eigen/src/SpecialFunctions/arch/NEON/BesselFunctions.h new file mode 100644 index 0000000..67433b0 --- /dev/null +++ b/external/unsupported/Eigen/src/SpecialFunctions/arch/NEON/BesselFunctions.h @@ -0,0 +1,54 @@ +#ifndef EIGEN_NEON_BESSELFUNCTIONS_H +#define EIGEN_NEON_BESSELFUNCTIONS_H + +namespace Eigen { +namespace internal { + +#if EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC + +#define NEON_HALF_TO_FLOAT_FUNCTIONS(METHOD) \ +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +Packet8hf METHOD(const Packet8hf& x) { \ + const Packet4f lo = METHOD(vcvt_f32_f16(vget_low_f16(x))); \ + const Packet4f hi = METHOD(vcvt_f32_f16(vget_high_f16(x))); \ + return vcombine_f16(vcvt_f16_f32(lo), vcvt_f16_f32(hi)); \ +} \ + \ +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +Packet4hf METHOD(const Packet4hf& x) { \ + return vcvt_f16_f32(METHOD(vcvt_f32_f16(x))); \ +} + +NEON_HALF_TO_FLOAT_FUNCTIONS(pbessel_i0) +NEON_HALF_TO_FLOAT_FUNCTIONS(pbessel_i0e) +NEON_HALF_TO_FLOAT_FUNCTIONS(pbessel_i1) +NEON_HALF_TO_FLOAT_FUNCTIONS(pbessel_i1e) +NEON_HALF_TO_FLOAT_FUNCTIONS(pbessel_j0) +NEON_HALF_TO_FLOAT_FUNCTIONS(pbessel_j1) +NEON_HALF_TO_FLOAT_FUNCTIONS(pbessel_k0) +NEON_HALF_TO_FLOAT_FUNCTIONS(pbessel_k0e) +NEON_HALF_TO_FLOAT_FUNCTIONS(pbessel_k1) +NEON_HALF_TO_FLOAT_FUNCTIONS(pbessel_k1e) +NEON_HALF_TO_FLOAT_FUNCTIONS(pbessel_y0) +NEON_HALF_TO_FLOAT_FUNCTIONS(pbessel_y1) + +#undef NEON_HALF_TO_FLOAT_FUNCTIONS +#endif + +BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pbessel_i0) +BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pbessel_i0e) +BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pbessel_i1) +BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pbessel_i1e) +BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pbessel_j0) +BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pbessel_j1) +BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pbessel_k0) +BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pbessel_k0e) +BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pbessel_k1) +BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pbessel_k1e) +BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pbessel_y0) +BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pbessel_y1) + +} // namespace internal +} // namespace Eigen + +#endif // EIGEN_NEON_BESSELFUNCTIONS_H diff --git a/external/unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h b/external/unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h new file mode 100644 index 0000000..ec92951 --- /dev/null +++ b/external/unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h @@ -0,0 +1,34 @@ +#ifndef EIGEN_NEON_SPECIALFUNCTIONS_H +#define EIGEN_NEON_SPECIALFUNCTIONS_H + +namespace Eigen { +namespace internal { + +#if EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC + +#define NEON_HALF_TO_FLOAT_FUNCTIONS(METHOD) \ +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +Packet8hf METHOD(const Packet8hf& x) { \ + const Packet4f lo = METHOD(vcvt_f32_f16(vget_low_f16(x))); \ + const Packet4f hi = METHOD(vcvt_f32_f16(vget_high_f16(x))); \ + return vcombine_f16(vcvt_f16_f32(lo), vcvt_f16_f32(hi)); \ +} \ + \ +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +Packet4hf METHOD(const Packet4hf& x) { \ + return vcvt_f16_f32(METHOD(vcvt_f32_f16(x))); \ +} + +NEON_HALF_TO_FLOAT_FUNCTIONS(perf) +NEON_HALF_TO_FLOAT_FUNCTIONS(pndtri) + +#undef NEON_HALF_TO_FLOAT_FUNCTIONS +#endif + +BF16_PACKET_FUNCTION(Packet4f, Packet4bf, perf) +BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pndtri) + +} // namespace internal +} // namespace Eigen + +#endif // EIGEN_NEON_SPECIALFUNCTIONS_H diff --git a/external/unsupported/Eigen/src/Splines/Spline.h b/external/unsupported/Eigen/src/Splines/Spline.h new file mode 100644 index 0000000..79edd52 --- /dev/null +++ b/external/unsupported/Eigen/src/Splines/Spline.h @@ -0,0 +1,507 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 20010-2011 Hauke Heibel +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPLINE_H +#define EIGEN_SPLINE_H + +#include "SplineFwd.h" + +namespace Eigen +{ + /** + * \ingroup Splines_Module + * \class Spline + * \brief A class representing multi-dimensional spline curves. + * + * The class represents B-splines with non-uniform knot vectors. Each control + * point of the B-spline is associated with a basis function + * \f{align*} + * C(u) & = \sum_{i=0}^{n}N_{i,p}(u)P_i + * \f} + * + * \tparam _Scalar The underlying data type (typically float or double) + * \tparam _Dim The curve dimension (e.g. 2 or 3) + * \tparam _Degree Per default set to Dynamic; could be set to the actual desired + * degree for optimization purposes (would result in stack allocation + * of several temporary variables). + **/ + template + class Spline + { + public: + typedef _Scalar Scalar; /*!< The spline curve's scalar type. */ + enum { Dimension = _Dim /*!< The spline curve's dimension. */ }; + enum { Degree = _Degree /*!< The spline curve's degree. */ }; + + /** \brief The point type the spline is representing. */ + typedef typename SplineTraits::PointType PointType; + + /** \brief The data type used to store knot vectors. */ + typedef typename SplineTraits::KnotVectorType KnotVectorType; + + /** \brief The data type used to store parameter vectors. */ + typedef typename SplineTraits::ParameterVectorType ParameterVectorType; + + /** \brief The data type used to store non-zero basis functions. */ + typedef typename SplineTraits::BasisVectorType BasisVectorType; + + /** \brief The data type used to store the values of the basis function derivatives. */ + typedef typename SplineTraits::BasisDerivativeType BasisDerivativeType; + + /** \brief The data type representing the spline's control points. */ + typedef typename SplineTraits::ControlPointVectorType ControlPointVectorType; + + /** + * \brief Creates a (constant) zero spline. + * For Splines with dynamic degree, the resulting degree will be 0. + **/ + Spline() + : m_knots(1, (Degree==Dynamic ? 2 : 2*Degree+2)) + , m_ctrls(ControlPointVectorType::Zero(Dimension,(Degree==Dynamic ? 1 : Degree+1))) + { + // in theory this code can go to the initializer list but it will get pretty + // much unreadable ... + enum { MinDegree = (Degree==Dynamic ? 0 : Degree) }; + m_knots.template segment(0) = Array::Zero(); + m_knots.template segment(MinDegree+1) = Array::Ones(); + } + + /** + * \brief Creates a spline from a knot vector and control points. + * \param knots The spline's knot vector. + * \param ctrls The spline's control point vector. + **/ + template + Spline(const OtherVectorType& knots, const OtherArrayType& ctrls) : m_knots(knots), m_ctrls(ctrls) {} + + /** + * \brief Copy constructor for splines. + * \param spline The input spline. + **/ + template + Spline(const Spline& spline) : + m_knots(spline.knots()), m_ctrls(spline.ctrls()) {} + + /** + * \brief Returns the knots of the underlying spline. + **/ + const KnotVectorType& knots() const { return m_knots; } + + /** + * \brief Returns the ctrls of the underlying spline. + **/ + const ControlPointVectorType& ctrls() const { return m_ctrls; } + + /** + * \brief Returns the spline value at a given site \f$u\f$. + * + * The function returns + * \f{align*} + * C(u) & = \sum_{i=0}^{n}N_{i,p}P_i + * \f} + * + * \param u Parameter \f$u \in [0;1]\f$ at which the spline is evaluated. + * \return The spline value at the given location \f$u\f$. + **/ + PointType operator()(Scalar u) const; + + /** + * \brief Evaluation of spline derivatives of up-to given order. + * + * The function returns + * \f{align*} + * \frac{d^i}{du^i}C(u) & = \sum_{i=0}^{n} \frac{d^i}{du^i} N_{i,p}(u)P_i + * \f} + * for i ranging between 0 and order. + * + * \param u Parameter \f$u \in [0;1]\f$ at which the spline derivative is evaluated. + * \param order The order up to which the derivatives are computed. + **/ + typename SplineTraits::DerivativeType + derivatives(Scalar u, DenseIndex order) const; + + /** + * \copydoc Spline::derivatives + * Using the template version of this function is more efficieent since + * temporary objects are allocated on the stack whenever this is possible. + **/ + template + typename SplineTraits::DerivativeType + derivatives(Scalar u, DenseIndex order = DerivativeOrder) const; + + /** + * \brief Computes the non-zero basis functions at the given site. + * + * Splines have local support and a point from their image is defined + * by exactly \f$p+1\f$ control points \f$P_i\f$ where \f$p\f$ is the + * spline degree. + * + * This function computes the \f$p+1\f$ non-zero basis function values + * for a given parameter value \f$u\f$. It returns + * \f{align*}{ + * N_{i,p}(u), \hdots, N_{i+p+1,p}(u) + * \f} + * + * \param u Parameter \f$u \in [0;1]\f$ at which the non-zero basis functions + * are computed. + **/ + typename SplineTraits::BasisVectorType + basisFunctions(Scalar u) const; + + /** + * \brief Computes the non-zero spline basis function derivatives up to given order. + * + * The function computes + * \f{align*}{ + * \frac{d^i}{du^i} N_{i,p}(u), \hdots, \frac{d^i}{du^i} N_{i+p+1,p}(u) + * \f} + * with i ranging from 0 up to the specified order. + * + * \param u Parameter \f$u \in [0;1]\f$ at which the non-zero basis function + * derivatives are computed. + * \param order The order up to which the basis function derivatives are computes. + **/ + typename SplineTraits::BasisDerivativeType + basisFunctionDerivatives(Scalar u, DenseIndex order) const; + + /** + * \copydoc Spline::basisFunctionDerivatives + * Using the template version of this function is more efficieent since + * temporary objects are allocated on the stack whenever this is possible. + **/ + template + typename SplineTraits::BasisDerivativeType + basisFunctionDerivatives(Scalar u, DenseIndex order = DerivativeOrder) const; + + /** + * \brief Returns the spline degree. + **/ + DenseIndex degree() const; + + /** + * \brief Returns the span within the knot vector in which u is falling. + * \param u The site for which the span is determined. + **/ + DenseIndex span(Scalar u) const; + + /** + * \brief Computes the span within the provided knot vector in which u is falling. + **/ + static DenseIndex Span(typename SplineTraits::Scalar u, DenseIndex degree, const typename SplineTraits::KnotVectorType& knots); + + /** + * \brief Returns the spline's non-zero basis functions. + * + * The function computes and returns + * \f{align*}{ + * N_{i,p}(u), \hdots, N_{i+p+1,p}(u) + * \f} + * + * \param u The site at which the basis functions are computed. + * \param degree The degree of the underlying spline. + * \param knots The underlying spline's knot vector. + **/ + static BasisVectorType BasisFunctions(Scalar u, DenseIndex degree, const KnotVectorType& knots); + + /** + * \copydoc Spline::basisFunctionDerivatives + * \param degree The degree of the underlying spline + * \param knots The underlying spline's knot vector. + **/ + static BasisDerivativeType BasisFunctionDerivatives( + const Scalar u, const DenseIndex order, const DenseIndex degree, const KnotVectorType& knots); + + private: + KnotVectorType m_knots; /*!< Knot vector. */ + ControlPointVectorType m_ctrls; /*!< Control points. */ + + template + static void BasisFunctionDerivativesImpl( + const typename Spline<_Scalar, _Dim, _Degree>::Scalar u, + const DenseIndex order, + const DenseIndex p, + const typename Spline<_Scalar, _Dim, _Degree>::KnotVectorType& U, + DerivativeType& N_); + }; + + template + DenseIndex Spline<_Scalar, _Dim, _Degree>::Span( + typename SplineTraits< Spline<_Scalar, _Dim, _Degree> >::Scalar u, + DenseIndex degree, + const typename SplineTraits< Spline<_Scalar, _Dim, _Degree> >::KnotVectorType& knots) + { + // Piegl & Tiller, "The NURBS Book", A2.1 (p. 68) + if (u <= knots(0)) return degree; + const Scalar* pos = std::upper_bound(knots.data()+degree-1, knots.data()+knots.size()-degree-1, u); + return static_cast( std::distance(knots.data(), pos) - 1 ); + } + + template + typename Spline<_Scalar, _Dim, _Degree>::BasisVectorType + Spline<_Scalar, _Dim, _Degree>::BasisFunctions( + typename Spline<_Scalar, _Dim, _Degree>::Scalar u, + DenseIndex degree, + const typename Spline<_Scalar, _Dim, _Degree>::KnotVectorType& knots) + { + const DenseIndex p = degree; + const DenseIndex i = Spline::Span(u, degree, knots); + + const KnotVectorType& U = knots; + + BasisVectorType left(p+1); left(0) = Scalar(0); + BasisVectorType right(p+1); right(0) = Scalar(0); + + VectorBlock(left,1,p) = u - VectorBlock(U,i+1-p,p).reverse(); + VectorBlock(right,1,p) = VectorBlock(U,i+1,p) - u; + + BasisVectorType N(1,p+1); + N(0) = Scalar(1); + for (DenseIndex j=1; j<=p; ++j) + { + Scalar saved = Scalar(0); + for (DenseIndex r=0; r + DenseIndex Spline<_Scalar, _Dim, _Degree>::degree() const + { + if (_Degree == Dynamic) + return m_knots.size() - m_ctrls.cols() - 1; + else + return _Degree; + } + + template + DenseIndex Spline<_Scalar, _Dim, _Degree>::span(Scalar u) const + { + return Spline::Span(u, degree(), knots()); + } + + template + typename Spline<_Scalar, _Dim, _Degree>::PointType Spline<_Scalar, _Dim, _Degree>::operator()(Scalar u) const + { + enum { Order = SplineTraits::OrderAtCompileTime }; + + const DenseIndex span = this->span(u); + const DenseIndex p = degree(); + const BasisVectorType basis_funcs = basisFunctions(u); + + const Replicate ctrl_weights(basis_funcs); + const Block ctrl_pts(ctrls(),0,span-p,Dimension,p+1); + return (ctrl_weights * ctrl_pts).rowwise().sum(); + } + + /* --------------------------------------------------------------------------------------------- */ + + template + void derivativesImpl(const SplineType& spline, typename SplineType::Scalar u, DenseIndex order, DerivativeType& der) + { + enum { Dimension = SplineTraits::Dimension }; + enum { Order = SplineTraits::OrderAtCompileTime }; + enum { DerivativeOrder = DerivativeType::ColsAtCompileTime }; + + typedef typename SplineTraits::ControlPointVectorType ControlPointVectorType; + typedef typename SplineTraits::BasisDerivativeType BasisDerivativeType; + typedef typename BasisDerivativeType::ConstRowXpr BasisDerivativeRowXpr; + + const DenseIndex p = spline.degree(); + const DenseIndex span = spline.span(u); + + const DenseIndex n = (std::min)(p, order); + + der.resize(Dimension,n+1); + + // Retrieve the basis function derivatives up to the desired order... + const BasisDerivativeType basis_func_ders = spline.template basisFunctionDerivatives(u, n+1); + + // ... and perform the linear combinations of the control points. + for (DenseIndex der_order=0; der_order ctrl_weights( basis_func_ders.row(der_order) ); + const Block ctrl_pts(spline.ctrls(),0,span-p,Dimension,p+1); + der.col(der_order) = (ctrl_weights * ctrl_pts).rowwise().sum(); + } + } + + template + typename SplineTraits< Spline<_Scalar, _Dim, _Degree> >::DerivativeType + Spline<_Scalar, _Dim, _Degree>::derivatives(Scalar u, DenseIndex order) const + { + typename SplineTraits< Spline >::DerivativeType res; + derivativesImpl(*this, u, order, res); + return res; + } + + template + template + typename SplineTraits< Spline<_Scalar, _Dim, _Degree>, DerivativeOrder >::DerivativeType + Spline<_Scalar, _Dim, _Degree>::derivatives(Scalar u, DenseIndex order) const + { + typename SplineTraits< Spline, DerivativeOrder >::DerivativeType res; + derivativesImpl(*this, u, order, res); + return res; + } + + template + typename SplineTraits< Spline<_Scalar, _Dim, _Degree> >::BasisVectorType + Spline<_Scalar, _Dim, _Degree>::basisFunctions(Scalar u) const + { + return Spline::BasisFunctions(u, degree(), knots()); + } + + /* --------------------------------------------------------------------------------------------- */ + + + template + template + void Spline<_Scalar, _Dim, _Degree>::BasisFunctionDerivativesImpl( + const typename Spline<_Scalar, _Dim, _Degree>::Scalar u, + const DenseIndex order, + const DenseIndex p, + const typename Spline<_Scalar, _Dim, _Degree>::KnotVectorType& U, + DerivativeType& N_) + { + typedef Spline<_Scalar, _Dim, _Degree> SplineType; + enum { Order = SplineTraits::OrderAtCompileTime }; + + const DenseIndex span = SplineType::Span(u, p, U); + + const DenseIndex n = (std::min)(p, order); + + N_.resize(n+1, p+1); + + BasisVectorType left = BasisVectorType::Zero(p+1); + BasisVectorType right = BasisVectorType::Zero(p+1); + + Matrix ndu(p+1,p+1); + + Scalar saved, temp; // FIXME These were double instead of Scalar. Was there a reason for that? + + ndu(0,0) = 1.0; + + DenseIndex j; + for (j=1; j<=p; ++j) + { + left[j] = u-U[span+1-j]; + right[j] = U[span+j]-u; + saved = 0.0; + + for (DenseIndex r=0; r(saved+right[r+1] * temp); + saved = left[j-r] * temp; + } + + ndu(j,j) = static_cast(saved); + } + + for (j = p; j>=0; --j) + N_(0,j) = ndu(j,p); + + // Compute the derivatives + DerivativeType a(n+1,p+1); + DenseIndex r=0; + for (; r<=p; ++r) + { + DenseIndex s1,s2; + s1 = 0; s2 = 1; // alternate rows in array a + a(0,0) = 1.0; + + // Compute the k-th derivative + for (DenseIndex k=1; k<=static_cast(n); ++k) + { + Scalar d = 0.0; + DenseIndex rk,pk,j1,j2; + rk = r-k; pk = p-k; + + if (r>=k) + { + a(s2,0) = a(s1,0)/ndu(pk+1,rk); + d = a(s2,0)*ndu(rk,pk); + } + + if (rk>=-1) j1 = 1; + else j1 = -rk; + + if (r-1 <= pk) j2 = k-1; + else j2 = p-r; + + for (j=j1; j<=j2; ++j) + { + a(s2,j) = (a(s1,j)-a(s1,j-1))/ndu(pk+1,rk+j); + d += a(s2,j)*ndu(rk+j,pk); + } + + if (r<=pk) + { + a(s2,k) = -a(s1,k-1)/ndu(pk+1,r); + d += a(s2,k)*ndu(r,pk); + } + + N_(k,r) = static_cast(d); + j = s1; s1 = s2; s2 = j; // Switch rows + } + } + + /* Multiply through by the correct factors */ + /* (Eq. [2.9]) */ + r = p; + for (DenseIndex k=1; k<=static_cast(n); ++k) + { + for (j=p; j>=0; --j) N_(k,j) *= r; + r *= p-k; + } + } + + template + typename SplineTraits< Spline<_Scalar, _Dim, _Degree> >::BasisDerivativeType + Spline<_Scalar, _Dim, _Degree>::basisFunctionDerivatives(Scalar u, DenseIndex order) const + { + typename SplineTraits >::BasisDerivativeType der; + BasisFunctionDerivativesImpl(u, order, degree(), knots(), der); + return der; + } + + template + template + typename SplineTraits< Spline<_Scalar, _Dim, _Degree>, DerivativeOrder >::BasisDerivativeType + Spline<_Scalar, _Dim, _Degree>::basisFunctionDerivatives(Scalar u, DenseIndex order) const + { + typename SplineTraits< Spline<_Scalar, _Dim, _Degree>, DerivativeOrder >::BasisDerivativeType der; + BasisFunctionDerivativesImpl(u, order, degree(), knots(), der); + return der; + } + + template + typename SplineTraits >::BasisDerivativeType + Spline<_Scalar, _Dim, _Degree>::BasisFunctionDerivatives( + const typename Spline<_Scalar, _Dim, _Degree>::Scalar u, + const DenseIndex order, + const DenseIndex degree, + const typename Spline<_Scalar, _Dim, _Degree>::KnotVectorType& knots) + { + typename SplineTraits::BasisDerivativeType der; + BasisFunctionDerivativesImpl(u, order, degree, knots, der); + return der; + } +} + +#endif // EIGEN_SPLINE_H diff --git a/external/unsupported/Eigen/src/Splines/SplineFitting.h b/external/unsupported/Eigen/src/Splines/SplineFitting.h new file mode 100644 index 0000000..9f6e8af --- /dev/null +++ b/external/unsupported/Eigen/src/Splines/SplineFitting.h @@ -0,0 +1,431 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 20010-2011 Hauke Heibel +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPLINE_FITTING_H +#define EIGEN_SPLINE_FITTING_H + +#include +#include +#include +#include + +#include "SplineFwd.h" + +#include "../../../../Eigen/LU" +#include "../../../../Eigen/QR" + +namespace Eigen +{ + /** + * \brief Computes knot averages. + * \ingroup Splines_Module + * + * The knots are computed as + * \f{align*} + * u_0 & = \hdots = u_p = 0 \\ + * u_{m-p} & = \hdots = u_{m} = 1 \\ + * u_{j+p} & = \frac{1}{p}\sum_{i=j}^{j+p-1}\bar{u}_i \quad\quad j=1,\hdots,n-p + * \f} + * where \f$p\f$ is the degree and \f$m+1\f$ the number knots + * of the desired interpolating spline. + * + * \param[in] parameters The input parameters. During interpolation one for each data point. + * \param[in] degree The spline degree which is used during the interpolation. + * \param[out] knots The output knot vector. + * + * \sa Les Piegl and Wayne Tiller, The NURBS book (2nd ed.), 1997, 9.2.1 Global Curve Interpolation to Point Data + **/ + template + void KnotAveraging(const KnotVectorType& parameters, DenseIndex degree, KnotVectorType& knots) + { + knots.resize(parameters.size()+degree+1); + + for (DenseIndex j=1; j + void KnotAveragingWithDerivatives(const ParameterVectorType& parameters, + const unsigned int degree, + const IndexArray& derivativeIndices, + KnotVectorType& knots) + { + typedef typename ParameterVectorType::Scalar Scalar; + + DenseIndex numParameters = parameters.size(); + DenseIndex numDerivatives = derivativeIndices.size(); + + if (numDerivatives < 1) + { + KnotAveraging(parameters, degree, knots); + return; + } + + DenseIndex startIndex; + DenseIndex endIndex; + + DenseIndex numInternalDerivatives = numDerivatives; + + if (derivativeIndices[0] == 0) + { + startIndex = 0; + --numInternalDerivatives; + } + else + { + startIndex = 1; + } + if (derivativeIndices[numDerivatives - 1] == numParameters - 1) + { + endIndex = numParameters - degree; + --numInternalDerivatives; + } + else + { + endIndex = numParameters - degree - 1; + } + + // There are (endIndex - startIndex + 1) knots obtained from the averaging + // and 2 for the first and last parameters. + DenseIndex numAverageKnots = endIndex - startIndex + 3; + KnotVectorType averageKnots(numAverageKnots); + averageKnots[0] = parameters[0]; + + int newKnotIndex = 0; + for (DenseIndex i = startIndex; i <= endIndex; ++i) + averageKnots[++newKnotIndex] = parameters.segment(i, degree).mean(); + averageKnots[++newKnotIndex] = parameters[numParameters - 1]; + + newKnotIndex = -1; + + ParameterVectorType temporaryParameters(numParameters + 1); + KnotVectorType derivativeKnots(numInternalDerivatives); + for (DenseIndex i = 0; i < numAverageKnots - 1; ++i) + { + temporaryParameters[0] = averageKnots[i]; + ParameterVectorType parameterIndices(numParameters); + int temporaryParameterIndex = 1; + for (DenseIndex j = 0; j < numParameters; ++j) + { + Scalar parameter = parameters[j]; + if (parameter >= averageKnots[i] && parameter < averageKnots[i + 1]) + { + parameterIndices[temporaryParameterIndex] = j; + temporaryParameters[temporaryParameterIndex++] = parameter; + } + } + temporaryParameters[temporaryParameterIndex] = averageKnots[i + 1]; + + for (int j = 0; j <= temporaryParameterIndex - 2; ++j) + { + for (DenseIndex k = 0; k < derivativeIndices.size(); ++k) + { + if (parameterIndices[j + 1] == derivativeIndices[k] + && parameterIndices[j + 1] != 0 + && parameterIndices[j + 1] != numParameters - 1) + { + derivativeKnots[++newKnotIndex] = temporaryParameters.segment(j, 3).mean(); + break; + } + } + } + } + + KnotVectorType temporaryKnots(averageKnots.size() + derivativeKnots.size()); + + std::merge(averageKnots.data(), averageKnots.data() + averageKnots.size(), + derivativeKnots.data(), derivativeKnots.data() + derivativeKnots.size(), + temporaryKnots.data()); + + // Number of knots (one for each point and derivative) plus spline order. + DenseIndex numKnots = numParameters + numDerivatives + degree + 1; + knots.resize(numKnots); + + knots.head(degree).fill(temporaryKnots[0]); + knots.tail(degree).fill(temporaryKnots.template tail<1>()[0]); + knots.segment(degree, temporaryKnots.size()) = temporaryKnots; + } + + /** + * \brief Computes chord length parameters which are required for spline interpolation. + * \ingroup Splines_Module + * + * \param[in] pts The data points to which a spline should be fit. + * \param[out] chord_lengths The resulting chord length vector. + * + * \sa Les Piegl and Wayne Tiller, The NURBS book (2nd ed.), 1997, 9.2.1 Global Curve Interpolation to Point Data + **/ + template + void ChordLengths(const PointArrayType& pts, KnotVectorType& chord_lengths) + { + typedef typename KnotVectorType::Scalar Scalar; + + const DenseIndex n = pts.cols(); + + // 1. compute the column-wise norms + chord_lengths.resize(pts.cols()); + chord_lengths[0] = 0; + chord_lengths.rightCols(n-1) = (pts.array().leftCols(n-1) - pts.array().rightCols(n-1)).matrix().colwise().norm(); + + // 2. compute the partial sums + std::partial_sum(chord_lengths.data(), chord_lengths.data()+n, chord_lengths.data()); + + // 3. normalize the data + chord_lengths /= chord_lengths(n-1); + chord_lengths(n-1) = Scalar(1); + } + + /** + * \brief Spline fitting methods. + * \ingroup Splines_Module + **/ + template + struct SplineFitting + { + typedef typename SplineType::KnotVectorType KnotVectorType; + typedef typename SplineType::ParameterVectorType ParameterVectorType; + + /** + * \brief Fits an interpolating Spline to the given data points. + * + * \param pts The points for which an interpolating spline will be computed. + * \param degree The degree of the interpolating spline. + * + * \returns A spline interpolating the initially provided points. + **/ + template + static SplineType Interpolate(const PointArrayType& pts, DenseIndex degree); + + /** + * \brief Fits an interpolating Spline to the given data points. + * + * \param pts The points for which an interpolating spline will be computed. + * \param degree The degree of the interpolating spline. + * \param knot_parameters The knot parameters for the interpolation. + * + * \returns A spline interpolating the initially provided points. + **/ + template + static SplineType Interpolate(const PointArrayType& pts, DenseIndex degree, const KnotVectorType& knot_parameters); + + /** + * \brief Fits an interpolating spline to the given data points and + * derivatives. + * + * \param points The points for which an interpolating spline will be computed. + * \param derivatives The desired derivatives of the interpolating spline at interpolation + * points. + * \param derivativeIndices An array indicating which point each derivative belongs to. This + * must be the same size as @a derivatives. + * \param degree The degree of the interpolating spline. + * + * \returns A spline interpolating @a points with @a derivatives at those points. + * + * \sa Les A. Piegl, Khairan Rajab, Volha Smarodzinana. 2008. + * Curve interpolation with directional constraints for engineering design. + * Engineering with Computers + **/ + template + static SplineType InterpolateWithDerivatives(const PointArrayType& points, + const PointArrayType& derivatives, + const IndexArray& derivativeIndices, + const unsigned int degree); + + /** + * \brief Fits an interpolating spline to the given data points and derivatives. + * + * \param points The points for which an interpolating spline will be computed. + * \param derivatives The desired derivatives of the interpolating spline at interpolation points. + * \param derivativeIndices An array indicating which point each derivative belongs to. This + * must be the same size as @a derivatives. + * \param degree The degree of the interpolating spline. + * \param parameters The parameters corresponding to the interpolation points. + * + * \returns A spline interpolating @a points with @a derivatives at those points. + * + * \sa Les A. Piegl, Khairan Rajab, Volha Smarodzinana. 2008. + * Curve interpolation with directional constraints for engineering design. + * Engineering with Computers + */ + template + static SplineType InterpolateWithDerivatives(const PointArrayType& points, + const PointArrayType& derivatives, + const IndexArray& derivativeIndices, + const unsigned int degree, + const ParameterVectorType& parameters); + }; + + template + template + SplineType SplineFitting::Interpolate(const PointArrayType& pts, DenseIndex degree, const KnotVectorType& knot_parameters) + { + typedef typename SplineType::KnotVectorType::Scalar Scalar; + typedef typename SplineType::ControlPointVectorType ControlPointVectorType; + + typedef Matrix MatrixType; + + KnotVectorType knots; + KnotAveraging(knot_parameters, degree, knots); + + DenseIndex n = pts.cols(); + MatrixType A = MatrixType::Zero(n,n); + for (DenseIndex i=1; i qr(A); + + // Here, we are creating a temporary due to an Eigen issue. + ControlPointVectorType ctrls = qr.solve(MatrixType(pts.transpose())).transpose(); + + return SplineType(knots, ctrls); + } + + template + template + SplineType SplineFitting::Interpolate(const PointArrayType& pts, DenseIndex degree) + { + KnotVectorType chord_lengths; // knot parameters + ChordLengths(pts, chord_lengths); + return Interpolate(pts, degree, chord_lengths); + } + + template + template + SplineType + SplineFitting::InterpolateWithDerivatives(const PointArrayType& points, + const PointArrayType& derivatives, + const IndexArray& derivativeIndices, + const unsigned int degree, + const ParameterVectorType& parameters) + { + typedef typename SplineType::KnotVectorType::Scalar Scalar; + typedef typename SplineType::ControlPointVectorType ControlPointVectorType; + + typedef Matrix MatrixType; + + const DenseIndex n = points.cols() + derivatives.cols(); + + KnotVectorType knots; + + KnotAveragingWithDerivatives(parameters, degree, derivativeIndices, knots); + + // fill matrix + MatrixType A = MatrixType::Zero(n, n); + + // Use these dimensions for quicker populating, then transpose for solving. + MatrixType b(points.rows(), n); + + DenseIndex startRow; + DenseIndex derivativeStart; + + // End derivatives. + if (derivativeIndices[0] == 0) + { + A.template block<1, 2>(1, 0) << -1, 1; + + Scalar y = (knots(degree + 1) - knots(0)) / degree; + b.col(1) = y*derivatives.col(0); + + startRow = 2; + derivativeStart = 1; + } + else + { + startRow = 1; + derivativeStart = 0; + } + if (derivativeIndices[derivatives.cols() - 1] == points.cols() - 1) + { + A.template block<1, 2>(n - 2, n - 2) << -1, 1; + + Scalar y = (knots(knots.size() - 1) - knots(knots.size() - (degree + 2))) / degree; + b.col(b.cols() - 2) = y*derivatives.col(derivatives.cols() - 1); + } + + DenseIndex row = startRow; + DenseIndex derivativeIndex = derivativeStart; + for (DenseIndex i = 1; i < parameters.size() - 1; ++i) + { + const DenseIndex span = SplineType::Span(parameters[i], degree, knots); + + if (derivativeIndex < derivativeIndices.size() && derivativeIndices[derivativeIndex] == i) + { + A.block(row, span - degree, 2, degree + 1) + = SplineType::BasisFunctionDerivatives(parameters[i], 1, degree, knots); + + b.col(row++) = points.col(i); + b.col(row++) = derivatives.col(derivativeIndex++); + } + else + { + A.row(row).segment(span - degree, degree + 1) + = SplineType::BasisFunctions(parameters[i], degree, knots); + b.col(row++) = points.col(i); + } + } + b.col(0) = points.col(0); + b.col(b.cols() - 1) = points.col(points.cols() - 1); + A(0,0) = 1; + A(n - 1, n - 1) = 1; + + // Solve + FullPivLU lu(A); + ControlPointVectorType controlPoints = lu.solve(MatrixType(b.transpose())).transpose(); + + SplineType spline(knots, controlPoints); + + return spline; + } + + template + template + SplineType + SplineFitting::InterpolateWithDerivatives(const PointArrayType& points, + const PointArrayType& derivatives, + const IndexArray& derivativeIndices, + const unsigned int degree) + { + ParameterVectorType parameters; + ChordLengths(points, parameters); + return InterpolateWithDerivatives(points, derivatives, derivativeIndices, degree, parameters); + } +} + +#endif // EIGEN_SPLINE_FITTING_H diff --git a/external/unsupported/Eigen/src/Splines/SplineFwd.h b/external/unsupported/Eigen/src/Splines/SplineFwd.h new file mode 100644 index 0000000..00d6b49 --- /dev/null +++ b/external/unsupported/Eigen/src/Splines/SplineFwd.h @@ -0,0 +1,93 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 20010-2011 Hauke Heibel +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPLINES_FWD_H +#define EIGEN_SPLINES_FWD_H + +#include "../../../../Eigen/Core" + +namespace Eigen +{ + template class Spline; + + template < typename SplineType, int DerivativeOrder = Dynamic > struct SplineTraits {}; + + /** + * \ingroup Splines_Module + * \brief Compile-time attributes of the Spline class for Dynamic degree. + **/ + template + struct SplineTraits< Spline<_Scalar, _Dim, _Degree>, Dynamic > + { + typedef _Scalar Scalar; /*!< The spline curve's scalar type. */ + enum { Dimension = _Dim /*!< The spline curve's dimension. */ }; + enum { Degree = _Degree /*!< The spline curve's degree. */ }; + + enum { OrderAtCompileTime = _Degree==Dynamic ? Dynamic : _Degree+1 /*!< The spline curve's order at compile-time. */ }; + enum { NumOfDerivativesAtCompileTime = OrderAtCompileTime /*!< The number of derivatives defined for the current spline. */ }; + + enum { DerivativeMemoryLayout = Dimension==1 ? RowMajor : ColMajor /*!< The derivative type's memory layout. */ }; + + /** \brief The data type used to store non-zero basis functions. */ + typedef Array BasisVectorType; + + /** \brief The data type used to store the values of the basis function derivatives. */ + typedef Array BasisDerivativeType; + + /** \brief The data type used to store the spline's derivative values. */ + typedef Array DerivativeType; + + /** \brief The point type the spline is representing. */ + typedef Array PointType; + + /** \brief The data type used to store knot vectors. */ + typedef Array KnotVectorType; + + /** \brief The data type used to store parameter vectors. */ + typedef Array ParameterVectorType; + + /** \brief The data type representing the spline's control points. */ + typedef Array ControlPointVectorType; + }; + + /** + * \ingroup Splines_Module + * \brief Compile-time attributes of the Spline class for fixed degree. + * + * The traits class inherits all attributes from the SplineTraits of Dynamic degree. + **/ + template < typename _Scalar, int _Dim, int _Degree, int _DerivativeOrder > + struct SplineTraits< Spline<_Scalar, _Dim, _Degree>, _DerivativeOrder > : public SplineTraits< Spline<_Scalar, _Dim, _Degree> > + { + enum { OrderAtCompileTime = _Degree==Dynamic ? Dynamic : _Degree+1 /*!< The spline curve's order at compile-time. */ }; + enum { NumOfDerivativesAtCompileTime = _DerivativeOrder==Dynamic ? Dynamic : _DerivativeOrder+1 /*!< The number of derivatives defined for the current spline. */ }; + + enum { DerivativeMemoryLayout = _Dim==1 ? RowMajor : ColMajor /*!< The derivative type's memory layout. */ }; + + /** \brief The data type used to store the values of the basis function derivatives. */ + typedef Array<_Scalar,Dynamic,Dynamic,RowMajor,NumOfDerivativesAtCompileTime,OrderAtCompileTime> BasisDerivativeType; + + /** \brief The data type used to store the spline's derivative values. */ + typedef Array<_Scalar,_Dim,Dynamic,DerivativeMemoryLayout,_Dim,NumOfDerivativesAtCompileTime> DerivativeType; + }; + + /** \brief 2D float B-spline with dynamic degree. */ + typedef Spline Spline2f; + + /** \brief 3D float B-spline with dynamic degree. */ + typedef Spline Spline3f; + + /** \brief 2D double B-spline with dynamic degree. */ + typedef Spline Spline2d; + + /** \brief 3D double B-spline with dynamic degree. */ + typedef Spline Spline3d; +} + +#endif // EIGEN_SPLINES_FWD_H diff --git a/external/unsupported/README.txt b/external/unsupported/README.txt new file mode 100644 index 0000000..70793bf --- /dev/null +++ b/external/unsupported/README.txt @@ -0,0 +1,50 @@ +This directory contains contributions from various users. +They are provided "as is", without any support. Nevertheless, +most of them are subject to be included in Eigen in the future. + +In order to use an unsupported module you have to do either: + + - add the path_to_eigen/unsupported directory to your include path and do: + #include + + - or directly do: + #include + + +If you are interested in contributing to one of them, or have other stuff +you would like to share, feel free to contact us: +http://eigen.tuxfamily.org/index.php?title=Main_Page#Mailing_list + +Any kind of contributions are much appreciated, even very preliminary ones. +However, it: + - must rely on Eigen, + - must be highly related to math, + - should have some general purpose in the sense that it could + potentially become an official Eigen module (or be merged into another one). + +In doubt feel free to contact us. For instance, if your addons is very too specific +but it shows an interesting way of using Eigen, then it could be a nice demo. + + +This directory is organized as follow: + +unsupported/Eigen/ModuleHeader1 +unsupported/Eigen/ModuleHeader2 +unsupported/Eigen/... +unsupported/Eigen/src/Module1/SourceFile1.h +unsupported/Eigen/src/Module1/SourceFile2.h +unsupported/Eigen/src/Module1/... +unsupported/Eigen/src/Module2/SourceFile1.h +unsupported/Eigen/src/Module2/SourceFile2.h +unsupported/Eigen/src/Module2/... +unsupported/Eigen/src/... +unsupported/doc/snippets/.cpp <- code snippets for the doc +unsupported/doc/examples/.cpp <- examples for the doc +unsupported/doc/TutorialModule1.dox +unsupported/doc/TutorialModule2.dox +unsupported/doc/... +unsupported/test/.cpp <- unit test files + +The documentation is generated at the same time than the main Eigen documentation. +The .html files are generated in: build_dir/doc/html/unsupported/ + diff --git a/include/common.hpp b/include/common.hpp index e2bf6dc..57406f9 100644 --- a/include/common.hpp +++ b/include/common.hpp @@ -21,6 +21,7 @@ #include #include #include +#include using Float = double; using Matrix = Eigen::Matrix; diff --git a/include/matrix_adaptation.hpp b/include/matrix_adaptation.hpp index da75067..75012e6 100644 --- a/include/matrix_adaptation.hpp +++ b/include/matrix_adaptation.hpp @@ -95,6 +95,8 @@ namespace matrix_adaptation virtual bool perform_eigendecomposition(const parameters::Settings& settings); + virtual void adapt_ps(const parameters::Weights& w); + void adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, const parameters::Stats& stats, size_t mu, size_t lambda) override; @@ -241,6 +243,52 @@ namespace matrix_adaptation Vector invert_y(const Vector&) override; }; + struct CovarainceNoEigvAdaptation final : CovarianceAdaptation + { + using CovarianceAdaptation::CovarianceAdaptation; + + void adapt_ps(const parameters::Weights& w) override; + + bool perform_eigendecomposition(const parameters::Settings& settings) override; + + Vector invert_y(const Vector&) override; + }; + + + struct NaturalGradientAdaptation final : Adaptation + { + Matrix A; + Matrix G; + + NaturalGradientAdaptation(const size_t dim, const Vector& x0, const Float expected_length_z) + : Adaptation(dim, x0, Vector::Ones(dim), expected_length_z), + A(Matrix::Identity(dim, dim)), + G(Matrix::Zero(dim, dim)), + I(Matrix::Identity(dim, dim)) + {} + + void adapt_evolution_paths_inner( + const Population& pop, + const parameters::Weights& w, + const parameters::Stats& stats, + size_t mu, size_t lambda + ) override; + + bool adapt_matrix(const parameters::Weights& w, const parameters::Modules& m, const Population& pop, size_t mu, + const parameters::Settings& settings, parameters::Stats& stats) override; + + void restart(const parameters::Settings& settings) override; + + Vector compute_y(const Vector&) override; + + Vector invert_y(const Vector&) override; + + private: + const Matrix I; + }; + + + inline std::shared_ptr get(const parameters::Modules& m, const size_t dim, const Vector& x0, const Float expected_z) { @@ -257,9 +305,12 @@ namespace matrix_adaptation return std::make_shared(dim, x0, expected_z); case MatrixAdaptationType::CHOLESKY: return std::make_shared(dim, x0, expected_z); - case MatrixAdaptationType::CMSA: return std::make_shared(dim, x0, expected_z); + case MatrixAdaptationType::COVARIANCE_NO_EIGV: + return std::make_shared(dim, x0, expected_z); + case MatrixAdaptationType::NATURAL_GRADIENT: + return std::make_shared(dim, x0, expected_z); default: case MatrixAdaptationType::COVARIANCE: return std::make_shared(dim, x0, expected_z); diff --git a/include/modules.hpp b/include/modules.hpp index 57dd3af..2efa09f 100644 --- a/include/modules.hpp +++ b/include/modules.hpp @@ -76,7 +76,9 @@ namespace parameters SEPERABLE, ONEPLUSONE, CHOLESKY, - CMSA + CMSA, + COVARIANCE_NO_EIGV, + NATURAL_GRADIENT }; enum class CenterPlacement diff --git a/include/to_string.hpp b/include/to_string.hpp index e45667f..e4a059f 100644 --- a/include/to_string.hpp +++ b/include/to_string.hpp @@ -128,6 +128,10 @@ namespace parameters return "COVARIANCE"; case MatrixAdaptationType::CMSA: return "CMSA"; + case MatrixAdaptationType::COVARIANCE_NO_EIGV: + return "COVARIANCE_NO_EIGV"; + case MatrixAdaptationType::NATURAL_GRADIENT: + return "NATURAL_GRADIENT"; default: return "unkown"; } diff --git a/src/main.cpp b/src/main.cpp index b834837..f1eda07 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -9,12 +9,11 @@ using std::chrono::duration_cast; using std::chrono::duration; using std::chrono::milliseconds; -static int dim = 40; +static int dim = 5; static bool rotated = true; static size_t budget = dim * 10000; - struct Ellipse { size_t evals; @@ -110,12 +109,17 @@ void run_modcma(parameters::MatrixAdaptationType mat_t, functions::ObjectiveFunc int main() { auto ft = functions::ELLIPSE; - auto ssa = parameters::StepSizeAdaptation::LPXNES; + + + auto ssa = parameters::StepSizeAdaptation::CSA; //run_modcma(parameters::MatrixAdaptationType::NONE, ft, ssa); //run_modcma(parameters::MatrixAdaptationType::SEPERABLE, ft); //run_modcma(parameters::MatrixAdaptationType::MATRIX, ft, ssa); //run_modcma(parameters::MatrixAdaptationType::CHOLESKY, ft); - //run_modcma(parameters::MatrixAdaptationType::CMSA, ft, ssa); - run_modcma(parameters::MatrixAdaptationType::COVARIANCE, ft, ssa); + //run_modcma(parameters::MatrixAdaptationType::COVARIANCE_NO_EIGV, ft, ssa); + run_modcma(parameters::MatrixAdaptationType::NATURAL_GRADIENT, ft, parameters::StepSizeAdaptation::XNES); + /*run_modcma(parameters::MatrixAdaptationType::NATURAL_GRADIENT, ft, parameters::StepSizeAdaptation::MXNES); + run_modcma(parameters::MatrixAdaptationType::NATURAL_GRADIENT, ft, parameters::StepSizeAdaptation::LPXNES); + run_modcma(parameters::MatrixAdaptationType::COVARIANCE, ft, ssa);*/ } \ No newline at end of file diff --git a/src/matrix_adaptation.cpp b/src/matrix_adaptation.cpp index 90fa717..3468de2 100644 --- a/src/matrix_adaptation.cpp +++ b/src/matrix_adaptation.cpp @@ -5,20 +5,6 @@ namespace matrix_adaptation using namespace parameters; - static Matrix cholesky_decomposition(const Matrix& C) - { - const Eigen::LLT chol(C); - if(chol.info() != Eigen::Success) - { - std::cout << chol.info(); - - - //raise std::exception(chol.info()); - assert(false); - } - return chol.matrixL(); - } - Vector Adaptation::invert_x(const Vector& xi, const Float sigma) { return (xi - m) / sigma; @@ -33,14 +19,16 @@ namespace matrix_adaptation adapt_evolution_paths_inner(pop, w, stats, mu, lambda); } + void CovarianceAdaptation::adapt_ps(const Weights& w) + { + ps = (1.0 - w.cs) * ps + (w.sqrt_cs_mueff * inv_root_C * dm); + } + void CovarianceAdaptation::adapt_evolution_paths_inner(const Population& pop, const Weights& w, const Stats& stats, const size_t mu, const size_t lambda) { - const auto& expr = constants::calc_eigv ? inv_root_C * dm : dz; - - ps = (1.0 - w.cs) * ps + (w.sqrt_cs_mueff * expr); - + adapt_ps(w); const Float actual_ps_length = ps.norm() / sqrt( 1.0 - pow(1.0 - w.cs, 2.0 * (stats.evaluations / lambda))); @@ -64,23 +52,6 @@ namespace matrix_adaptation bool CovarianceAdaptation::perform_eigendecomposition(const Settings& settings) { - if(!constants::calc_eigv) - { - const Eigen::LLT chol(C); - if(chol.info() != Eigen::Success) - { - if(settings.verbose) - { - std::cout << "Cholesky solver failed, we need to restart reason:" - << chol.info() << '\n'; - } - return false; - } - - A = chol.matrixL(); - return true; - } - const Eigen::SelfAdjointEigenSolver eigen_solver(C); if(eigen_solver.info() != Eigen::Success) { @@ -143,11 +114,6 @@ namespace matrix_adaptation Vector CovarianceAdaptation::invert_y(const Vector& yi) { - if(!constants::calc_eigv) - { - return A.triangularView().solve(yi); - } - return (B.transpose() * yi).cwiseQuotient(d); } @@ -396,4 +362,83 @@ namespace matrix_adaptation return A.triangularView().solve(yi); } + + void CovarainceNoEigvAdaptation::adapt_ps(const Weights& w) + { + ps = (1.0 - w.cs) * ps + (w.sqrt_cs_mueff * dz); + } + + bool CovarainceNoEigvAdaptation::perform_eigendecomposition(const parameters::Settings& settings) + { + const Eigen::LLT chol(C); + if(chol.info() != Eigen::Success) + { + if(settings.verbose) + { + std::cout << "Cholesky solver failed, we need to restart reason:" + << chol.info() << '\n'; + } + return false; + } + + A = chol.matrixL(); + return true; + } + + Vector CovarainceNoEigvAdaptation::invert_y(const Vector& yi) + { + return A.triangularView().solve(yi); + } + + void NaturalGradientAdaptation::adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, const parameters::Stats& stats, size_t mu, size_t lambda) + { + ps = (1.0 - w.cs) * ps + (w.sqrt_cs_mueff * A.triangularView().solve(dm)); + } + + bool NaturalGradientAdaptation::adapt_matrix( + const parameters::Weights& w, const parameters::Modules& m, + const Population& pop, size_t mu, const parameters::Settings& settings, parameters::Stats& stats) + { + + stats.last_update = stats.t; + stats.n_updates++; + static Float eta = 0.6 * (3 + std::log(settings.dim)) / std::pow(settings.dim, 1.5); + + G.setZero(); + const Matrix I = Matrix::Identity(settings.dim, settings.dim); + for(int i = 0; i < mu; ++i) + { + const auto& z = pop.Z.col(i); + G.noalias() += w.positive(i) * (z * z.transpose() - I); + } + + // Remove isotropic (sigma-related) component: make G trace-free + G -= (G.trace() / dd) * I; + + // Ensure symmetry for numerical stability + G = 0.5 * (G + G.transpose().eval()); + + // Apply the exponential update to A + A *= ((0.5 * eta) * G).exp(); + + return true; + } + + void NaturalGradientAdaptation::restart(const parameters::Settings& settings) + { + Adaptation::restart(settings); + A = Matrix::Identity(settings.dim, settings.dim); + G = Matrix::Zero(settings.dim, settings.dim); + } + + Vector NaturalGradientAdaptation::compute_y(const Vector& zi) + { + return A * zi; + } + + Vector NaturalGradientAdaptation::invert_y(const Vector& yi) + { + return A.triangularView().solve(yi); + } + } diff --git a/src/mutation.cpp b/src/mutation.cpp index 24e1f99..07676de 100644 --- a/src/mutation.cpp +++ b/src/mutation.cpp @@ -20,8 +20,6 @@ namespace mutation return (f < fopt) and (i >= seq_cutoff) and (m != parameters::Mirror::PAIRWISE or i % 2 == 0); } - - void Strategy::mutate(FunctionType& objective, const size_t n_offspring, parameters::Parameters& p) { ss->sample(sigma, p.pop, p.weights.beta); @@ -143,10 +141,7 @@ namespace mutation Population& pop, const Population& old_pop, const parameters::Stats& stats, const size_t lambda) { - // const Float z = ((std::dynamic_pointer_cast(adaptation)->inv_root_C * .Y).colwise().norm().array().pow(2.) - adaptation->dd).matrix() * w.clipped(); - - - const Float z = ((pop.Z).colwise().norm().array().pow(2.) - adaptation->dd).matrix() * w.clipped(); + const Float z = ((pop.Z).colwise().squaredNorm().array() - adaptation->dd).matrix() * w.clipped(); sigma *= std::exp((w.cs / std::sqrt(adaptation->dd)) * z); } @@ -154,23 +149,16 @@ namespace mutation Population& pop, const Population& old_pop, const parameters::Stats& stats, const size_t lambda) { - const auto n = std::min(pop.n_finite(), old_pop.n_finite()); - if (n != 0) - { - // const auto z = (w.mueff * std::pow((dynamic.inv_root_C * dynamic.dm).norm(), 2)) - dynamic.dd; - const auto mu = pop.n - lambda; - const auto dz = (pop.Z.leftCols(mu).array().rowwise() * w.positive.array().transpose()).rowwise().sum().matrix(); - const auto z = (w.mueff * std::pow(dz.norm(), 2)) - adaptation->dd; - sigma *= std::exp((w.cs / adaptation->dd) * z); - } + const Float delta = (w.mueff * adaptation->dz.squaredNorm() - adaptation->dd); + sigma *= std::exp((w.cs / adaptation->dd) * delta); } void LPXNES::adapt(const parameters::Weights& w, std::shared_ptr adaptation, Population& pop, const Population& old_pop, const parameters::Stats& stats, const size_t lambda) { - const auto z = std::exp(w.cs * pop.s.array().log().matrix().dot(w.clipped())); - sigma = std::pow(sigma, 1.0 - w.cs) * z; + const Float rel_log = (pop.s.array() / sigma).log().matrix().dot(w.clipped()); + sigma *= std::exp(w.cs * rel_log); } void SR::adapt(const parameters::Weights& w, std::shared_ptr adaptation, diff --git a/src/weights.cpp b/src/weights.cpp index 8d81c6a..844ed57 100644 --- a/src/weights.cpp +++ b/src/weights.cpp @@ -8,20 +8,21 @@ namespace parameters { switch (ssa) { - case StepSizeAdaptation::XNES: - return mueff / (2.0 * std::log(std::max(Float{ 2. }, d)) * sqrt(d)); - case StepSizeAdaptation::MXNES: - return 1.0; - case StepSizeAdaptation::LPXNES: - return (9.0 * mueff) / (10.0 * sqrt(d)); - case StepSizeAdaptation::PSR: - return 0.9; - case StepSizeAdaptation::SR: - return 1.0 / 12.0; - case StepSizeAdaptation::CSA: - return (mueff + 2.0) / (d + mueff + 5.0); - default: - return 0.3; + case StepSizeAdaptation::XNES: + //return 1.0 / std::sqrt(d); + return mueff / (2.0 * std::log(std::max(Float{ 2. }, d)) * sqrt(d)); + case StepSizeAdaptation::MXNES: + return 1.0; + case StepSizeAdaptation::LPXNES: + return (9.0 * mueff) / (10.0 * sqrt(d)); + case StepSizeAdaptation::PSR: + return 0.9; + case StepSizeAdaptation::SR: + return 1.0 / 12.0; + case StepSizeAdaptation::CSA: + return (mueff + 2.0) / (d + mueff + 5.0); + default: + return 0.3; } } @@ -29,39 +30,39 @@ namespace parameters { switch (ssa) { - case StepSizeAdaptation::SR: - return 1.0 + (d / 2.0); - case StepSizeAdaptation::CSA: - { - const Float rhs = std::sqrt((mueff - Float(1.0)) / (d + 1)) - 1; - return 1.0 + (2.0 * std::max(Float(0.0), rhs) + cs); - } - default: - return 0.0; + case StepSizeAdaptation::SR: + return 1.0 + (d / 2.0); + case StepSizeAdaptation::CSA: + { + const Float rhs = std::sqrt((mueff - Float(1.0)) / (d + 1)) - 1; + return 1.0 + (2.0 * std::max(Float(0.0), rhs) + cs); + } + default: + return 0.0; } } - + Weights::Weights( - const size_t dim, - const size_t mu, - const size_t lambda, + const size_t dim, + const size_t mu, + const size_t lambda, const Settings& settings, const Float expected_length_z -) + ) : weights(lambda), positive(mu), negative(lambda - mu), expected_length_z(expected_length_z) { const Float d = static_cast(dim); switch (settings.modules.weights) { - case RecombinationWeights::EQUAL: - weights_equal(mu); - break; - case RecombinationWeights::HALF_POWER_LAMBDA: - weights_half_power_lambda(mu, lambda); - break; - case RecombinationWeights::DEFAULT: - weights_default(lambda); - break; + case RecombinationWeights::EQUAL: + weights_equal(mu); + break; + case RecombinationWeights::HALF_POWER_LAMBDA: + weights_half_power_lambda(mu, lambda); + break; + case RecombinationWeights::DEFAULT: + weights_default(lambda); + break; } mueff = std::pow(positive.sum(), 2) / positive.dot(positive); @@ -104,9 +105,9 @@ namespace parameters expected_length_ps = (1.4 + (2.0 / (d + 1.0))) * expected_length_z; - beta = 1.0 / std::sqrt(2.0 * mueff) ? settings.modules.ssa != StepSizeAdaptation::LPXNES : - std::log(2.0) / (std::sqrt(d) * std::log(d)) - ; + beta = 1.0 / std::sqrt(2.0 * mueff); + if (settings.modules.ssa == StepSizeAdaptation::LPXNES) + beta = std::log(2.0) / (std::sqrt(d) * std::log(d)); } From 7a37a303184e0ad86ec62d4a9b149d82b904f8d1 Mon Sep 17 00:00:00 2001 From: Jacob de Nobel Date: Mon, 2 Jun 2025 19:35:06 +0200 Subject: [PATCH 19/74] made weights also work for mu > 0.5 lambda --- include/settings.hpp | 171 +++++++++++++++++++++----------------- include/weights.hpp | 3 +- src/interface.cpp | 1 - src/main.cpp | 10 +-- src/matrix_adaptation.cpp | 40 +++++---- src/weights.cpp | 12 +-- 6 files changed, 127 insertions(+), 110 deletions(-) diff --git a/include/settings.hpp b/include/settings.hpp index 50050a1..97c9aa8 100644 --- a/include/settings.hpp +++ b/include/settings.hpp @@ -5,91 +5,106 @@ namespace parameters { - struct Settings - { - size_t dim; - Modules modules; + struct Settings + { + size_t dim; + Modules modules; - std::optional target; - std::optional max_generations; - size_t budget; + std::optional target; + std::optional max_generations; + size_t budget; - Float sigma0; - size_t lambda0; - size_t mu0; + Float sigma0; + size_t lambda0; + size_t mu0; - std::optional x0; - Vector lb; - Vector ub; - std::optional cs; - std::optional cc; - std::optional cmu; - std::optional c1; - bool verbose; - Float volume; + std::optional x0; + Vector lb; + Vector ub; + std::optional cs; + std::optional cc; + std::optional cmu; + std::optional c1; + bool verbose; + Float volume; - Settings(size_t dim, - std::optional mod = std::nullopt, - std::optional target = std::nullopt, - std::optional max_generations = std::nullopt, - std::optional budget = std::nullopt, - std::optional sigma = std::nullopt, - std::optional lambda = std::nullopt, - std::optional mu = std::nullopt, - std::optional x0 = std::nullopt, - std::optional lb = std::nullopt, - std::optional ub = std::nullopt, - std::optional cs = std::nullopt, - std::optional cc = std::nullopt, - std::optional cmu = std::nullopt, - std::optional c1 = std::nullopt, - bool verbose = false) : dim(dim), - modules(mod.value_or(Modules())), - target(target), - max_generations(max_generations), - budget(budget.value_or(dim * 1e4)), - sigma0(sigma.value_or(2.0)), - lambda0(lambda.value_or(4 + std::floor(3 * std::log(dim)))), - mu0(mu.value_or(lambda0 / 2)), - x0(x0), - lb(lb.value_or(Vector::Ones(dim) * -std::numeric_limits::infinity())), - ub(ub.value_or(Vector::Ones(dim) * std::numeric_limits::infinity())), - cs(cs), - cc(cc), - cmu(cmu), - c1(c1), - verbose(verbose), - volume(0.0) - { - if (modules.mirrored == Mirror::PAIRWISE and lambda0 % 2 != 0) - lambda0++; + Settings(size_t dim, + std::optional mod = std::nullopt, + std::optional target = std::nullopt, + std::optional max_generations = std::nullopt, + std::optional budget = std::nullopt, + std::optional sigma = std::nullopt, + std::optional lambda = std::nullopt, + std::optional mu = std::nullopt, + std::optional x0 = std::nullopt, + std::optional lb = std::nullopt, + std::optional ub = std::nullopt, + std::optional cs = std::nullopt, + std::optional cc = std::nullopt, + std::optional cmu = std::nullopt, + std::optional c1 = std::nullopt, + bool verbose = false, + bool always_compute_eigv = false + ) : dim(dim), + modules(mod.value_or(Modules())), + target(target), + max_generations(max_generations), + budget(budget.value_or(dim * 1e4)), + sigma0(sigma.value_or(2.0)), + lambda0(lambda.value_or(4 + std::floor(3 * std::log(dim)))), + mu0(mu.value_or(lambda0 / 2)), + x0(x0), + lb(lb.value_or(Vector::Ones(dim) * -std::numeric_limits::infinity())), + ub(ub.value_or(Vector::Ones(dim)* std::numeric_limits::infinity())), + cs(cs), + cc(cc), + cmu(cmu), + c1(c1), + verbose(verbose), + volume(0.0) + { + if (modules.mirrored == Mirror::PAIRWISE and lambda0 % 2 != 0) + lambda0++; - if (mu0 > lambda0) - { - mu0 = lambda0 / 2; - } + if (mu0 > lambda0) + { + mu0 = lambda0 / 2; + } - if(modules.ssa == StepSizeAdaptation::SA) - { - mu0 = std::min(lambda0 / 4, mu0); - } + if (modules.ssa == StepSizeAdaptation::SA) + { + mu0 = std::min(lambda0 / 4, mu0); + } - if (lambda0 == 1) - { - mu0 = 1; - modules.elitist = true; - modules.active = false; - modules.weights = RecombinationWeights::EQUAL; - modules.ssa = StepSizeAdaptation::SR; - modules.matrix_adaptation = MatrixAdaptationType::ONEPLUSONE; - cc = 2.0 / (static_cast(dim) + 2.0); - c1 = 2.0 / (pow(static_cast(dim),2) + 6.0); + /*if (modules.matrix_adaptation == MatrixAdaptationType::NATURAL_GRADIENT) + { + mu0 = lambda0; + }*/ - if (modules.restart_strategy == RestartStrategyType::BIPOP || modules.restart_strategy == RestartStrategyType::IPOP) - modules.restart_strategy = RestartStrategyType::RESTART; - } - volume = (this->ub.cwiseMin(10 * sigma0) - this->lb.cwiseMax(-10 * sigma0)).prod(); - } - }; + if (modules.ssa == StepSizeAdaptation::CSA + and modules.matrix_adaptation == MatrixAdaptationType::COVARIANCE + and not always_compute_eigv + ) + { + modules.matrix_adaptation = MatrixAdaptationType::COVARIANCE_NO_EIGV; + } + + if (lambda0 == 1) + { + mu0 = 1; + modules.elitist = true; + modules.active = false; + modules.weights = RecombinationWeights::EQUAL; + modules.ssa = StepSizeAdaptation::SR; + modules.matrix_adaptation = MatrixAdaptationType::ONEPLUSONE; + cc = 2.0 / (static_cast(dim) + 2.0); + c1 = 2.0 / (pow(static_cast(dim), 2) + 6.0); + + if (modules.restart_strategy == RestartStrategyType::BIPOP || modules.restart_strategy == RestartStrategyType::IPOP) + modules.restart_strategy = RestartStrategyType::RESTART; + } + volume = (this->ub.cwiseMin(10 * sigma0) - this->lb.cwiseMax(-10 * sigma0)).prod(); + } + }; } \ No newline at end of file diff --git a/include/weights.hpp b/include/weights.hpp index 5003123..6df238b 100644 --- a/include/weights.hpp +++ b/include/weights.hpp @@ -16,14 +16,13 @@ namespace parameters Float damps; Float sqrt_cc_mueff, sqrt_cs_mueff; Float lazy_update_interval; - Float sigma_path_scale; Float expected_length_z; Float expected_length_ps; Float beta; Weights(const size_t dim, const size_t mu, const size_t lambda, const Settings &settings, const Float expected_length_z); - void weights_default(const size_t lambda); + void weights_default(const size_t mu, const size_t lambda); void weights_equal(const size_t mu); diff --git a/src/interface.cpp b/src/interface.cpp index b228d43..3146852 100644 --- a/src/interface.cpp +++ b/src/interface.cpp @@ -558,7 +558,6 @@ void define_parameters(py::module& main) .def_readwrite("sqrt_cc_mueff", &Weights::sqrt_cc_mueff) .def_readwrite("sqrt_cs_mueff", &Weights::sqrt_cs_mueff) .def_readwrite("lazy_update_interval", &Weights::lazy_update_interval) - .def_readwrite("sigma_path_scale", &Weights::sigma_path_scale) .def_readwrite("expected_length_z", &Weights::expected_length_z) .def_readwrite("expected_length_ps", &Weights::expected_length_ps) .def_readwrite("weights", &Weights::weights) diff --git a/src/main.cpp b/src/main.cpp index f1eda07..14809c3 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -9,7 +9,7 @@ using std::chrono::duration_cast; using std::chrono::duration; using std::chrono::milliseconds; -static int dim = 5; +static int dim = 2; static bool rotated = true; static size_t budget = dim * 10000; @@ -118,8 +118,8 @@ int main() //run_modcma(parameters::MatrixAdaptationType::MATRIX, ft, ssa); //run_modcma(parameters::MatrixAdaptationType::CHOLESKY, ft); //run_modcma(parameters::MatrixAdaptationType::COVARIANCE_NO_EIGV, ft, ssa); - run_modcma(parameters::MatrixAdaptationType::NATURAL_GRADIENT, ft, parameters::StepSizeAdaptation::XNES); - /*run_modcma(parameters::MatrixAdaptationType::NATURAL_GRADIENT, ft, parameters::StepSizeAdaptation::MXNES); - run_modcma(parameters::MatrixAdaptationType::NATURAL_GRADIENT, ft, parameters::StepSizeAdaptation::LPXNES); - run_modcma(parameters::MatrixAdaptationType::COVARIANCE, ft, ssa);*/ + //run_modcma(parameters::MatrixAdaptationType::NATURAL_GRADIENT, ft, parameters::StepSizeAdaptation::XNES); + run_modcma(parameters::MatrixAdaptationType::NATURAL_GRADIENT, ft, parameters::StepSizeAdaptation::MXNES); + //run_modcma(parameters::MatrixAdaptationType::NATURAL_GRADIENT, ft, parameters::StepSizeAdaptation::LPXNES); + run_modcma(parameters::MatrixAdaptationType::COVARIANCE, ft, ssa); } \ No newline at end of file diff --git a/src/matrix_adaptation.cpp b/src/matrix_adaptation.cpp index 3468de2..2a4563d 100644 --- a/src/matrix_adaptation.cpp +++ b/src/matrix_adaptation.cpp @@ -53,9 +53,9 @@ namespace matrix_adaptation bool CovarianceAdaptation::perform_eigendecomposition(const Settings& settings) { const Eigen::SelfAdjointEigenSolver eigen_solver(C); - if(eigen_solver.info() != Eigen::Success) + if (eigen_solver.info() != Eigen::Success) { - if(settings.verbose) + if (settings.verbose) { std::cout << "Eigenvalue solver failed, we need to restart reason:" << eigen_solver.info() << '\n'; @@ -65,9 +65,9 @@ namespace matrix_adaptation d = eigen_solver.eigenvalues(); B = eigen_solver.eigenvectors(); - if(d.minCoeff() < 0.0) + if (d.minCoeff() < 0.0) { - if(settings.verbose) + if (settings.verbose) { std::cout << "Negative eigenvalues after decomposition, we need to restart.\n"; } @@ -85,7 +85,7 @@ namespace matrix_adaptation const Settings& settings, parameters::Stats& stats) { - if(static_cast(stats.t) >= static_cast(stats.last_update) + w.lazy_update_interval) + if (static_cast(stats.t) >= static_cast(stats.last_update) + w.lazy_update_interval) { stats.last_update = stats.t; stats.n_updates++; @@ -147,7 +147,7 @@ namespace matrix_adaptation const auto& popY = m.active ? pop.Y : pop.Y.leftCols(mu); const auto decay_c = (1 - (w.c1 * dhs) - w.c1 - (w.cmu * weights.sum())); - for(auto j = 0; j < settings.dim; j++) + for (auto j = 0; j < settings.dim; j++) { const auto rank_mu = (popY.row(j).array().pow(2) * weights.transpose().array()).sum(); c(j) = (decay_c * c(j)) + (w.c1 * pow(pc(j), 2)) + (w.cmu * rank_mu); @@ -181,18 +181,18 @@ namespace matrix_adaptation const parameters::Stats& stats, size_t mu, size_t lambda) { - if(!stats.has_improved) + if (!stats.has_improved) return; pc = (1.0 - w.cc) * pc; - if(stats.success_ratio < max_success_ratio) + if (stats.success_ratio < max_success_ratio) pc += w.sqrt_cc_mueff * pop.Y.col(0); } bool OnePlusOneAdaptation::adapt_matrix(const parameters::Weights& w, const parameters::Modules& m, const Population& pop, size_t mu, const parameters::Settings& settings, parameters::Stats& stats) { - if(!stats.has_improved) + if (!stats.has_improved) { return true; } @@ -286,11 +286,11 @@ namespace matrix_adaptation A *= std::sqrt(1 - w.c1 - w.cmu); Eigen::internal::llt_rank_update_lower(A, pc, w.c1); - for(auto i = 0; i < mu; i++) + for (auto i = 0; i < mu; i++) Eigen::internal::llt_rank_update_lower(A, pop.Y.col(i), w.cmu * w.positive(i)); - if(m.active) - for(auto i = 0; i < pop.Y.cols() - mu; i++) + if (m.active) + for (auto i = 0; i < pop.Y.cols() - mu; i++) Eigen::internal::llt_rank_update_lower(A, pop.Y.col(mu + i), w.cmu * w.negative(i)); @@ -335,9 +335,9 @@ namespace matrix_adaptation C = 0.5 * (C + C.transpose().eval()); const Eigen::LLT chol(C); - if(chol.info() != Eigen::Success) + if (chol.info() != Eigen::Success) { - if(settings.verbose) + if (settings.verbose) std::cout << "t: " << stats.t << "Cholesky solver failed, we need to restart reason:" << chol.info() << '\n'; return false; @@ -371,9 +371,9 @@ namespace matrix_adaptation bool CovarainceNoEigvAdaptation::perform_eigendecomposition(const parameters::Settings& settings) { const Eigen::LLT chol(C); - if(chol.info() != Eigen::Success) + if (chol.info() != Eigen::Success) { - if(settings.verbose) + if (settings.verbose) { std::cout << "Cholesky solver failed, we need to restart reason:" << chol.info() << '\n'; @@ -405,13 +405,15 @@ namespace matrix_adaptation static Float eta = 0.6 * (3 + std::log(settings.dim)) / std::pow(settings.dim, 1.5); G.setZero(); - const Matrix I = Matrix::Identity(settings.dim, settings.dim); - for(int i = 0; i < mu; ++i) + for (int i = 0; i < w.positive.rows(); ++i) { const auto& z = pop.Z.col(i); G.noalias() += w.positive(i) * (z * z.transpose() - I); } + + //std::cout << A << std::endl << std::endl; + // Remove isotropic (sigma-related) component: make G trace-free G -= (G.trace() / dd) * I; @@ -421,6 +423,8 @@ namespace matrix_adaptation // Apply the exponential update to A A *= ((0.5 * eta) * G).exp(); + //std::cout << A << std::endl << std::endl << std::endl; + return true; } diff --git a/src/weights.cpp b/src/weights.cpp index 844ed57..0a7516b 100644 --- a/src/weights.cpp +++ b/src/weights.cpp @@ -9,8 +9,8 @@ namespace parameters switch (ssa) { case StepSizeAdaptation::XNES: - //return 1.0 / std::sqrt(d); - return mueff / (2.0 * std::log(std::max(Float{ 2. }, d)) * sqrt(d)); + //return mueff / (2.0 * std::log(std::max(Float{ 2. }, d)) * sqrt(d)); + return 0.01; case StepSizeAdaptation::MXNES: return 1.0; case StepSizeAdaptation::LPXNES: @@ -61,7 +61,7 @@ namespace parameters weights_half_power_lambda(mu, lambda); break; case RecombinationWeights::DEFAULT: - weights_default(lambda); + weights_default(mu, lambda); break; } @@ -108,13 +108,13 @@ namespace parameters beta = 1.0 / std::sqrt(2.0 * mueff); if (settings.modules.ssa == StepSizeAdaptation::LPXNES) beta = std::log(2.0) / (std::sqrt(d) * std::log(d)); - } - void Weights::weights_default(const size_t lambda) + void Weights::weights_default(const size_t mu, const size_t lambda) { - const Float base = std::log((static_cast(lambda) + 1.) / 2.0); + const Float ratio = static_cast(lambda) / static_cast(mu); + const Float base = std::log((static_cast(lambda) + 1.) / ratio); for (auto i = 0; i < positive.size(); ++i) positive(i) = base - std::log(static_cast(i + 1)); From 475d510fe5420fea9162b576445d9194674d0733 Mon Sep 17 00:00:00 2001 From: Jacob de Nobel Date: Mon, 2 Jun 2025 19:46:40 +0200 Subject: [PATCH 20/74] dont rely on weird back transformations --- src/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main.cpp b/src/main.cpp index 14809c3..801054e 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -9,7 +9,7 @@ using std::chrono::duration_cast; using std::chrono::duration; using std::chrono::milliseconds; -static int dim = 2; +static int dim = 20; static bool rotated = true; static size_t budget = dim * 10000; From fc7e7bb39cfbde97e22f8071683bedf4aa390c53 Mon Sep 17 00:00:00 2001 From: Jacob de Nobel Date: Mon, 2 Jun 2025 19:46:55 +0200 Subject: [PATCH 21/74] fix inversion for natural grad --- src/main.cpp | 8 ++++---- src/matrix_adaptation.cpp | 12 ++++-------- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 801054e..8b38930 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -63,7 +63,7 @@ void run_modcma(parameters::MatrixAdaptationType mat_t, functions::ObjectiveFunc rng::set_seed(42); parameters::Modules m; m.matrix_adaptation = mat_t; - m.elitist = false; + m.elitist = true; m.active = false; m.ssa = ssa; //m.weights = parameters::RecombinationWeights::EQUAL; @@ -116,10 +116,10 @@ int main() //run_modcma(parameters::MatrixAdaptationType::NONE, ft, ssa); //run_modcma(parameters::MatrixAdaptationType::SEPERABLE, ft); //run_modcma(parameters::MatrixAdaptationType::MATRIX, ft, ssa); - //run_modcma(parameters::MatrixAdaptationType::CHOLESKY, ft); + //run_modcma(parameters::MatrixAdaptationType::CHOLESKY, ft, ssa); //run_modcma(parameters::MatrixAdaptationType::COVARIANCE_NO_EIGV, ft, ssa); - //run_modcma(parameters::MatrixAdaptationType::NATURAL_GRADIENT, ft, parameters::StepSizeAdaptation::XNES); - run_modcma(parameters::MatrixAdaptationType::NATURAL_GRADIENT, ft, parameters::StepSizeAdaptation::MXNES); + run_modcma(parameters::MatrixAdaptationType::NATURAL_GRADIENT, ft, parameters::StepSizeAdaptation::XNES); + run_modcma(parameters::MatrixAdaptationType::NATURAL_GRADIENT, ft, ssa); //run_modcma(parameters::MatrixAdaptationType::NATURAL_GRADIENT, ft, parameters::StepSizeAdaptation::LPXNES); run_modcma(parameters::MatrixAdaptationType::COVARIANCE, ft, ssa); } \ No newline at end of file diff --git a/src/matrix_adaptation.cpp b/src/matrix_adaptation.cpp index 2a4563d..28f3564 100644 --- a/src/matrix_adaptation.cpp +++ b/src/matrix_adaptation.cpp @@ -392,7 +392,7 @@ namespace matrix_adaptation void NaturalGradientAdaptation::adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, const parameters::Stats& stats, size_t mu, size_t lambda) { - ps = (1.0 - w.cs) * ps + (w.sqrt_cs_mueff * A.triangularView().solve(dm)); + ps = (1.0 - w.cs) * ps + (w.sqrt_cs_mueff * dz); } bool NaturalGradientAdaptation::adapt_matrix( @@ -410,21 +410,16 @@ namespace matrix_adaptation const auto& z = pop.Z.col(i); G.noalias() += w.positive(i) * (z * z.transpose() - I); } - - - //std::cout << A << std::endl << std::endl; // Remove isotropic (sigma-related) component: make G trace-free G -= (G.trace() / dd) * I; // Ensure symmetry for numerical stability - G = 0.5 * (G + G.transpose().eval()); + G = 0.5 * (G + G.transpose().eval()); // Apply the exponential update to A A *= ((0.5 * eta) * G).exp(); - //std::cout << A << std::endl << std::endl << std::endl; - return true; } @@ -442,7 +437,8 @@ namespace matrix_adaptation Vector NaturalGradientAdaptation::invert_y(const Vector& yi) { - return A.triangularView().solve(yi); + //return A.triangularView().solve(yi); + return A.fullPivLu().solve(yi); } } From f65e93a0de1a6bb76c73e4e7ffe7cae9761964d5 Mon Sep 17 00:00:00 2001 From: Jacob de Nobel Date: Tue, 3 Jun 2025 11:47:40 +0200 Subject: [PATCH 22/74] update --- CMakeLists.txt | 3 +++ include/common.hpp | 2 +- src/main.cpp | 27 ++++++++++++--------------- src/matrix_adaptation.cpp | 2 +- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8aaf37b..7a40971 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,5 +19,8 @@ target_include_directories(main PUBLIC if (MSVC) target_compile_options(main PRIVATE /bigobj) + +else() + target_compile_options(main PRIVATE -march=native) endif() \ No newline at end of file diff --git a/include/common.hpp b/include/common.hpp index 57406f9..edf752f 100644 --- a/include/common.hpp +++ b/include/common.hpp @@ -23,7 +23,7 @@ #include #include -using Float = double; +using Float = long double; using Matrix = Eigen::Matrix; using Vector = Eigen::Matrix; using Array = Eigen::Array; diff --git a/src/main.cpp b/src/main.cpp index 8b38930..53272f9 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -10,7 +10,7 @@ using std::chrono::duration; using std::chrono::milliseconds; static int dim = 20; -static bool rotated = true; +static bool rotated = false; static size_t budget = dim * 10000; @@ -53,7 +53,8 @@ struct Timer { const auto t2 = high_resolution_clock::now(); const auto ms_int = duration_cast(t2 - t1); - std::cout << "Time elapsed: " << static_cast(ms_int.count()) / 1000.0 << "s\n\n"; + std::cout << "Time elapsed: " << std::defaultfloat << std::setprecision(5) << + static_cast(ms_int.count()) / 1000.0 << "s\n\n"; } }; @@ -63,7 +64,7 @@ void run_modcma(parameters::MatrixAdaptationType mat_t, functions::ObjectiveFunc rng::set_seed(42); parameters::Modules m; m.matrix_adaptation = mat_t; - m.elitist = true; + m.elitist = false; m.active = false; m.ssa = ssa; //m.weights = parameters::RecombinationWeights::EQUAL; @@ -83,8 +84,8 @@ void run_modcma(parameters::MatrixAdaptationType mat_t, functions::ObjectiveFunc FunctionType f = Ellipse(dim, rotated, fun_t); while (cma.step(f)) { - if (cma.p->stats.global_best.y < 1e-9) - break; + //if (cma.p->stats.global_best.y < 1e-9) + //break; } std::cout << "modcmaes: " << parameters::to_string(mat_t) << std::defaultfloat; @@ -109,17 +110,13 @@ void run_modcma(parameters::MatrixAdaptationType mat_t, functions::ObjectiveFunc int main() { auto ft = functions::ELLIPSE; - - auto ssa = parameters::StepSizeAdaptation::CSA; - //run_modcma(parameters::MatrixAdaptationType::NONE, ft, ssa); - //run_modcma(parameters::MatrixAdaptationType::SEPERABLE, ft); - //run_modcma(parameters::MatrixAdaptationType::MATRIX, ft, ssa); - //run_modcma(parameters::MatrixAdaptationType::CHOLESKY, ft, ssa); - //run_modcma(parameters::MatrixAdaptationType::COVARIANCE_NO_EIGV, ft, ssa); - run_modcma(parameters::MatrixAdaptationType::NATURAL_GRADIENT, ft, parameters::StepSizeAdaptation::XNES); - run_modcma(parameters::MatrixAdaptationType::NATURAL_GRADIENT, ft, ssa); - //run_modcma(parameters::MatrixAdaptationType::NATURAL_GRADIENT, ft, parameters::StepSizeAdaptation::LPXNES); + run_modcma(parameters::MatrixAdaptationType::NONE, ft, ssa); + run_modcma(parameters::MatrixAdaptationType::SEPERABLE, ft, ssa); + run_modcma(parameters::MatrixAdaptationType::MATRIX, ft, ssa); + run_modcma(parameters::MatrixAdaptationType::CHOLESKY, ft, ssa); run_modcma(parameters::MatrixAdaptationType::COVARIANCE, ft, ssa); + run_modcma(parameters::MatrixAdaptationType::COVARIANCE_NO_EIGV, ft, ssa); + run_modcma(parameters::MatrixAdaptationType::NATURAL_GRADIENT, ft, ssa); } \ No newline at end of file diff --git a/src/matrix_adaptation.cpp b/src/matrix_adaptation.cpp index 28f3564..b91cedd 100644 --- a/src/matrix_adaptation.cpp +++ b/src/matrix_adaptation.cpp @@ -151,7 +151,7 @@ namespace matrix_adaptation { const auto rank_mu = (popY.row(j).array().pow(2) * weights.transpose().array()).sum(); c(j) = (decay_c * c(j)) + (w.c1 * pow(pc(j), 2)) + (w.cmu * rank_mu); - c(j) = std::max(c(j), 1e-12); + c(j) = std::max(c(j), Float{ 1e-12 }); d(j) = std::sqrt(c(j)); } From e52b78891e4b0051a69d3413632a4df3a408ed18 Mon Sep 17 00:00:00 2001 From: Jacob de Nobel Date: Tue, 3 Jun 2025 14:10:45 +0200 Subject: [PATCH 23/74] change back to fast MA-ES --- include/matrix_adaptation.hpp | 11 ++++++++--- src/main.cpp | 12 ++++++------ src/matrix_adaptation.cpp | 37 ++++++++++++++++++++++++++++------- 3 files changed, 44 insertions(+), 16 deletions(-) diff --git a/include/matrix_adaptation.hpp b/include/matrix_adaptation.hpp index 75012e6..385ee17 100644 --- a/include/matrix_adaptation.hpp +++ b/include/matrix_adaptation.hpp @@ -163,9 +163,10 @@ namespace matrix_adaptation MatrixAdaptation(const size_t dim, const Vector& x0, const Float expected_length_z) : Adaptation(dim, x0, Vector::Ones(dim), expected_length_z), M(Matrix::Identity(dim, dim)), M_inv(Matrix::Identity(dim, dim)), - ZwI(Matrix::Identity(dim, dim)), + /*ZwI(Matrix::Identity(dim, dim)), ssI(Matrix::Identity(dim, dim)), - I(Matrix::Identity(dim, dim)) + I(Matrix::Identity(dim, dim)), */ + outdated_M_inv(false) { } @@ -183,9 +184,13 @@ namespace matrix_adaptation Vector invert_y(const Vector&) override; private: - Matrix ZwI, ssI, I; + //Matrix ZwI, ssI, I; + bool outdated_M_inv; }; + + + struct CholeskyAdaptation final : Adaptation { Matrix A; diff --git a/src/main.cpp b/src/main.cpp index 53272f9..5d9daba 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -9,7 +9,7 @@ using std::chrono::duration_cast; using std::chrono::duration; using std::chrono::milliseconds; -static int dim = 20; +static int dim = 50; static bool rotated = false; static size_t budget = dim * 10000; @@ -112,11 +112,11 @@ int main() auto ft = functions::ELLIPSE; auto ssa = parameters::StepSizeAdaptation::CSA; - run_modcma(parameters::MatrixAdaptationType::NONE, ft, ssa); - run_modcma(parameters::MatrixAdaptationType::SEPERABLE, ft, ssa); + //run_modcma(parameters::MatrixAdaptationType::NONE, ft, ssa); + //run_modcma(parameters::MatrixAdaptationType::SEPERABLE, ft, ssa); run_modcma(parameters::MatrixAdaptationType::MATRIX, ft, ssa); - run_modcma(parameters::MatrixAdaptationType::CHOLESKY, ft, ssa); + //run_modcma(parameters::MatrixAdaptationType::CHOLESKY, ft, ssa); run_modcma(parameters::MatrixAdaptationType::COVARIANCE, ft, ssa); - run_modcma(parameters::MatrixAdaptationType::COVARIANCE_NO_EIGV, ft, ssa); - run_modcma(parameters::MatrixAdaptationType::NATURAL_GRADIENT, ft, ssa); + //run_modcma(parameters::MatrixAdaptationType::COVARIANCE_NO_EIGV, ft, ssa); + //run_modcma(parameters::MatrixAdaptationType::NATURAL_GRADIENT, ft, ssa); } \ No newline at end of file diff --git a/src/matrix_adaptation.cpp b/src/matrix_adaptation.cpp index b91cedd..c893553 100644 --- a/src/matrix_adaptation.cpp +++ b/src/matrix_adaptation.cpp @@ -218,15 +218,33 @@ namespace matrix_adaptation stats.last_update = stats.t; stats.n_updates++; - const auto& weights = m.active ? w.weights.topRows(pop.Y.cols()) : w.positive; + const auto& weights = m.active ? w.weights.topRows(pop.Z.cols()) : w.positive; const auto& popZ = m.active ? pop.Z : pop.Z.leftCols(mu); - const auto& Z = popZ * weights.asDiagonal() * popZ.transpose(); - - ZwI.noalias() = (w.cmu / 2.0) * (Z - I); - ssI.noalias() = (w.c1 / 2.0) * (ps * ps.transpose() - I); + const auto& popY = m.active ? pop.Y : pop.Y.leftCols(mu); - M = M * (I + ssI + ZwI); - M_inv = (I - ssI - ZwI + epsilon * I) * M_inv; + // Normal MA-ES -> O(n^3) + // + // const auto& Z = popZ * weights.asDiagonal() * popZ.transpose(); + // ZwI.noalias() = (w.cmu / 2.0) * (Z - I); + // ssI.noalias() = (w.c1 / 2.0) * (ps * ps.transpose() - I); + // M = M * (I + ssI + ZwI); + // M_inv = (I - ssI - ZwI + epsilon * I) * M_inv; + + // Fast MA-ES -> O(n^2) + const Float tau_1 = w.c1 / 2.0; + const Float tau_m = w.cmu / 2.0; + const Float decay_m = (1.0 - tau_1 - tau_m); + + M = (decay_m * M) + + (tau_1 * (M * ps) * ps.transpose()) + + (popY * (tau_m * weights).asDiagonal() * popZ.transpose()); + + if (settings.modules.elitist) + M_inv = (decay_m * M_inv) + + (tau_1 * ps * (ps.transpose() * M_inv)) + + ((popY * (tau_m * weights).asDiagonal()) * (popZ.transpose() * M_inv)); + else + outdated_M_inv = true; // Rely on moore penrose pseudo-inv (only when needed) return true; } @@ -235,6 +253,7 @@ namespace matrix_adaptation Adaptation::restart(settings); M = Matrix::Identity(settings.dim, settings.dim); M_inv = Matrix::Identity(settings.dim, settings.dim); + outdated_M_inv = false; } Vector MatrixAdaptation::compute_y(const Vector& zi) @@ -244,6 +263,10 @@ namespace matrix_adaptation Vector MatrixAdaptation::invert_y(const Vector& yi) { + if (outdated_M_inv) { + M_inv = M.completeOrthogonalDecomposition().pseudoInverse(); + outdated_M_inv = false; + } return M_inv * yi; } From 75c6277159d3974e9fe62736e2d79aea896c833c Mon Sep 17 00:00:00 2001 From: Jacob de Nobel Date: Tue, 3 Jun 2025 14:17:01 +0200 Subject: [PATCH 24/74] natural es inverse --- include/matrix_adaptation.hpp | 6 ++++-- src/main.cpp | 2 +- src/matrix_adaptation.cpp | 8 ++++++-- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/include/matrix_adaptation.hpp b/include/matrix_adaptation.hpp index 385ee17..ad13e56 100644 --- a/include/matrix_adaptation.hpp +++ b/include/matrix_adaptation.hpp @@ -264,12 +264,14 @@ namespace matrix_adaptation { Matrix A; Matrix G; + Matrix A_inv; NaturalGradientAdaptation(const size_t dim, const Vector& x0, const Float expected_length_z) : Adaptation(dim, x0, Vector::Ones(dim), expected_length_z), A(Matrix::Identity(dim, dim)), G(Matrix::Zero(dim, dim)), - I(Matrix::Identity(dim, dim)) + A_inv(Matrix::Identity(dim, dim)), + outdated_A_inv(false) {} void adapt_evolution_paths_inner( @@ -289,7 +291,7 @@ namespace matrix_adaptation Vector invert_y(const Vector&) override; private: - const Matrix I; + bool outdated_A_inv; }; diff --git a/src/main.cpp b/src/main.cpp index 5d9daba..0668f22 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -115,7 +115,7 @@ int main() //run_modcma(parameters::MatrixAdaptationType::NONE, ft, ssa); //run_modcma(parameters::MatrixAdaptationType::SEPERABLE, ft, ssa); run_modcma(parameters::MatrixAdaptationType::MATRIX, ft, ssa); - //run_modcma(parameters::MatrixAdaptationType::CHOLESKY, ft, ssa); + run_modcma(parameters::MatrixAdaptationType::CHOLESKY, ft, ssa); run_modcma(parameters::MatrixAdaptationType::COVARIANCE, ft, ssa); //run_modcma(parameters::MatrixAdaptationType::COVARIANCE_NO_EIGV, ft, ssa); //run_modcma(parameters::MatrixAdaptationType::NATURAL_GRADIENT, ft, ssa); diff --git a/src/matrix_adaptation.cpp b/src/matrix_adaptation.cpp index c893553..145b32e 100644 --- a/src/matrix_adaptation.cpp +++ b/src/matrix_adaptation.cpp @@ -443,6 +443,8 @@ namespace matrix_adaptation // Apply the exponential update to A A *= ((0.5 * eta) * G).exp(); + outdated_A_inv = true; + return true; } @@ -451,6 +453,7 @@ namespace matrix_adaptation Adaptation::restart(settings); A = Matrix::Identity(settings.dim, settings.dim); G = Matrix::Zero(settings.dim, settings.dim); + outdated_A_inv = false; } Vector NaturalGradientAdaptation::compute_y(const Vector& zi) @@ -460,8 +463,9 @@ namespace matrix_adaptation Vector NaturalGradientAdaptation::invert_y(const Vector& yi) { - //return A.triangularView().solve(yi); - return A.fullPivLu().solve(yi); + if (outdated_A_inv) + A_inv = A.inverse(); + return A_inv * yi; } } From 4a796bfa19685e57e93a18f68789c714ed2cae78 Mon Sep 17 00:00:00 2001 From: Jacob de Nobel Date: Tue, 3 Jun 2025 15:12:03 +0200 Subject: [PATCH 25/74] asda --- include/common.hpp | 2 +- include/sampling.hpp | 74 +++++++++++++++++++++------------------ src/common.cpp | 2 +- src/main.cpp | 11 +++--- src/matrix_adaptation.cpp | 4 +-- 5 files changed, 50 insertions(+), 43 deletions(-) diff --git a/include/common.hpp b/include/common.hpp index edf752f..5d0bd13 100644 --- a/include/common.hpp +++ b/include/common.hpp @@ -40,7 +40,7 @@ namespace constants extern size_t cache_min_samples; extern bool cache_samples; extern bool clip_sigma; - extern bool calc_eigv; + extern bool use_box_muller; } /** diff --git a/include/sampling.hpp b/include/sampling.hpp index 7823011..d7028f5 100644 --- a/include/sampling.hpp +++ b/include/sampling.hpp @@ -219,7 +219,7 @@ namespace sampling /** * Should be overwritten, transforms U(0,1) indep samples into something else */ - [[nodiscard]] virtual Vector transform(Vector x) = 0; + [[nodiscard]] virtual Vector transform(const Vector& x) = 0; [[nodiscard]] virtual Vector operator()() override { @@ -243,7 +243,7 @@ namespace sampling { IdentityTransformer(const std::shared_ptr sampler) : SampleTransformer(sampler) {} - [[nodiscard]] virtual Vector transform(Vector x) + [[nodiscard]] virtual Vector transform(const Vector& x) { return x; } @@ -261,11 +261,28 @@ namespace sampling return sqrt(dd) * (1.0 - 1.0 / (4.0 * dd) + 1.0 / (21.0 * pow(dd, 2.0))); } - [[nodiscard]] virtual Vector transform(Vector x) override + [[nodiscard]] inline Vector box_muller(const Vector& u) { - for (auto &xi : x) - xi = ppf(xi); - return x; + int n = u.size(); + int m = n / 2; + + Vector z(n); + for (size_t i = 0; i < m; ++i) { + const Float r = std::sqrt(-2.0 * std::log(u(2 * i))); + const Float theta = 2.0 * M_PI * u(2 * i + 1); + + z(2 * i) = r * std::cos(theta); + z(2 * i + 1) = r * std::sin(theta); + } + return z.head(n % 2 == 0 ? n : n - 1); + } + + + [[nodiscard]] virtual Vector transform(const Vector& x) override + { + if (constants::use_box_muller) + return box_muller(x); + return x.unaryExpr(&ppf); } }; @@ -275,11 +292,9 @@ namespace sampling UniformScaler(const std::shared_ptr sampler) : SampleTransformer(sampler) {} - [[nodiscard]] virtual Vector transform(Vector x) override + [[nodiscard]] virtual Vector transform(const Vector& x) override { - for (auto &xi : x) - xi = -scale + (2.0 * scale) * xi; - return x; + return (-scale + (2.0 * scale) * x.array()).matrix(); } }; @@ -289,17 +304,11 @@ namespace sampling LaplaceTransformer(const std::shared_ptr sampler) : SampleTransformer(sampler) {} - [[nodiscard]] virtual Vector transform(Vector x) override + [[nodiscard]] virtual Vector transform(const Vector& x) override { - for (auto &xi : x) - { - if (xi < 0.5) - xi = b * std::log(2.0 * xi); - else - xi = -b * std::log(2.0 * (1.0 - xi)); - - } - return x; + return ((x.array() < 0.5) + .select(b * (2.0 * x.array()).log(), + -b * (2.0 * (1.0 - x.array())).log())).matrix(); } }; @@ -309,11 +318,9 @@ namespace sampling LogisticTransformer(const std::shared_ptr sampler) : SampleTransformer(sampler) {} - [[nodiscard]] virtual Vector transform(Vector x) override + [[nodiscard]] virtual Vector transform(const Vector& x) override { - for (auto &xi : x) - xi = s * std::log(xi / (1 - xi)); - return x; + return (s * (x.array() / (1.0 - x.array())).log()).matrix(); } }; @@ -334,11 +341,9 @@ namespace sampling return median_z; } - [[nodiscard]] virtual Vector transform(Vector x) override + [[nodiscard]] virtual Vector transform(const Vector& x) override { - for (auto &xi : x) - xi = gamma * std::tan(M_PI * (xi - 0.5)); - return x; + return (gamma * (M_PI * (x.array() - 0.5)).tan()).matrix(); } }; @@ -346,14 +351,13 @@ namespace sampling { DoubleWeibullTransformer(const std::shared_ptr sampler) : SampleTransformer(sampler) {} - [[nodiscard]] virtual Vector transform(Vector x) override + [[nodiscard]] virtual Vector transform(const Vector& x) override { - for (auto &xi : x) - if (xi < 0.5) - xi = -std::sqrt(-std::log(2.0 * xi)); - else - xi = std::sqrt(-std::log(2.0 * (1.0 - xi))); - return x; + return ((x.array() < 0.5) + .select( + -(-((2.0 * x.array()).log())).sqrt(), + (-((2.0 * (1.0 - x.array())).log())).sqrt() + )).matrix(); } }; diff --git a/src/common.cpp b/src/common.cpp index d520620..d325d0d 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -15,7 +15,7 @@ namespace constants size_t cache_min_samples = 128; bool cache_samples = false; bool clip_sigma = false; - bool calc_eigv = true; + bool use_box_muller = true; } namespace utils diff --git a/src/main.cpp b/src/main.cpp index 0668f22..2f52a54 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -9,7 +9,7 @@ using std::chrono::duration_cast; using std::chrono::duration; using std::chrono::milliseconds; -static int dim = 50; +static int dim = 100; static bool rotated = false; static size_t budget = dim * 10000; @@ -114,9 +114,12 @@ int main() //run_modcma(parameters::MatrixAdaptationType::NONE, ft, ssa); //run_modcma(parameters::MatrixAdaptationType::SEPERABLE, ft, ssa); - run_modcma(parameters::MatrixAdaptationType::MATRIX, ft, ssa); - run_modcma(parameters::MatrixAdaptationType::CHOLESKY, ft, ssa); + //run_modcma(parameters::MatrixAdaptationType::MATRIX, ft, ssa); + + + + //run_modcma(parameters::MatrixAdaptationType::CHOLESKY, ft, ssa); run_modcma(parameters::MatrixAdaptationType::COVARIANCE, ft, ssa); - //run_modcma(parameters::MatrixAdaptationType::COVARIANCE_NO_EIGV, ft, ssa); + run_modcma(parameters::MatrixAdaptationType::COVARIANCE_NO_EIGV, ft, ssa); //run_modcma(parameters::MatrixAdaptationType::NATURAL_GRADIENT, ft, ssa); } \ No newline at end of file diff --git a/src/matrix_adaptation.cpp b/src/matrix_adaptation.cpp index 145b32e..121968d 100644 --- a/src/matrix_adaptation.cpp +++ b/src/matrix_adaptation.cpp @@ -431,11 +431,11 @@ namespace matrix_adaptation for (int i = 0; i < w.positive.rows(); ++i) { const auto& z = pop.Z.col(i); - G.noalias() += w.positive(i) * (z * z.transpose() - I); + G.noalias() += w.positive(i) * (z * z.transpose() - Matrix::Identity(settings.dim, settings.dim)); } // Remove isotropic (sigma-related) component: make G trace-free - G -= (G.trace() / dd) * I; + G.diagonal().array() -= (G.trace() / dd); // Ensure symmetry for numerical stability G = 0.5 * (G + G.transpose().eval()); From a27b9292c21e1b3051761397705a5369413556f6 Mon Sep 17 00:00:00 2001 From: Jacob de Nobel Date: Tue, 3 Jun 2025 15:23:20 +0200 Subject: [PATCH 26/74] cleanup --- include/matrix_adaptation.hpp | 28 +++++++++++----------------- include/settings.hpp | 8 ++++---- src/matrix_adaptation.cpp | 1 + 3 files changed, 16 insertions(+), 21 deletions(-) diff --git a/include/matrix_adaptation.hpp b/include/matrix_adaptation.hpp index ad13e56..d85e8a1 100644 --- a/include/matrix_adaptation.hpp +++ b/include/matrix_adaptation.hpp @@ -60,9 +60,9 @@ namespace matrix_adaptation } void adapt_evolution_paths_inner( - const Population& pop, + const Population& pop, const parameters::Weights& w, - const parameters::Stats& stats, + const parameters::Stats& stats, size_t mu, size_t lambda) override; @@ -75,8 +75,8 @@ namespace matrix_adaptation { Vector pc, d; Matrix B, C; - Matrix A; - Matrix inv_root_C; + Matrix A; + Matrix inv_root_C; bool hs = true; @@ -96,7 +96,7 @@ namespace matrix_adaptation virtual bool perform_eigendecomposition(const parameters::Settings& settings); virtual void adapt_ps(const parameters::Weights& w); - + void adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, const parameters::Stats& stats, size_t mu, size_t lambda) override; @@ -115,7 +115,7 @@ namespace matrix_adaptation { Vector pc, d, c; bool hs; - + SeperableAdaptation(const size_t dim, const Vector& x0, const Float expected_length_z) : Adaptation(dim, x0, Vector::Zero(dim), expected_length_z), pc(Vector::Zero(dim)), d(Vector::Ones(dim)), @@ -139,7 +139,7 @@ namespace matrix_adaptation }; - struct OnePlusOneAdaptation: CovarianceAdaptation + struct OnePlusOneAdaptation : CovarianceAdaptation { constexpr static Float max_success_ratio = 0.44; @@ -163,15 +163,12 @@ namespace matrix_adaptation MatrixAdaptation(const size_t dim, const Vector& x0, const Float expected_length_z) : Adaptation(dim, x0, Vector::Ones(dim), expected_length_z), M(Matrix::Identity(dim, dim)), M_inv(Matrix::Identity(dim, dim)), - /*ZwI(Matrix::Identity(dim, dim)), - ssI(Matrix::Identity(dim, dim)), - I(Matrix::Identity(dim, dim)), */ outdated_M_inv(false) { } void adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, - const parameters::Stats& stats, + const parameters::Stats& stats, size_t mu, size_t lambda) override; bool adapt_matrix(const parameters::Weights& w, const parameters::Modules& m, const Population& pop, size_t mu, @@ -184,11 +181,8 @@ namespace matrix_adaptation Vector invert_y(const Vector&) override; private: - //Matrix ZwI, ssI, I; bool outdated_M_inv; - }; - - + }; struct CholeskyAdaptation final : Adaptation @@ -196,7 +190,7 @@ namespace matrix_adaptation Matrix A; Vector pc; - CholeskyAdaptation(const size_t dim, const Vector& x0, const Float expected_length_z) + CholeskyAdaptation(const size_t dim, const Vector& x0, const Float expected_length_z) : Adaptation(dim, x0, Vector::Ones(dim), expected_length_z), A(Matrix::Identity(dim, dim)), pc(Vector::Zero(dim)) @@ -204,7 +198,7 @@ namespace matrix_adaptation } void adapt_evolution_paths_inner( - const Population& pop, + const Population& pop, const parameters::Weights& w, const parameters::Stats& stats, size_t mu, size_t lambda diff --git a/include/settings.hpp b/include/settings.hpp index 97c9aa8..b93104a 100644 --- a/include/settings.hpp +++ b/include/settings.hpp @@ -76,12 +76,12 @@ namespace parameters mu0 = std::min(lambda0 / 4, mu0); } - /*if (modules.matrix_adaptation == MatrixAdaptationType::NATURAL_GRADIENT) + if (modules.matrix_adaptation == MatrixAdaptationType::NATURAL_GRADIENT) { - mu0 = lambda0; - }*/ + modules.active = false; + } - if (modules.ssa == StepSizeAdaptation::CSA + if (modules.ssa != StepSizeAdaptation::CSA and modules.matrix_adaptation == MatrixAdaptationType::COVARIANCE and not always_compute_eigv ) diff --git a/src/matrix_adaptation.cpp b/src/matrix_adaptation.cpp index 121968d..86d6e38 100644 --- a/src/matrix_adaptation.cpp +++ b/src/matrix_adaptation.cpp @@ -53,6 +53,7 @@ namespace matrix_adaptation bool CovarianceAdaptation::perform_eigendecomposition(const Settings& settings) { const Eigen::SelfAdjointEigenSolver eigen_solver(C); + if (eigen_solver.info() != Eigen::Success) { if (settings.verbose) From 1ac0467ce52ff3302280c320ebd7dba407e17dda Mon Sep 17 00:00:00 2001 From: jacobdenobel Date: Tue, 3 Jun 2025 16:12:35 +0200 Subject: [PATCH 27/74] working python binding --- include/matrix_adaptation.hpp | 4 +- include/mutation.hpp | 3 +- include/population.hpp | 4 +- include/sampling.hpp | 4 +- include/weights.hpp | 6 +- modcma/c_maes/cmaescpp/__init__.pyi | 1 + modcma/c_maes/cmaescpp/matrix_adaptation.pyi | 22 +++ modcma/c_maes/cmaescpp/mutation.pyi | 3 +- modcma/c_maes/cmaescpp/options.pyi | 17 +- modcma/c_maes/cmaescpp/parameters.pyi | 28 ++- src/interface.cpp | 171 ++++++++++++------- src/matrix_adaptation.cpp | 14 +- src/repelling.cpp | 2 +- 13 files changed, 183 insertions(+), 96 deletions(-) diff --git a/include/matrix_adaptation.hpp b/include/matrix_adaptation.hpp index d85e8a1..07b6466 100644 --- a/include/matrix_adaptation.hpp +++ b/include/matrix_adaptation.hpp @@ -27,7 +27,8 @@ namespace matrix_adaptation virtual void adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, const parameters::Stats& stats, size_t mu, size_t lambda) = 0; - virtual bool adapt_matrix(const parameters::Weights& w, const parameters::Modules& m, const Population& pop, + virtual bool adapt_matrix( + const parameters::Weights& w, const parameters::Modules& m, const Population& pop, size_t mu, const parameters::Settings& settings, parameters::Stats& stats) = 0; virtual Vector compute_y(const Vector&) = 0; @@ -79,7 +80,6 @@ namespace matrix_adaptation Matrix inv_root_C; bool hs = true; - CovarianceAdaptation(const size_t dim, const Vector& x0, const Float expected_length_z) : Adaptation(dim, x0, Vector::Zero(dim), expected_length_z), pc(Vector::Zero(dim)), d(Vector::Ones(dim)), diff --git a/include/mutation.hpp b/include/mutation.hpp index fbb6241..49fea69 100644 --- a/include/mutation.hpp +++ b/include/mutation.hpp @@ -101,12 +101,11 @@ namespace mutation virtual void mutate(FunctionType& objective, const size_t n_offspring, parameters::Parameters& p); virtual void adapt(const parameters::Weights& w, std::shared_ptr adaptation, Population& pop, - const Population& old_pop, const parameters::Stats& stats, const size_t lambda) = 0; + const Population& old_pop, const parameters::Stats& stats, const size_t lambda) {}; }; struct CSA : Strategy { - using Strategy::Strategy; void adapt(const parameters::Weights& w, std::shared_ptr adaptation, Population& pop, diff --git a/include/population.hpp b/include/population.hpp index 4863ad4..f6f068a 100644 --- a/include/population.hpp +++ b/include/population.hpp @@ -15,10 +15,10 @@ struct Population size_t n; Population(const size_t d, const size_t n) - : X(d, n), Z(d, n), Y(d, n), f(Vector::Constant(n, std::numeric_limits::infinity())), s(n), d(d), n(n), t(n) {} + : X(d, n), Z(d, n), Y(d, n), f(Vector::Constant(n, std::numeric_limits::infinity())), s(n), t(n), d(d), n(n) {} Population(const Matrix &X, const Matrix &Z, const Matrix &Y, const Vector &f, const Vector &s) - : X(X), Z(Z), Y(Y), f(f), s(s), d(X.rows()), n(X.cols()) {} + : X(X), Z(Z), Y(Y), f(f), s(s), t(f.rows()), d(X.rows()), n(X.cols()) {} Population() : Population(0, 0) {} diff --git a/include/sampling.hpp b/include/sampling.hpp index d7028f5..88bbd3f 100644 --- a/include/sampling.hpp +++ b/include/sampling.hpp @@ -263,8 +263,8 @@ namespace sampling [[nodiscard]] inline Vector box_muller(const Vector& u) { - int n = u.size(); - int m = n / 2; + size_t n = u.size(); + size_t m = n / 2; Vector z(n); for (size_t i = 0; i < m; ++i) { diff --git a/include/weights.hpp b/include/weights.hpp index 6df238b..153c4b1 100644 --- a/include/weights.hpp +++ b/include/weights.hpp @@ -1,11 +1,10 @@ #pragma once - #include "settings.hpp" namespace parameters { - struct Weights + struct Weights { Vector weights; Vector positive; @@ -20,7 +19,8 @@ namespace parameters Float expected_length_ps; Float beta; - Weights(const size_t dim, const size_t mu, const size_t lambda, const Settings &settings, const Float expected_length_z); + Weights(const size_t dim, const size_t mu, const size_t lambda, const Settings &settings, + const Float expected_length_z); void weights_default(const size_t mu, const size_t lambda); diff --git a/modcma/c_maes/cmaescpp/__init__.pyi b/modcma/c_maes/cmaescpp/__init__.pyi index c8ae86c..ceeeb3d 100644 --- a/modcma/c_maes/cmaescpp/__init__.pyi +++ b/modcma/c_maes/cmaescpp/__init__.pyi @@ -34,6 +34,7 @@ class Population: f: numpy.ndarray n: int s: numpy.ndarray + t: numpy.ndarray @overload def __init__(self, dimension: int, n: int) -> None: ... @overload diff --git a/modcma/c_maes/cmaescpp/matrix_adaptation.pyi b/modcma/c_maes/cmaescpp/matrix_adaptation.pyi index 40afdb4..4dd871f 100644 --- a/modcma/c_maes/cmaescpp/matrix_adaptation.pyi +++ b/modcma/c_maes/cmaescpp/matrix_adaptation.pyi @@ -40,4 +40,26 @@ class OnePlusOneAdaptation(CovarianceAdaptation): def __init__(self, dimension: int, x0: numpy.ndarray, expected_length_z: float) -> None: ... class SeperableAdaptation(CovarianceAdaptation): + c: numpy.ndarray + pc: numpy.ndarray + d: numpy.ndarray + def __init__(self, dimension: int, x0: numpy.ndarray, expected_length_z: float) -> None: ... + +class CovarainceNoEigvAdaptation(CovarainceNoEigvAdaptation): + def __init__(self, dimension: int, x0: numpy.ndarray, expected_length_z: float) -> None: ... + +class CholeskyAdaptation(Adaptation): + A: numpy.ndarray + pc: numpy.ndarray + def __init__(self, dimension: int, x0: numpy.ndarray, expected_length_z: float) -> None: ... + +class SelfAdaptation(Adaptation): + A: numpy.ndarray + C: numpy.ndarray def __init__(self, dimension: int, x0: numpy.ndarray, expected_length_z: float) -> None: ... + +class NaturalGradientAdaptation(Adaptation): + A: numpy.ndarray + G: numpy.ndarray + A_inv: numpy.ndarray + def __init__(self, dimension: int, x0: numpy.ndarray, expected_length_z: float) -> None: ... \ No newline at end of file diff --git a/modcma/c_maes/cmaescpp/mutation.pyi b/modcma/c_maes/cmaescpp/mutation.pyi index 454745c..72be370 100644 --- a/modcma/c_maes/cmaescpp/mutation.pyi +++ b/modcma/c_maes/cmaescpp/mutation.pyi @@ -97,11 +97,10 @@ class SigmaSampler: beta: float def __init__(self, dimension: float) -> None: ... def sample( - self, sigma: float, population: modcma.c_maes.cmaescpp.Population + self, sigma: float, population: modcma.c_maes.cmaescpp.Population, beta: float ) -> None: ... class Strategy: - cs: float s: float sequential_selection: SequentialSelection sigma: float diff --git a/modcma/c_maes/cmaescpp/options.pyi b/modcma/c_maes/cmaescpp/options.pyi index 1420703..f99a409 100644 --- a/modcma/c_maes/cmaescpp/options.pyi +++ b/modcma/c_maes/cmaescpp/options.pyi @@ -112,10 +112,16 @@ class CorrectionMethod: class MatrixAdaptationType: __members__: ClassVar[dict] = ... # read-only + NONE: ClassVar[MatrixAdaptationType] = ... COVARIANCE: ClassVar[MatrixAdaptationType] = ... MATRIX: ClassVar[MatrixAdaptationType] = ... - NONE: ClassVar[MatrixAdaptationType] = ... SEPERABLE: ClassVar[MatrixAdaptationType] = ... + ONEPLUSONE: ClassVar[MatrixAdaptationType] = ... + CHOLESKY: ClassVar[MatrixAdaptationType] = ... + CMSA: ClassVar[MatrixAdaptationType] = ... + COVARIANCE_NO_EIGV: ClassVar[MatrixAdaptationType] = ... + NATURAL_GRADIENT: ClassVar[MatrixAdaptationType] = ... + __entries: ClassVar[dict] = ... def __init__(self, value: int) -> None: ... def __eq__(self, other: object) -> bool: ... @@ -184,12 +190,15 @@ class RestartStrategy: class StepSizeAdaptation: __members__: ClassVar[dict] = ... # read-only CSA: ClassVar[StepSizeAdaptation] = ... - LPXNES: ClassVar[StepSizeAdaptation] = ... + TPA: ClassVar[StepSizeAdaptation] = ... MSR: ClassVar[StepSizeAdaptation] = ... + XNES: ClassVar[StepSizeAdaptation] = ... MXNES: ClassVar[StepSizeAdaptation] = ... + LPXNES: ClassVar[StepSizeAdaptation] = ... PSR: ClassVar[StepSizeAdaptation] = ... - TPA: ClassVar[StepSizeAdaptation] = ... - XNES: ClassVar[StepSizeAdaptation] = ... + SR: ClassVar[StepSizeAdaptation] = ... + SA: ClassVar[StepSizeAdaptation] = ... + __entries: ClassVar[dict] = ... def __init__(self, value: int) -> None: ... def __eq__(self, other: object) -> bool: ... diff --git a/modcma/c_maes/cmaescpp/parameters.pyi b/modcma/c_maes/cmaescpp/parameters.pyi index a538a9f..4197318 100644 --- a/modcma/c_maes/cmaescpp/parameters.pyi +++ b/modcma/c_maes/cmaescpp/parameters.pyi @@ -56,6 +56,7 @@ class Settings: cmu: float | None = ..., c1: float | None = ..., verbose: bool = ..., + always_compute_eigv: bool | False = ... ) -> None: ... class Solution: @@ -66,23 +67,34 @@ class Solution: def __init__(self) -> None: ... class Stats: - centers: list[Solution] - current_avg: float - current_best: Solution + t: int evaluations: int + current_avg: float + solutions: list[Solution] + centers: list[Solution] global_best: Solution + current_best: Solution has_improved: bool - solutions: list[Solution] success_ratio: float - t: int + cs: float + last_update: int + n_updates: int def __init__(self) -> None: ... class Weights: - c1: float - cc: float - cmu: float mueff: float mueff_neg: float + c1: float + cmu: float + cc: float + cs: float + damps: float + sqrt_cc_mueff: float + sqrt_cs_mueff: float + lazy_update_interval: float + expected_length_z: float + expected_length_ps: float + beta: float negative: numpy.ndarray positive: numpy.ndarray weights: numpy.ndarray diff --git a/src/interface.cpp b/src/interface.cpp index 3146852..1eca541 100644 --- a/src/interface.cpp +++ b/src/interface.cpp @@ -59,7 +59,8 @@ void define_options(py::module& main) .value("MXNES", StepSizeAdaptation::MXNES) .value("LPXNES", StepSizeAdaptation::LPXNES) .value("PSR", StepSizeAdaptation::PSR) - .value("SR", StepSizeAdaptation::PSR) + .value("SR", StepSizeAdaptation::SR) + .value("SA", StepSizeAdaptation::SA) .export_values(); py::enum_(m, "CorrectionMethod") @@ -86,7 +87,10 @@ void define_options(py::module& main) .value("MATRIX", MatrixAdaptationType::MATRIX) .value("SEPERABLE", MatrixAdaptationType::SEPERABLE) .value("ONEPLUSONE", MatrixAdaptationType::ONEPLUSONE) - .value("CHOLESKSY", MatrixAdaptationType::CHOLESKSY) + .value("CHOLESKY", MatrixAdaptationType::CHOLESKY) + .value("CMSA", MatrixAdaptationType::CMSA) + .value("COVARIANCE_NO_EIGV", MatrixAdaptationType::COVARIANCE_NO_EIGV) + .value("NATURAL_GRADIENT", MatrixAdaptationType::NATURAL_GRADIENT) .export_values(); py::enum_(m, "CenterPlacement") @@ -329,14 +333,19 @@ void define_matrix_adaptation(py::module& main) .def_readwrite("m", &Adaptation::m) .def_readwrite("m_old", &Adaptation::m_old) .def_readwrite("dm", &Adaptation::dm) - .def_readwrite("dz", &Adaptation::dz) .def_readwrite("ps", &Adaptation::ps) + .def_readwrite("dz", &Adaptation::dz) .def_readwrite("dd", &Adaptation::dd) .def_readwrite("expected_length_z", &Adaptation::expected_length_z) .def("adapt_evolution_paths", &Adaptation::adapt_evolution_paths, py::arg("pop"), py::arg("weights"), - py::arg("mutation"), + py::arg("stats"), + py::arg("mu"), + py::arg("lamb")) + .def("adapt_evolution_paths_innner", &Adaptation::adapt_evolution_paths_inner, + py::arg("pop"), + py::arg("weights"), py::arg("stats"), py::arg("mu"), py::arg("lamb")) @@ -365,6 +374,24 @@ void define_matrix_adaptation(py::module& main) ss << ">"; return ss.str(); }); + + py::class_>(m, "NoAdaptation") + .def(py::init(), py::arg("dimension"), py::arg("x0"), py::arg("expected_length_z")) + .def("__repr__", [](None& dyn) + { + std::stringstream ss; + ss << std::boolalpha; + ss << ""; + return ss.str(); }); + + py::class_>(m, "CovarianceAdaptation") .def(py::init(), py::arg("dimension"), py::arg("x0"), py::arg("expected_length_z")) .def_readwrite("pc", &CovarianceAdaptation::pc) @@ -380,6 +407,7 @@ void define_matrix_adaptation(py::module& main) py::arg("population"), py::arg("mu")) .def("perform_eigendecomposition", &CovarianceAdaptation::perform_eigendecomposition, py::arg("stats")) + .def("adapt_ps", &CovarianceAdaptation::adapt_ps, py::arg("weights")) .def("__repr__", [](CovarianceAdaptation& dyn) { std::stringstream ss; @@ -400,8 +428,11 @@ void define_matrix_adaptation(py::module& main) ss << ">"; return ss.str(); }); - py::class_>(m, "SeperableAdaptation") + py::class_>(m, "SeperableAdaptation") .def(py::init(), py::arg("dimension"), py::arg("x0"), py::arg("expected_length_z")) + .def_readwrite("c", &SeperableAdaptation::c) + .def_readwrite("pc", &SeperableAdaptation::pc) + .def_readwrite("d", &SeperableAdaptation::d) .def("__repr__", [](SeperableAdaptation& dyn) { std::stringstream ss; @@ -413,18 +444,14 @@ void define_matrix_adaptation(py::module& main) ss << " pc: " << dyn.pc.transpose(); ss << " ps: " << dyn.ps.transpose(); ss << " d: " << dyn.d.transpose(); - ss << " B: " << dyn.B; - ss << " C: " << dyn.C; - ss << " inv_root_C: " << dyn.inv_root_C; - ss << " dd: " << dyn.dd; + ss << " c: " << dyn.c.transpose(); ss << " expected_length_z: " << dyn.expected_length_z; ss << " hs: " << dyn.hs; ss << ">"; return ss.str(); }); py::class_>(m, "OnePlusOneAdaptation") - .def(py::init(), py::arg("dimension"), py::arg("x0"), py::arg("expected_length_z")) - .def("__repr__", [](SeperableAdaptation& dyn) + .def("__repr__", [](OnePlusOneAdaptation& dyn) { std::stringstream ss; ss << std::boolalpha; @@ -463,21 +490,25 @@ void define_matrix_adaptation(py::module& main) ss << ">"; return ss.str(); }); - py::class_>(m, "NoAdaptation") + py::class_>(m, "CholeskyAdaptation") .def(py::init(), py::arg("dimension"), py::arg("x0"), py::arg("expected_length_z")) - .def("__repr__", [](None& dyn) - { - std::stringstream ss; - ss << std::boolalpha; - ss << ""; - return ss.str(); }); + .def_readwrite("A", &CholeskyAdaptation::A) + .def_readwrite("pc", &CholeskyAdaptation::pc); + + py::class_>(m, "SelfAdaptation") + .def(py::init(), py::arg("dimension"), py::arg("x0"), py::arg("expected_length_z")) + .def_readwrite("A", &SelfAdaptation::A) + .def_readwrite("C", &SelfAdaptation::C); + + py::class_>(m, "CovarainceNoEigvAdaptation") + ; + + py::class_>(m, "NaturalGradientAdaptation") + .def(py::init(), py::arg("dimension"), py::arg("x0"), py::arg("expected_length_z")) + .def_readwrite("A", &NaturalGradientAdaptation::A) + .def_readwrite("A_inv", &NaturalGradientAdaptation::A_inv) + .def_readwrite("G", &NaturalGradientAdaptation::G); + } void define_parameters(py::module& main) @@ -560,6 +591,7 @@ void define_parameters(py::module& main) .def_readwrite("lazy_update_interval", &Weights::lazy_update_interval) .def_readwrite("expected_length_z", &Weights::expected_length_z) .def_readwrite("expected_length_ps", &Weights::expected_length_ps) + .def_readwrite("beta", &Weights::beta) .def_readwrite("weights", &Weights::weights) .def_readwrite("positive", &Weights::positive) .def_readwrite("negative", &Weights::negative) @@ -584,7 +616,7 @@ void define_parameters(py::module& main) std::optional, std::optional, std::optional, std::optional, std::optional, std::optional, std::optional, std::optional, - std::optional, bool>(), + std::optional, bool, bool>(), py::arg("dim"), py::arg("modules") = std::nullopt, py::arg("target") = std::nullopt, @@ -600,7 +632,9 @@ void define_parameters(py::module& main) py::arg("cc") = std::nullopt, py::arg("cmu") = std::nullopt, py::arg("c1") = std::nullopt, - py::arg("verbose") = false) + py::arg("verbose") = false, + py::arg("always_compute_eigv") = false + ) .def_readonly("dim", &Settings::dim) .def_readonly("modules", &Settings::modules) .def_readwrite("target", &Settings::target) @@ -646,10 +680,15 @@ void define_parameters(py::module& main) using AdaptationType = std::variant< std::shared_ptr, - std::shared_ptr, + std::shared_ptr, std::shared_ptr, std::shared_ptr, - std::shared_ptr>; + std::shared_ptr, + std::shared_ptr, + std::shared_ptr, + std::shared_ptr, + std::shared_ptr + >; py::class_>(main, "Parameters") .def(py::init(), py::arg("dimension")) @@ -675,6 +714,14 @@ void define_parameters(py::module& main) return std::dynamic_pointer_cast(self.adaptation); case MatrixAdaptationType::ONEPLUSONE: return std::dynamic_pointer_cast(self.adaptation); + case MatrixAdaptationType::CHOLESKY: + return std::dynamic_pointer_cast(self.adaptation); + case MatrixAdaptationType::CMSA: + return std::dynamic_pointer_cast(self.adaptation); + case MatrixAdaptationType::COVARIANCE_NO_EIGV: + return std::dynamic_pointer_cast(self.adaptation); + case MatrixAdaptationType::NATURAL_GRADIENT: + return std::dynamic_pointer_cast(self.adaptation); default: case MatrixAdaptationType::COVARIANCE: return std::dynamic_pointer_cast(self.adaptation); @@ -768,60 +815,57 @@ void define_mutation(py::module& main) py::class_>(m, "SigmaSampler") .def(py::init(), py::arg("dimension")) - .def_readwrite("beta", &SigmaSampler::beta) - .def("sample", &SigmaSampler::sample, py::arg("sigma"), py::arg("population")); + .def("sample", &SigmaSampler::sample, py::arg("sigma"), py::arg("population"), py::arg("tau")); py::class_>(m, "NoSigmaSampler") .def(py::init(), py::arg("dimension")); py::class_>(m, "Strategy") + .def( + py::init< + std::shared_ptr, + std::shared_ptr, + std::shared_ptr, + Float + >(), + py::arg("threshold_convergence"), + py::arg("sequential_selection"), + py::arg("sigma_sampler"), + py::arg("sigma0")) .def("adapt", &Strategy::adapt, py::arg("weights"), py::arg("dynamic"), py::arg("population"), py::arg("old_population"), py::arg("stats"), py::arg("lamb")) + .def( + "mutate", &CSA::mutate, py::arg("objective"), + py::arg("n_offspring"), + py::arg("parameters")) .def_readwrite("threshold_convergence", &Strategy::tc) .def_readwrite("sequential_selection", &Strategy::sq) .def_readwrite("sigma_sampler", &Strategy::ss) .def_readwrite("sigma", &Strategy::sigma) - .def_readwrite("s", &Strategy::s); - - py::class_>(m, "CSA") - .def( - py::init, std::shared_ptr, std::shared_ptr, Float>(), - py::arg("threshold_convergence"), - py::arg("sequential_selection"), - py::arg("sigma_sampler"), - py::arg("sigma0")) - .def( - "mutate", &CSA::mutate, py::arg("objective"), - py::arg("n_offspring"), - py::arg("parameters")); + .def_readwrite("s", &Strategy::s) + ; - py::class_>(m, "TPA") + py::class_>(m, "CSA"); + py::class_>(m, "TPA") .def_readwrite("a_tpa", &TPA::a_tpa) .def_readwrite("b_tpa", &TPA::b_tpa) .def_readwrite("rank_tpa", &TPA::rank_tpa); - py::class_>(m, "MSR") - ; - - py::class_>(m, "PSR") + py::class_>(m, "MSR"); + py::class_>(m, "PSR") .def_readwrite("success_ratio", &PSR::success_ratio); - py::class_>(m, "XNES") - ; + py::class_>(m, "XNES"); + py::class_>(m, "MXNES"); + py::class_>(m, "LPXNES"); + py::class_>(m, "SR"); + py::class_>(m, "SA"); - py::class_>(m, "MXNES") - ; - - py::class_>(m, "LPXNES") - ; - py::class_>(m, "SR") - // .def_staticreadwrite("tgt_success_ratio", &SR::tgt_success_ratio) - ; } void define_population(py::module& main) @@ -840,7 +884,8 @@ void define_population(py::module& main) .def_readwrite("f", &Population::f) .def_readwrite("s", &Population::s) .def_readwrite("d", &Population::d) - .def_readwrite("n", &Population::n); + .def_readwrite("n", &Population::n) + .def_readwrite("t", &Population::t); } class constants_w @@ -875,11 +920,11 @@ void define_constants(py::module& m) [](py::object, bool a) { constants::clip_sigma = a; }) .def_property_static( - "calc_eigv", + "use_box_muller", [](py::object) - { return constants::calc_eigv; }, + { return constants::use_box_muller; }, [](py::object, bool a) - { constants::calc_eigv = a; }) + { constants::use_box_muller = a; }) ; } diff --git a/src/matrix_adaptation.cpp b/src/matrix_adaptation.cpp index 86d6e38..9e4010b 100644 --- a/src/matrix_adaptation.cpp +++ b/src/matrix_adaptation.cpp @@ -148,7 +148,7 @@ namespace matrix_adaptation const auto& popY = m.active ? pop.Y : pop.Y.leftCols(mu); const auto decay_c = (1 - (w.c1 * dhs) - w.c1 - (w.cmu * weights.sum())); - for (auto j = 0; j < settings.dim; j++) + for (size_t j = 0; j < settings.dim; j++) { const auto rank_mu = (popY.row(j).array().pow(2) * weights.transpose().array()).sum(); c(j) = (decay_c * c(j)) + (w.c1 * pow(pc(j), 2)) + (w.cmu * rank_mu); @@ -214,17 +214,17 @@ namespace matrix_adaptation bool MatrixAdaptation::adapt_matrix(const Weights& w, const Modules& m, const Population& pop, const size_t mu, const Settings& settings, parameters::Stats& stats) { - constexpr Float epsilon = 1e-10; - + stats.last_update = stats.t; stats.n_updates++; - + const auto& weights = m.active ? w.weights.topRows(pop.Z.cols()) : w.positive; const auto& popZ = m.active ? pop.Z : pop.Z.leftCols(mu); const auto& popY = m.active ? pop.Y : pop.Y.leftCols(mu); - + // Normal MA-ES -> O(n^3) // + // constexpr Float epsilon = 1e-10; // const auto& Z = popZ * weights.asDiagonal() * popZ.transpose(); // ZwI.noalias() = (w.cmu / 2.0) * (Z - I); // ssI.noalias() = (w.c1 / 2.0) * (ps * ps.transpose() - I); @@ -310,11 +310,11 @@ namespace matrix_adaptation A *= std::sqrt(1 - w.c1 - w.cmu); Eigen::internal::llt_rank_update_lower(A, pc, w.c1); - for (auto i = 0; i < mu; i++) + for (size_t i = 0; i < mu; i++) Eigen::internal::llt_rank_update_lower(A, pop.Y.col(i), w.cmu * w.positive(i)); if (m.active) - for (auto i = 0; i < pop.Y.cols() - mu; i++) + for (size_t i = 0; i < pop.Y.cols() - mu; i++) Eigen::internal::llt_rank_update_lower(A, pop.Y.col(mu + i), w.cmu * w.negative(i)); diff --git a/src/repelling.cpp b/src/repelling.cpp index ea6e5a9..e10700a 100644 --- a/src/repelling.cpp +++ b/src/repelling.cpp @@ -97,7 +97,7 @@ namespace repelling { return a.criticality > b.criticality; }); //! If it is not intialized - if (C_inv.cols() != p.settings.dim) + if (static_cast(C_inv.cols()) != p.settings.dim) { C_inv = Matrix::Identity(p.settings.dim, p.settings.dim); } From 35b876c4a3d32e1d64a17eeee90778857e0ec83a Mon Sep 17 00:00:00 2001 From: Jacob de Nobel Date: Wed, 4 Jun 2025 13:38:01 +0200 Subject: [PATCH 28/74] change default numeric type back to double --- CMakeLists.txt | 2 +- include/common.hpp | 4 ++-- src/main.cpp | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7a40971..ebe15e6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,7 +18,7 @@ target_include_directories(main PUBLIC ) if (MSVC) - target_compile_options(main PRIVATE /bigobj) + target_compile_options(main PRIVATE /bigobj /openmp) else() target_compile_options(main PRIVATE -march=native) diff --git a/include/common.hpp b/include/common.hpp index 5d0bd13..905a350 100644 --- a/include/common.hpp +++ b/include/common.hpp @@ -23,7 +23,7 @@ #include #include -using Float = long double; +using Float = double; using Matrix = Eigen::Matrix; using Vector = Eigen::Matrix; using Array = Eigen::Array; @@ -338,8 +338,8 @@ namespace rng namespace functions { Float sphere(const Vector &x); - Float rastrigin(const Vector &x); Float ellipse(const Vector& x); + Float rastrigin(const Vector &x); Float rosenbrock(const Vector& x); Matrix random_rotation_matrix(int n, int seed); diff --git a/src/main.cpp b/src/main.cpp index 2f52a54..f64e903 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -11,7 +11,7 @@ using std::chrono::milliseconds; static int dim = 100; static bool rotated = false; -static size_t budget = dim * 10000; +static size_t budget = dim * 2000; struct Ellipse @@ -75,7 +75,7 @@ void run_modcma(parameters::MatrixAdaptationType mat_t, functions::ObjectiveFunc -std::numeric_limits::infinity(), std::nullopt, budget, - 0.1 + 2.0 ); auto p = std::make_shared(settings); auto cma = ModularCMAES(p); From 6df659abd1e734baebc6ac27c817adf3d86c0382 Mon Sep 17 00:00:00 2001 From: Jacob de Nobel Date: Wed, 4 Jun 2025 17:35:30 +0200 Subject: [PATCH 29/74] fix stagnation criteria --- CMakeLists.txt | 2 +- include/restart_criteria.hpp | 13 +++++++++++-- include/settings.hpp | 6 +++--- src/main.cpp | 29 +++++++++++++++++++---------- src/mutation.cpp | 6 ++++++ src/parameters.cpp | 2 ++ src/restart_criteria.cpp | 17 +++++++++++++---- 7 files changed, 55 insertions(+), 20 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ebe15e6..7a40971 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,7 +18,7 @@ target_include_directories(main PUBLIC ) if (MSVC) - target_compile_options(main PRIVATE /bigobj /openmp) + target_compile_options(main PRIVATE /bigobj) else() target_compile_options(main PRIVATE -march=native) diff --git a/include/restart_criteria.hpp b/include/restart_criteria.hpp index e77562d..a0b0f77 100644 --- a/include/restart_criteria.hpp +++ b/include/restart_criteria.hpp @@ -35,7 +35,7 @@ namespace restart void update(const parameters::Parameters &p) { - for (const auto &c : items) + for (auto &c : items) { c->update(p); } @@ -43,7 +43,7 @@ namespace restart void reset(const parameters::Parameters &p) { - for (const auto &c : items) + for (auto &c : items) c->reset(p); } @@ -55,6 +55,15 @@ namespace restart return false; } + std::string reason() const + { + std::string res; + for (const auto& c : items) + if (c->met) + res += c->name + "; "; + return res; + } + vCriteria items; static Criteria get(const parameters::Modules modules); diff --git a/include/settings.hpp b/include/settings.hpp index b93104a..9777ba8 100644 --- a/include/settings.hpp +++ b/include/settings.hpp @@ -43,7 +43,7 @@ namespace parameters std::optional cc = std::nullopt, std::optional cmu = std::nullopt, std::optional c1 = std::nullopt, - bool verbose = false, + bool verbose = true, bool always_compute_eigv = false ) : dim(dim), modules(mod.value_or(Modules())), @@ -54,8 +54,8 @@ namespace parameters lambda0(lambda.value_or(4 + std::floor(3 * std::log(dim)))), mu0(mu.value_or(lambda0 / 2)), x0(x0), - lb(lb.value_or(Vector::Ones(dim) * -std::numeric_limits::infinity())), - ub(ub.value_or(Vector::Ones(dim)* std::numeric_limits::infinity())), + lb(lb.value_or(Vector::Ones(dim) * -5)), + ub(ub.value_or(Vector::Ones(dim)* 5)), cs(cs), cc(cc), cmu(cmu), diff --git a/src/main.cpp b/src/main.cpp index f64e903..85db5d9 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -9,9 +9,9 @@ using std::chrono::duration_cast; using std::chrono::duration; using std::chrono::milliseconds; -static int dim = 100; -static bool rotated = false; -static size_t budget = dim * 2000; +static int dim = 5; +static bool rotated = true; +static size_t budget = dim * 100000; struct Ellipse @@ -61,13 +61,20 @@ struct Timer void run_modcma(parameters::MatrixAdaptationType mat_t, functions::ObjectiveFunction fun_t, parameters::StepSizeAdaptation ssa) { - rng::set_seed(42); + rng::set_seed(412); parameters::Modules m; m.matrix_adaptation = mat_t; m.elitist = false; m.active = false; m.ssa = ssa; - //m.weights = parameters::RecombinationWeights::EQUAL; + m.sampler = parameters::BaseSampler::SOBOL; + m.bound_correction = parameters::CorrectionMethod::SATURATE; + m.restart_strategy = parameters::RestartStrategyType::RESTART; + m.orthogonal = true; + m.sample_transformation = parameters::SampleTranformerType::CAUCHY; + m.sequential_selection = true; + m.threshold_convergence = true; + m.weights = parameters::RecombinationWeights::HALF_POWER_LAMBDA; parameters::Settings settings( dim, @@ -75,7 +82,9 @@ void run_modcma(parameters::MatrixAdaptationType mat_t, functions::ObjectiveFunc -std::numeric_limits::infinity(), std::nullopt, budget, - 2.0 + 2.0, + 130, + 23 ); auto p = std::make_shared(settings); auto cma = ModularCMAES(p); @@ -85,7 +94,7 @@ void run_modcma(parameters::MatrixAdaptationType mat_t, functions::ObjectiveFunc while (cma.step(f)) { //if (cma.p->stats.global_best.y < 1e-9) - //break; + // break;*/ } std::cout << "modcmaes: " << parameters::to_string(mat_t) << std::defaultfloat; @@ -109,8 +118,8 @@ void run_modcma(parameters::MatrixAdaptationType mat_t, functions::ObjectiveFunc int main() { - auto ft = functions::ELLIPSE; - auto ssa = parameters::StepSizeAdaptation::CSA; + auto ft = functions::ROSENBROCK; + auto ssa = parameters::StepSizeAdaptation::MXNES; //run_modcma(parameters::MatrixAdaptationType::NONE, ft, ssa); //run_modcma(parameters::MatrixAdaptationType::SEPERABLE, ft, ssa); @@ -120,6 +129,6 @@ int main() //run_modcma(parameters::MatrixAdaptationType::CHOLESKY, ft, ssa); run_modcma(parameters::MatrixAdaptationType::COVARIANCE, ft, ssa); - run_modcma(parameters::MatrixAdaptationType::COVARIANCE_NO_EIGV, ft, ssa); + //run_modcma(parameters::MatrixAdaptationType::COVARIANCE_NO_EIGV, ft, ssa); //run_modcma(parameters::MatrixAdaptationType::NATURAL_GRADIENT, ft, ssa); } \ No newline at end of file diff --git a/src/mutation.cpp b/src/mutation.cpp index 07676de..6bc3593 100644 --- a/src/mutation.cpp +++ b/src/mutation.cpp @@ -150,7 +150,13 @@ namespace mutation const Population& old_pop, const parameters::Stats& stats, const size_t lambda) { const Float delta = (w.mueff * adaptation->dz.squaredNorm() - adaptation->dd); + sigma *= std::exp((w.cs / adaptation->dd) * delta); + if (!std::isfinite(sigma)) + { + std::cout << stats.t << ": "; + std::cout << sigma; + } } void LPXNES::adapt(const parameters::Weights& w, std::shared_ptr adaptation, diff --git a/src/parameters.cpp b/src/parameters.cpp index 298347f..7c76507 100644 --- a/src/parameters.cpp +++ b/src/parameters.cpp @@ -39,6 +39,8 @@ namespace parameters void Parameters::perform_restart(FunctionType& objective, const std::optional& sigma) { + std::cout << "performing restart. reason: "<< criteria.reason() << std::endl; + stats.solutions.push_back(stats.current_best); stats.evaluations++; stats.centers.emplace_back(adaptation->m, objective(adaptation->m), stats.t - 1, stats.evaluations); diff --git a/src/restart_criteria.cpp b/src/restart_criteria.cpp index 8a22de7..806e176 100644 --- a/src/restart_criteria.cpp +++ b/src/restart_criteria.cpp @@ -21,6 +21,9 @@ namespace // TODO: this is duplicate code Float median(const Vector &x) { + if (x.size() == 1) + return x(0); + if (x.size() % 2 == 0) return (x(x.size() / 2) + x(x.size() / 2 - 1)) / 2.0; return x(x.size() / 2); @@ -28,6 +31,9 @@ namespace Float median(const std::vector &v, const size_t from, const size_t to) { + if (v.size() == 1) + return v[0]; + const size_t n = to - from; if (n % 2 == 0) return (v[from + (n / 2)] + v[from + (n / 2) - 1]) / 2.0; @@ -80,6 +86,7 @@ namespace restart void UnableToAdapt::update(const parameters::Parameters &p) { + //std::cout << p.stats.t << ", "<< p.stats.evaluations << ": " << std::boolalpha << p.successfull_adaptation << ": " << p.mutation->sigma << '\n'; met = !p.successfull_adaptation or !std::isfinite(p.mutation->sigma); } @@ -174,10 +181,12 @@ namespace restart median_fitnesses.push_back(median(p.pop.f)); best_fitnesses.push_back(p.pop.f(0)); - const bool best_better = median(best_fitnesses, pt, time_since_restart) >= median(best_fitnesses, 0, pt); - const bool median_better = median(median_fitnesses, pt, time_since_restart) >= median(median_fitnesses, 0, pt); - - met = time_since_restart > n_stagnation and (best_better and median_better); + if (time_since_restart > n_stagnation) + { + const bool best_better = median(best_fitnesses, pt, time_since_restart) >= median(best_fitnesses, 0, pt); + const bool median_better = median(median_fitnesses, pt, time_since_restart) >= median(median_fitnesses, 0, pt); + met = best_better and median_better; + } } void Stagnation::on_reset(const parameters::Parameters &p) From 2768c67c6abb85932eb513db6d38ecdd98f9d568 Mon Sep 17 00:00:00 2001 From: Jacob de Nobel Date: Wed, 4 Jun 2025 17:36:13 +0200 Subject: [PATCH 30/74] add reason --- src/interface.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/interface.cpp b/src/interface.cpp index 1eca541..9f42c70 100644 --- a/src/interface.cpp +++ b/src/interface.cpp @@ -1024,6 +1024,7 @@ void define_restart_criteria(py::module& main) .def_readwrite("items", &Criteria::items) .def("reset", &Criteria::reset, py::arg("parameters")) .def("update", &Criteria::update, py::arg("parameters")) + .def("reason", &Criteria::reason) .def("any", &Criteria::any); } From 4e3f0084fc7314c2444beae250f7a27bef211a8f Mon Sep 17 00:00:00 2001 From: Jacob de Nobel Date: Wed, 4 Jun 2025 17:38:14 +0200 Subject: [PATCH 31/74] remove debug --- src/mutation.cpp | 5 ----- src/restart_criteria.cpp | 1 - 2 files changed, 6 deletions(-) diff --git a/src/mutation.cpp b/src/mutation.cpp index 6bc3593..6708037 100644 --- a/src/mutation.cpp +++ b/src/mutation.cpp @@ -152,11 +152,6 @@ namespace mutation const Float delta = (w.mueff * adaptation->dz.squaredNorm() - adaptation->dd); sigma *= std::exp((w.cs / adaptation->dd) * delta); - if (!std::isfinite(sigma)) - { - std::cout << stats.t << ": "; - std::cout << sigma; - } } void LPXNES::adapt(const parameters::Weights& w, std::shared_ptr adaptation, diff --git a/src/restart_criteria.cpp b/src/restart_criteria.cpp index 806e176..6980f96 100644 --- a/src/restart_criteria.cpp +++ b/src/restart_criteria.cpp @@ -86,7 +86,6 @@ namespace restart void UnableToAdapt::update(const parameters::Parameters &p) { - //std::cout << p.stats.t << ", "<< p.stats.evaluations << ": " << std::boolalpha << p.successfull_adaptation << ": " << p.mutation->sigma << '\n'; met = !p.successfull_adaptation or !std::isfinite(p.mutation->sigma); } From 402f6ad8836c52e35f5d108b65a0428731ced82f Mon Sep 17 00:00:00 2001 From: Jacob de Nobel Date: Wed, 4 Jun 2025 17:38:30 +0200 Subject: [PATCH 32/74] add message if verbose --- src/parameters.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/parameters.cpp b/src/parameters.cpp index 7c76507..7e0fba4 100644 --- a/src/parameters.cpp +++ b/src/parameters.cpp @@ -39,7 +39,8 @@ namespace parameters void Parameters::perform_restart(FunctionType& objective, const std::optional& sigma) { - std::cout << "performing restart. reason: "<< criteria.reason() << std::endl; + if (settings.verbose) + std::cout << "performing restart. reason: "<< criteria.reason() << std::endl; stats.solutions.push_back(stats.current_best); stats.evaluations++; From 499083beecf30d8d6db73b9ffd62e5a63f14d3df Mon Sep 17 00:00:00 2001 From: Jacob de Nobel Date: Wed, 4 Jun 2025 18:30:23 +0200 Subject: [PATCH 33/74] fix SA with elitist ->use RCMSA adaptation --- include/modules.hpp | 2 +- include/mutation.hpp | 5 +++++ src/main.cpp | 45 ++++++++++++++++++++++++++++++---------- src/mutation.cpp | 9 +++++++- src/parameters.cpp | 3 ++- src/restart_criteria.cpp | 19 ++++++++++++----- 6 files changed, 64 insertions(+), 19 deletions(-) diff --git a/include/modules.hpp b/include/modules.hpp index 2efa09f..23898ad 100644 --- a/include/modules.hpp +++ b/include/modules.hpp @@ -62,8 +62,8 @@ namespace parameters enum class RestartStrategyType { NONE, - STOP, RESTART, + STOP, IPOP, BIPOP }; diff --git a/include/mutation.hpp b/include/mutation.hpp index 49fea69..ec2d80a 100644 --- a/include/mutation.hpp +++ b/include/mutation.hpp @@ -188,6 +188,11 @@ namespace mutation void adapt(const parameters::Weights& w, std::shared_ptr adaptation, Population& pop, const Population& old_pop, const parameters::Stats& stats, const size_t lambda) override; + + void mutate(FunctionType& objective, const size_t n_offspring, parameters::Parameters& p) override; + + private: + Float mean_sigma; }; diff --git a/src/main.cpp b/src/main.cpp index 85db5d9..81f8614 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -64,9 +64,10 @@ void run_modcma(parameters::MatrixAdaptationType mat_t, functions::ObjectiveFunc rng::set_seed(412); parameters::Modules m; m.matrix_adaptation = mat_t; - m.elitist = false; - m.active = false; m.ssa = ssa; + + /*m.elitist = false; + m.active = false; m.sampler = parameters::BaseSampler::SOBOL; m.bound_correction = parameters::CorrectionMethod::SATURATE; m.restart_strategy = parameters::RestartStrategyType::RESTART; @@ -74,8 +75,24 @@ void run_modcma(parameters::MatrixAdaptationType mat_t, functions::ObjectiveFunc m.sample_transformation = parameters::SampleTranformerType::CAUCHY; m.sequential_selection = true; m.threshold_convergence = true; - m.weights = parameters::RecombinationWeights::HALF_POWER_LAMBDA; + m.weights = parameters::RecombinationWeights::HALF_POWER_LAMBDA;*/ + m.elitist = true; + m.active = true; + m.sampler = parameters::BaseSampler::UNIFORM; + m.bound_correction = parameters::CorrectionMethod::SATURATE; + m.restart_strategy = parameters::RestartStrategyType::STOP; + m.sample_transformation = parameters::SampleTranformerType::SCALED_UNIFORM; + m.orthogonal = true; + m.sequential_selection = true; + m.weights = parameters::RecombinationWeights::EQUAL; + m.mirrored = parameters::Mirror::PAIRWISE; + /* + "c1" : 0.1659166344465, + "cc" : 0.2310662705758, + "cmu" : 0.2024033500315, + "cs" : 0.2789599486741, + */ parameters::Settings settings( dim, m, @@ -83,8 +100,8 @@ void run_modcma(parameters::MatrixAdaptationType mat_t, functions::ObjectiveFunc std::nullopt, budget, 2.0, - 130, - 23 + 43, + 13 ); auto p = std::make_shared(settings); auto cma = ModularCMAES(p); @@ -93,8 +110,14 @@ void run_modcma(parameters::MatrixAdaptationType mat_t, functions::ObjectiveFunc FunctionType f = Ellipse(dim, rotated, fun_t); while (cma.step(f)) { - //if (cma.p->stats.global_best.y < 1e-9) - // break;*/ + + std::cout << "evals: " << cma.p->stats.evaluations << "/" << budget << ": "; + std::cout << "iters: " << cma.p->stats.t << ": "; + std::cout << "sigma: " << cma.p->mutation->sigma << ": "; + std::cout << "best_y: " << cma.p->stats.global_best.y << std::endl; + + if (cma.p->stats.global_best.y < 1e-9) + break; } std::cout << "modcmaes: " << parameters::to_string(mat_t) << std::defaultfloat; @@ -118,17 +141,17 @@ void run_modcma(parameters::MatrixAdaptationType mat_t, functions::ObjectiveFunc int main() { - auto ft = functions::ROSENBROCK; - auto ssa = parameters::StepSizeAdaptation::MXNES; + auto ft = functions::SPHERE; + auto ssa = parameters::StepSizeAdaptation::LPXNES; - //run_modcma(parameters::MatrixAdaptationType::NONE, ft, ssa); + run_modcma(parameters::MatrixAdaptationType::NONE, ft, ssa); //run_modcma(parameters::MatrixAdaptationType::SEPERABLE, ft, ssa); //run_modcma(parameters::MatrixAdaptationType::MATRIX, ft, ssa); //run_modcma(parameters::MatrixAdaptationType::CHOLESKY, ft, ssa); - run_modcma(parameters::MatrixAdaptationType::COVARIANCE, ft, ssa); + //run_modcma(parameters::MatrixAdaptationType::COVARIANCE, ft, ssa); //run_modcma(parameters::MatrixAdaptationType::COVARIANCE_NO_EIGV, ft, ssa); //run_modcma(parameters::MatrixAdaptationType::NATURAL_GRADIENT, ft, ssa); } \ No newline at end of file diff --git a/src/mutation.cpp b/src/mutation.cpp index 6708037..b403057 100644 --- a/src/mutation.cpp +++ b/src/mutation.cpp @@ -169,12 +169,19 @@ namespace mutation sigma *= std::exp((1 / w.damps) * ((stats.success_ratio - tgt_success_ratio) / (1.0 - tgt_success_ratio))); } + + void SA::mutate(FunctionType& objective, const size_t n_offspring, parameters::Parameters& p) + { + Strategy::mutate(objective, n_offspring, p); + mean_sigma = std::exp(p.pop.s.array().log().mean()); + } + void SA::adapt(const parameters::Weights& w, std::shared_ptr adaptation, Population& pop, const Population& old_pop, const parameters::Stats& stats, const size_t lambda) { const auto& sigma_l = pop.s.topRows(w.positive.rows()); - sigma = std::exp((w.positive.array() * sigma_l.array().log()).sum()); + sigma = std::exp((w.positive.array() * sigma_l.array().log()).sum()) / mean_sigma; } diff --git a/src/parameters.cpp b/src/parameters.cpp index 7e0fba4..0d8248e 100644 --- a/src/parameters.cpp +++ b/src/parameters.cpp @@ -40,7 +40,8 @@ namespace parameters void Parameters::perform_restart(FunctionType& objective, const std::optional& sigma) { if (settings.verbose) - std::cout << "performing restart. reason: "<< criteria.reason() << std::endl; + std::cout << "performing restart. t(" << stats.t <<", " << stats.evaluations + << ") reason: " << criteria.reason() << std::endl; stats.solutions.push_back(stats.current_best); stats.evaluations++; diff --git a/src/restart_criteria.cpp b/src/restart_criteria.cpp index 6980f96..376869d 100644 --- a/src/restart_criteria.cpp +++ b/src/restart_criteria.cpp @@ -66,12 +66,16 @@ namespace restart n_bin = 10 + static_cast(std::ceil(30 * static_cast(p.settings.dim) / static_cast(p.lambda))); } - void NoImprovement::update(const parameters::Parameters &p) + void NoImprovement::update(const parameters::Parameters& p) { const size_t time_since_restart = p.stats.t - last_restart; best_fitnesses.push_back(p.pop.f(0)); - const auto recent_improvement = ptp_tail(best_fitnesses, n_bin); - met = time_since_restart > n_bin and recent_improvement == 0; + met = false; + if (time_since_restart > n_bin) + { + const auto recent_improvement = ptp_tail(best_fitnesses, n_bin); + met = recent_improvement == 0; + } } void MaxSigma::update(const parameters::Parameters &p) @@ -93,8 +97,12 @@ namespace restart { const size_t time_since_restart = p.stats.t - last_restart; flat_fitnesses(p.stats.t % p.settings.dim) = p.pop.f(0) == p.pop.f(flat_fitness_index); - const size_t n_flat_fitness = static_cast(flat_fitnesses.sum()); - met = time_since_restart > static_cast(flat_fitnesses.size()) and n_flat_fitness > max_flat_fitness; + met = false; + if (time_since_restart > static_cast(flat_fitnesses.size())) + { + const size_t n_flat_fitness = static_cast(flat_fitnesses.sum()); + met = n_flat_fitness > max_flat_fitness; + } } void FlatFitness::on_reset(const parameters::Parameters &p) @@ -180,6 +188,7 @@ namespace restart median_fitnesses.push_back(median(p.pop.f)); best_fitnesses.push_back(p.pop.f(0)); + met = false; if (time_since_restart > n_stagnation) { const bool best_better = median(best_fitnesses, pt, time_since_restart) >= median(best_fitnesses, 0, pt); From aa14504f65fb516394935dad58393dad2d177a94 Mon Sep 17 00:00:00 2001 From: Jacob de Nobel Date: Thu, 5 Jun 2025 11:13:30 +0200 Subject: [PATCH 34/74] fix cotn --- include/bounds.hpp | 14 +++++++------- src/bounds.cpp | 12 ++++++------ src/mutation.cpp | 4 ++++ 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/include/bounds.hpp b/include/bounds.hpp index 82247e8..95cadd7 100644 --- a/include/bounds.hpp +++ b/include/bounds.hpp @@ -40,7 +40,7 @@ namespace bounds void correct(const Eigen::Index i, parameters::Parameters& p); - virtual Vector correct_x(const Vector& xi, const Mask& oob) = 0; + virtual Vector correct_x(const Vector& xi, const Mask& oob, const Float sigma) = 0; [[nodiscard]] Mask is_out_of_bounds(const Vector& xi) const; @@ -56,7 +56,7 @@ namespace bounds { using BoundCorrection::BoundCorrection; - Vector correct_x(const Vector& xi, const Mask& oob) override + Vector correct_x(const Vector& xi, const Mask& oob, const Float sigma) override { return xi; } @@ -73,14 +73,14 @@ namespace bounds COTN(Eigen::Ref lb, Eigen::Ref ub) : BoundCorrection(lb, ub), sampler(static_cast(lb.size()), rng::normal(0, 1.0 / 3.)) {} - Vector correct_x(const Vector& xi, const Mask& oob) override; + Vector correct_x(const Vector& xi, const Mask& oob, const Float sigma) override; }; struct Mirror final : BoundCorrection { using BoundCorrection::BoundCorrection; - Vector correct_x(const Vector& xi, const Mask& oob) override; + Vector correct_x(const Vector& xi, const Mask& oob, const Float sigma) override; }; struct UniformResample final : BoundCorrection @@ -89,21 +89,21 @@ namespace bounds UniformResample(Eigen::Ref lb, Eigen::Ref ub) : BoundCorrection(lb, ub), sampler(static_cast(lb.size())) {} - Vector correct_x(const Vector& xi, const Mask& oob) override; + Vector correct_x(const Vector& xi, const Mask& oob, const Float sigma) override; }; struct Saturate final : BoundCorrection { using BoundCorrection::BoundCorrection; - Vector correct_x(const Vector& xi, const Mask& oob) override; + Vector correct_x(const Vector& xi, const Mask& oob, const Float sigma) override; }; struct Toroidal final : BoundCorrection { using BoundCorrection::BoundCorrection; - Vector correct_x(const Vector& xi, const Mask& oob) override; + Vector correct_x(const Vector& xi, const Mask& oob, const Float sigma) override; }; inline std::shared_ptr get(const parameters::CorrectionMethod& m, const Vector& lb, const Vector& ub) diff --git a/src/bounds.cpp b/src/bounds.cpp index d5d8340..950ade3 100644 --- a/src/bounds.cpp +++ b/src/bounds.cpp @@ -46,15 +46,15 @@ namespace bounds } } - Vector COTN::correct_x(const Vector& xi, const Mask& oob) + Vector COTN::correct_x(const Vector& xi, const Mask& oob, const Float sigma) { const Vector y = delta_out_of_bounds(xi, oob); return (oob).select( - lb.array() + db.array() * ((y.array() > 0).cast() - sampler().array().abs()).abs(), y); + lb.array() + db.array() * ((y.array() > 0).cast() - (sigma * sampler().array().abs())).abs(), y); } - Vector Mirror::correct_x(const Vector& xi, const Mask& oob) + Vector Mirror::correct_x(const Vector& xi, const Mask& oob, const Float sigma) { const Vector y = delta_out_of_bounds(xi, oob); return (oob).select( @@ -64,19 +64,19 @@ namespace bounds } - Vector UniformResample::correct_x(const Vector& xi, const Mask& oob) + Vector UniformResample::correct_x(const Vector& xi, const Mask& oob, const Float sigma) { return (oob).select(lb + sampler().cwiseProduct(db), xi); } - Vector Saturate::correct_x(const Vector& xi, const Mask& oob) + Vector Saturate::correct_x(const Vector& xi, const Mask& oob, const Float sigma) { const Vector y = delta_out_of_bounds(xi, oob); return (oob).select( lb.array() + db.array() * (y.array() > 0).cast(), y); } - Vector Toroidal::correct_x(const Vector& xi, const Mask& oob) + Vector Toroidal::correct_x(const Vector& xi, const Mask& oob, const Float sigma) { const Vector y = delta_out_of_bounds(xi, oob); return (oob).select( diff --git a/src/mutation.cpp b/src/mutation.cpp index b403057..4fcce05 100644 --- a/src/mutation.cpp +++ b/src/mutation.cpp @@ -42,7 +42,11 @@ namespace mutation p.pop.f(i) = objective(p.pop.X.col(i)); p.stats.evaluations++; if (sq->break_conditions(i, p.pop.f(i), p.stats.global_best.y, p.settings.modules.mirrored)) + { + // TODO: We should renormalize the weights break; + } + } } From cee523c28cbd51c1443c9d750efcab28daf69521 Mon Sep 17 00:00:00 2001 From: Jacob de Nobel Date: Thu, 5 Jun 2025 11:16:42 +0200 Subject: [PATCH 35/74] rename half_power_lambda --- include/modules.hpp | 2 +- include/to_string.hpp | 4 ++-- include/weights.hpp | 2 +- src/weights.cpp | 6 +++--- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/modules.hpp b/include/modules.hpp index 23898ad..8d3f98c 100644 --- a/include/modules.hpp +++ b/include/modules.hpp @@ -6,7 +6,7 @@ namespace parameters { DEFAULT, EQUAL, - HALF_POWER_LAMBDA + EXPONENTIAL }; enum class BaseSampler diff --git a/include/to_string.hpp b/include/to_string.hpp index e4a059f..b643ac5 100644 --- a/include/to_string.hpp +++ b/include/to_string.hpp @@ -11,8 +11,8 @@ namespace parameters { case RecombinationWeights::EQUAL: return "EQUAL"; - case RecombinationWeights::HALF_POWER_LAMBDA: - return "HALF_POWER_LAMBDA"; + case RecombinationWeights::EXPONENTIAL: + return "EXPONENTIAL"; default: case RecombinationWeights::DEFAULT: return "DEFAULT"; diff --git a/include/weights.hpp b/include/weights.hpp index 153c4b1..7658794 100644 --- a/include/weights.hpp +++ b/include/weights.hpp @@ -26,7 +26,7 @@ namespace parameters void weights_equal(const size_t mu); - void weights_half_power_lambda(const size_t mu, const size_t lambda); + void weights_exponential(const size_t mu, const size_t lambda); Vector clipped() const; }; diff --git a/src/weights.cpp b/src/weights.cpp index 0a7516b..f591eeb 100644 --- a/src/weights.cpp +++ b/src/weights.cpp @@ -57,8 +57,8 @@ namespace parameters case RecombinationWeights::EQUAL: weights_equal(mu); break; - case RecombinationWeights::HALF_POWER_LAMBDA: - weights_half_power_lambda(mu, lambda); + case RecombinationWeights::EXPONENTIAL: + weights_exponential(mu, lambda); break; case RecombinationWeights::DEFAULT: weights_default(mu, lambda); @@ -129,7 +129,7 @@ namespace parameters negative.setConstant(-wi); } - void Weights::weights_half_power_lambda(const size_t mu, const size_t lambda) + void Weights::weights_exponential(const size_t mu, const size_t lambda) { const Float dmu = static_cast(mu); const Float base = (1.0 / pow(2.0, dmu)) / dmu; From 8e55ef8b8dba0137a740a983471c8da0ac2448de Mon Sep 17 00:00:00 2001 From: Jacob de Nobel Date: Thu, 5 Jun 2025 13:03:10 +0200 Subject: [PATCH 36/74] fix COTN, fix box muller, add active to xNES --- include/matrix_adaptation.hpp | 78 +++++++++++----------- include/sampling.hpp | 22 +++++-- include/settings.hpp | 5 -- src/bounds.cpp | 2 +- src/es.cpp | 4 +- src/main.cpp | 39 +++++------ src/matrix_adaptation.cpp | 120 +++++++++++++++++++--------------- src/mutation.cpp | 12 +++- src/parameters.cpp | 8 ++- src/weights.cpp | 4 ++ 10 files changed, 164 insertions(+), 130 deletions(-) diff --git a/include/matrix_adaptation.hpp b/include/matrix_adaptation.hpp index 07b6466..3b0a508 100644 --- a/include/matrix_adaptation.hpp +++ b/include/matrix_adaptation.hpp @@ -20,12 +20,12 @@ namespace matrix_adaptation expected_length_z(expected_length_z) { } - + void adapt_evolution_paths(const Population& pop, const parameters::Weights& w, - const parameters::Stats& stats, size_t mu, size_t lambda); + const parameters::Stats& stats, const parameters::Settings& settings, size_t lambda, size_t mu); virtual void adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, - const parameters::Stats& stats, size_t mu, size_t lambda) = 0; + const parameters::Stats& stats, const parameters::Settings& settings, size_t mu, size_t lambda) = 0; virtual bool adapt_matrix( const parameters::Weights& w, const parameters::Modules& m, const Population& pop, @@ -37,7 +37,7 @@ namespace matrix_adaptation virtual Vector invert_y(const Vector&) = 0; - virtual void restart(const parameters::Settings& settings) + virtual void restart(const parameters::Settings& settings, const Float sigma) { m = settings.x0.value_or(Vector::Zero(settings.dim)); m_old.setZero(); @@ -60,11 +60,9 @@ namespace matrix_adaptation return true; } - void adapt_evolution_paths_inner( - const Population& pop, + void adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, - const parameters::Stats& stats, - size_t mu, size_t lambda) override; + const parameters::Stats& stats, const parameters::Settings& settings, size_t mu, size_t lambda) override; Vector compute_y(const Vector&) override; @@ -98,13 +96,12 @@ namespace matrix_adaptation virtual void adapt_ps(const parameters::Weights& w); void adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, - const parameters::Stats& stats, - size_t mu, size_t lambda) override; + const parameters::Stats& stats, const parameters::Settings& settings, size_t mu, size_t lambda) override; bool adapt_matrix(const parameters::Weights& w, const parameters::Modules& m, const Population& pop, size_t mu, const parameters::Settings& settings, parameters::Stats& stats) override; - void restart(const parameters::Settings& settings) override; + void restart(const parameters::Settings& settings, const Float sigma) override; Vector compute_y(const Vector&) override; @@ -125,13 +122,12 @@ namespace matrix_adaptation } void adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, - const parameters::Stats& stats, - size_t mu, size_t lambda) override; + const parameters::Stats& stats, const parameters::Settings& settings, size_t mu, size_t lambda) override; bool adapt_matrix(const parameters::Weights& w, const parameters::Modules& m, const Population& pop, size_t mu, const parameters::Settings& settings, parameters::Stats& stats) override; - void restart(const parameters::Settings& settings) override; + void restart(const parameters::Settings& settings, const Float sigma) override; Vector compute_y(const Vector&) override; @@ -146,8 +142,7 @@ namespace matrix_adaptation using CovarianceAdaptation::CovarianceAdaptation; void adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, - const parameters::Stats& stats, - size_t mu, size_t lambda) override; + const parameters::Stats& stats, const parameters::Settings& settings, size_t mu, size_t lambda) override; bool adapt_matrix(const parameters::Weights& w, const parameters::Modules& m, const Population& pop, size_t mu, const parameters::Settings& settings, parameters::Stats& stats) override; @@ -168,13 +163,12 @@ namespace matrix_adaptation } void adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, - const parameters::Stats& stats, - size_t mu, size_t lambda) override; + const parameters::Stats& stats, const parameters::Settings& settings, size_t mu, size_t lambda) override; bool adapt_matrix(const parameters::Weights& w, const parameters::Modules& m, const Population& pop, size_t mu, const parameters::Settings& settings, parameters::Stats& stats) override; - void restart(const parameters::Settings& settings) override; + void restart(const parameters::Settings& settings, const Float sigma) override; Vector compute_y(const Vector&) override; @@ -197,17 +191,14 @@ namespace matrix_adaptation { } - void adapt_evolution_paths_inner( - const Population& pop, + void adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, - const parameters::Stats& stats, - size_t mu, size_t lambda - ) override; + const parameters::Stats& stats, const parameters::Settings& settings, size_t mu, size_t lambda) override; bool adapt_matrix(const parameters::Weights& w, const parameters::Modules& m, const Population& pop, size_t mu, const parameters::Settings& settings, parameters::Stats& stats) override; - void restart(const parameters::Settings& settings) override; + void restart(const parameters::Settings& settings, const Float sigma) override; Vector compute_y(const Vector&) override; @@ -225,17 +216,14 @@ namespace matrix_adaptation C(Matrix::Identity(dim, dim)) {} - void adapt_evolution_paths_inner( - const Population& pop, + void adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, - const parameters::Stats& stats, - size_t mu, size_t lambda - ) override; + const parameters::Stats& stats, const parameters::Settings& settings, size_t mu, size_t lambda) override; bool adapt_matrix(const parameters::Weights& w, const parameters::Modules& m, const Population& pop, size_t mu, const parameters::Settings& settings, parameters::Stats& stats) override; - void restart(const parameters::Settings& settings) override; + void restart(const parameters::Settings& settings, const Float sigma) override; Vector compute_y(const Vector&) override; @@ -259,26 +247,34 @@ namespace matrix_adaptation Matrix A; Matrix G; Matrix A_inv; + Float sigma_g; - NaturalGradientAdaptation(const size_t dim, const Vector& x0, const Float expected_length_z) + NaturalGradientAdaptation(const size_t dim, const Vector& x0, const Float expected_length_z, const Float sigma0) : Adaptation(dim, x0, Vector::Ones(dim), expected_length_z), - A(Matrix::Identity(dim, dim)), + A(Matrix::Identity(dim, dim) / sigma0), G(Matrix::Zero(dim, dim)), A_inv(Matrix::Identity(dim, dim)), + sigma_g(0), outdated_A_inv(false) {} - void adapt_evolution_paths_inner( - const Population& pop, + void compute_gradients( + const Population& pop, + const parameters::Weights& w, + const parameters::Stats& stats, + const parameters::Settings& settings, + size_t mu, + size_t lambda + ); + + void adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, - const parameters::Stats& stats, - size_t mu, size_t lambda - ) override; + const parameters::Stats& stats, const parameters::Settings& settings, size_t mu, size_t lambda) override; bool adapt_matrix(const parameters::Weights& w, const parameters::Modules& m, const Population& pop, size_t mu, const parameters::Settings& settings, parameters::Stats& stats) override; - void restart(const parameters::Settings& settings) override; + void restart(const parameters::Settings& settings, const Float sigma) override; Vector compute_y(const Vector&) override; @@ -291,7 +287,7 @@ namespace matrix_adaptation - inline std::shared_ptr get(const parameters::Modules& m, const size_t dim, const Vector& x0, const Float expected_z) + inline std::shared_ptr get(const parameters::Modules& m, const size_t dim, const Vector& x0, const Float expected_z, const Float sigma0) { using namespace parameters; switch (m.matrix_adaptation) @@ -311,7 +307,7 @@ namespace matrix_adaptation case MatrixAdaptationType::COVARIANCE_NO_EIGV: return std::make_shared(dim, x0, expected_z); case MatrixAdaptationType::NATURAL_GRADIENT: - return std::make_shared(dim, x0, expected_z); + return std::make_shared(dim, x0, expected_z, sigma0); default: case MatrixAdaptationType::COVARIANCE: return std::make_shared(dim, x0, expected_z); diff --git a/include/sampling.hpp b/include/sampling.hpp index 88bbd3f..9f41fdb 100644 --- a/include/sampling.hpp +++ b/include/sampling.hpp @@ -261,6 +261,14 @@ namespace sampling return sqrt(dd) * (1.0 - 1.0 / (4.0 * dd) + 1.0 / (21.0 * pow(dd, 2.0))); } + [[nodiscard]] inline std::pair box_muller(const Float u1, const Float u2) + { + const Float r = std::sqrt(-2.0 * std::log(u1)); + const Float theta = 2.0 * M_PI * u2; + return { r * std::cos(theta), r * std::sin(theta) }; + } + + [[nodiscard]] inline Vector box_muller(const Vector& u) { size_t n = u.size(); @@ -268,13 +276,15 @@ namespace sampling Vector z(n); for (size_t i = 0; i < m; ++i) { - const Float r = std::sqrt(-2.0 * std::log(u(2 * i))); - const Float theta = 2.0 * M_PI * u(2 * i + 1); - - z(2 * i) = r * std::cos(theta); - z(2 * i + 1) = r * std::sin(theta); + const auto&[n1, n2] = box_muller(u(2 * i), u(2 * i + 1)); + z(2 * i) = n1; + z(2 * i + 1) = n2; } - return z.head(n % 2 == 0 ? n : n - 1); + + if (n % 2 != 0) + z(n - 1) = box_muller(u(0), u(n - 1)).first; + + return z; } diff --git a/include/settings.hpp b/include/settings.hpp index 9777ba8..beef56b 100644 --- a/include/settings.hpp +++ b/include/settings.hpp @@ -76,11 +76,6 @@ namespace parameters mu0 = std::min(lambda0 / 4, mu0); } - if (modules.matrix_adaptation == MatrixAdaptationType::NATURAL_GRADIENT) - { - modules.active = false; - } - if (modules.ssa != StepSizeAdaptation::CSA and modules.matrix_adaptation == MatrixAdaptationType::COVARIANCE and not always_compute_eigv diff --git a/src/bounds.cpp b/src/bounds.cpp index 950ade3..fefffc8 100644 --- a/src/bounds.cpp +++ b/src/bounds.cpp @@ -40,7 +40,7 @@ namespace bounds if (oob.any()) { n_out_of_bounds++; - p.pop.X.col(i) = correct_x(p.pop.X.col(i), oob); + p.pop.X.col(i) = correct_x(p.pop.X.col(i), oob, p.mutation->sigma); p.pop.Y.col(i) = p.adaptation->invert_x(p.pop.X.col(i), p.pop.s(i)); p.pop.Z.col(i) = p.adaptation->invert_y(p.pop.Y.col(i)); } diff --git a/src/es.cpp b/src/es.cpp index 3b7e563..bb35d11 100644 --- a/src/es.cpp +++ b/src/es.cpp @@ -15,7 +15,7 @@ namespace es const auto mask = corrector->is_out_of_bounds(x1); if (mask.any()) - x1 = corrector->correct_x(x1, mask); + x1 = corrector->correct_x(x1, mask, sigma); } while (rejection_sampling && n_rej++ < 5*d && bounds::any_out_of_bounds(x1, corrector->lb, corrector->ub) ); return x1; @@ -51,7 +51,7 @@ namespace es const auto mask = corrector->is_out_of_bounds(x); if (mask.any()) - x = corrector->correct_x(x, mask); + x = corrector->correct_x(x, mask, si.mean()); } while (rejection_sampling && n_rej++ < 5*d && bounds::any_out_of_bounds(x, corrector->lb, corrector->ub)); return x; diff --git a/src/main.cpp b/src/main.cpp index 81f8614..4ce7fec 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -10,7 +10,7 @@ using std::chrono::duration; using std::chrono::milliseconds; static int dim = 5; -static bool rotated = true; +static bool rotated = false; static size_t budget = dim * 100000; @@ -65,6 +65,7 @@ void run_modcma(parameters::MatrixAdaptationType mat_t, functions::ObjectiveFunc parameters::Modules m; m.matrix_adaptation = mat_t; m.ssa = ssa; + m.active = true; /*m.elitist = false; m.active = false; @@ -77,16 +78,16 @@ void run_modcma(parameters::MatrixAdaptationType mat_t, functions::ObjectiveFunc m.threshold_convergence = true; m.weights = parameters::RecombinationWeights::HALF_POWER_LAMBDA;*/ - m.elitist = true; - m.active = true; - m.sampler = parameters::BaseSampler::UNIFORM; - m.bound_correction = parameters::CorrectionMethod::SATURATE; - m.restart_strategy = parameters::RestartStrategyType::STOP; - m.sample_transformation = parameters::SampleTranformerType::SCALED_UNIFORM; - m.orthogonal = true; - m.sequential_selection = true; - m.weights = parameters::RecombinationWeights::EQUAL; - m.mirrored = parameters::Mirror::PAIRWISE; + //m.elitist = true; + //m.active = true; + //m.sampler = parameters::BaseSampler::UNIFORM; + //m.bound_correction = parameters::CorrectionMethod::SATURATE; + //m.restart_strategy = parameters::RestartStrategyType::STOP; + //m.sample_transformation = parameters::SampleTranformerType::SCALED_UNIFORM; + //m.orthogonal = true; + //m.sequential_selection = true; + //m.weights = parameters::RecombinationWeights::EQUAL; + //m.mirrored = parameters::Mirror::PAIRWISE; /* "c1" : 0.1659166344465, "cc" : 0.2310662705758, @@ -111,10 +112,10 @@ void run_modcma(parameters::MatrixAdaptationType mat_t, functions::ObjectiveFunc while (cma.step(f)) { - std::cout << "evals: " << cma.p->stats.evaluations << "/" << budget << ": "; - std::cout << "iters: " << cma.p->stats.t << ": "; - std::cout << "sigma: " << cma.p->mutation->sigma << ": "; - std::cout << "best_y: " << cma.p->stats.global_best.y << std::endl; + //std::cout << "evals: " << cma.p->stats.evaluations << "/" << budget << ": "; + //std::cout << "iters: " << cma.p->stats.t << ": "; + //std::cout << "sigma: " << cma.p->mutation->sigma << ": "; + //std::cout << "best_y: " << cma.p->stats.global_best.y << std::endl; if (cma.p->stats.global_best.y < 1e-9) break; @@ -142,16 +143,16 @@ void run_modcma(parameters::MatrixAdaptationType mat_t, functions::ObjectiveFunc int main() { auto ft = functions::SPHERE; - auto ssa = parameters::StepSizeAdaptation::LPXNES; + auto ssa = parameters::StepSizeAdaptation::TPA; - run_modcma(parameters::MatrixAdaptationType::NONE, ft, ssa); + //run_modcma(parameters::MatrixAdaptationType::NONE, ft, ssa); //run_modcma(parameters::MatrixAdaptationType::SEPERABLE, ft, ssa); //run_modcma(parameters::MatrixAdaptationType::MATRIX, ft, ssa); //run_modcma(parameters::MatrixAdaptationType::CHOLESKY, ft, ssa); - //run_modcma(parameters::MatrixAdaptationType::COVARIANCE, ft, ssa); + run_modcma(parameters::MatrixAdaptationType::COVARIANCE, ft, ssa); //run_modcma(parameters::MatrixAdaptationType::COVARIANCE_NO_EIGV, ft, ssa); - //run_modcma(parameters::MatrixAdaptationType::NATURAL_GRADIENT, ft, ssa); + run_modcma(parameters::MatrixAdaptationType::NATURAL_GRADIENT, ft, ssa); } \ No newline at end of file diff --git a/src/matrix_adaptation.cpp b/src/matrix_adaptation.cpp index 9e4010b..842ff28 100644 --- a/src/matrix_adaptation.cpp +++ b/src/matrix_adaptation.cpp @@ -10,13 +10,13 @@ namespace matrix_adaptation return (xi - m) / sigma; } - void Adaptation::adapt_evolution_paths(const Population& pop, const Weights& w, - const Stats& stats, const size_t mu, const size_t lambda) + void matrix_adaptation::Adaptation::adapt_evolution_paths(const Population& pop, const Weights& w, + const Stats& stats, const parameters::Settings& settings, const size_t lambda, const size_t mu) { const auto sigma = pop.s.mean(); - dm = (m - m_old) / sigma; + dm = (m - m_old) / sigma; dz = pop.Z.leftCols(mu) * w.positive.head(mu); - adapt_evolution_paths_inner(pop, w, stats, mu, lambda); + adapt_evolution_paths_inner(pop, w, stats, settings, mu, lambda); } void CovarianceAdaptation::adapt_ps(const Weights& w) @@ -24,9 +24,8 @@ namespace matrix_adaptation ps = (1.0 - w.cs) * ps + (w.sqrt_cs_mueff * inv_root_C * dm); } - - void CovarianceAdaptation::adapt_evolution_paths_inner(const Population& pop, const Weights& w, - const Stats& stats, const size_t mu, const size_t lambda) + void matrix_adaptation::CovarianceAdaptation::adapt_evolution_paths_inner(const Population& pop, const Weights& w, + const Stats& stats, const parameters::Settings& settings, const size_t mu, const size_t lambda) { adapt_ps(w); const Float actual_ps_length = ps.norm() / sqrt( @@ -97,9 +96,9 @@ namespace matrix_adaptation } - void CovarianceAdaptation::restart(const Settings& settings) + void matrix_adaptation::CovarianceAdaptation::restart(const Settings& settings, const Float sigma) { - Adaptation::restart(settings); + Adaptation::restart(settings, sigma); B = Matrix::Identity(settings.dim, settings.dim); C = Matrix::Identity(settings.dim, settings.dim); A = Matrix::Identity(settings.dim, settings.dim); @@ -119,11 +118,9 @@ namespace matrix_adaptation } - void SeperableAdaptation::adapt_evolution_paths_inner( - const Population& pop, + void matrix_adaptation::SeperableAdaptation::adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, - const parameters::Stats& stats, - size_t mu, size_t lambda) + const parameters::Stats& stats, const parameters::Settings& settings, size_t mu, size_t lambda) { ps = (1.0 - w.cs) * ps + (w.sqrt_cs_mueff * dz); @@ -159,9 +156,9 @@ namespace matrix_adaptation return true; } - void SeperableAdaptation::restart(const parameters::Settings& settings) + void matrix_adaptation::SeperableAdaptation::restart(const parameters::Settings& settings, const Float sigma) { - Adaptation::restart(settings); + Adaptation::restart(settings, sigma); c.setOnes(); d.setOnes(); pc.setZero(); @@ -178,9 +175,8 @@ namespace matrix_adaptation } - void OnePlusOneAdaptation::adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, - const parameters::Stats& stats, - size_t mu, size_t lambda) + void matrix_adaptation::OnePlusOneAdaptation::adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, + const parameters::Stats& stats, const parameters::Settings& settings, size_t mu, size_t lambda) { if (!stats.has_improved) return; @@ -205,8 +201,8 @@ namespace matrix_adaptation - void MatrixAdaptation::adapt_evolution_paths_inner(const Population& pop, const Weights& w, - const Stats& stats, const size_t mu, const size_t lambda) + void matrix_adaptation::MatrixAdaptation::adapt_evolution_paths_inner(const Population& pop, const Weights& w, + const Stats& stats, const parameters::Settings& settings, const size_t mu, const size_t lambda) { ps = (1.0 - w.cs) * ps + (w.sqrt_cs_mueff * dz); } @@ -249,9 +245,9 @@ namespace matrix_adaptation return true; } - void MatrixAdaptation::restart(const Settings& settings) + void matrix_adaptation::MatrixAdaptation::restart(const Settings& settings, const Float sigma) { - Adaptation::restart(settings); + Adaptation::restart(settings, sigma); M = Matrix::Identity(settings.dim, settings.dim); M_inv = Matrix::Identity(settings.dim, settings.dim); outdated_M_inv = false; @@ -272,8 +268,8 @@ namespace matrix_adaptation } - void None::adapt_evolution_paths_inner(const Population& pop, const Weights& w, - const Stats& stats, const size_t mu, const size_t lambda) + void matrix_adaptation::None::adapt_evolution_paths_inner(const Population& pop, const Weights& w, + const Stats& stats, const parameters::Settings& settings, const size_t mu, const size_t lambda) { ps = (1.0 - w.cs) * ps + (w.sqrt_cs_mueff * dz); } @@ -289,18 +285,15 @@ namespace matrix_adaptation return yi; } - void CholeskyAdaptation::adapt_evolution_paths_inner( - const Population& pop, + void matrix_adaptation::CholeskyAdaptation::adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, - const parameters::Stats& stats, - size_t mu, size_t lambda - ) + const parameters::Stats& stats, const parameters::Settings& settings, size_t mu, size_t lambda) { pc = (1.0 - w.cc) * pc + (w.sqrt_cc_mueff) * dm; ps = (1.0 - w.cs) * ps + (w.sqrt_cs_mueff * A.triangularView().solve(dm)); } - bool CholeskyAdaptation::adapt_matrix(const parameters::Weights& w, const parameters::Modules& m, const Population& pop, size_t mu, + bool CholeskyAdaptation::adapt_matrix(const parameters::Weights & w, const parameters::Modules & m, const Population & pop, size_t mu, const parameters::Settings& settings, parameters::Stats& stats) { @@ -321,9 +314,9 @@ namespace matrix_adaptation return true; } - void CholeskyAdaptation::restart(const parameters::Settings& settings) + void matrix_adaptation::CholeskyAdaptation::restart(const parameters::Settings& settings, const Float sigma) { - Adaptation::restart(settings); + Adaptation::restart(settings, sigma); A = Matrix::Identity(settings.dim, settings.dim); pc.setZero(); } @@ -338,7 +331,7 @@ namespace matrix_adaptation return A.triangularView().solve(yi); } - void SelfAdaptation::adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, const parameters::Stats& stats, size_t mu, size_t lambda) + void matrix_adaptation::SelfAdaptation::adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, const parameters::Stats& stats, const parameters::Settings& settings, size_t mu, size_t lambda) { ps = (1.0 - w.cs) * ps + (w.sqrt_cs_mueff * A.triangularView().solve(dm)); } @@ -371,7 +364,7 @@ namespace matrix_adaptation return true; } - void SelfAdaptation::restart(const parameters::Settings& settings) + void matrix_adaptation::SelfAdaptation::restart(const parameters::Settings& settings, const Float sigma) { A = Matrix::Identity(settings.dim, settings.dim); } @@ -414,47 +407,70 @@ namespace matrix_adaptation return A.triangularView().solve(yi); } - void NaturalGradientAdaptation::adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, const parameters::Stats& stats, size_t mu, size_t lambda) + void matrix_adaptation::NaturalGradientAdaptation::adapt_evolution_paths_inner( + const Population& pop, + const parameters::Weights& w, + const parameters::Stats& stats, + const parameters::Settings& settings, + size_t mu, + size_t lambda + ) { ps = (1.0 - w.cs) * ps + (w.sqrt_cs_mueff * dz); + compute_gradients(pop, w, stats, settings, mu, lambda); } - bool NaturalGradientAdaptation::adapt_matrix( - const parameters::Weights& w, const parameters::Modules& m, - const Population& pop, size_t mu, const parameters::Settings& settings, parameters::Stats& stats) + void NaturalGradientAdaptation::compute_gradients( + const Population& pop, + const parameters::Weights& w, + const parameters::Stats& stats, + const parameters::Settings& settings, + size_t mu, + size_t lambda + ) { + const size_t dim = pop.Z.rows(); - stats.last_update = stats.t; - stats.n_updates++; - static Float eta = 0.6 * (3 + std::log(settings.dim)) / std::pow(settings.dim, 1.5); - + const auto& weights = settings.modules.active ? w.weights.topRows(pop.Z.cols()) : w.positive; + G.setZero(); - for (int i = 0; i < w.positive.rows(); ++i) + for (int i = 0; i < weights.rows(); ++i) { const auto& z = pop.Z.col(i); - G.noalias() += w.positive(i) * (z * z.transpose() - Matrix::Identity(settings.dim, settings.dim)); + G.noalias() += weights(i) * (z * z.transpose() - Matrix::Identity(dim, dim)); } - + // Remove isotropic (sigma-related) component: make G trace-free - G.diagonal().array() -= (G.trace() / dd); + sigma_g = (G.trace() / dd); + G.diagonal().array() -= sigma_g; // Ensure symmetry for numerical stability - G = 0.5 * (G + G.transpose().eval()); + G = 0.5 * (G + G.transpose().eval()); + } - // Apply the exponential update to A - A *= ((0.5 * eta) * G).exp(); + bool NaturalGradientAdaptation::adapt_matrix( + const parameters::Weights& w, const parameters::Modules& m, + const Population& pop, size_t mu, const parameters::Settings& settings, parameters::Stats& stats) + { + + stats.last_update = stats.t; + stats.n_updates++; + // Apply the exponential update to A + A *= ((0.5 * w.cc) * G).exp(); outdated_A_inv = true; return true; } - void NaturalGradientAdaptation::restart(const parameters::Settings& settings) + void matrix_adaptation::NaturalGradientAdaptation::restart(const parameters::Settings& settings, const Float sigma) { - Adaptation::restart(settings); - A = Matrix::Identity(settings.dim, settings.dim); + Adaptation::restart(settings, sigma); + A = Matrix::Identity(settings.dim, settings.dim) / sigma; + A_inv = Matrix::Identity(settings.dim, settings.dim); G = Matrix::Zero(settings.dim, settings.dim); outdated_A_inv = false; + sigma_g = 0.; } Vector NaturalGradientAdaptation::compute_y(const Vector& zi) diff --git a/src/mutation.cpp b/src/mutation.cpp index 4fcce05..63ca610 100644 --- a/src/mutation.cpp +++ b/src/mutation.cpp @@ -32,7 +32,11 @@ namespace mutation do { p.pop.t(i) = p.stats.t; - p.pop.Z.col(i).noalias() = p.mutation->tc->scale((*p.sampler)(), p.bounds->diameter, p.settings.budget, p.stats.evaluations); + const auto& zi = (*p.sampler)(); + const auto& zi_scaled = p.mutation->tc->scale( + zi, p.bounds->diameter, p.settings.budget, p.stats.evaluations + ); + p.pop.Z.col(i).noalias() = zi; p.pop.Y.col(i).noalias() = p.adaptation->compute_y(p.pop.Z.col(i)); p.pop.X.col(i).noalias() = p.pop.Y.col(i) * p.pop.s(i) + p.adaptation->m; p.bounds->correct(i, p); @@ -145,6 +149,12 @@ namespace mutation Population& pop, const Population& old_pop, const parameters::Stats& stats, const size_t lambda) { + if (const auto dynamic = std::dynamic_pointer_cast(adaptation)) + { + sigma *= std::exp(w.cs / 2.0 * dynamic->sigma_g); + return; + } + const Float z = ((pop.Z).colwise().squaredNorm().array() - adaptation->dd).matrix() * w.clipped(); sigma *= std::exp((w.cs / std::sqrt(adaptation->dd)) * z); } diff --git a/src/parameters.cpp b/src/parameters.cpp index 0d8248e..476ad73 100644 --- a/src/parameters.cpp +++ b/src/parameters.cpp @@ -14,7 +14,9 @@ namespace parameters criteria(restart::Criteria::get(settings.modules)), adaptation(matrix_adaptation::get(settings.modules, settings.dim, settings.x0.value_or(Vector::Zero(settings.dim)), - sampler->expected_length())), + sampler->expected_length(), + settings.sigma0 + )), mutation(mutation::get(settings.modules, settings.mu0, static_cast(settings.dim), @@ -59,7 +61,7 @@ namespace parameters mutation = mutation::get(settings.modules, mu, static_cast(settings.dim), sigma.value_or(settings.sigma0)); - adaptation->restart(settings); + adaptation->restart(settings, sigma.value_or(settings.sigma0)); (*center_placement)(*this); criteria.reset(*this); stats.current_best = {}; @@ -67,7 +69,7 @@ namespace parameters void Parameters::adapt() { - adaptation->adapt_evolution_paths(pop, weights, stats, mu, lambda); + adaptation->adapt_evolution_paths(pop, weights, stats, settings, lambda, mu); mutation->adapt(weights, adaptation, pop, old_pop, stats, lambda); if (constants::clip_sigma) diff --git a/src/weights.cpp b/src/weights.cpp index f591eeb..807df5c 100644 --- a/src/weights.cpp +++ b/src/weights.cpp @@ -77,6 +77,7 @@ namespace parameters if (settings.modules.matrix_adaptation == MatrixAdaptationType::SEPERABLE) cmu_default *= ((d + 2.0) / 3.0); + if (settings.lambda0 == 1) { cmu_default = 2 / (pow(d, 2) + 6.0); @@ -108,6 +109,9 @@ namespace parameters beta = 1.0 / std::sqrt(2.0 * mueff); if (settings.modules.ssa == StepSizeAdaptation::LPXNES) beta = std::log(2.0) / (std::sqrt(d) * std::log(d)); + + if (settings.modules.matrix_adaptation == MatrixAdaptationType::NATURAL_GRADIENT) + cs = cc = (9.0 + 3.0 + std::log(d)) / (5.0 * d * std::sqrt(d)); } From 35d354ed213ceb0336eb4338dbcc7b7cd19b2ca1 Mon Sep 17 00:00:00 2001 From: Jacob de Nobel Date: Thu, 5 Jun 2025 13:51:48 +0200 Subject: [PATCH 37/74] fixed repelling --- include/matrix_adaptation.hpp | 11 +++++- include/repelling.hpp | 69 ++++++++++++++++------------------- src/interface.cpp | 6 ++- src/main.cpp | 27 +++++++++----- src/matrix_adaptation.cpp | 16 ++++---- src/repelling.cpp | 30 ++------------- 6 files changed, 76 insertions(+), 83 deletions(-) diff --git a/include/matrix_adaptation.hpp b/include/matrix_adaptation.hpp index 3b0a508..255825c 100644 --- a/include/matrix_adaptation.hpp +++ b/include/matrix_adaptation.hpp @@ -46,6 +46,15 @@ namespace matrix_adaptation dz.setZero(); } + Float distance(const Vector u, const Vector& v) + { + const auto& delta = u - v; + return invert_y(delta).norm(); + } + + Float distance_from_center(const Vector& xi) { + return distance(m, xi); + } }; struct None final : Adaptation @@ -228,6 +237,7 @@ namespace matrix_adaptation Vector compute_y(const Vector&) override; Vector invert_y(const Vector&) override; + }; struct CovarainceNoEigvAdaptation final : CovarianceAdaptation @@ -286,7 +296,6 @@ namespace matrix_adaptation - inline std::shared_ptr get(const parameters::Modules& m, const size_t dim, const Vector& x0, const Float expected_z, const Float sigma0) { using namespace parameters; diff --git a/include/repelling.hpp b/include/repelling.hpp index 0003393..fd082f7 100644 --- a/include/repelling.hpp +++ b/include/repelling.hpp @@ -12,22 +12,22 @@ namespace repelling { namespace distance { - Float manhattan(const Vector &u, const Vector &v); - Float euclidian(const Vector &u, const Vector &v); - Float mahanolobis(const Vector &u, const Vector &v, const Matrix &C_inv); + Float manhattan(const Vector& u, const Vector& v); + Float euclidian(const Vector& u, const Vector& v); + Float mahanolobis(const Vector& u, const Vector& v, const Matrix& C_inv); bool hill_valley_test( - const Solution &u, - const Solution &v, - FunctionType &f, + const Solution& u, + const Solution& v, + FunctionType& f, const size_t n_evals); bool hill_valley_test_p( - const Solution &u, - const Solution &v, - FunctionType &f, + const Solution& u, + const Solution& v, + FunctionType& f, const size_t n_evals, - parameters::Parameters &p); + parameters::Parameters& p); } struct TabooPoint @@ -37,20 +37,17 @@ namespace repelling Float shrinkage; int n_rep; Float criticality; - // Matrix C; - // Matrix C_inv; TabooPoint( - const Solution &s, - const Float radius/*, - const Matrix& C, const Matrix& C_inv*/ ) : solution(s), - radius(radius), - shrinkage(std::pow(0.99, 1. / static_cast(s.x.size()))), - n_rep(1), - criticality(0.0) {} - /*, - C(C), - C_inv(C_inv) {}*/ + const Solution& s, + const Float radius) : + solution(s), + radius(radius), + shrinkage(std::pow(0.99, 1. / static_cast(s.x.size()))), + n_rep(1), + criticality(0.0) + {} + /** * \brief Rejection rule for a taboo point for a given xi @@ -59,11 +56,11 @@ namespace repelling * \param attempts determines the amount of shrinkage applied; radius = pow(shrinkage, attempts) * radius * \return */ - bool rejects(const Vector &xi, const parameters::Parameters &p, const int attempts) const; + bool rejects(const Vector& xi, const parameters::Parameters& p, const int attempts) const; - bool shares_basin(FunctionType &objective, const Solution &sol, parameters::Parameters &p) const; + bool shares_basin(FunctionType& objective, const Solution& sol, parameters::Parameters& p) const; - void calculate_criticality(const parameters::Parameters &p); + void calculate_criticality(const parameters::Parameters& p); }; struct Repelling @@ -71,8 +68,6 @@ namespace repelling std::vector archive; int attempts = 0; Float coverage = 20.0; - // Matrix C; - Matrix C_inv; virtual ~Repelling() = default; @@ -83,38 +78,36 @@ namespace repelling * \param p * \return */ - virtual bool is_rejected(const Vector &xi, parameters::Parameters &p); + virtual bool is_rejected(const Vector& xi, parameters::Parameters& p); /** * \brief Update the archive of points * \param p */ - virtual void update_archive(FunctionType &objective, parameters::Parameters &p); + virtual void update_archive(FunctionType& objective, parameters::Parameters& p); /** * \brief Hook before sampling starts */ - virtual void prepare_sampling(const parameters::Parameters &p); + virtual void prepare_sampling(const parameters::Parameters& p); }; struct NoRepelling final : Repelling { - bool is_rejected(const Vector &xi, parameters::Parameters &p) override + bool is_rejected(const Vector& xi, parameters::Parameters& p) override { return false; } - void update_archive(FunctionType &objective, parameters::Parameters &p) override - { - } + void update_archive(FunctionType& objective, parameters::Parameters& p) override + {} - void prepare_sampling(const parameters::Parameters &p) override - { - } + void prepare_sampling(const parameters::Parameters& p) override + {} }; - inline std::shared_ptr get(const parameters::Modules &m) + inline std::shared_ptr get(const parameters::Modules& m) { if (m.repelling_restart) return std::make_shared(); diff --git a/src/interface.cpp b/src/interface.cpp index 9f42c70..9c8b38c 100644 --- a/src/interface.cpp +++ b/src/interface.cpp @@ -341,12 +341,14 @@ void define_matrix_adaptation(py::module& main) py::arg("pop"), py::arg("weights"), py::arg("stats"), + py::arg("settings"), py::arg("mu"), py::arg("lamb")) .def("adapt_evolution_paths_innner", &Adaptation::adapt_evolution_paths_inner, py::arg("pop"), py::arg("weights"), py::arg("stats"), + py::arg("settings"), py::arg("mu"), py::arg("lamb")) .def("adapt_matrix", &Adaptation::adapt_matrix, @@ -356,7 +358,9 @@ void define_matrix_adaptation(py::module& main) py::arg("mu"), py::arg("settings"), py::arg("stats")) - .def("restart", &Adaptation::restart, py::arg("settings")) + .def("restart", &Adaptation::restart, py::arg("settings"), py::args("sigma")) + .def("distance", &Adaptation::distance, py::arg("u"), py::arg("v")) + .def("distance_from_center", &Adaptation::distance_from_center, py::arg("x")) .def("compute_y", &Adaptation::compute_y, py::arg("zi")) .def("invert_x", &Adaptation::invert_x, py::arg("xi"), py::arg("sigma")) .def("invert_y", &Adaptation::invert_y, py::arg("yi")) diff --git a/src/main.cpp b/src/main.cpp index 4ce7fec..1cbeab0 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -65,7 +65,16 @@ void run_modcma(parameters::MatrixAdaptationType mat_t, functions::ObjectiveFunc parameters::Modules m; m.matrix_adaptation = mat_t; m.ssa = ssa; - m.active = true; + m.active = false; + m.sampler = parameters::BaseSampler::HALTON; + m.restart_strategy = parameters::RestartStrategyType::STOP; + m.sample_transformation = parameters::SampleTranformerType::CAUCHY; + m.elitist = true; + m.sequential_selection = true; + m.threshold_convergence = true; + m.weights = parameters::RecombinationWeights::EQUAL; + m.repelling_restart = true; + /*m.elitist = false; m.active = false; @@ -101,8 +110,8 @@ void run_modcma(parameters::MatrixAdaptationType mat_t, functions::ObjectiveFunc std::nullopt, budget, 2.0, - 43, - 13 + 27, + 17 ); auto p = std::make_shared(settings); auto cma = ModularCMAES(p); @@ -112,10 +121,10 @@ void run_modcma(parameters::MatrixAdaptationType mat_t, functions::ObjectiveFunc while (cma.step(f)) { - //std::cout << "evals: " << cma.p->stats.evaluations << "/" << budget << ": "; - //std::cout << "iters: " << cma.p->stats.t << ": "; - //std::cout << "sigma: " << cma.p->mutation->sigma << ": "; - //std::cout << "best_y: " << cma.p->stats.global_best.y << std::endl; + std::cout << "evals: " << cma.p->stats.evaluations << "/" << budget << ": "; + std::cout << "iters: " << cma.p->stats.t << ": "; + std::cout << "sigma: " << cma.p->mutation->sigma << ": "; + std::cout << "best_y: " << cma.p->stats.global_best.y << std::endl; if (cma.p->stats.global_best.y < 1e-9) break; @@ -143,7 +152,7 @@ void run_modcma(parameters::MatrixAdaptationType mat_t, functions::ObjectiveFunc int main() { auto ft = functions::SPHERE; - auto ssa = parameters::StepSizeAdaptation::TPA; + auto ssa = parameters::StepSizeAdaptation::MSR; //run_modcma(parameters::MatrixAdaptationType::NONE, ft, ssa); //run_modcma(parameters::MatrixAdaptationType::SEPERABLE, ft, ssa); @@ -152,7 +161,7 @@ int main() //run_modcma(parameters::MatrixAdaptationType::CHOLESKY, ft, ssa); - run_modcma(parameters::MatrixAdaptationType::COVARIANCE, ft, ssa); + //run_modcma(parameters::MatrixAdaptationType::COVARIANCE, ft, ssa); //run_modcma(parameters::MatrixAdaptationType::COVARIANCE_NO_EIGV, ft, ssa); run_modcma(parameters::MatrixAdaptationType::NATURAL_GRADIENT, ft, ssa); } \ No newline at end of file diff --git a/src/matrix_adaptation.cpp b/src/matrix_adaptation.cpp index 842ff28..30bab05 100644 --- a/src/matrix_adaptation.cpp +++ b/src/matrix_adaptation.cpp @@ -201,7 +201,7 @@ namespace matrix_adaptation - void matrix_adaptation::MatrixAdaptation::adapt_evolution_paths_inner(const Population& pop, const Weights& w, + void MatrixAdaptation::adapt_evolution_paths_inner(const Population& pop, const Weights& w, const Stats& stats, const parameters::Settings& settings, const size_t mu, const size_t lambda) { ps = (1.0 - w.cs) * ps + (w.sqrt_cs_mueff * dz); @@ -245,7 +245,7 @@ namespace matrix_adaptation return true; } - void matrix_adaptation::MatrixAdaptation::restart(const Settings& settings, const Float sigma) + void MatrixAdaptation::restart(const Settings& settings, const Float sigma) { Adaptation::restart(settings, sigma); M = Matrix::Identity(settings.dim, settings.dim); @@ -268,7 +268,7 @@ namespace matrix_adaptation } - void matrix_adaptation::None::adapt_evolution_paths_inner(const Population& pop, const Weights& w, + void None::adapt_evolution_paths_inner(const Population& pop, const Weights& w, const Stats& stats, const parameters::Settings& settings, const size_t mu, const size_t lambda) { ps = (1.0 - w.cs) * ps + (w.sqrt_cs_mueff * dz); @@ -285,7 +285,7 @@ namespace matrix_adaptation return yi; } - void matrix_adaptation::CholeskyAdaptation::adapt_evolution_paths_inner(const Population& pop, + void CholeskyAdaptation::adapt_evolution_paths_inner(const Population& pop, const parameters::Weights& w, const parameters::Stats& stats, const parameters::Settings& settings, size_t mu, size_t lambda) { @@ -314,7 +314,7 @@ namespace matrix_adaptation return true; } - void matrix_adaptation::CholeskyAdaptation::restart(const parameters::Settings& settings, const Float sigma) + void CholeskyAdaptation::restart(const parameters::Settings& settings, const Float sigma) { Adaptation::restart(settings, sigma); A = Matrix::Identity(settings.dim, settings.dim); @@ -364,7 +364,7 @@ namespace matrix_adaptation return true; } - void matrix_adaptation::SelfAdaptation::restart(const parameters::Settings& settings, const Float sigma) + void SelfAdaptation::restart(const parameters::Settings& settings, const Float sigma) { A = Matrix::Identity(settings.dim, settings.dim); } @@ -407,7 +407,7 @@ namespace matrix_adaptation return A.triangularView().solve(yi); } - void matrix_adaptation::NaturalGradientAdaptation::adapt_evolution_paths_inner( + void NaturalGradientAdaptation::adapt_evolution_paths_inner( const Population& pop, const parameters::Weights& w, const parameters::Stats& stats, @@ -481,7 +481,7 @@ namespace matrix_adaptation Vector NaturalGradientAdaptation::invert_y(const Vector& yi) { if (outdated_A_inv) - A_inv = A.inverse(); + A_inv = A.completeOrthogonalDecomposition().pseudoInverse(); return A_inv * yi; } diff --git a/src/repelling.cpp b/src/repelling.cpp index e10700a..a703133 100644 --- a/src/repelling.cpp +++ b/src/repelling.cpp @@ -66,8 +66,8 @@ namespace repelling bool TabooPoint::rejects(const Vector &xi, const parameters::Parameters &p, const int attempts) const { const Float rejection_radius = std::pow(shrinkage, attempts) * radius; - const Float delta_xi = distance::mahanolobis(xi, solution.x, p.repelling->C_inv) / p.mutation->sigma; - + const Float delta_xi = p.adaptation->distance(xi, solution.x) / p.mutation->sigma; + if (delta_xi < rejection_radius) return true; @@ -81,7 +81,8 @@ namespace repelling void TabooPoint::calculate_criticality(const parameters::Parameters &p) { - const Float delta_m = distance::mahanolobis(p.adaptation->m, solution.x, p.repelling->C_inv) / p.mutation->sigma; + const Float delta_m = p.adaptation->distance_from_center(solution.x) / p.mutation->sigma; + const auto u = delta_m + radius; const auto l = delta_m - radius; criticality = cdf(u) - cdf(l); @@ -95,29 +96,6 @@ namespace repelling std::sort(archive.begin(), archive.end(), [](const TabooPoint &a, const TabooPoint &b) { return a.criticality > b.criticality; }); - - //! If it is not intialized - if (static_cast(C_inv.cols()) != p.settings.dim) - { - C_inv = Matrix::Identity(p.settings.dim, p.settings.dim); - } - - if (!(p.settings.modules.matrix_adaptation == parameters::MatrixAdaptationType::NONE || - p.settings.modules.matrix_adaptation == parameters::MatrixAdaptationType::MATRIX)) - { - using namespace matrix_adaptation; - const auto dynamic = std::dynamic_pointer_cast(p.adaptation); - - C_inv.noalias() = (dynamic->B * dynamic->d.cwiseInverse().asDiagonal()) * dynamic->B.transpose(); - - /*const Float d_sigma = p.mutation->sigma / p.settings.sigma0; - if (d_sigma > constants::sigma_threshold) - { - C = dynamic->C / dynamic->C.maxCoeff(); - C_inv = dynamic->inv_C / dynamic->inv_C.maxCoeff(); - }*/ - } - } void Repelling::update_archive(FunctionType &objective, parameters::Parameters &p) From 35443e0784f5c7b9f45d5a6cac121f8b08c61d32 Mon Sep 17 00:00:00 2001 From: Jacob de Nobel Date: Thu, 5 Jun 2025 14:13:13 +0200 Subject: [PATCH 38/74] increase shrinkage add criteria too much repelling, fix interface --- include/repelling.hpp | 2 +- include/restart_criteria.hpp | 334 ++++++++++++++++++----------------- src/interface.cpp | 125 +++++++------ src/main.cpp | 43 +---- src/restart_criteria.cpp | 11 ++ 5 files changed, 260 insertions(+), 255 deletions(-) diff --git a/include/repelling.hpp b/include/repelling.hpp index fd082f7..7d1ba8f 100644 --- a/include/repelling.hpp +++ b/include/repelling.hpp @@ -43,7 +43,7 @@ namespace repelling const Float radius) : solution(s), radius(radius), - shrinkage(std::pow(0.99, 1. / static_cast(s.x.size()))), + shrinkage(std::pow(0.95, 1. / static_cast(s.x.size()))), n_rep(1), criticality(0.0) {} diff --git a/include/restart_criteria.hpp b/include/restart_criteria.hpp index a0b0f77..8b2f00e 100644 --- a/include/restart_criteria.hpp +++ b/include/restart_criteria.hpp @@ -5,171 +5,179 @@ namespace parameters { - struct Parameters; + struct Parameters; } namespace restart { - struct Criterion - { - bool met; - std::string name; - size_t last_restart; - - Criterion(const std::string &name) : met(false), name(name), last_restart(0) {} - - virtual ~Criterion() = default; - - void reset(const parameters::Parameters &p); - - virtual void update(const parameters::Parameters &p) = 0; - - virtual void on_reset(const parameters::Parameters &p) {}; - }; - - using vCriteria = std::vector>; - - struct Criteria - { - Criteria(const vCriteria &c) : items(c) {} - - void update(const parameters::Parameters &p) - { - for (auto &c : items) - { - c->update(p); - } - } - - void reset(const parameters::Parameters &p) - { - for (auto &c : items) - c->reset(p); - } - - bool any() const - { - for (const auto &c : items) - if (c->met) - return true; - return false; - } - - std::string reason() const - { - std::string res; - for (const auto& c : items) - if (c->met) - res += c->name + "; "; - return res; - } - - vCriteria items; - - static Criteria get(const parameters::Modules modules); - }; - - struct ExceededMaxIter : Criterion - { - size_t max_iter; - ExceededMaxIter() : Criterion("ExceededMaxIter") {} - void update(const parameters::Parameters &p) override; - void on_reset(const parameters::Parameters &p) override; - }; - - struct NoImprovement : Criterion - { - size_t n_bin; - std::vector best_fitnesses; - NoImprovement() : Criterion("NoImprovement") {} - void update(const parameters::Parameters &p) override; - void on_reset(const parameters::Parameters &p) override; - }; - - struct MaxSigma : Criterion - { - static inline Float tolerance = 1e4; - MaxSigma() : Criterion("MaxSigma") {} - void update(const parameters::Parameters &p) override; - }; - - struct MinSigma : Criterion - { - static inline Float tolerance = 1e-20; - MinSigma() : Criterion("MinSigma") {} - void update(const parameters::Parameters &p) override; - }; - - struct UnableToAdapt : Criterion - { - UnableToAdapt() : Criterion("UnableToAdapt") {} - void update(const parameters::Parameters &p) override; - }; - - struct FlatFitness : Criterion - { - size_t max_flat_fitness; - size_t flat_fitness_index; - Eigen::Array flat_fitnesses; - - FlatFitness() : Criterion("FlatFitness") {} - void update(const parameters::Parameters &p) override; - void on_reset(const parameters::Parameters &p) override; - }; - - struct TolX : Criterion - { - static inline Float tolerance = 10e-12; - Vector tolx_vector; - TolX() : Criterion("TolX") {} - void update(const parameters::Parameters &p) override; - void on_reset(const parameters::Parameters &p) override; - }; - - struct MaxDSigma : Criterion - { - static inline Float tolerance = std::pow(10., 20.); - MaxDSigma() : Criterion("MaxDSigma") {} - void update(const parameters::Parameters &p) override; - }; - - struct MinDSigma : Criterion - { - static inline Float tolerance = 1e-8; - MinDSigma() : Criterion("MinDSigma") {} - void update(const parameters::Parameters &p) override; - }; - - struct ConditionC : Criterion - { - static inline Float tolerance = std::pow(10., 14.); - ConditionC() : Criterion("ConditionC") {} - void update(const parameters::Parameters &p) override; - }; - - struct NoEffectAxis : Criterion - { - static inline Float tolerance = 0.; - NoEffectAxis() : Criterion("NoEffectAxis") {} - void update(const parameters::Parameters &p) override; - }; - - struct NoEffectCoord : Criterion - { - static inline Float tolerance = 0.; - NoEffectCoord() : Criterion("NoEffectCoord") {} - void update(const parameters::Parameters &p) override; - }; - - struct Stagnation : Criterion - { - static inline Float tolerance = 0.3; - - size_t n_stagnation; - std::vector median_fitnesses; - std::vector best_fitnesses; - Stagnation() : Criterion("Stagnation") {} - void update(const parameters::Parameters &p) override; - void on_reset(const parameters::Parameters &p) override; - }; + struct Criterion + { + bool met; + std::string name; + size_t last_restart; + + Criterion(const std::string& name) : met(false), name(name), last_restart(0) {} + + virtual ~Criterion() = default; + + void reset(const parameters::Parameters& p); + + virtual void update(const parameters::Parameters& p) = 0; + + virtual void on_reset(const parameters::Parameters& p) {}; + }; + + using vCriteria = std::vector>; + + struct Criteria + { + Criteria(const vCriteria& c) : items(c) {} + + void update(const parameters::Parameters& p) + { + for (auto& c : items) + { + c->update(p); + } + } + + void reset(const parameters::Parameters& p) + { + for (auto& c : items) + c->reset(p); + } + + bool any() const + { + for (const auto& c : items) + if (c->met) + return true; + return false; + } + + std::string reason() const + { + std::string res; + for (const auto& c : items) + if (c->met) + res += c->name + "; "; + return res; + } + + vCriteria items; + + static Criteria get(const parameters::Modules modules); + }; + + struct ExceededMaxIter : Criterion + { + size_t max_iter; + ExceededMaxIter() : Criterion("ExceededMaxIter") {} + void update(const parameters::Parameters& p) override; + void on_reset(const parameters::Parameters& p) override; + }; + + struct NoImprovement : Criterion + { + size_t n_bin; + std::vector best_fitnesses; + NoImprovement() : Criterion("NoImprovement") {} + void update(const parameters::Parameters& p) override; + void on_reset(const parameters::Parameters& p) override; + }; + + struct MaxSigma : Criterion + { + static inline Float tolerance = 1e4; + MaxSigma() : Criterion("MaxSigma") {} + void update(const parameters::Parameters& p) override; + }; + + struct MinSigma : Criterion + { + static inline Float tolerance = 1e-20; + MinSigma() : Criterion("MinSigma") {} + void update(const parameters::Parameters& p) override; + }; + + struct UnableToAdapt : Criterion + { + UnableToAdapt() : Criterion("UnableToAdapt") {} + void update(const parameters::Parameters& p) override; + }; + + struct FlatFitness : Criterion + { + size_t max_flat_fitness; + size_t flat_fitness_index; + Eigen::Array flat_fitnesses; + + FlatFitness() : Criterion("FlatFitness") {} + void update(const parameters::Parameters& p) override; + void on_reset(const parameters::Parameters& p) override; + }; + + struct TolX : Criterion + { + static inline Float tolerance = 10e-12; + Vector tolx_vector; + TolX() : Criterion("TolX") {} + void update(const parameters::Parameters& p) override; + void on_reset(const parameters::Parameters& p) override; + }; + + struct MaxDSigma : Criterion + { + static inline Float tolerance = std::pow(10., 20.); + MaxDSigma() : Criterion("MaxDSigma") {} + void update(const parameters::Parameters& p) override; + }; + + struct MinDSigma : Criterion + { + static inline Float tolerance = 1e-8; + MinDSigma() : Criterion("MinDSigma") {} + void update(const parameters::Parameters& p) override; + }; + + struct ConditionC : Criterion + { + static inline Float tolerance = std::pow(10., 14.); + ConditionC() : Criterion("ConditionC") {} + void update(const parameters::Parameters& p) override; + }; + + struct NoEffectAxis : Criterion + { + static inline Float tolerance = 0.; + NoEffectAxis() : Criterion("NoEffectAxis") {} + void update(const parameters::Parameters& p) override; + }; + + struct NoEffectCoord : Criterion + { + static inline Float tolerance = 0.; + NoEffectCoord() : Criterion("NoEffectCoord") {} + void update(const parameters::Parameters& p) override; + }; + + struct Stagnation : Criterion + { + static inline Float tolerance = 0.3; + + size_t n_stagnation; + std::vector median_fitnesses; + std::vector best_fitnesses; + Stagnation() : Criterion("Stagnation") {} + void update(const parameters::Parameters& p) override; + void on_reset(const parameters::Parameters& p) override; + }; + + struct TooMuchRepelling : Criterion + { + static inline Float tolerance = 50; + TooMuchRepelling() : Criterion("TooMuchRepelling") {} + void update(const parameters::Parameters& p) override; + }; + } \ No newline at end of file diff --git a/src/interface.cpp b/src/interface.cpp index 9c8b38c..03cbf1f 100644 --- a/src/interface.cpp +++ b/src/interface.cpp @@ -300,7 +300,7 @@ void define_repelling(py::module& main) .def_readwrite("solution", &TabooPoint::solution) .def_readwrite("shrinkage", &TabooPoint::shrinkage) .def_readwrite("criticality", &TabooPoint::criticality) - .def("__repr__", [](TabooPoint& tb) { + .def("__repr__", [] (TabooPoint& tb) { return ""; }); @@ -364,7 +364,7 @@ void define_matrix_adaptation(py::module& main) .def("compute_y", &Adaptation::compute_y, py::arg("zi")) .def("invert_x", &Adaptation::invert_x, py::arg("xi"), py::arg("sigma")) .def("invert_y", &Adaptation::invert_y, py::arg("yi")) - .def("__repr__", [](Adaptation& dyn) + .def("__repr__", [] (Adaptation& dyn) { std::stringstream ss; ss << std::boolalpha; @@ -378,10 +378,10 @@ void define_matrix_adaptation(py::module& main) ss << ">"; return ss.str(); }); - + py::class_>(m, "NoAdaptation") .def(py::init(), py::arg("dimension"), py::arg("x0"), py::arg("expected_length_z")) - .def("__repr__", [](None& dyn) + .def("__repr__", [] (None& dyn) { std::stringstream ss; ss << std::boolalpha; @@ -393,7 +393,7 @@ void define_matrix_adaptation(py::module& main) ss << " dd: " << dyn.dd; ss << " expected_length_z: " << dyn.expected_length_z; ss << ">"; - return ss.str(); }); + return ss.str(); }); py::class_>(m, "CovarianceAdaptation") @@ -412,7 +412,7 @@ void define_matrix_adaptation(py::module& main) py::arg("mu")) .def("perform_eigendecomposition", &CovarianceAdaptation::perform_eigendecomposition, py::arg("stats")) .def("adapt_ps", &CovarianceAdaptation::adapt_ps, py::arg("weights")) - .def("__repr__", [](CovarianceAdaptation& dyn) + .def("__repr__", [] (CovarianceAdaptation& dyn) { std::stringstream ss; ss << std::boolalpha; @@ -437,7 +437,7 @@ void define_matrix_adaptation(py::module& main) .def_readwrite("c", &SeperableAdaptation::c) .def_readwrite("pc", &SeperableAdaptation::pc) .def_readwrite("d", &SeperableAdaptation::d) - .def("__repr__", [](SeperableAdaptation& dyn) + .def("__repr__", [] (SeperableAdaptation& dyn) { std::stringstream ss; ss << std::boolalpha; @@ -455,7 +455,7 @@ void define_matrix_adaptation(py::module& main) return ss.str(); }); py::class_>(m, "OnePlusOneAdaptation") - .def("__repr__", [](OnePlusOneAdaptation& dyn) + .def("__repr__", [] (OnePlusOneAdaptation& dyn) { std::stringstream ss; ss << std::boolalpha; @@ -479,7 +479,7 @@ void define_matrix_adaptation(py::module& main) .def(py::init(), py::arg("dimension"), py::arg("x0"), py::arg("expected_length_z")) .def_readwrite("M", &MatrixAdaptation::M) .def_readwrite("M_inv", &MatrixAdaptation::M_inv) - .def("__repr__", [](MatrixAdaptation& dyn) + .def("__repr__", [] (MatrixAdaptation& dyn) { std::stringstream ss; ss << std::boolalpha; @@ -508,11 +508,19 @@ void define_matrix_adaptation(py::module& main) ; py::class_>(m, "NaturalGradientAdaptation") - .def(py::init(), py::arg("dimension"), py::arg("x0"), py::arg("expected_length_z")) + .def(py::init(), py::arg("dimension"), py::arg("x0"), py::arg("expected_length_z"), py::arg("sigma")) .def_readwrite("A", &NaturalGradientAdaptation::A) .def_readwrite("A_inv", &NaturalGradientAdaptation::A_inv) - .def_readwrite("G", &NaturalGradientAdaptation::G); - + .def_readwrite("G", &NaturalGradientAdaptation::G) + .def_readwrite("sigma_g", &NaturalGradientAdaptation::sigma_g) + .def("compute_gradients", &NaturalGradientAdaptation::compute_gradients, py::arg("pop"), + py::arg("weights"), + py::arg("stats"), + py::arg("settings"), + py::arg("mu"), + py::arg("lamb") + ) + ; } void define_parameters(py::module& main) @@ -538,7 +546,7 @@ void define_parameters(py::module& main) .def_readwrite("matrix_adaptation", &Modules::matrix_adaptation) .def_readwrite("center_placement", &Modules::center_placement) .def_readwrite("sample_transformation", &Modules::sample_transformation) - .def("__repr__", [](Modules& mod) + .def("__repr__", [] (Modules& mod) { return to_string(mod); }); py::class_(m, "Solution") @@ -562,7 +570,7 @@ void define_parameters(py::module& main) .def_readwrite("success_ratio", &Stats::success_ratio) .def_readwrite("last_update", &Stats::last_update) .def_readwrite("n_updates", &Stats::n_updates) - .def("__repr__", [](Stats& stats) + .def("__repr__", [] (Stats& stats) { std::stringstream ss; ss << std::boolalpha; @@ -582,7 +590,7 @@ void define_parameters(py::module& main) py::arg("lambda0"), py::arg("modules"), py::arg("expected_length_z") - ) + ) .def_readwrite("mueff", &Weights::mueff) .def_readwrite("mueff_neg", &Weights::mueff_neg) .def_readwrite("c1", &Weights::c1) @@ -599,7 +607,7 @@ void define_parameters(py::module& main) .def_readwrite("weights", &Weights::weights) .def_readwrite("positive", &Weights::positive) .def_readwrite("negative", &Weights::negative) - .def("__repr__", [](Weights& weights) + .def("__repr__", [] (Weights& weights) { std::stringstream ss; ss << std::boolalpha; @@ -656,7 +664,7 @@ void define_parameters(py::module& main) .def_readwrite("c1", &Settings::c1) .def_readwrite("verbose", &Settings::verbose) .def_readonly("volume", &Settings::volume) - .def("__repr__", [](Settings& settings) + .def("__repr__", [] (Settings& settings) { std::stringstream ss; ss << std::boolalpha; @@ -692,7 +700,7 @@ void define_parameters(py::module& main) std::shared_ptr, std::shared_ptr, std::shared_ptr - >; + >; py::class_>(main, "Parameters") .def(py::init(), py::arg("dimension")) @@ -706,32 +714,32 @@ void define_parameters(py::module& main) .def_readwrite("lamb", &Parameters::lambda) .def_property( "adaptation", - [](Parameters& self) -> AdaptationType + [] (Parameters& self) -> AdaptationType { switch (self.settings.modules.matrix_adaptation) { - case MatrixAdaptationType::MATRIX: - return std::dynamic_pointer_cast(self.adaptation); - case MatrixAdaptationType::NONE: - return std::dynamic_pointer_cast(self.adaptation); - case MatrixAdaptationType::SEPERABLE: - return std::dynamic_pointer_cast(self.adaptation); - case MatrixAdaptationType::ONEPLUSONE: - return std::dynamic_pointer_cast(self.adaptation); - case MatrixAdaptationType::CHOLESKY: - return std::dynamic_pointer_cast(self.adaptation); - case MatrixAdaptationType::CMSA: - return std::dynamic_pointer_cast(self.adaptation); - case MatrixAdaptationType::COVARIANCE_NO_EIGV: - return std::dynamic_pointer_cast(self.adaptation); - case MatrixAdaptationType::NATURAL_GRADIENT: - return std::dynamic_pointer_cast(self.adaptation); - default: - case MatrixAdaptationType::COVARIANCE: - return std::dynamic_pointer_cast(self.adaptation); + case MatrixAdaptationType::MATRIX: + return std::dynamic_pointer_cast(self.adaptation); + case MatrixAdaptationType::NONE: + return std::dynamic_pointer_cast(self.adaptation); + case MatrixAdaptationType::SEPERABLE: + return std::dynamic_pointer_cast(self.adaptation); + case MatrixAdaptationType::ONEPLUSONE: + return std::dynamic_pointer_cast(self.adaptation); + case MatrixAdaptationType::CHOLESKY: + return std::dynamic_pointer_cast(self.adaptation); + case MatrixAdaptationType::CMSA: + return std::dynamic_pointer_cast(self.adaptation); + case MatrixAdaptationType::COVARIANCE_NO_EIGV: + return std::dynamic_pointer_cast(self.adaptation); + case MatrixAdaptationType::NATURAL_GRADIENT: + return std::dynamic_pointer_cast(self.adaptation); + default: + case MatrixAdaptationType::COVARIANCE: + return std::dynamic_pointer_cast(self.adaptation); } }, - [](Parameters& self, std::shared_ptr adaptation) + [] (Parameters& self, std::shared_ptr adaptation) { self.adaptation = adaptation; }) @@ -827,10 +835,10 @@ void define_mutation(py::module& main) py::class_>(m, "Strategy") .def( py::init< - std::shared_ptr, - std::shared_ptr, - std::shared_ptr, - Float + std::shared_ptr, + std::shared_ptr, + std::shared_ptr, + Float >(), py::arg("threshold_convergence"), py::arg("sequential_selection"), @@ -851,7 +859,7 @@ void define_mutation(py::module& main) .def_readwrite("sigma_sampler", &Strategy::ss) .def_readwrite("sigma", &Strategy::sigma) .def_readwrite("s", &Strategy::s) - ; + ; py::class_>(m, "CSA"); py::class_>(m, "TPA") @@ -893,41 +901,40 @@ void define_population(py::module& main) } class constants_w -{ -}; +{}; void define_constants(py::module& m) { py::class_(m, "constants") .def_property_static( "cache_max_doubles", - [](py::object) + [] (py::object) { return constants::cache_max_doubles; }, - [](py::object, size_t a) + [] (py::object, size_t a) { constants::cache_max_doubles = a; }) .def_property_static( "cache_min_samples", - [](py::object) + [] (py::object) { return constants::cache_min_samples; }, - [](py::object, size_t a) + [] (py::object, size_t a) { constants::cache_min_samples = a; }) .def_property_static( "cache_samples", - [](py::object) + [] (py::object) { return constants::cache_samples; }, - [](py::object, bool a) + [] (py::object, bool a) { constants::cache_samples = a; }) .def_property_static( "clip_sigma", - [](py::object) + [] (py::object) { return constants::clip_sigma; }, - [](py::object, bool a) + [] (py::object, bool a) { constants::clip_sigma = a; }) .def_property_static( "use_box_muller", - [](py::object) + [] (py::object) { return constants::use_box_muller; }, - [](py::object, bool a) + [] (py::object, bool a) { constants::use_box_muller = a; }) ; } @@ -960,7 +967,7 @@ void define_restart_criteria(py::module& main) .def_readwrite("met", &Criterion::met) .def_readwrite("name", &Criterion::name) .def_readwrite("last_restart", &Criterion::last_restart) - .def("__repr__", [](Criterion& self) + .def("__repr__", [] (Criterion& self) { return "<" + self.name + " met: " + std::to_string(self.met) + ">"; }); ; @@ -1031,6 +1038,10 @@ void define_restart_criteria(py::module& main) .def("reason", &Criteria::reason) .def("any", &Criteria::any); + py::class_>(m, "TooMuchRepelling") + .def(py::init<>()) + .def_readwrite_static("tolerance", &TooMuchRepelling::tolerance); + } void define_restart_strategy(py::module& main) diff --git a/src/main.cpp b/src/main.cpp index 1cbeab0..745ce07 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -11,6 +11,7 @@ using std::chrono::milliseconds; static int dim = 5; static bool rotated = false; +static functions::ObjectiveFunction fun_t = functions::ObjectiveFunction::SPHERE; static size_t budget = dim * 100000; @@ -59,7 +60,7 @@ struct Timer }; -void run_modcma(parameters::MatrixAdaptationType mat_t, functions::ObjectiveFunction fun_t, parameters::StepSizeAdaptation ssa) +void run_modcma(parameters::MatrixAdaptationType mat_t, parameters::StepSizeAdaptation ssa) { rng::set_seed(412); parameters::Modules m; @@ -67,42 +68,14 @@ void run_modcma(parameters::MatrixAdaptationType mat_t, functions::ObjectiveFunc m.ssa = ssa; m.active = false; m.sampler = parameters::BaseSampler::HALTON; - m.restart_strategy = parameters::RestartStrategyType::STOP; + m.restart_strategy = parameters::RestartStrategyType::RESTART; m.sample_transformation = parameters::SampleTranformerType::CAUCHY; - m.elitist = true; + m.elitist = false; m.sequential_selection = true; m.threshold_convergence = true; m.weights = parameters::RecombinationWeights::EQUAL; m.repelling_restart = true; - - /*m.elitist = false; - m.active = false; - m.sampler = parameters::BaseSampler::SOBOL; - m.bound_correction = parameters::CorrectionMethod::SATURATE; - m.restart_strategy = parameters::RestartStrategyType::RESTART; - m.orthogonal = true; - m.sample_transformation = parameters::SampleTranformerType::CAUCHY; - m.sequential_selection = true; - m.threshold_convergence = true; - m.weights = parameters::RecombinationWeights::HALF_POWER_LAMBDA;*/ - - //m.elitist = true; - //m.active = true; - //m.sampler = parameters::BaseSampler::UNIFORM; - //m.bound_correction = parameters::CorrectionMethod::SATURATE; - //m.restart_strategy = parameters::RestartStrategyType::STOP; - //m.sample_transformation = parameters::SampleTranformerType::SCALED_UNIFORM; - //m.orthogonal = true; - //m.sequential_selection = true; - //m.weights = parameters::RecombinationWeights::EQUAL; - //m.mirrored = parameters::Mirror::PAIRWISE; - /* - "c1" : 0.1659166344465, - "cc" : 0.2310662705758, - "cmu" : 0.2024033500315, - "cs" : 0.2789599486741, - */ parameters::Settings settings( dim, m, @@ -124,7 +97,10 @@ void run_modcma(parameters::MatrixAdaptationType mat_t, functions::ObjectiveFunc std::cout << "evals: " << cma.p->stats.evaluations << "/" << budget << ": "; std::cout << "iters: " << cma.p->stats.t << ": "; std::cout << "sigma: " << cma.p->mutation->sigma << ": "; - std::cout << "best_y: " << cma.p->stats.global_best.y << std::endl; + std::cout << "best_y: " << cma.p->stats.global_best.y; + std::cout << " n_resamples: " << cma.p->repelling->attempts; + + std::cout << std::endl; if (cma.p->stats.global_best.y < 1e-9) break; @@ -151,7 +127,6 @@ void run_modcma(parameters::MatrixAdaptationType mat_t, functions::ObjectiveFunc int main() { - auto ft = functions::SPHERE; auto ssa = parameters::StepSizeAdaptation::MSR; //run_modcma(parameters::MatrixAdaptationType::NONE, ft, ssa); @@ -163,5 +138,5 @@ int main() //run_modcma(parameters::MatrixAdaptationType::CHOLESKY, ft, ssa); //run_modcma(parameters::MatrixAdaptationType::COVARIANCE, ft, ssa); //run_modcma(parameters::MatrixAdaptationType::COVARIANCE_NO_EIGV, ft, ssa); - run_modcma(parameters::MatrixAdaptationType::NATURAL_GRADIENT, ft, ssa); + run_modcma(parameters::MatrixAdaptationType::NATURAL_GRADIENT, ssa); } \ No newline at end of file diff --git a/src/restart_criteria.cpp b/src/restart_criteria.cpp index 376869d..861a28a 100644 --- a/src/restart_criteria.cpp +++ b/src/restart_criteria.cpp @@ -206,6 +206,12 @@ namespace restart best_fitnesses = {}; } + void TooMuchRepelling::update(const parameters::Parameters& p) + { + const Float average_repelling = static_cast(p.repelling->attempts) / static_cast(p.lambda); + met = average_repelling >= tolerance; + } + Criteria Criteria::get(const parameters::Modules modules) { vCriteria criteria{ @@ -231,6 +237,11 @@ namespace restart criteria.push_back(std::make_shared()); } } + + if (modules.repelling_restart) + { + criteria.push_back(std::make_shared()); + } return Criteria(criteria); } } \ No newline at end of file From 788f7befe7f5bdf12e3a13b2de6e842c8101bea2 Mon Sep 17 00:00:00 2001 From: jacobdenobel Date: Thu, 5 Jun 2025 14:19:43 +0200 Subject: [PATCH 39/74] fix interface --- perf.data | Bin 0 -> 44513228 bytes src/interface.cpp | 6 +++--- 2 files changed, 3 insertions(+), 3 deletions(-) create mode 100644 perf.data diff --git a/perf.data b/perf.data new file mode 100644 index 0000000000000000000000000000000000000000..7da2c82be766e683fc9a1b43b196c6bd8001902e GIT binary patch literal 44513228 zcmb^41z1&0*Eeu5Q0!Lh4y;=g6%`dl!9Wa53@lJoQ0zt&mD|Eb#m4ULM6m+}6}!6| zi|;-Ud%f5B?{AG~pV`OjdR`uVpZT$7&6?S>XYT{e8?qm9Gz!uMd^4kCd;Em9I~fuTPb)&y=svm9H<9uZG?PD_u(U z-`8cClmFtU{QnLAX82PqM~>wGV&VTwb3r+T9 z^2zz#=LdnGKG?E;+6zO#A28Cg{`#*Y!2dZ!-Z#{nPvTaXZ<#WT1wYHcWM5rx)Xr4B z1^f&FFHL{m*Jey^T3wkr!V2(1d&t)V=g){)4Stp`mi2orT@QX-C(HUy12%&{P>0XecjG48!SAa0>h*|Q zKR9n)>fPY|>d4nO=W*lLFJ>QjR-NVZ%f%Zr_YnAd6kkjKxp-YB9Ru&Ej(9o0?1s1C zcW{@_2j`bA@)7))qL%d&T)%)nrle*4Eq%X(?^D6D{=p?b!FQ`F?{oEd;n^SX&pIdj zTI!MWCjDt!*?50ns;qh5LDlu^wN)4JMn6y9h^ynHcE;B)E@htB2w!8`vw*^{~c8RDgV9o>JFJKh)ky6^aSshx2?xp=St6A0eFzxBmS zd8c@{{}}*&cxLVO#Kl{}YcP1{^2yI9)~`Jb{1=7geZ%-sT~j`KI*tTCYdJn2)XvoS z-KjkWyo(lIU$Rdb2mbN8e7rR7#_@9f&vI!3c$wXJkLy1-f72G44Bi_L-s9rq#{XH! z6!7YMC3`aSm-DwIO$UEYdwHMB$K$E9z+Y?T{%p5u{eF>l9(ebC`212k+E)`7d=jW=VwIIli0|Cx8T3gao)=L;RQc}pIV2{jb}vS7w~)c);^w`*E=Q=ytiha57W6U z&4&{Ee}k7#5FfQu&xdg3ITd-W=2}-VzF(i}$M=`osXcm5N8SXV)Zk?hc+}3=X=hg>UqS_niath6P!;{yT9!R4f&>dL;2nj?*M*i6F%S6 z&N$y(J)G)Q6})dc?>BG$QvV9)$vj*Trw&Fc%XY6t7!^L!sz|s=H|w^I z;7>KvB$w+`I=~ z3c>qP)XvoPAf_Fzpe!2cTwfq78y{^gQr?vm;{luHBf50oRtZ%a3 z?3dP##^I(83tn3^AmS3)3_wLFAezES_>y@jAFT?YL zH$w5$*B{w>a4lQ{{0u!TyFTu|Q5yVmI_?j-{PgxK2Yxl>JVPcwT>b6;P!as9%K3(R zz0g{JQzupiZ-{c9s*YDn{c-iZqkJ{+ot5)S8K28XquVvWFW8o^Z!RC)e0j33Hh624 z`!RKXx%GnU|Gh=^z)PvzFUt0xzAvpuFJCtWzrN1%;F9sp!RuN>e*SU!T`{RO_yIc4 zYoAqX4_cL(3Tre%HiEPmiusjJVY>ju^L z$zlHB_o~nP)XvoPkil)I8wcL(^2y_)_NbjYzLl!?3o?040B=Yo-lKMEkJfn_ z-!{D_gV((}?@>Erk6RClADIH)O-H^SwOlW`@oCk22KZf+`Krz*T?dTg<;G`Y>1E*k zs>S!88y~L!m*=ekuX{oH{u_Lnw=`b6->wJ$dj{U8cBbY#<%#^3*EWH_FDsu9YG<5Z zF5Z57w}RI#zkIx0z89a|0sh@mysxF+IKNiJA@J?(<$Xi_(R)9X?}Dd}g5M=OpKoes zoNt3i_xA}9cj|(-z)PiEZ)M`; z@)14zF8D4w?>}kYQ$43B_z?W@7JmNiwEqeC$1QxnxT4()@XP7AU*z(ismL4fM=JM= zGULnDW4nv*!H*Z*k7}*QQiDE&H$dm}D|Nj*FY+DyJ{EpH=#t?V_?|k?`=j?Kfmcy^ z{-NFXAtJi0z(}8!xN8kES z`zDGvc3&p&dkW4Usomf9TU`7-9E*XUs55?b{jR8A3jD0j`tqapFKo64f4haxLu#h4 z0KQ8d?fVZd-+RYY0x!x{J|Eov>*v$D;1{jU$II0>H~$XUHvq3%Vcz4`OU~O+!yUYi z%6?ou|2U7!XJYRr;N>ZyFQ4>%Y5sK%Zw7w5a@zaP#asG#3-Da4ST^1dIa`B&#Ekn> zF5awx?ZBI??B8Ur$DBVn(Hs1tZkEkQTtFA_JZ z&V#smIPMn=ej1(kPn0Lh@A8}zz+Y0A&j+RI4b(J_AwKJB|G-tsHwg1=hu{xP*PcfS_X zcmeo-bi6;w1;1e1F&F^fK@_S@{0$pJ`Fx)BU5C^^;qV+BH}Up0ydz8#!;+ zrw!ocSMC?J#K(=-{fk?`v(j;Y$oa45>;QkV;QdM3m;bGv4f&vbC(W05=Y8N?EAgxI zLG6s^H<#au2?xP@tUTXWd!qG+{+;4mS>Y&nH3ZM|shv7L`WzW~PRpXfYoyFCb$ry$ z*yF}0Puf%9JWnInqyveh^_ISB^n(mSYybsyT z^XRz?#kcKe2JlKH^5akKOpSlskZj;(eQeqBKQJjb_#0yRcxfDr<2B@y>X`cfX?`v6 zF0JG}YG>?m^;FiU9(V@^$@ic0=k96<{w)i?FE?>=WAMi->$A*!;NtfRZw9`fh5e)c zEy2&ATn}X8=jNZca~trN2+pHL^+*3s^%rH^0lW!jcwBxRYIX*1m*QEf|5Nlqd3c6_^74GM)E-D<4eNTg zylEEr-30ldar#?641T!k^OY%v%maUiz^8V9^SSw9SA8M)>CMPD=S}#&7`$y}cwBxX zk1hwVkB<9m&aV}-3jEVLd@dhWv(|w>#f<9#7w`0jo4`9Rs0S_e#>E?Nw+;NXNAhB3-}W^*3cj--UTSB2J>v2^ z)9nO!8+6n!=Won*8vHm5*W3P^=fKaVbG-=fbqTydI>(Rp88m*so?ZbzO0d7BcBaN} z*OqJGE%&kP{ng6JH^Kj9VZTz?UGNuK*xwQU5d2m;KF`b5_oRTQ;QzMp`EbtqFTwY- zuzx7uTkvaG*l+ay1Ne7!tS{X4X3Nem;BOVIhqO;LHJ@%oCW7azBOiu%X&t8dA2{eY z_>O{jshx4W29MtRC$DcU>*~hmX-@>}AGI^~xO!fcA~ksKI>(FFF^czcTw3st3i?m& zjN|3TKi|p>;O!E4TE{>A^IYJa($Rk|zdJVP1wX8gW!LNN3k!nZPkBE{X1~P6|I@Z8 z_-%E(Z)O-zdah3OVRfJc`1=L#GgCWtebBriuUn-u;AK-jKc(J}QaiOrbwggU)#bpe z;KrYiQafXh%jb^L|A5!e!tX;bJWv_@G|KskrRw4C4&y_-;Dyf7?2A1@EGc_dB@!F6izH{to5yvRdku^ZLE)4c^!?+SeP- zo0YpCcoPKglX3Hj^Cr0T2QQoQzNMD_a`iNK^FZ)yYRO+8xpVv*7 z#e00uYVh+2u5Z-NI9_gj8@FmbcxwdrgId@32Y#Eu>n(_n^SJSk?YAAg47$$O+^uqj z2CRoPF*)<;GS=J1V4N#_L$sQgY79*{Q(UTTjku@0S*wq#W9uJgPkJv&sO@tn9o)?L>J_ ze?Bufp%r*Xy>5_exc(Zh7jW9e5oKONBrzvvIk{M;d$+u9OL;a-uG#gMJ zoQJD>4v{J85Ha9-`=9cm}a^O9m4z{w`ruYu1tgVW(8ACG#xXn#iiy0URQ zI8lP}qIT3T>L)o-19yXyE0&Lk+L1%?k#oprKR9>ed57AGoXoC=!C5c4j#W*69Gqj4 zI^v7kI6Y@2VVf^bc~!+<=7Q)s!8T~!Q^Y;Jk)sKO?652bH3e8 zaPqqG{i1fF`l)So7n}hC$5b8WU;O}_w%&X^rktJapMc}tUz&JyF}y?V zMEPl)`VBZg2l9@oe%T&*56+9>+8nx`QNO&0dDHk~XX3IHg7UTuz!vbv%U0-Ruu6Ha5PT*qRp?0G2 z+F!mXI4f83j;Zl#`KkmsLnWQeYs-MMdZS!C+e6EP(_^=slOnJZIQEkFDS0Y7g0ntK zE}nI%oxoWmd4F2=WG!&6Y|s`D&3~%H^`UjaDYlb$sGX<|o!hvAQ&w=lqpm}$Z;Iz# z5qEHk2pnoBiYLd@Cg2Q^+#lAC_XHo{<7O5QhJ zwG9QQ%Xw|%PI;&CN=%##&SSxSH?CvqAEtpTUxEo}~6rzxH>&KtlvBUqoQohY6`skVUAU2@!WrP=|`rpwy;MfswB zbvVBpoHzUBoFCKogY!~y{V8?#2sno|p8HYWsb3-MqQQx-%lC`giN>pTvs2&{s?R%= zSCMn1&^d61wUcuuoxKQ7d5sRuTguOcL07;T?a#+U?L__ZE_Mx^W|GeNYx@A4EQ0$KQ~4Q~_X#-pB=6%^rFj9)t?hF8$#n5GI3cU#oMJoP zgOf+{dDG+BpTP-^kc;P4v+v;8NSiNY)-paH zyS?47g7Y70Cvqn3F9VK`;M_*-P~It?cMHmcvsrRIoatW)oGlv9f7J0<**k)> zRMPor;{?t#jnB;~UzDHzFKdBgC7I_&>+6D(Tk?9**UuH4nw#Y2*Qk>2;KXP=|54{B zRrL+wQ4{-dKmI3beH@q3pH0OyOweyQ`5 z`RxF30yUlksGULg27_~6vS0Nt4+m$g_hpL0=< z7yX^`W4mo9II*qdoU^6&f^#lRo1-4Da<>kE6TORfsGX>OdUQMjPPJ&>p?rv(?-`=O zDe+Ry8FL~A9H&G%r-}7BaDJzftDpG$7s07@p|I#$lZkR```~PJ;vG|Un7I8hIISvab0|LQ*ZnTf!Fg1j zcc`7HU)xi>1}9r%-Z9m$mgnDrGss7qL-A3+)(`sx&c*J$L+z+vv|l7=Mul(S3?Ixp z)Q%kLCpq!=e}FSHgm5DmLI06WlMW zy=Rs^naH1v8Ay>@K3eG0UbJ6r2bAt0z@;&?}#q7X& zCwbq!HA?|-21uT_Ec#v;9DBhzB#n<~ylOu$4vu4_+_>9Dl?Lb4G&!e*cR6q#&6ji9 z%w%N{93PJmel1QN49-WvIUBVT#q;{ocyRueJYTiBJPn+KXZbvv z%FnK|Gr_T0t?j;vT*}V{x4Gc-nJwoe9E|`c#X&hIIwBIBetv6Jr7PV zjpzJy{h;;7x6x&At}W;LMeQga`a3ztQ^$c*MUWqAM-KIq9RJ(b!Lbp1zJS`1LoPX` zR^I|Azu<>$9+gAz{FK^>`c-_PEjWeZwOvo?{ZWeNNwJLJ^uESB)Q;kz`9@CtJ6XW76`b=^ zJ94O>O9}M=?YF8$@i?*Ep!Lx zgy3AAzQ3qnZ`wBn=a%Gt>`C6{;B?SbDLI6u_%#w7E5UV;+KK9U#Fk~?{24Dd?#?}; z!09(b&Ux5;EjT#@KNo`PPSmfn88(7bNRS^>&WsaVz{$E$u3ve^?f|FeYB}eS%N}qt z2(A~V`jxA~0dU?+&adnr4ukV;y8i-RGx2V zzX;Ab$>*L`U&ewX`gwY$;>mY69-L~D`MD6A0M1&C`yYyr*0KCE?|?H>avy%A`2%qF z3(o!M`-|2MyX;TE$tPRSvCqLNCRxu%kGuwFt>kmY^C9oSc_Dd!QLpD`a3Uq&C#hNT zJ2>ey=9!+WQT@a}{t3=XL7u6dXxtrFC4nJqVp*lujVJRfwM+%pF-_uymF|1Ucr?O1;B9?j2E>dhvK7n zD)lb{&cjLkIUBVTISro{2dB_9IVW4^(%|^*lXE8BFAGldC)%7esyy%NR}q}V^!)Xm z+KKYhB~@i`8kOfAQ+ZyNP!*i}jpUqe!8O2%bCYulH>(X!sg82ahC~-|hIEp1`krb4 z&hdJ3PQ|{Bz)9;W=akIq0nV`Ya!!K-&B1vcAm`-m;|0#`aoQZZ52tk_rhYqczRu(w zYDe>e{!Y%PpWfgUnXb*Dc|wj$Y!`6SF613*NAb|#$yqz4J2>Yh--jIEzz>{dE9ByF zNYMwJm`&OodQLD21f2VVeG83`D4qz%5#S6L z^vjfU^4Dl^d?owU@YpzT3QFGZ99kU;PL$;S$0;BToOsE8O{qE!oMMvw+FN`kI4*+s z;!KTKhWB&8=`Ps+m~tBLhydrS#&dq!Pt*LWFeVb5rh@ZcYA34agrUp8vDf%~na;T= z9;(_-IGaMDQTXXm0MaJ;T-s~>fqH}yacd9^x*uKTsK}l$^_0P$#o<3_H5t;3+5N)Rn)J6A-TYb z7rf7A${Eu*FE}?O9sgnl!TBt?j#;NG3QjtW^+Wxn`dRp`1UN$l`$K9+d8T!ZoEZnp zfOCBpKQE{qIpmU)ZcKS_hG=}xLG2vytOU+Y$$I`-$Pt_Yl1`yF)xnu2>0~}z3!Ld1 z#!LNsqDTGrdpVzmwsA15KUb~)_v=%w==V6w{+{raQ^Tv$?};|}zWF+`zWVpbQ#(reBevN^Ayjtsjc#9d}^%|hh zTW3ETys!Z69+%I{U*~~WRPki;$@$wpECj!;4xbyZHD{NA-@mW+d>FigHu05>*JrA;gY+KkfPD`aiGdKJYH+^!gS(1l}Ao>XRG)9PvlN3svHi$tUNRi#-AU zSDo`UN;SW3A2|)awIILL&eZ%WI`%wxD|OcESjWrY4c1YwTz-o=$AQ0B$M|veyD{4h z@EeHs*U@P-Hx(}gzrY!3K-u%|QN9~OB$;J0*wIg`0?RigYe@oVM01r@z{2^+^`is$8*=#e}>wbVD~4SVx1cySf=dC_*$!SkxD&nwn&CU}KvYWKMHr~I@z;AO3! z>{;siw<>=G_?|j^ZoHhkF9P48q4s=m-jjPvzB;#!;FlDfr)Y_vtB=$X55V^@b37-h&ffwXKLKxsAYN)`ZobQ>eF6R- z3)jnv)33pQq+>j}d}p8h9{km2o}bcl2U@RPntldvsNne$wKKI|r?&kLUM8LA-E^Ix zcmu6}fo~;vUP0|l#e4sF5_pw$JYVA0r%F>(Rxv)Ger;htsAn4RU3KQ0_9c|>V>Qx) ze@{>!)Xr4CKYYmq-bxGSV?(p-;19KMJ|gPm2LG0s*F&mH%J1N>dBN+Y+~2CNht$qE zzufvTWqCpHmYEqJjU&ZZt8G#6`YG$BIzDP=93M9y0&i#S7b{X(jSonH9 zdUbj5Q|hQ+!}x`(-gg`kR0;fcg84@6jPuRa!y89O@MZ`+Zu~fJUl%9vDk$?ywjNxI zI)m5V!q?;9*UrdKSMbLR_A6TUSDe3hwmbNRb>xQ|&zH@cf}dN-kCyS| zyqpJ{gSX!dkL!QIX)N%jB#p~kT4*XGyFWdiz?j6BfpwshA;{#rE!TmHhf4O`P zJ=q<+5*E&9#d&_kq>S? zh-tXT)HJ`nx-JIqvJZd0OYMy3H&<_y^DPJOk>Gs}ZhhxG+nZ6~ zy)mP|-1yX=x)!`>9pl5*S4fkM;8zl??^^3?l4^ZR%()eOE5Z3Fwfo!p#^t9#!Vd7= z1-_Q~Z5U6w&!c*?xwZ%V=Ys1AwKJ|aF5btB4uJQihx~lw{6hXmz#k|WPc8W{_%tq* z54-Bo;Li~F)b4NN$<3dbG^fDdtoT~i3oak~KAr`yryyP}`QYlqJ^CW})s^v7$II0R z*Z-vPSHP<#@VNDe8!rd{Yv6qn)SH&^;_`dR=_dF;b*@h|4peVvQr-pMMi4)>Gq>In zpFRNJ%glV!_oaB7ta<|87#;cG>g~?p7vQ%Q#LLwiH-3v9-hkIeM}9fK&A0d9*SGNe zOB3@M{7ZuMg3AXt|E}))4&D=i$IU;^%QE^Gc$8oD`oMWyKCjoZax^~A{%qm#I>@ z{6jj%i;MqL*0kWC(cyFX&haQc_&&<~lg;q$3>qv=zcr!9Lk&L(LAAi+UD&D-fjop zqjtvi%k_6(9v|@bIPo5rKdzn{4C)Tv$7;!*%zWVDO@GH1{Myd?;%#2RAG~kMc+14g z<)hN7e&8p$$>+o1(>$SiIMrSt6wg^bH9uNZ=8pGez|)4aWEKsS7rRP^q-5jVzUY0tdu4nZ9__KAQ#JN>6?DK>JpjZ|BF)1HY=^^KI15 z)cR|kZXtMc&3GS~8?O$F7lW6loqYc}-@U`t+s_j}ex@G5oD7hgAzt>A@q<~^Z|3&K+UDs%S2Gu+Y-o&lK%e;ISvc_fS6XDYG6Le2RnO-xGEge1F09 zjM|wS|K^+*!EaEX&yVbSetSiZge!hi1mrn6E$o&kw(aQBny`Q3X z>gy-1gXHah`Vzb@%JoR?Q9EOg8?PLb--73-cS zFbDW{g7+h+ojSkj_3`7K+~6HmJavAl-QV)d&5vr^^MU`bh0p)uh8F_Ai-rC2b&G*7 zdf!J&ez^K;TeB4S^_25Jt@DMxFV%Ynl&)vOozn|02= zwVC^Z*GlJpX5p(q@N9hK?=KDGML&0r`ad*wAo!7jc&VMK@oN8iD0mxm>~FdCBGc-T z;LlXfV=T2_D%)u+_>ns9C%E|278wuzDdqmhQt?lDIT3ts9r@?3FA1Ce1%H#^Jdoy@ zsd`BrHXS^7rT%2{!L3JbUbDczB#2k*dUQA4Jn%B=jF<96^KsMH1>o0J=DXJY09^;j z>%Dt1c$ozAo7x$V7dQT8hb#y0s=%Xu8GGD(NL6tacnbvUo7U?QeP8Ooz1=$Sx0|sa zGW4JFLSEJ@8^N0?7$0h9s{d`aZ3SGB@#E%00h=@6Eij}1?0VJX5_pa4@#oW8*Q;OKuYgxX zc^|`4`OH!38u&ky_bIflSJV%hUv>*`f|t3LzWzJkx&vN7Ret{@yS@xB@c{h1I^XxW zF!V8a{`D;zZ-;Bo!0#h?zk#cN!+cerw>i1I0`I=^d|X{$)XvoX1bttshu*2)fuB-& zUaqCSxb7=ks666V1m8MRR~3X5r^gtFPt;zpjq^FD~C(#^wWmi{SYatwYAwe=gq# zixdK{n}zQOtYeFUKi|y#EA@lM@4%>%;1w5~&rmz#{&V@9*vTHeMmpbDcdT9kyy<4d z$BoaH=as;-Q#_gVfb;upbOe8qg+G6?XP6WCm34kTggysM^$=gj8GK*C=L4vnse1VK zyDoV2^98kDuW8+<_;T%Y1#i3H^9I!JZ~F^w{2GS1gTKemve(mUgPVe%LC5Fqx%n68 z(;WN>g3lj_<}+QVDBmsfdx6(XP!H73R6Q)e+!nmXX2i#h*Q}}D;4QQ8czv1I1^fj% z#*2%;u3vZXa|!C1uDgG$XRaRBhxmd2M=*X|J#g`UaP9-%MIGnwoIm740Ql#0tj~sg z(0e&l&trE4fnQpX4{B#z&s@C01BQS{>znNLgw`#JcXag;;O`XF2etcKeQ@z-w;2O| zNuBeD;-L6DTpb5~V}VcY{uaMsJk{p|onu46r=QoazP?a9Q{(wAJPbXhK4iy}<}KyJ z+G85{H3j2I?f#YzZah2K%>=)aAm6fntw5yiCgVU%kFiJLCLu_4ED5e()Aqct7#$<6-dU z>hQVoYIf*2`0-}um)2p*Z?@4f;1yEV6Lo&6ovHlZbvz5+3k$DL(>`7R-&XMR!nD@I z?L)EP{ZKqhU2kls#DiZ`$IoZv>N(eh1n|3B*thn+1O99a*RxBb2jF{~F~7O_;a}ki zcs7E1=IVpJ^AdZTuK+kY71rJwUk^SRgP_uz*K z`cLgl&F9M1KZ9rK`|-u)zk`2RM}2YgWAyKz;OABHq0YCK{TNpt>Aod_UsRB9uHLwK zy?3RoZhRi^rDK1?`EHBSfnQ!wf3o=)`)_ve%L+d4D9Q)bJ=Jp`r`+IuaN^H{sGX_# zLf0|!w*=<}zm?$k2T?my^QG_zSMXNU;p5esU;4fj@1{F$;Ctpv_GPcv(YG6ecc74b zyj;Bvd*=cE^fLN<8fVJK&>Stm4--63r*?m<4=(2m>SFb;!{?orxyyt2U1#gtj_X%>J906WE3%^gWbl@29cPQ64b$(^*zkKx& z@N4S)e1G3Zq2L|Y@%z!Z^`OUsFz^THct4EuLt0D&f0f|#*;?|=`3`n7!9Qx@pMS8` zW-j<8bv*w!%tw0fo955h-x1(97Q8=4?bPcB<&C_wnWm3dUx^)Y_79#@@;TdWlsCQ#F*=}UV0atCW{RjFyvDCBgnJ5UzhA2 zfHQhgy8i*@IetBL-@vKmD(4Kn_ye3RW8|C_ZhyeZ zyHn0dbH=)c@%rO=PtNJ@l^UF`HkqXI9Ff%)oLWcZ;+Z=)12|<*%Q@ZhWdUc?8#$+1 zTy}7lW|Pa0ZNJ>$+$tdFOtH@gPDLkeP9D`fb+}Lv9G7~$L+wQKG<8r>aE`R&9a?Wh zPRaZw!Eq0eb2^?c1CHZ3IcH38d2kBMlyma=R079tshpE5zauy)BeXf$RO4R%T6J(T ztl%ALCmJu8*|oq)w~2Su<4*e++P56{tqabI-MmBXMDc_*as|h25AUesp}$i+)3dsR zvwE|fQ|d|+aN@ViIc*Mlf)lb|&gnn46*yxg`&FZETX0hDl8YzQ+8dmM2emm=mo#3f zPj&{U<_X@RcB1-u8`ce+6=!*e>V+JtONu8=b6;>eT;?5WM-I8gsGroYfp-UkO2~puLtCu zZnejO(=1xf$(%A2oSTyIygxY^oVCZ~;_0ztDma}_%Q-)Xhl6ukavf{uJO`ZRN95ue zZ5;uQU5uP#cWMzhmCwsLKGTykEu(@%0<^gF&CoGwS@@-uncZgA{QX>({@qxG=#@crPN+RHoCj@Cmu7a(VF`@`V$ zImA2EjvN|4a`JCG4$eo7*LRAKoY1Q=;2asn$3yKz@vNV87M$Ob_ZRm+Uj}EfFCP!( zLln=Ys5o$jZjigLZ0&IaobvnSoZ^>nf%EIUoYS=JJ#b2Wl5D%)0P&?85y7$bgrt!H}FM(rfo|c%E3Y=R#_;^e?%L3DaGj*t(v%g3Na3&1U z=Fs{`c`k4zGdMkhd57AG^1O3%c5teO^Ny+f)ES!_oUnhjIdnfp{pwdWA2@3S4z&~Y ztHYOq;Cz~=EgtG8#Z&%dQE=8vp4*HaR}viSWpeS1ceV%T+&XOz)dBUZ->>rEJQUnF zQ9Dtd7eA~7&UB6U@9I3~TJIQmB*}+cWTodf0==+QMm8HHjIQb>-mxEKefb&!G ze&yxmYa4g!Cyo1`7O+? z`QNn#XS8Jfgl_W&Cy(U)#e%_Iz_HbMe?k4EaSyB69US`$a(Ql-(hr=@SL7Uz3;y7& zle|AIeK-Id+naLnyb22fXQ#$5MbMNh>+-jpOHl)AgcU zJiV4gfHPKdp7xK31gC`LJU!iQ88~k>UUz8!LF?G{a#7%1ImzdT+KK8g@Y5P_@=0EI z+T7m&&LfTMY8ut^IN!Bfz*#4_UQjzxzp{tz0H=Z=Kc?nszh-;D2^E|psU6BYjo0q1 z2f+C$7eqP3W8jR_I9}vZJpM^1z!{rMu6~wBo(3nk9q*{~Lw~1umi0Xkj+=v= zQ!nFXZ~`Qqp_{LQ^RSUzJfFO-gHy4qoD-7z7C5g5%Q+Pf-34b`sGRe8=tFR>YkVI- zU5BR~pMo{1kQfJIV80c^(*2=Q*bg!&ad-}n}f4J^1ge`6fbacUzQuMVqWdQX)L+^w6g06 zj+5lM#Jjs)z&UqLTfb=hXxyK#>JCm>!TS=_PBiYL1N^|bBl(`yWY0d}86@xD6ShqQXM?0OZre<7)@fYFsGg_}SFCt{0}91*evQ^HXwOoVy$a&KSu&pEOV-aL?>FF-S)naIR8N$jzIi@?vp_H}sGTT3ukL*YCs6YG zwPD3~aK1{OlT7LP3!M2H&q=7Cl%L}ER?f!fTC)4PDt4*BDX;N)iaMU}_tSurAh|z$ z5|tjD0+QEvo1o0#G}rk2g3cEx&x2}b2dALmzKPn2>ac6h+~8D^%=3~vdBI7eao?gI zFZ)LY!AX*gCo!QYIJtJq&G*d9OM){XO3pdo*&dwK8uuG?eWh`KmA?Ww6$I-%wG)kd z@SRHFR1~c9G(O~zOYyW{>j;jGU|vu=a>ym;!3-yGUTM5fq5Ug4soOY%^G$Hwp?0Ep zD%-h$lTPD$Nd2UET<*Anvp{f8LhVHH^jPT*&LPS73bH?L3eI)GISGvq#Y6E?zs7EA z4o+&pb(z|cLoPW3W_p1$P~-d}mz?^Z?Z9cfh@Yp_P83hI<{iNqAsNrz3O?XOhRenC zQCk4p?0Ep&g>WgPP2KuqmGCEPVt=VHwK&p$voe-4*}=B z#yrz=2#V*yi%@XJ?BV-G?L_%0uq_On+#1(ab$(i&n+8rAjn@mhZ=rsro;4Gk)hqda zQ9Dt;dV0)?*4E00gEX}k4oFm)#cuYCtKSzRdR`R|earZKC4hYVXOvQ71NEA2= zB==)(-PVG0N^(EnwB$x`N(uH+>V8puQ~ex$x&@pDd-?IAb~NtvcXDp7-2qN1!Mvb$ zT{c*AH^)wz&w-9Kkw9?L^}qk?#sP4ubnZsuyz9{fc-P56&*Z^CoIX4!P8? z1E&(e=^*)jZ`Axd;Ox?PUrFmV#dDfgV?Jsb0 zX}sSdm-?0ea}qdnCF{BOp_H|a&&OOP*RiXY(txA&xw*PuKj)Om)2{lpCwTR!PzL-*HJssyx7&V zC^&^B_fgINDGAOZjqi1<^SpJZJvfnqeG9b{^=s?$3gFb0{9J81F9&c^_{*)&)+wuk zvuBu`GiH|)I5jlhzf-=beqsZi!6`2J-0B~D7jW*&-dDbL1?QCHx^Zc zx`5*#`E$ckckd3)Ucq^Ssd(a>`GK=gaNcXmDUq!YI0q%``ATd6II=%q%Vu^EID;kM zOUaOSI5xzPXT9W zX>IY)^`6%G>D_05)6t1{sGVq@-nE$xPS@JJV`|*{t)B-@PcJ!Vc&mlrOqIMYM`T$7 zPSR+(cpAT04$jO;a*oxRRp87UFX!A3SqILdQF2a~s++)xl~?rPkJQ+yQ9@Nx&iDWUPb3py7dr@`AJ;5Z8Q8`MtJ zulCoY!D)V6Zrn3Oo&x8TL*IA0{s6V|&v1*eDP^Mb}{UV`H#xsRF| z^9G!6lKbaplRkhmK;u4&>WS)bd*d(QJQs`?wG-9h+02RHY!IA7(z+#bW?laUj+^8; zTkq3Wb&SuWA|%hXF3d~?j=$tN7T^LnQ89GH|DoF*EdU(kD3 zR6iakvV*fwFz(cj#+~Yk9GgYC!SNQ1JGCQ+`bm!MvV7o-xgdAlDcP$KI2|>v^Xhmy zmnjBL8_D%&$E%XyJd#|WKPB3Ob6@f~TF#>tz=_tlZ=vy{@hUOa0h}v>a|voE8uz}9 zs)D0+ohO&#Deq7N9MRAFqIRNqoW3}NlSObYO7%hxJ%6HjChl?pXSLuwf!dKnE;%!Q zHvq>&W%`eIq#dH6j2RMNUdVgcLc{Zl$~ko^tO2KKH9045 z?0RtadCECy7iUD5VNS@cViM|Dn?-_0Rq4kmSGh)g;aH1vqwSVvd)jprMbcdEmJx2@|MpCetDJU70%G&MLS1?Oz^{YB$$+tU`D29on@OxcX!+>!h_ zoF~3#0q2tB`)rrL=KyC$yxh7mH#!eEQzYvrW>$W1oFv!7sa*<#lR|Rbr#CMS&TYwh z?vuMTIMXE0wVvK63y!Pg`h5OuMR3+jzArjIG1kd>0nsL;*x)TvlaawXG{Fvd4r$sju-1E|90m8e$}e- z`-^wl#8)=_e(Asc-fV+6{(F1yZYY7v#LMO5_Vv!-|2ZlDd$T!z)r9WguiIx?f7ZfY z;J2S9?;CzE{B~tr4A(tF{)^n{3;z4yWM4g=)Xvm+CdKv#@99W=@zU>Kr+C|+9t6I_ z2tHnFr;e9?Up#q{SB8NXIE;^%+EINkni^hJUEhZM`Xp}sU)_`6`^hNqFAnD8rFQCg zgRKfrHT>dr@(w-@2Jc1?A1}50TfAI7w|hDP{5}0G>kqIA13xrC-Z$i%))~rA!JkvX z&)kR4H?=dB?~P}}!Rz1Kvhn^KH3$5$im#r3TH`(6KLWhW7LK=l(@5~^>x?%p+cNNu z_ma;qThCphz#pc==jtukdoB1qyX&jBUpY2{H>Rt6KDc;;PHX``yqmswXZ6|vUcnyn z@p9uAS7ZmycQj$H70PV}5h~ zv+gnA7t`T$^|!_AEchb@`PRDLdDvbAFTKwCpnU^fPuyc-!S5u9m)e=CkG{j=!E36c zJ`DM#eF(){x_Scm2L$UgwKEm(sF0`NRZ{vdvp?tR;YYid;8$?t=bzSkICbI;cuyMY zTQBJQQhp~^{{a4iCfeiW`hREZXYf9I>hn@p`VL+ZFYO+;9$Y{56TA;?Et}8CZb{&; z>Yy*)`@d3DHopHY*GXTzv@fUeFZZ#SipXSZaXRY;i zph_+9;+6GM%Y5MaZ9#~QC)9Z#*gd&+avYCA1YXXwe~+Y&<(sJX3jV22hF#4 z1)G4^P)Gf8{Xg-b8TdEMxZmWwwh=AC8)?RR$c?{uhc@8t=xN#due`kj_#G8rOT8NU zPy2Kluc33hfcH*NPt?wM{pR}r=Vo{CT-(U^pY!|m^#gxz3whs=589_tJ_4=#fFB{) zKT#2t3N0vB%Aa Mz zS4GEugY(ZHS_!_r&iqopD8Dy5t^wb}jC^qG&AHSYz*{P~o>3g?{L*~@#dj=WGk7Bf z=h4*eZ}$&ees6|v2j5HJ(?08Oz9An}hZKK|+ivj33+_*-ovD1h$hjZ9U4r{lE&1Ta zFK@oX;9oIgf5+u_$n|63h3m*K=Z7yl3H}Hj`(4&wat8c33;UINUI0Hqxqir8Pr3M4 zl#2zwm4)*^K1)3K2Q2ItO}GL64ITM0)O(cb`AgQ7x50N*;#cptsGafr3 z5qRAC$9WZNKL+o)86H;;xyn2T@3e)-Yx0*@;4jrVUX&+VPX`@+2mTesSC1F9Q;!#| zv*fKG{RzCBg8WiDV~@*kr1LlMzUs&?R}YnJeu95Ma6J+ApZ=Zt-}Uq#@bc(f@86G1 zQN{TD`=sFd6s(-yz!9ir_C1oZryAHqHm<1qD_H zuby&VuFkJkujrqu;8`j6Q(8Q(e#eB@0I#Hu`#(ef={iI8mD#B__{9Y4J+)Kklm4B& zTen=mOCvaMrFO=iq5pJWL0*oX4Zy1|c>jUg{cV16`F$492>g3G)@MV0={Y0CZ&lm_ ze9`=+cItf4b&Nch176@A)e)~DAG9u!zb|il@IwUa1GW2GzPWnKcBdow#TqBC59;~B z)d%M_D(?f{Q8PTQ9-=OH2hUpP`7^B>l;3vgdx4*+72khqXIu}4{?mKP7#LW$2(*N04(g21=a@%+G$4?3r(d~D1=4E$7r^LT1!oDYLXc_*)r z_h|5b3F?d5newI;2mxX9f}h<* zU%Zva&j9aneaprhczqW5kviXpD&sg0ymiX@qF(>Bj(^&P3&8Ub)Vs)|>kiesQ>Mk> zMGE#W)Xuoxxq9j`YZ-X{RY>6F5X@r*MLXwL(0@Y=T}|20sOQ& z-jC$`#D-hIKV;$eTVsA~2Y;{5&mY9D*bSatKl%B~<-@(pe(>`ITGqc^_%QhCE&O?s zG7pY{AFabT%pclU(R?p^@g(^F4&c{gYG*v(IdA9gGvIAd;#IG2oX3ry*MJM)&CoG^ z-1JJfA`Q`lfZh{5tgqyn4##9cAi`i}(4=6pqH{ z+aJvAKjoR$3#$P(;8hXypV}F(7u=b6*303ea)RGxE7UIl?i z?M(H*$Hw;H?bdM~#MQ%rN}a%8?Ji$$hJ4WH6e%A&_WOXJqbZ+XYG<4egGcKWc|98R z0FR#csq2f{8GBs+x5xT{H@~HP|2e4^3ld45c~y#^D=5@Dj#EB z1%WqG=X|5@OYyEwlovC<-4jlnrISapEeX`{k@Xwp^c}K2(pH~h6?;quP zfo%QOOFt33VLIxItKaCvF!0Ok@D2H(eJ|BR-zX%=Mh}ZG+c{tF0?HHRV|nil;GZ(%eQmB@9~IgH-X`Vq7P9r# z{>FCj9_#2om)}JTcY~iw$MX-)ztep``0Fg}=V^Hu{9QVHZaniAIu5>{kG}P6==YQ0 zg`44V`K@Pt7QC{GCo})J{@1>I0sNC@d_IDUH+N_(c()Z#CSJ}z+%O*eLOSM)As_S{ zp4N*reG5c%7T;+yAQ9;|X71 zfPYW%W#jD>{06)-%6>$q-njWOzr+Xd2MhLp)KBAj;5_FupTP^#`M%$g{@=lysB^!& z#`PC?qjc^cx_?gsuZ@NG`+j#)Ry97aUMqNCKStf}%;4=cWB*~ON80yMe9}Y;FVVLtzQ4BovHdrZB+?8H=X^bc}?}~c&rNe1C{sp)$vk0<9NCCzh=Mc z;1x3CeId^4U$Pc>Pt5qd2$c$aGo#+P z{zqAPgLhX)|GDuin6C@?QObO?)cSiRTTk$VbzDEV@vHiy7x>{VwBMg{`G^YY3toiI z{XOklsGi6F3Ok;b2+qr?o!X=82zmSS3E(qjtvq=izMCd))Y(eY6t17COd<^9Qb7 z1AY@7&*wRRRlo-DZ|U$2^Owef@*U;91^l*VjvuW%MC*-4A{N9X_|-&R%{P{4m8=&tGo7arF@0<~VpIb=Cuo1C8I@f-&F^7t{l_Gaf%f z|7l*5SL50l@a_scYNz(-`;b?E$_4N;EAQ*8^GWTDJuaW&?y=x?)A9QfIR8$)c<`I) zcz>MpQ{+wnzpsTqzp&)tZSd>r@VWiNxt;gHAFXpda}Rk8UOyf2a`{eZ{~Y{c7Ot0y zWnY8uV&V90dc6nVQHO6B&)uq@H&e61XYki3pD$9+Pikjses+2M4ZI>c*JoN6Xg%4p z>L>W!&A481^}MlX5_tIp>m9{m+<(r?UMyubWhoN!J!~_EYNz&S-J$r_6fFbZPk~46 zj6H6AdZa54-ZTqeAD(>w2mDbw$Dht&C?9jqQ~{snmpZ@H&Qw0Er&b5AppN;<<-1VJ zTHv42>C?}ZpnQzTS{HmbLB6To-_B>b_@_F%f}dYUy>Ro{xt=@t6?I=tr?!7|5PVbUrq0H0I#>8eyQEx z>dVl7T1Uxm__Z_m^OX8kj~BHwHJ_XB>IPn*(tnxrPOcvO*ZP8APucHS$`6?C5B_`| z`$;bTs_p^cN9w3&&VQXY2>fjpj^E?VVDS4}czpjH8xDRI9plT@`-YIw;P=*f-m_nGf2h(EPvsW-|Dh1nU*GGd2G&L`?;6qJ{I()F&MLBRcaz*CEQs zo)UA=H)B7}U0*icoey3^!TL&Z80VL(U;8DC!0Tn^ewn@x^|yHErQkUU_P5kd9UuKW zd9EHS!Aqs{^Lecu)_|wy_my(%Ww}%vz@IJHztM9eQ`c8@J~GE`27kO6hSQFU32!&js*)=!logcc&_`;KvL0 zyISWX-G@^AU9985uPm64)J~lb`o83OyuATlCkxl#jsv&B&#p5cG|m)%zu^1eJL$aM zz43kw-aa!tt{yJseGXnt3-|xi<5%Ek64V3b%~U;PUiJ<=Zyoi(?I(&4_yj&ZkJ37y z)%82H`#12sExbOjcK8YYRh{EU_n}n3*M9r~|FNLnsGafnart%IlcKuud3qzA_3-cB zY~a<=kzX#}F>`Z)-?a(ez4!qNi^?7N!6aa6nr*@B94_?|80k5p$$=v^Q z{r^6-IQWIk`1}dywS8X-yjgkLxmHTJ)dPMC^?ceFXimq=@ z(s+SCREb|*-_*|3^=;F`w%|Pyc-;Ks^4oW{H+b~?PTha1tH1T1^IP=k0{$>Ter5fp z<-3EQMmevrRQ#hq`GUXS!hWwe{@_=)@XuT7wj%)ic1r%$^{2I7M)nQ@FTuj&IlJ5t z@Jm=Y9~IsY2mh4f%jP3w{b=y~Ed28iYY!g>{z#oa53h)8D0s639#;>B>n+_^)A|-v zH4MBP0*~6M*EibNlXvXrRPY*`@$(+J^=Hx1aPZ=Ejt}L9;=MO(4){F<{ik-u{paF+ zHYfr-H^q}#&kWC%}ncraBpN z3pZXrY`21!LC5byPhsV5l`AA?&T<2-tPJ%P{NI0aw!Z3Qd_U{PtyPX zP&?cI{+V-P?pwzB;Ts3>gtkgvIn9lyK%;nYR=-WIaC4olSFV9GsYlxXCB|IG{`U=V zI({@{(vVB#kH$Rzn)?4(yUXY(nr?mg!QI{6-GT*ocZUQI?(R+?K!UrwyL%Gc-JRg> z&il-{=lZX?`mJJmcF(ic`H*{^{bSdzUDe$+oy;wA_CJpLe}Jpbu|{{v@eGJ2auU^g zKu+ES(M>ri4Skrg!((zTk5oH|)Aiw{1y9Kta$oH@>*xLNUyyS&XAD#0f{XR_ckCN- z@^qFtzF*&y)51^YEbIK4ocz0F&ak)N$oc(P=#2P9&ZlTGMRoQ_>QTgTAFH2L=J@Uk zM^2scGN;a!RZ;=b)d^hglb8C+FK{wSzcaeY{)bCa2O0wd1Tlom1r}XUTe*v+Z6%a<=b~ zIe!)xCFkk^nUiEd334_akvYwM%CHk8b2eA1K+c_AGG{`ZD&#C)Cvz5Gs7}uNO)@9R zl-lIH5{@hL=KAEEJtWH~X`jaAY?&`}7UlIOr^Zs56VtyXIa!43%cpW%a)Nux^6B%l zJvsRY$(*wrI+Nq)FLN%m=}yl5*)r#H>fYpZ5$=aRcl(hOMzcRV+gIl;7(`BkJ+g7N zT{4WE^7~~@T;EaT99b@Na%S)&$8W7U2iG%P2jhnNlQVsr+CiM|IyiMz069VX)Q+?H z+ShR^IT?awPLJF($=P{A=A8L5hn#WeWKPSc3&@#pS?2g(T|&-a;q}ye-U@P3@08_J z&U+0x4TbYwKg|Ym#tY{?-jmJbWIiby*Nk=B$@wgt_h<8Vlk-eC?*m8dCui_cbK}Cg z#d`OwbeNp6n)yPUZoNNBc7&Xkht1^^-te3f?#Xd-R%q@w5U0z>bL(kxCJOr}!^lhI zzJ<&hhawr^We%(9=e)?tv_V}T#OcO0D4--cX-uB8!9_k3 z$Co9ix8|G;ak_kZ)~iU)>ow|eVSaSZ^LSOsnW>p~qk}%cxEfrlK~6JGeGsS1=hpN( zt+}s59P&Zk|DThp2{|FB)OAK29OMV*+7oYbKAl%Ph|@W* z?zJRm#}%3LX?0t2&I-?0Yjo>C&JRtUjpIVyQJ+%UhLclObKP;4 z&(Kn%$tj^(@6MbFau%MJtuO!D)5uAAQRYnY znnljBt1_os^m*j$)~qko1AU0~hB}A2ypWtin*9)QaIj9`_%B>aP7lp_3*z7)KRAoq ztt98~1zCL}Bk0g0&keh2jj%JwnW@Y&P|PjI9+|lUf4m7k8pjZp1y~i zOv3Y@@Xv$D8G1}zA7lR*=lw{?F>*>DlbuU6nQ)4ne(lUT@eS9V;Xlrj)1Z^uK^*1_ z^M`pKJLw`h-R7ws#KAGvXIqr3LWRy>dKtpSYOF$ zV)EPu_h(oaEqDDOCu|#aK8Vxx^OiP$$XV7;?HKzIF7nxsD6E&`xz^WlY6o$;d{(`T zKu%cUc}tWPQOIdMQ=N}-T=*H|y6h8!oT9>Wn^x81kkiiObEPq#&IuBbGi9M{T&?aT zCMWqaa}M@d)MxXmP1fFJdMHoHCmCMG=Q_VIP5$_RbV?YH1wA!9jj-)-InxPFmr0XXuVOnG@J>Ejgv)$efxq?ZC(4am^}pG7_%*_5Hko{X1N{VNf4eW_B;2AN7veB3eEtn*ah4zCB;KiZ5C;b? zoO~~Tle0>Azq(~Zm|~9STH#H8j%KX$u{jaQ>7w}@4RN}0jcXZ&oW2Lu^+7#!&Y+Aj z$O$s}z9{lTo$IEFL(Xf>{UPFX`BeQFpPVkj&rf~#B_?Nw@cDG`kmTgl+^w#&vvC#g zkeZxyChvRUqRzj))05-9RhG}s)S1cIyhQC7`v>a{`OLhNjhy+b)ehox^ZtBpE^^vx z#%0V0KO>(VZSs-RRC7*(I9)z-vJ@oeuyB11eO-i{JDT$!XZcLIU7VcQntj8W6Jc3t za(0_M*TT6H>O9)FJUO~^B*f|J{3lywaso|0zr#g71wT|HXNczc9dWvRmh7oT&J^ME zdy!G~$O$z0{Eqph*;<-ME5=H6th88JTmsVheJfY2F)jmd}yMZODmg za=qjJ?Els0L_2b-Y0eK3r>jqk37yEfYw|n_=L^WEQrm9itkPTu5vR*%XpUawR1oeP zQ4{qeXTK&N<9r#fJ2&qPB-$0yhTuGMSIj`?Xa`u|sH;hi-Tw}>; zpxHMNryEy+V*ccG5Wd$`u3G>(M}+r3g_}(!r@qPGhsFG%&S~P$Bq!$(^}c~PU45R% zpG%JaM786r&IeB~AZKY`nKNU^5^`3JlsS3xtsrOr6q$4UHDNw`W9}wrgvsj#`U(9!`P@Eo{%Ecjh|~4+h$)B2*=h2*2`=*K z)gXkNb|=lv7w%)=ludb@oTMh-?=|Lg@X0B19tb~g+I#gJIk&Ex8<#Pk9E&fJlfz_v zjLzv{*T^{~ynhZ;`xZH~h1caKiSCipS?F{P^N5@fll8&<2m0sg@h9Y*)SQzbPPZidQqACvbjc+SMQ1{ME6P8iL73*vO+iW2h+Ih#zr=V|Q2YL~y0Q&)KZ z^K{m4atdnR7sbD?8`m+PFvT6uwc-h%3sUBcK+Z4W`^MpZMA@o5IRRs z#U|&F@OrWOKzwo@Y1WrgT0vVpnCxoz_OI^>6O z#VVYSoc$*M&WX|S{^m(e8k6UuM(4}^BIL{x&R4+j;^cf0?uXN>mnNs0$?FB4TTtii z3Cfd`SMz%uh|~4YiwKp;Ss{G?C-J#zALMcVm5`9&S#~9N{@Y&i$>)St-2klw8)19Dm{Yb6~qpwr7YL)34IkPm^Q_PRf8Ib%IIZK808UN@mIlF}WbN&qv z$;l&p?*7&N2|0O9K6j&Dn6EnpUXW8uGhc|))p>TxH{|%4Twmw|Z zgX<8S<*PrFb4+;sN;~yCInRaHuau2{lao$3UprHTDdBjoby~PCnm!CqPDbIlb_GW! z=alfA&C5SJIVXkZ#@lMfCZ~YOK16?`f0`tUPtHn{&$;MJI2((nAZLW;a|gtspYb!C z$T8B8bMc`1_p%WO$2hKUf$7O9c}VRbPUqxEkeQrqp)zOJl5FI3iKf0@ILoJ2;aucQ z%r0|=UC&ES_DV9R(QZ$2Mz@eT3)>bUCtYKiGdE6ga`JSPISmh$B4_nrne%B@IdTSf zmN_dbRw8Fddzn-EYgKZN^_MxPR@Wq_&v=PmxwCyW8lu$DHL;F2@jZmYIAWG`?5x`^#{0)@bg- z5vQB?MHfbsQ$hHgn{UQAa%O1qaaNxK-V@2Gt9h<;=CrCgnVbc}`>`U)rjv8g$){1xsSf#Ft>lUw-S z;JOoQ$*Cs1pYOYKBRM@x?i-jt%zN8mTgh3YsSo0G^FFlPPI8(F>$5W2UUGg3>r>{^ z0df)u&(()K3Lq&C<3-___jn9%J{d<*hQ(q*fjPTrM z^MfnoTo;b({<0h7DKAx zN6*MvC;Xh~)`nN)T+w{q?JS=Iz2A}JC;S|(L9tKdR4}>TasPq&${q0=Icqe(e}On% zofDk@Nlvd@=JpNT2P2;hJO7e1;&yR9`^6qo@?)WXZbek>QDZuR_60njsHaD0P;Rsod1*Gr;yjz?n(O9 z`2F*?`C7hv2KkZNs_Q$RUsZi;>|8)zdKckep1!5UB(sUWwLwjny>JCSCj9hSr2CBOO^k~$?M5aZK2<6_18N!z=3eN<1ikNh(o)ct36eY|~tki4m# z%zLWwcbOGT-cY;eu6~YP_89pAUEEvWM%ho1-=Leddi0NXj=bVs-J9>1Qy0lEXs?Hv z{n9pnV-45OHA!xe*F=*q;{LV2+x$@m|6RX3;W_!ib^i0s&L{p|F+a>3*6Yiu3CQd1f~V@wDvgqmx5e(c>v|libqexxv~%xzEfPN?`ODh4 zHvh)@UOAqHyeU=9d#d`zEt!M7L)F|n{=-LelmDrf`FvIV8moMM^4ixk@2SQg?R!D; zs@V5`xAoV7qU5h=Y^}aq49^!kd`gg?)qcG(UT+cSxF4wM5&cJL@)~LGZ*ZOd*ZqOY zU$?$I`7eE})x)?x_I0mJemVQ~%vcY^Io3nfZ!0~klXuOccRFHi@^HQ-&2R39dgQgy zoM)-VtD2wZbDEHs%+1g9eb@Vtf2)Rj=fC}q_T=ZX?`LM}Ypcgt!~Ons-_GQD=Tqm4 zxPPq&)%85lmTu(F&;Orq%vW_iSIu8=?q1}D6jA4EX8o$xL-63fBl@}#s z0C|(kTJx?YA3|Q4D%QOG6NZx)!{YTTAnIuHcDQi;Qq^x@pdWclU3h-6c{q2%{oCQH z6Ug(l@BhYrM4aRPsp?0cw*lm3cJqFcXx>!vt2cLZzxSI-{_v*e`$<)g%1P&v_szod zp~^3FW&!y*EYw@oZyOgaA>ZGA{WkW8YW>^t#XgCC``l**c^x$O1Bi3BzvAD8-zdWx z@^ib8uWkIeZ-N))^m_6R+WXBoe#AMh4^=(4^xRC|Zj0CNm07owx7z-E?5_LSk56}z zf7kvz<1T;yntkMlSa=>+t&hdq4w2v9SH54Ue806JG%cv!c_^@8gR*59SjOXRiCoc|!s=;7aiw{+1p@!(W2_> z5#n5(PoLgN$oI8Bueocz4DX+s{P?BK*F&{lro~84-p*XEdDy2=-|Trak>{CL?IF%_ zJ*(D-hfh}Wye&LmtNfHVbC6%rUQfw)3Uuu8;mgKMM zkLGa-k-x$I`K9sxTeY96yo%$Bl9$-x^Z3h%CCF>+;`2E2M*lVlEKOb$&GR_oob_+B zCgsUHY2kTUHUFQYRwh51#m^J*IUedU^g%W9M{AzH5NE6h_EC6#b8C}VN>dNS{cHbK z^;g0f^~pbK|NPoazpCmxCx2t|e%SlfUF+-a!)D}HvcG>I&A0r-7UbQu`1uOv0rU5+ zVjJ@3YtB;;_pkX=)q6*T_T&fJ&wt!i@9l>>k>A(i`u?=18+q+4od2lmF=$UO@>g1@ zht0>lU_Nhj???VN&F5u^``3Kh)+3%%;eXFOi2Rc-eqJ`p@cF=tOGC-~tEmU#oaKvs z9QiiyJc|77_W6{q$CK&&$SYyteHvANEdAq8egw_?OJ?pbRQqYkvjOB!b74QVd3diI z^*z{mDtU`F9^xFYC#w9$MxIIDBNwj!s(M~uH;26Tn*K6VPn93F?E>-#TG%ga^}u~J z>f0&Z67nZ$&R-GdSYKPdSU2#_-dIjvH_i1Lan9D8aeugcb~X7cHJ|q)&X_Oe0r_^E zww}CLntT!GEMMdPFu&<$@@Lq6>Hbh9#dh*OTgX?{Zv~$3B7dXB`$J$lS zFAEP{B0ska>qWKSU+i;@y!|eCs_`ewb&I?&7S{*f8^C_l=GR^F6KT#j5$Cu*RO63x z{~>wBHJ)m{sk~%so{-ndzTV94XV~{K-dJs3khf8@ei7&D{`)b{8}c_>9DkIL@5#IE zf~V@IUK>A?H?6gM&wG-L_)h+4H=pN5ZTy@3P8NJszx}BlrhwylaRR$!oV`9u>~TzJ>a#{D~>@k)PAezSkd5@*B9hKEtjTA%CI0KP2m0 zRiCz3i<6(jg0Jew)<;T{-_nKi6`P0qBAj}ZAwOqb_2+MhbF7DIzAEHwK;9^gr@G%%jsNVi#^imd zZ|(jF=aT4$qCLIIAEi0}M%=&dKUMj+%+!+nBAWAW>Gk`?t2X41ws8Jro6n_&zt0gn zs6F`+HR}a&&iZ3m=g#C^u;**$eok56%H7HHv-rG=c}9P%%iNp%U-t1E*B|2k)gQL{ z;v5bBm`DA{Z?35?;vDO%s>h*ugUIu?P!HAmE;?Ws`6XQ3k8xjy@wZApioA82@gvUJ z_}9nsBd?E}&nMPg@F%~r3*%SKSIDXW@;Z_{9imP+T zkM8FCq4@I`kbla}{+L!v$S-5ze$lr6(Dzs`1JkS^e~;Za?w^QrTraBi{PWUk@@8tD zr*zkA?0d+s-R|||h0&Y`AnsrLrL8~k{|CSQh|T23(LApq?q9yD9>Z#FC%=Q;H`Z5I z4_wEQZ=%?{$s44}7jgf}S5*&>u=~l+WwGAK1Nm=ze2Dz*7S0D$>;3f35b{GUtQS>% zHugDA{xOYjre9R~r|)>0{Mi=jW9vs;=TZM1h0c>7L9^ZwXI$@ij)52D(FRMcVYahe8*(}K;AYBzi(ozFZvqm zAWNA92q5J>*?-^6>i(X6uQ62l*A-o|?Qtn&%b7Im_=#+w|l`(Y)WH znjh7CPp+StyaCP4@5d@Hb5`8>XD*2*$)9TRdd`4{C-Mfl`Fv>a)vn}+)jZFc z=?~TZ)M0&3@*BH(K2uiiOMZL{zHNPDU12^Wy&gdRNEgmiRr|$tzaiwUa^d=O^`u@-}pNZu4aPj^a>k#9e5OXql z#Wl}|h%=5C{~exR$Tae9+n*1m*ZbGKXOWlA{`X`h`?;#0?i8Cx{(_F~yB5i)v@sRYgPUFeCJB?huQ04oIkVoyOnyaC2yq*o@&0LH{D3ye2e49I!6B% zuD+H0)tY<}=eQnJ`Nod4lRPW^Ynw0QdRrEhkg9++8_RgJxP8ki~WXmgMN#D?hN^@E!KNu;05yLxH;d#Ev}GX z(>|YO_6ODcZAf*4yc!nz!`9C@XG47xC%i*`cFo_PMx5jNQ01Hd_I>jDTBxte-?jEJ z`JdhFHyHeke0Q(MH`QK|@9zA&mwM+mU$vh$jPi;67B1|ks`>PM`IWqrZr-1~?)@bH zkA?dqTRrf5Pgp-0HvA>Ov;F%5jO!h7j_XHteHq>^TtUb4;sF{D`=q0%s&9kTk;wD2 zFn*PP>26f=cer`}ZY_^V{uDQ__at58k{`js{;H}^#m))I_qO1x`sGQXq~zDttVgr` z67ypU@&epEpV1DcA%C%(=dv;E`x$Nc1#wOCL5yU4d*NFnmGyO6J{o}eOmWB0T z%NM^#jqz_^S&{rgn(vE1oU{3goTVyx)!lr)TXlO4@?%&$-!0v-4teD@fA7G|d84hq z#`C(XaT<_6*nS>pygnn&S$)^8Y)oDc&HJim@>N|gYEJVZznJ~JSn|APs|W7e(Qj9) zwIaWk{e5p^JrL(u51VIPzjp`vl2<_U_k$2;^so+*-viH%swulh?#z|Ks1q_%qe$PyScUc{SpU`Qp7Tcuijn zB5#$ZKN08b^CPS~_*rHSBR{|W^Lb-`BF-^iTYq94@P-s0MV{675wg7-OWqxe?@J81 zXciRO|wO^!2u!6i`3(pfOKWVfz z(KuiDo=?>G!0gTBWzu+vbM$QE$9t3T&eh*eUVhE}F5;Z+ zZx{#sABA?4pT>pzebsy=3BRAb+M0Z&_amRr9wL95oBigqLdd^i!B_QLx5>xJ&*ehB zRh~zK)8u(;`VI4L>^IB<`r&Yr^W;6Udy@M-ReyLzyG*{nh5FijT-T9*uUps1kEzKY zajveHLMv{OAJ1aF3%0#SUL#Gun5Tc`tE%_zUXRHCZNXRdUz2iA$uIBX^%H%G`P=d9 z1$pHxdZl*0A+N8C-*1ZhaO4--`vZ9+HS>=+$MvSFUxxx;$ZKX_4`#0Ks`)B*m@dEUW3>NtK? zzH!@UCNHnAdq3ab)Hpl&pY6}1#(u>*ak)B| z`KtE2YZV)l_fT`*rrPgR-t{!zG?}^`w$?I5DeSV8L<9IP|@Di0>PF{h8 zY7cRao~l3l)Lup2yJYhHT~*Hkjn|TYIK4Vw-TdI(1LN=1Y6E$mIn^HGoQ=Qg(9Pt% zDCpktXARm${^649d{yID%~wFQUE~E+u##^@kAx!WrV^1cyE&XJn1&%6t$9$5@|o z@m`TLahuvfoX!cX`<9%X5o0Lxapu&#_ko-p)nrcm3SY<>-bLoj-|?NCOiN@=ufo5{ zsc_Mp6W_4jM+A8kaom5FeN#J#)6LiHKHjpazcl0|?5K87SDmxfGXpu5hsc~PFEW$!dy>rgusAz8Rp-l` z?;~@QGhnsMSy4JaImH*toT+~clJiZNPsxKt$(grRmd~tdCCGWSSLUo6T!x(ALg#F$ z3gq7p4I;&kil(Vg1l%oFx=_xV2L{9bBqTv-1Y zSKE`R;)OF;`;6vyGhDdt}b5?>otv8zgh~9oS2b|4x~6Zr(w1ULBA*Wm*K2GhvrG2lox= zACG*;$myWT2XW{hJV(Ll_Vpw=FZQeRK^z?P6P&xj=g0{ORy&B(IZF;)A}3BOnX_o- zHF7@JkU8B--XiBh6`8Z>>0NRP`N*7(Qy-Euxu?vjRQCxvD@V(m@$p`e(`us3iGTGq zIYHB9&g{eQ$qCf#tHwUWK8t?tcIPuWQ3KWaAWqlM^=E!3XKEL>4(1Qz zn(GmpoWz>zDdKc>F7qTFIX8vZ(+dX^k@H%U59UXg&x;|+$O#rc52h=dik!^r%+(qB zq0Z}*q$B5u$@-w~aPr*BNKV_0>UE@tmG^+c@BW{P2|&HWKMGCYOc$OLq7N! zPBq`Wjk-RF(>cq+79l5Ki_9q&QjDCn8)VMnQKiUvyM&SZVC-q6o$ zsy8HOP>6cI5U1Dj*I z%+=J#*|>`ObR?(8F?Bx9oD3PelC$iz%<+5OgPdj;WKPhif#jst)Y;j%zE>YgP6f^9 zR?eK+S4WT&{=DqGuIR8aba=Epz;zO(W-X z6Pa^m>MU}Cy33sFRpycNX{5|~pM4QI2{g|a&ib(4i>2h$@RQ}^zhosj3n$B*wqwBOW6smT=#gbt#mbM8#$K3>y8MoZ}nJy(fd~G_LOjBfci*;9j+ZINkMq!^n5! zobpmT#`TWpdE}F6^G9+rv`{;U)8&)=;TLk|Y*IVU^4Znz2RR4+svT$f48HiAoD@lA z*TJ%R!gx8JZ~Unva~3TKPfqy>GG|PZ$mA5+FLU-Tj7HA-r{)}d-h=*$nI{%GX``#x z7vgmN)2~%La=f#t9b^CC{vP?XjFpI-4q?>};&l0xN|}tDz<6rMm=AtNK3h(tBxi}{ zeJ#Z4@;Q4dEjbmUn#%{{L_QmPW+Z3nC-uA|PM6R0xLL`070Fya7$@?H`XC26D}?)e zzGr#J*_F{;KG)n_S_G_$*$KTtNvn!$6K^*!ylHt9A^G!OF^Ch3!K^z?9hkTZw z>_$$w4r&K+I%m?!UgW%=Epz;<^do2CXql7p$Ut<d%+YstzS5^s3BBb9)3iJAcTW z-iOAJGdHPheZ6Zno}Be@%sIH8p?_9{n?%l#Olk*l=pXF+a8CZ3Oiq^)Y6o#};KHf( zWI8#;bEqA}!9jn+DbQ&?IibS$jT6UQOwRYx=H?ywA)jd9SCSK@qS`^6Zd`Alts`gb zIN5$Uw9XcC!sL`Wm-=rf=j8c0GDK<*x zY#0?p&aU$^C;P=CHkkhk=xp86rV_jVRbD5mVGt>^^ zbp2C%?R9eIJXSl#`r!JCd~z3gMb1=BK8Qm;ShsNUHF!%-o!aVkfjBtG56+>iAIJ$> zs&){kb2irdOiqQ2=IR3%`P}LGjhq#|%{dq+oZFXvk`s7V?I2D!uApRp$@v>j*3aj< zg)QcIzOko*%$a^Q0y!mw_dg@5L?Op}k1U@!XQPvI-{kp*ao#JoiA~OE&GR7Qbo2Ej zZ+vpLWmR9_ojHqdCL$+6c9}DIKr(V}Xzp8#`JgY+&&N}yB4e z-w$83I3qdI=gMp4vXb-I%lza9P(8=#`(g}$mdh<0_5!N zCv(-=_RNpg~DeoqQx_UZk}k=FTj6p_Z@J4hkQbB_>r^YsCrz8Lq2$JfwTJF1adNJ>Vr5q_?!<;(Aq$9&S>5nL>wH{ z3r^l;)5zJlO+7Be!7=u8mUgqqDJ0yV7u1{ z>w`F5eY_5@Bqx^1>pS`h^Hp`wT5=9AROf>@T|Q-tY$T_e@H$xc_ZD)xn7j@mKa4A5 z@D6fvXnwy5aTpi+8_uYSd&nt&P+cFy!NL5&$v^V|IlWBo8(1fBx;GCZr`RfWK8Vxh zQ$Nj7a;~pcI~a$~8SwN3Iqgi&7hH@haMKxb77MSZ-|k-^r?K$N+D%SLbt8?~wB~K<%KeIwxs{2joPZA#)nvdrVGUll4JN@PM~%W2L~?l`SmvmIiI_$&;1Yw2QHk) z-BOU##82%YPUoyonTDK_J7rGmwdu)ObWG-yx|4;Rq0eN_@A5gx>C(fTgY||wH`$ek zoO1=$4&qQ}%pV-Dqy@<7*+%Uk4i4rIPUGN0p&On)SqjYC-(yfy@n+|s)XX#Cu^W{}}s+BU=|9 ze-@LIrku?2S+$&;ZcSxQrn#%hDc)7)oUF2*oW(xo9QU`_#CURnE@^4#zLH?D2LfF@5MPg>hn7BEpmpO zQs;v>T|O-q-6dy+=I^gM8&@{ZhwKRV!@^fW$(fs1*3W(VJSXRJWpfVhJ5lGdDPEHk zwx!xZoUYF0-n}CyppV*dR_6{&K9SS8x6DaV_ZvBtMhTrLzsTt++&A6?{Us-i=6wKT zeNcDISBs(HN;sbX=>9$l;xJ#>N8kh$k3>#n&HDg|gM<9wJV_OeoG+VYbuO4Z7CBLb zpMxy86_=df(}npQNlH$U1u|#h=#=Cn7Cxs$&5@RzH(S*EA?8OnUp+QtAg8wGzSo(P zJAW2(5()3;CkJOICw5#}o%1x!O-?!CzIroEesZ3f{CkzC7uLnb4F$=GD*QWdHJcVC zXRz?^Sxt&jf}B8;??>TtK8$Pjv(n^zo~E8J#OeC5WpH_NMhf@&sJ$zbv$vZ%AIy&~ zpR^gOlXGsU%t?8p7CAw}&ztH@u1C&!;paP7sx~6$i172B@zI)*lUDONh_TMNexQFg zerit6BhBX^h||s2^((E(sUf`Xtf<$GoSwq^yh+xJoY!4t`*YoieaY!7ybpi;X8<`@ zgx?dLFFfku$^O?+fF31O1%!=>&4VYJP7E zak_pUydjVr|I)(!r_MBT&XhOjV7*~nmH*5nr@H2x1aZ1?eLFRmoTFXL<%4|$`4se9 zNKRVeK7XyyQgWV|{QJ)sC-Qmsb_F^8H18uJPFJ6NE7y<{(d2staFNd{zYXNnTcWNr z;&k~ODz}B4<|hBnq;bB6dh8(Qp78H64n44&oM{`>{5#3Hy$_Rf z^PnuBcWIB1GgtWDPT$BU$SJD%dyLNN)ARIca_X79zQaWy?wxp^oc@~k#}KERuVFPW zlk?%Qtj>GlUnghRahbC!{B3fMoRv9S!`vq)lkh#^3~wHh(_i==TE_KH$>}D1&P~+* zB{?1@-`hbSpnuk8d`nIW;eB0}ryt1aD(u7UtG|#_H%K<`-#h;xr@#rB)3L@Ma>|^M zIr&S5E$MiU>tz%`8?T>j~p+} zydzFm=dxo9l2car_f9VIE* z>VfY+m3$w!@%wOXe!*YOl>UF;YWlygD71g?r@xEAbak)4 zvBoO$qjr(|w*JERp4+bXw(G;)vg^q2-cj9Oh%clAp{%eN_D&CC*Lqz3smA z{S*NWw%8wg+s7jB znuY#Q2dTX~|3BtG<3?Uvzc<9C#rk`KMdE*PpQ=EBQ+-_^SSR(myBpG3>sX>$lCr zK7#psS28boRWu&roXuZ@+@9p^voL;DKj#ipgnaK-?w!9**Nc(g$bzq$zk72_kssae zo0&gVJ;t;yN8Tzo=es9QCGsO^`orx0*5gA}@^ZM~sm5PwcTMu1Tj)2{eD&R0m;7dJ z{_8h0^QH2-+-gMLz?SAc)%ag8Xhz=W`ttRl%C|&`7UYj@U_M`)hv#nWH>oSOB`>Iv z+C!YP{pQWD_T*KoAs@f0zPHzQCVxat_xkbbbtm7Wwl&}Qy!!2ZFY=GoQTvE<^?LgA zP(SjsSnR)jhWq1HH3pIIW%rHi4{?tDqUz_C4~CLgOVfX<{mj-Ms1y3RP1}*|HI>&_ zm2bWxW6A&G^Pg`Vzb;?gCn4XeZ^n~%+3rd12UY9k!Kz8*Cu^au2lDz?zuD@6c}4zS zt)`HlTC;y5&aod<`Ihf6gS?t{Pf`!n{Pj*hkNh7N=dabfh2&MPC9kil9--rxlHcEg zud2tB>kqEzXYFm zl3%)twR&J*!TQ_YaWDDx?AJpx^QY>E2&oQ|H><9?zR1JbdcL?Yh`iSp`axCS3I&dm zzsG+4kj$U09y<)zyYN>|kUz|RJu%i7agOy+_3x-QXUIF{t*)=x{*9mK0(ti>?C+}j z?v8MU{7M$CPpW#X-+i6@#G3tBRS%U{w#jYsIyX08Uz>-%#`;KC{CD*K z%4=crJD!K{bHP)MzyFK~a+o?QxD>WYC96iE%I|&sIMyjwqxp%|J?pOC4D}@JfPkO z3O6FZwB|exan9=faD7wqCfLuHrTJnXLB4DAG$%i@=6MQn&hj0;s}*@6Zhk%)U(J{N z2Aa=@%=Yt>XC27%vUq=ve;4)LJFN@(=UliR+txGA`Qe?)--EmHgS_Gv_aFSb zsBh1ev&kQ!InO|xW4@~Up94kalNVRBpPBW-URgw5G|lt2%2UmM{65RbTW{g{SmiIu zwu=0pZoXfua&jH{l`ZUVs{5;<2R4!KqxrlZ*9B+&kS%Z0ev27THUC+YJs_`=n}6RY@q@?YU$p;zlDpR1*=5hjuh+xc{Tk*K*XMV0 zUy=XO?whH%YW_+#d`Dg$7k*z$<&BO1iM(ts=7)U(;~jAED|s0;zmJDFXV;&~Gk%g6 zqnEY&u?vm=lDF3GN&3lFPmBZOpHVhk0mt+3lD*aWBF@?P*Qbv}p4IW+Nf?d1+V=U8 zj9;}M9DW>={9_i@tEyjTu8&LpGZ*?_mG8Mu3CXKqVg0N8C`FQzpVH0s{u(YN`4ueq zwtC}pFZ6ToQ)$Sbty#~AbJoxO4rd@QiVN#m)ei*%vXEEHh5bu4U)NXXC$E=l2_LLewFn8$M}A6fZj?IN01k4Uth-k67%9XU#k5kS%NX-9kBPOq`s2`Gv|q6los$O)Z>gN0pWLZmo z7z^)%s@7BgC>zONZ}-itPgT7=k8B~&!$Q6)e|zs85%TxD*`M701o<_?w=t>pxjsTTUyvZRqOfvxS!+)YxW0Sz0sHGhx|4F zk{3nOZ;1O>Kil%fa{>G~vBG&eo>vdo^atXM{f0h-cWPfm@@m=pL)yPj`$i@2lf}=U z2Na1(-bV}lrJDZ&QR9+7+r{}GX88MVOFtzbZ>gL6HQvc2d$lmKy&4--_P($#1Wz2jU#}8&$r2>J%dHu$%kMJBb(h8C+N&s{J9#{SxFYaluoK z|M846aQ~$0uNr$AlmA^ae%*ZGJ_7aZ+t`P^AkF()h;wzn2~FOL z{9kT9FKbiMm;8nH^J90N2mSokf&4-Z-Fx0wXGRzDt6BK{WmP}6>(qn%%@*pT>clNX2*-q{jpz^nK^;HESl>Z;*9$*@`g9Jb|86AT-?ua9|SKr;xzIeYW6e4 z{cAt7%@3|4@CTlrNxrA%{uFVJ<5#WEiRt`u}m%mE=#*)YojjLq4w|?}eM|yYkou@`GKduWkIe4@Q4IS-XY2*BTFTj`O9e zXN@{L$cwD;RQ0rZxbH!JO{480@0VtN5a%qvkEiyNm)7F>=&>1x$$Q}D^U+l;kCC6j z_rLoAGyAD+{RbMJ{}cQ=McyUN@4F!GU;CA6e@R;60{ML{yuV@V2jh8AnCVx@e_U2w z55zh4gDT&eD{qh&uB7~a%a$+pVf63e5qHTSQcRsM;*9y?J{aEiyAR0wmS3GO;{G*% zs`dLce<=BRim3BdtzT7r-{pEv-o+B?e9c}T@q2%$$GxSm$j?+c~2@?^V*gBLtgso z<~>zEy}1;opyT;+RL%P^nAd;xzsj%IKLYv5EWDp*t1spe^S>s26!PP1-VZ^XXnaE3N z(Ytym8+m~)c&d5^cF#p#HqHHvbU&z@Eg$*w-Q538?|G8H+{OMk&R6#FMacW0>3_sI z_N%I&R(KUBZ;Ph?bsqLH^wW!vrO1n~Iln-hV}7dnzj?JBd8IA9KcyPK=kiMAch%&p z8ow&vVlAtY=V$jM&$BB3NUmDscd=Lx%me1HP=tEqPqzEU{RMH3^QWrsprD52HPU#h z`r16)=Oe!+Ax+8iYOcQDK%BGuw(M+9UP>3{Up3yRzOBid=fZkY&3C0j?Z_)?p`TRw zCj8rx{9-PQ-s0KlM=Zhgk4!{er(kiuoONe+2pIH2s1&2#*lYU;~~z`Q;k1m z{qf|5x9<;<{ns{rJhvm?t!XEb|5&sCAkJC7!H*}C7uUk)d#ZYzTs@uq-4=Y+{*iY6 zZ1PKKKJV1+@A&ViZ>=`-$=j=`FXEilH(|!bL|y{>_>K1qDo@p~rHX7L&(Ff=Eh_&( z(Ou+Qd7oG{Uy0-IBY&ahen&T7*yqq+Q!gAOuY$%yoa1`1<%jz;crT|1lb1;||A;es zm`8ZC>m4I+hsHykqo#tUpM8$}HZJT}s`bBQ$|dsJYd&8# zv;I}}$k+E8`I{|#o^PuMuFIIe1O;!Ae@XMajyPxYcR2Jec~dRqtE$Jyy${LHsj0WL zKU(#FLjFgK&)4BHyddwii|;dxGJL*M;OuMiMriJT5$EiBgLR31@IUaL{DzwS264uI zK%c`~Jo7VoUo`n5?qB(;`e9kq@8nO{_-6V+@v<#p3OSy4hu73w zwI8eUosudgdA;nP7f9}ZReqX2X~<8N?!V`6iLct9v-C($ex=On=X+KEsXV`~naG=w z*Sx11f6>6K8aDSxAKUS;qB7d0$U)6u>I@Tlqy~V#Lmp64I@}j!nsmAYjuPJ#+ zE&TmSl|Nx&bMl|N*}vbYHThfJ{C)Qw;#D+Q# zUv`h2elu*9wAIr7KYN`Kr>k?f6c5P>o*wo80Hfnl*wO~oIAN?&XPG5$XQuH=B)lznVbym zWlo}w)ycWGQ0ByVT8o@f=VeZouJy?2^F!w33)_gCswrjt({n*nayEF$obIKYlhdod z%*hqEH93q<_~iRK*akJwk6-|9imym6kDZsjQc9C_sAz+-2vo$+M;$4hkWoeoICM{kdthmIS2W{d6HxVIk}Fg9mF9Y z{0wJXgfZmATW`)Wj_Ya2ICAPQQagx4KA1n`lW6!va!zUH3vqCaw;xYta0!!?U{BjRDd+x}bre6+_Q}?dS z2^$ndPG6Jj0{aN|jUUsGl5843%5;aEih=6;F~p%h_!-Wjh4;xxFwLBU{Si*1B#+4{eNgQn4*B3`I3eqvl2hr0 z%-L1_B{}ILo2xVW3Hb~QeM8Pt&HXCkbmQ9X|ACx$MP&I*AN_@#jWyJcvwV`~{6S9m zVrmETqZ?O9xIg4vODA&*tqxPv@%(3X8kv)=d<1fad75)jFU-5&v&iHe5Z1Y2V03Z@ zwl$Xz>W+N!6^~8M!~rtr_`7)IEbx~(Ki?-J$9s;<8U8C7IWUf1XyUM$T#BxnIi!waAG$S(Z<_)b+?II8EkEi_(akh{Aof$G)cIlUKFiik zBxjcJTw?Y7$>dx=Ez8HJ&2(~l?>FZdpX1(Uo=r}$=G+Exxb7I|tHZ~6p~r?a|A~xh6c<3huIsoHiHD)j6)=^X1Tj>&Q8(=^w=D>g&07aEyHzwbB!Esw_}Dh|@W_qP`#} zmhgVw`^;-{W(u$GJ1)E@r-{kuCgbyt@AS{)9GIf658`z7nNL&M9xah<9Yr~V1V5Kad)9oUY^WEfg9O{mIx@3+{PQhL3d=RIb zuMRI_ku&y?+Ce>Z&fkOa$k}*I=DZo7h@6(1&mElQbD>!>a#9HIqZZXqMUIDXT*t1O?Gi}I3_c&*H7;^Rq949$BE&iW^O z(jw&45r*{WYjPF~ z*XfG$zT}J)p8q^s)sdVJn(H9elkWO8xnoyy(r%a4xlWay_3~FaKiiWNfqalQ$Vf;3A)h53iC_eY`B6s@_)`HRD2@ zE}!SV@5#v{ydPUp^D{Y>OpXilhdOVJ{hge63uW{E;`A?azM4EgjBfZGWZHO-Vvgrp zZ#4JMh(n!GcZ@4`mGIcPjVIs`?*DE zVRBNM+g*d>lbk7<^^Q0=aN%sLTbG;%!u9?(X+v^Ch38`%A2uQ9o5}Y9jN>YQ z+=rapm(|xD#OdnOZD%WT@}87A_XqoubJpZOhWs#JYf5$`=Ya5YnUk@*k~2&=?`a?R zAm@hA@w?WCoB>B=b?!5N069rD?>V5Zx_N))J%pU{!uc9qaRfPQH1|Kw^11$HJURD; zaz;g#_46g~W#oL!D04RcT1n35GBPLm@wMc9YASPj_uNR1uaC@0oN6mM zbGyr&{O5L%b7Z8jH7QeQd(w8{{18X0Cs5 zpND)BH@QPj%Qk8Uak_ktMSeieJYhbeyB?D>d$=s0IGvx76F6Dz82b?8M14wRcty@g z&AAQYboF_3|1CL#g#CPT`A2e&m^|0Q``m0)*#&w<;DQ zC#>dkSZDS58Ow{D;lessPhOIo6ejD8>j&0pxtC?gxh*`0ytt+UIYB0`JNSGT`Mhjj zg`D@Ab&5D$AC9bDgPf5j_e0Dd@~PFd4mq1lUN4MwPLQGjIm)l)5cBWlg@PXB=1lhwaHpt=SI|hx(vis85in(jYI63r zQ#-~wV?RPZTRyHM=VxEFgE(D2W0!9tXSko*G3JAxkx!!L+sK)xxqcx|m(PTxyT}=` z#9TfYC-T`HW*<2PHSdQbPM1&gpo8S>HJK0k0Qo!{5=_o;&Gi&A<~#~+8;KF$p z;}$tHea(#v{RAh?zPsd960XzceIAk%L%2?R<$6L+G~s#g#HY{6>99anpQKA)lha+Z zE>Kt9eEk~#o}5D_*9F!a<||{>&*U7_yf=tAT|R%Kd?%-}$>#!NA4WU*i=06wKYuqm z6()L=a6FHitvRh z`WGW-mhe59_3cZM<8jU0bq9Tk_4TxBIdUGDJRdVUQQ}o1=dZAT)?KYiPQqZ>xQgGa zNlrK6{``D-U2;wcf8YGc#D?TdF}WWa>l~>@Q*tJ0uFHti?S~y=G$$v4@Ve}GrWHAV zh3E4*pLQT8uJHO*w?;Q|Ry>uROSIb6i=3&gWloul{m7Y~RpzwaJdm6Vjm$Z?zsGt% zlYA&Sn+K>J#G!w1-wCJOmJ#H1TC8>u2M2u#XF|R)#E}X49$B{GivD!f# z9Jp}$SDHvpN|WbNMkm*UKys2~k?mu(7EB|@qnymy)p-^<6|2jf!%60m^GfKHIp75B>m&qw5e17-rb)B4P!uyugJ8qNHMEH4Azkc`0X+BoAE?VY%Oipp( zzHuV-DLK`J`^JVvFUeWEQZ}yEqu-KKQ@Sq7e{TOIq`*cz7VezIV-X|uaEJ2r2GEwdy{)VsFl~Yk6ruk>ykb2 zy~&dA)mHgqb2V|BZ~Pu#Ch1iFMaNWKu2WEX{?X}~av*!Td=Q49Ww$0fO{+wj2 zH@x~D>2L_V&}4cv?r3~f@;L@x(UJV)C&d{XUx}?xg6D0*{O66|s77bO{}9Aq54Ef> zJU>o)MrM-Ond4Bk{JmCN6BJb<#LGzO4vt|DR;7{}9 z*Ms5==QFRq5u=KLSD_i-6OC7TAIk6Pvtr<_Y|Qs4PMsfpoxIonrNDD;gZ%h3+JD*gEN)!P0DcHKt(7x}6q`11wl3lygw&-A|J*)8k} zULoemtZ%&j@@nk~{$pQ$eMS8hs%n3;GCts?_TzgLXRQ5&&-Vf^&dmLFX{ImuH~aJR z)!JXDn)rja#NV{>MdO;r!=aP`;4dD)&zIs1>&xr^qDO~-cVi&m6ZQXA)%En1Uc<2>;0 zG&OHMzV%%M{^BO`zJ9!|RgI5VS(kzD$?{hpA1KaPJp%n!fal&^K40GW7-F#+{Ku{3 zectgcPw+bMTeeetbv;D$nZ8crZRm?g@Q!ve&D-D^1>Ob|&nIWT+y@OUp#+{`(E%P1m|aT98d1~S?YZU!M|!|KU<$8;8!$r{Xe8R4*nS*`TFzb zOV9PE!0#wHzodC#Z2d^l{2X}0+4-fp#$%oD7r@`l&acen_t@y%o1Gu2eTw_rc-QwQokx+srPFQj3kuE`Db84bUao%+ymEqkW#{Lk6pz6FXyW-O zjZ>;er|5sde$g+J0F!jzHIya8oVBY^F>~My!G_Pv3KC5GxPb~ zpv#}Yw>8nHaZdfUYWg?ueOWzZ<_E7n!Igi3Kbf6Rt9{w=y!2^G2gCjC5EJV|c~E_7 zPf82^H+KGOuKd&2v;cqPP}7b#w9iKQXUmuc{PTj(2Pw|jeDph%9lT`%PwV_S9hMus zMeKS|roQ_1gRVoU{g*rB2fw4>dXwS|+s_-%`I{91FOy)t&^R)7JfXT%zO!A6gWrdp z|EbqIiZjfYw|-7|TnfCg?0iP;@%l^e(RDNBw`x>5@IDCgqc~&vWz1a>yy#@?kMY{O zZ?7GAQ`qrMOM7|!-Mfb)co8NXk9ql?E$$3{uwZ=h`iGbA;rQy{iPm$ibY>j#J%l(qVKx@InIk>mSAaZ9ehpalf}K_=C)R{CVlv6#RB3tOvaO zZ+z|nej5|kN4>8;{(rg92mAx<^HKHsOL57qf38%1;Cl#sE#r|_pY^i`px=@|ezf?! z`FSU6AoxAnD882Ugy$uW2m;TegK1v5+9SZL-qkd($E#rQ&L+d-_0O&aW5M%h`!DM8 zrL})(-<8%Qi;fe)uOj&Tnc~#z5xp;YvFWCQH>8z$$J2E88Q_0p`*B*v6R*B`w#)+W zZAbZhd46obKj07WFt5M-=X~%}v;LDEA9UQI{+w`RG5CROzer2H_49Y7YX5gvvvBZ! z3OtH49B;h!;O?mi@Pd-DAH?(Armq2SgW&Tps*|z&Dt6oeo|lQ|Q>`*=25+ee`wzVF zI`HUL@JpNdeC=ZYo#-cXKZ4GSX?*@ue-C&+1?PVhXV_o7_UC!MAH0JmoGVKQk7r>h& z@F>n${}+6C89X-=&PVm_r|V40H|XDM;MW!OKgAj5tM_OelecaCE%06l<`c!KJ*qQ# zRr@4>XX@w6L&`q{&yUqpJwCPOSHSUK@G1!EN9{7!-UBC|gSSdhKdoNTaj(IPG%>$N zjoyK`m0jPd>&MHFH^1w)`UIYnnfLoTmHh_(HWTI#FaHCdeuCfK%)aB%l#Yh`p`%Rj zdF$WG&1u1(A(#(Z`?F}E1$b^|K3-j|l?D83g7sCDFYPpT5?ru)`tJp5ba68IGb*ZUM_Y&^_-ehs`&$++Kw zx87_Veha);ZraBK?|fs(%e&yw{V?j|5pVqS`oEs{1Mmj3{Ty@kf0*+V@GqHg|2{AO z))$|Fzsv-m*MI#cy#n7Qnd5=_fyS#-?YH1fX8RGc{de`{2k_>axc@+(^HaVB{`~^J zv*7-Cic`-ob^EI?`~jZ7;QlCzQ+sqBLHT_ONm1GGd?Btge|}M%p~o9f2WzAO?>5WN zT=Su8uSVb>WckYYyz$!0rWyDNen2uB>>n?Wc8} z#>0r&?ZLON&d-io&8};jH|Ja+c$N*cd%XUi&~hkv zp$!$!T>ZcO{7CR~m~g+cK7ab0j^=Z(ext#^QIDTL#r^Gig*U#QzZ(aBW>z2d@t8Nh zc=hfQI0?Mo?C14l>aEY0j)PQ>NfBGYpTg#wI$w%2T;KE_eU3z4>x3QPb;!;iZxpBY z=yL+{M(m9S&o?jMqc}s4SI;NQ_kw3%g5Ur2IoRL&pXcwtaRB`7Hu64ieC5x482l~l zc&F|!-uU8q^J^Xh&%P$VzC4fD{<||zfR|X`yzT#Ba|ZlOf}c0gns1i1=fQI_;pYu_ z^<86q34G61rq%bK^;f}r%=QCh)&pMueQ$gN{3j+}UsFHOeBS%~Huz-(=l>LEY(D22 zat}Q3Zt~-eSC5t1AAuiZ!u1f(Z;<*a_>H~g^XHAfb8lXNpCGs&Kx@5^&PoJtp$VUV z^YZnq^&b3Z?B|nZ*SEvlKY=%g)mN)W$2}Uq51V`guNeDz4z)*d#>Vg7_dmgVZ^F-m z>crz%Ryq>)$_nEmyD!*prZ{8co8Fhkcfa$c!LQGbhg$OGt#2nMlm{=nV1JqNP_G{}Pbt4a4=aLK zOK|+7xWBcZcYKQLPzn5^CLW*E>xajU%HSUq-0wqihV|gBM{9Of1utH3yyJPi_D3|S z30`Fr<{NK3TzOC%e5+&}pLqFB9a|r~nm@V= zFW!Wo*W-C@^^&c!%@J=gX@{_~4!3 zyOuJq-#v0S_()N-SFrw6*PG%D$DiJ#`^w0(%@G4$8y9~6QJkU2 ztMBt}N5DJN(6si`I!pN`zK#Xoh3&tn+fQ-E@@?GZ6nJILJpXdVp9TM$nfG7zd0YTL zpNXFr(s>`%d-j{l;0LqwPxX3Aaeq4=@YatP)31Rq`aD|edb-@|7I^7QydI%(MD^JD z=`Q+$>k*1GHvXayJ^*ip2|wS#n~x*MJOMwgiTf{fo<{XJRQfsi>zJ>ej})h_2aOx@ zHpIRHZ=2xv%~71O@kZ+)`F#ex1wVH(f1XqA-OBS3yyJrLKym8!({(20*YM~U@E)`4 z8})pnxWBcZHy+N8{Q>?86V9J`eye~KPKNtO{wC~C>DLRIM^vBTHPe8fQtdh$)UORUEEi+zt`F?6q9{dY}`KUGDu1+@K9X9j&ar|v1@FNBJ z^41St`)jSK4Bj4=ulo4MYro#3_o4nT)1xYQH2>5d#ToWLFTdcrHNkr>*ni;V$6Me3 z`BfXd)`Inc=4Eo%hYm;UgWt@|`x}|U8iAi+qEE*K>c4pJX5gO}_!RfI`sn+U?i(ck zonuSzA2MG(J}Ay`eDL~n`-e8*-4^64+n=S+bpU^Z2|rKB%isM{7x2rN+3&r?1N`$Q ztoOY9i@1A(U)ap^>0HrX;9oMq=dCZVU-t!nq+mR1U0+_fQ z)svU+#4!`VYiPoJ=GC`urzzmKOh$cqUP$g4;C(Z({l%`#0`H@lKhMq*@elZyO#JyM z>Q|Z%+czx$e=qaZ$0v$2oDcf(K*vS$7W7>LUS7d?pt!#sPkDY~`Q_kS2-}>FXdaLM^6GuKY83cm z*w4qRuSc}hn>Qb4l-Le_%Vhoht*egy&Z@p1V}Jk8dhk_e!~G2U`-mxj|EaB{f9&sV zQ5^5@hU%U4>Eq4fT$_nbOl@UmB{!et{jNbiUydm~V9d!eCRZ-~-zlT|41Rqbob=Vw zCzVC*P@JefV{6<3C+M91l{%f;F;?e$NAG}Bcd>;ghyLy>wQFPWJ#dz|X5=~)Cu&!m z&qHuZT;n^&+7;6O2{<=fWa8#y%&}YY44j4A<(va|UV>9PZDy%_9D2P0C#0F2^XdG1 zaNhgLIT5u!gR^XfoRf9mH*g{^$~muHe}U8UyPQ+xR?4b|>sWjt<9{zUrS8A$p+iJk zaMnGM%cq8&1vpt>$T>DKnZcQ#Nn5*8tJbmYQqy{+ICdxT)*1Lf7WScf8uWEjMZ&weTf2c=JS2|aB#Zrk;})&I~bhKhvl4sb;p8Z zaY~y*kW2aOxi|xytDEHV89jS8IBj?E9b@zB zPU|pmX6~1BvgBC^PWoeVPTE^bzzI7o=R9Ar9Gr$19cZ9(8DNnF3LGqBDaHcLb5)tgQLMYcSA0paF>1HG`b__3~U$!&g8pt&a50q z!3mC)b5{L(9Gn#=Qhm}|d;5P`IGT-E`-`~_80#0I_w$BTwoiu)$TMPpy=)9bB`oais4z!T_yx?7QFgS~^ z$~i?2j{#?HGr7+TKKwf#oJCvZoZ@9CgOek@T)*!bH4U6TZgNg|mQZlcO_y_)O_&4D z+(A=4slpOTp<;T$@AdHO;SYrqf{q1qW;g zCv28BhmO;f&%b4(!FeojC{9$L9C>2FS)}p&M_r$1*H40zp|Q4kLF0+qmA~&RP z4T=-BtH`A|aNPQ7%SYX=B6TinS)}yURJ_x4s1Dr{w2)t`pyYv!uLSKCjAs0B2)a zIVbn2&)`g`CFeZo_Z^(=?d6<2d3DtduTzfn)8^3pr*$Lpcq(vKh43AU6RjHuhNlDP zb|~MWc_4Cn1!n~3-cmVdQMIh#EEZhHsq>-F(WyRewQ_(nR&czaI8nRmea;O|pJ8(C z+Pfn^IEN+oNnD2&2B(kaN;}aRldJQ#q$|xHCAdJIFb88rJ}4mam*s z@{0>N3xegGtC97Z5+??(TPG`yW=VZ^O;4BW4YgcqpH*job%QuDi2z>Icr%%=~ev=CdZ8er*Ss9aag+d7;tLlmvf5enE=ijjrXO{ zxTNvi`p{%>MmON+Lvf<)-m-6{oViq`;1?P1XC(7sJt-0V-8KNy8 z%8&ACGIKsS#RU8F6er3jc*J6GZV1k!j5(hj!@-#|POe?sV^)H5PV)RG&-}IEgfx@O z=V+}+a56O3=Fm8x{&IM{8JzUp_zuO1`fK*AZQvXl$amEJMf)a{&%S26z{x5&U!^!v zJ_md41*d>ye+3sk2u>+KZSA6VQo9^490q6fP;Cy?otz_MW5J1-%y%eGRG+0)Pl4k; zpYKpRM9z=zXTeD+_`K4Xv*_dnaI!^e%ZJ)Y^~tsE3OI>^&nqd8+C`t!kdrX*Iyhxk z@bjTKa;Tl;tZ8~1oK}*b!xqYO51e-z>rDAkK4acJ1gE4RABq#TtMY+=!C5GIzww3P zFTiQ5@xDRoKWf(`=R|OZMe+NK;zaGbSo%FUhj;TG>SvKN;`JwRz6w4cH0F3keFLYO zpj~Q*+DY|U>i-Lz)4TZXqBv2za@nV?vtT7Q9f(- zS%CBMjJA9zKgwtKmMq}Z*Lc4c?cj`QJkn-%eCub zcWZFm1@lW?AIgu~m8p~+IF^FrAjOH=_3EVqIGMM}wJT_|6F37kwu{IP1p4Sa|CS>!AV^#aFvoVI-%8s9X2rXBMEXVOT%LvfT3Jtt?t=U(8rp48@0 zJIPs9(-$1eymIx4JJBDU>RsiWx>W~(vu2c>({a;aa2z(uIrXiFfphtWob!IyNN^gY zlAB+b28{;iYDqb#Q?~KothSMJhOC(iPNb`xbAI7WaDpbu)p>5VIp8=Bm2(b{4Fl)G zN;&71^+IsgUXgQF?pgv)tmM!6E~~#BoOu><{pEFQB{)MQKd1gSd@VRX8p`F;}; zC8Fh=Bb}~;b0S90xnKG=IGOgzIR~C6fOB@goRfL;LvZdNk#l+k{0mO^Q*zE^&lli4 zIxpvZDVqq6ug2?c+NYrTUL@OlaJotEZ?w4k37oqc=LPNGQ9jdFd;{l|VEj;=XdS!j z^9!7glKsBFT&kLe`$$(b?gOaT)vz~d!I>@D@0IUbfYbM~-1s@aHVZf_1pQ9!pgPn2 zwbWnJ{j!6TMldfZjvR8yS=J^uIML_i+7(bdKR6MR`#)1(76RwE#_L>lyKJJ1f-_(8 z=k@N4F9}ZYX>IkP{X44jx1nXhc_P?Ppg2)~rT1)%eAX(Vta5>3-;CN{YCSl{m#zd zJPp*APfpc7TfScI;CvL^pGk3)kNP|zbBpfaWSzp#hvLYgcGA3n1Lv*a{!C*|S=WK!gsqm#C&kSn;7p5>arToPiDGoD3zFgR`r>oRjI?N^rb<<(w(w)`HVX zave)CBNCj7)8z8;tRDr=5RLaW(Yi+CFipor3huX2=R?Ous&mdAvEa-S-2X#y)Gm5X&cOSpz-c7- z+>_$SQTO|#(&xb$CV3y@z&97c+3hYjerhkh3eGPtIj3Li8{l-7+^4OP<_l$5+*>)rhK z;Dib8yD*l|)DEA)siARymF7Rq)0)oT!I>qvj-xnHecJ#01#vx<;L@?j5)wLD|wyU^lEN!o=etw?$-R^ z6p*~m{j#<&IJ+eGZI=5M1LwqgZFQ#cP2Lmdr=mgFv@EvsJJ!O6?HU=Zr^Fa3Ur5ArHE@2PccdoknuC{DDl zdhcBb&dQd2$Jjb%-ES#4VUpLC%L}gnXXXI8d}>`@1bTnEmx>2mo@_1g$e z*YR@Bnc`c(885k?pi8p@oPU?f({?(s-^gd>=R|mh&Bo6OHFRtz*Ec zCwYAupXVqzAsWxaX`WEKT;q>}vrOa9@2H*r^G<{FNb)-F>A3UYd=y+S)B97M={kYh zwYbqGaJ&WQQ4~iGx#T>thzF;fhUX}n)T-L4gf-+c#ywt(EE$V z;h9EXz*(sAb9`El{*31eKfswNxZa^SQ9gy<>s$=?k#*ePw(u4Cu zay+~B%>>Rb$$fyLc9!6L*LWWhtw+>f4N~O(LSo#Kx}iX(@{ zB{^B<6#!?qVEj;=$f@jC1e|Yi{Ctc#Wju<5^RHyT7cWy9oHCO8bsf`}2dBN{e%+HZ z6~TEdc^^^WvP$3_kv#6)2&@dwbB*g5ts^vkawo#gZHxaE{QHfp^zUZ; zALY||u=k+;UH|t!l^l8RGwFZt<8nf2cT76vO%jv`YWG#vZrcux-s`_Z?~_z-e!F{n z_0u=DauTHPbN&BztUCnlRmPi}uIAfoN$*du8}svU((A6V@j-u|RQ>mMDK15+_@sDc z92xXy=uxFzi41Z{O6O01w$7(KUt1(PoBqEvwaX{S-!nkpHoJ;W6{{oxl=nZ~jzatX zlTXxPB_B(ck5y9mpM2UDP@b>!JMmoqUz+k6rsPtoqMe-*{l9+CpUBI{px;BrDEYXt zLWC&ep89-VN-gPb+z)BV5wzIOOAs0*z#j~tYKqxv}yjOF&b`tPMuoZ@)sw!b-* z)DCi{EISTPQ$3#5Jp+#ykM0Z3g`- zbvkwbQC!kDfBzqOqaI?tzu$Qb-q&EhM{(hc4ExVbSF1=_XlLB|r{IMj;d>P4I`&0g z*BZZ4P<{^gF~3RYUxMf3sQg_8b$cn!pLMX}bp6| z-L0(qr?K5{3pql&XH|L+u2(;Po)m}kz)kmaJC2vs>%M?DvmW20IG67|lk%ZD28E*M zU-k!hqwewhh2rck)^1n3ZKKKL9q)`@)!aHe{dp;!cdY!u_b4uW5Y{im2K%{WSSs)= z-1+ULIIN$mZcZlb=i7PGfp=?!;;H99#Zf!Im&N&itGER?T^I86qPVy-5lQEvis!29 zbql?sCo+S#_BB5*ile+X7b*wi&El6OIJI=j-}g|@3yRaP1OE30RaU$Xxun-0H<9~eqI!Z?Q}{V5r^$8x34jHwMQsk z2nDk142rWYyftZku^qB0=`p-}P|{Zaz8~6x)rURzXZa+ak4kD+R97X>s9mf* zos{?l_I`=%>;CM~mYyF{UYE$~;BlDc)mQOsyDHCZ_p$nyX5(PyuB7)(@KN5+)|b6M zb8JT`&tu9e?M>Xm@^WGAIL6+$kMcY&KYO2YtUlh#^Efu{-1;i7i=s#7S(aB`ZxyS& z-k&{&u&+n;X6|<7{r$Hok1_0ZmdBOvd$7kS_Wh{7%=1;=*X;yz#tchpcOvscPby!x zWckOj*T+>*^0Mux#3!)VMa3!oWoe_-F(IE4A9Y4~)SXoxU94H%SU$GwF>x!a_gEzl z+ri4~6Iq-U^AhtaUyosrq3kh=#rv~I9s9aJtEVk{q;=@@3|xoS*))fB!*>{e9iliq z9=Pg$b;RRo*z%U(6h+Q0D6fZk@i>}keH(B-p5W(2aUVn0{a)!@0?B*rhU;UB z==R{%EXO~NQXIC^O_%LD)@{eZ&fsOA%J(QPV4!FBfPOv!N`9`o)%(%2D%}sfRe_49 zt{cTsJBLm{XVX=GaN5@7*Nx)n@Ic;>Oz4%p8~|Q93%*Bjc>HwJRcnXiqiwz*@NRzP zpYKx~dak+@*A^trSK5cjWipex!0>z4JvKj1mPRy_6f7R6bn*qGFR zZX*k<*MHta?Jimn*O3BK7l7+$!=LvQM|tW;c}m^d|9#HkwleAcbe60#?2-1nb=j4# zkNM;?M*m;`*?<4v=SnueKlZTp9_r`Y-_zU1y}y6={@y+nyDQ(fwzspj{{4Xud=&J1 zSNu;NlRTFswTC|cQrhjR^L={_#&hPI%Ij!9;*l+X+)y0V@BU5f=P3IXP`{L^`SqhX z_4U!wqS((bVpoIL>I#3{P+ZJ0yxw}X3deP0yGZct!}uP>vEKVLUMvgac$qXb3cNhl ze2?O2yih-sQynKBJl+Q0n>322K2A`a;&jl_{vEBGmqT`evz_Ioo*xt!{-k%(cu?m@ z*E!@~JhTVgn{1y$ogc-~II(?O9M-8{Sr35oCWK!%io?9zbR9clzfLZ52)xC+`5wg? z_G=2Ahb}ya)wEx~|4}>rCLaawnWZw0>2sG5{fmG9qd404EWdV^GVj?NQ2UR~#d$I& z=Lzsv)#1+*ic`1$t!i9f=zAKx(w=;e;tboLQs<^?sXEVlS?N4@C+71#ic^2iF-C{; zB-Nse;9XkG_b5)CUsqLszx{CqJdd+{kK)u`s=2ry=r!Ovcn@^S{o3k%9g4&LaMktC zisuK9o+p5Jq5}W^Z;GRFk}wO$Nse5Pz-icx-%g6dyqr=;s`e3fp8FTP8Z{IzgbKv= z5h%|8N57=;oLJpA=`o5u5AD!5>3L`qzobXoNGu*bw$<{VcZqtf+EYFE?=<+<%i<tIfX8p2xAe+vZd1Z`+)C7nR2Z z_Ile(%5#7A7?oLRm+fWc>rw16kv-a8VXvphf0XBO9!fhB+1D-WuzbRl?}vIS-;at{ zo+q-$@OkX{HTHO2d9=NuJnHJQ_OsW8v|z7ej~;I9_3U+)owg;_D~^3V>X!2TxCJbZ zJ@>z@Jda|JiR{t#4tpJYw7si5_h*kDi`er77Eh1t>$=6t*WK7-ID1TFk1k7;?}xI- z1omjVRQY-cdyHd`R?C#H`?JRw_GlTdeBFaRS{73FbNufqUr%I@3H6lvxztx4b<36S zN8M+yV~_q1*w@*kZRagXb)=0^SHuzDIGmPP*x$HlbI3<4^Ebx8!>ir`}JX^G7#B22#Pg>v_0ob@KA!(M7{yWDUaO9y_pf9Crylz}O?}*>ILymUmntjv z>*v8)!0VfiUpIK@q7GoY8zi@4u; z(D*48nK!8pbRDO(@7ka5>%yb#liC-e&;IwnaOHxN+IMNV(!Q20j(*pF+pO)Y_`3vy z^mXX(@8jp?>p7^hgS)@=KwE3u?me6)7Kb5#n!{-5T>6*e!X&MsUS@-fG}|1&Ro_UJ$G_YSe2wXK7#t-Z64oxUlK6>a}C z=TE+Owis)Vj_6xdn(tTF!%pmdjJ12DwuRU957lQuL z&ja4|_J(4`l3qvipwn6XE0uAoe-vGByRiEuH}zHK70t6*s{06Su9gJ2K ze^32C#p|GJSsUl=q`76m`(mYd>irdpTl;#0(r+$K;5JveJ1SKGcTp+6OL2ePOr8d= zn{K_T{lgAb1h-Qpzx@=Kg#Bs1o6cq~wtwe5JMel7?ysUab^p-&&^owxfCG3dcJS9h zio^U|bzfEY_qJ(J1-!OpmGcnwJ{83&{rul{p6p!}objvp=QE;q)==Hgn>nx+corY| z9>r1Jl)EpT{;U&$>^}E<#~LJgRL9LnmFIMxppF~N;#R(I1aTSIb5Yz77Wd*oV~G2Q zJr~6VvA7E>n_wJ!E{Yq<;^seZ3UR5}b5Yzd7WXE+8N|h~=c2gbEH3-N<`8#xfbv`v zH-g3azG?w+-(M=vMR6lp+)K+=5SN!d7sZWYaUS0=?)z8exhO7}#kGCd8s6{c1LZlz z(L8X;g8T4m8@El0qxJdleEztlIQ?0O{(t0EyMbQrjh(;?>A|1x6ldk-W7XTw%1-fI zbvg6kep|X|5Ab@YQ9Sj0r#Q+hwI$vkocPcioI*48ul(=*!4!w>bV|Kjwcl2vR$uTk zvExoilKT7q*nS(u+1787bp8~Yt7+1sE!$Vp6>Y9OXOAISmDeS**I7+guJ=Rj*#1v; zwqMntSyG&T6Xo?WmMpHE@^z1F%5&SM%42wT<nFq%KJv;R=#grMcEhhZ>2nsVvnJD zmFJ1A+3VV{=j?qF*>l^r%Ga&(D~}l4}6-3ltt6PRyVTzQ{x_VuWItX-X19DDR~$W>V;p;KRYG|l!yXe$DbM}Al*cIc=-*v=9$G-jQ&)6XQhh_pC||c+W0h3@IQHDO z5gR|_mG4`XRUSjiDD6)4R-Q+(*Cnv8+xAet?#~`w$||q-XOEWUSR8w_EX3;8Q~A1^ zqcYC@+1G6cDD6+|qkKP(ecivW^4zwt67TQJo|jYJ&$gfP+@C!r(j$9)lplM2f8~+Z zp$}W|K1sKm{b1b~xnF<9^51oc;?(J842;TUf{PmmSC@;&i7n1gM==?|bB?z3dm-xpOio?%~ z|NA*=i^s#kdsJLG?y1l7>2p-u`^tDm*G)I9Hr`+V@Wd!^KhNYJPbm)Xf9jx1P~CsN z<@jjuYNt~^*HPz5adBys@!eIq-Tu$dk2r3_eb6hF#)G@Vpa1z2#i{pI`>O6Y`qXYR zcu`CF9>uA>f*(&qfBRRN4&J|`_#VY2+$xi_{wU+eRhJ?2W$?~8&H`^}km9M&BPfm> z_wQJ@NnhrI6V-$7P#igv?oLX|kH&M?z4O62*O%{5oZ@uQ*{bf-UpsCwIFAP?j(U7h zTtWwBV6cZjwx~BK2QBVK#1;tpS$bNQFq+l2Hrw< z#Z!OoPH}Ypv}jsrQr)OMB0p8yN8m3=(7*D(`wUuq{paSP&79!AhUVqpf}dHiZc|*+ zoB#elrQh6irB(as_N6|6H}0cyMW?PW#i{4ZM;AN}w0rsqyk|A|#}$gh`*U1%71HDR za>WW?!Rx<5@znE$;;5bUxekrbxk9@^Vn!w=geNDsa-K;2%#Y4v%wgx^$|~Tf7RS1#j&zzDIG`PB-0yIoQq<`Yp>aNfG==;vW+e$>cU1l&%|6jwc-DURxQH#?4(ejdfZdF#w?FU7@l!~Q6xn*aaa zDFxoK0KP|Y1ABOO_o+CfH|2LR8uM#0x;%IuHTWLIg}Y*Yu5WR?9JH|kZ_f|@{Gd1- z&#t=jj_6&_UkN;aHlEe}KygYtU3C^-c>Y&8og+9G2lM-r;>dZV`uXuHDV)H$@R;vV z9MwI1CeDLOw$;G-)RKQ3r#PGkZo2C=uzeP;B2Q9Hku$96hiuMbYbLw;Tqr*3C6)z1kWnA{k=20!>7#rgXT9Ms$UVQT8nSFSj| zHVkeK-tNu(c2XR*vxREhFU-&ioaQU}c~KmWdpF&}EI2Q}UTX{9_=9|p;tcD?JI|XQ z*BiXfyA;n{=XtBvOatGJt;6Q>JNZR|zc;Bz<^S!JdNTE>#aX8 zT~in5&%~U^!0T$qKaZq1wdb)D$3sW!S`t>Vokw53*tJC0dJ<{?#cs`cf26o>mJZaVt84LYy7+43uR{RAGx zsq=eS0N3yDC4YdI5W>%o;?!Q#5;$)k`|2DFuVXen<9ifGp1$`}>Z1O9UZ@NI`+XUo z+3z{K^0@18eV_j4{t(g6qjc}@7vK{VFxYe8pyb@obZK^~q;{!4cY3J0ul>}`)X@I; z!u)wcaq97C`2@#Lj=Sl=+x&)KPl{7}bpJe!p9wQ9z-!uopC84c=c=n;${O|$a!k$) z-j(zGc}#Ko?DgyY_Ow@$?k~{irQb*S`O+R7(dTs(N7pBMPk+3gsM?pQ@~9Jd2bwE+ zsgI`=N7p&LeHneean?9*W=-@0-?k*bUnov}9qW7q?~i}NOK?(Nv#rP*eHmoo$Z_(^f$2|eJt_^ayYu9^mf zg4ZJ@zr7TP_4~cA(+WRNt!FzIymN~bPklV0II3HD)p^3A7h&M+Ue3>p;?#9>sfcy6 zYrX)y<5~FQgyLvDRPuAx&HaU5o@|T3%duba)a|4=YG(!2K1}zb%fPAOz+VR_PMz0w z)$gGwv~2}=b;j}60g9u%hN$*^Yi?Ktj^`qNUKFRk4w$3*eH%HptOf7+H@-)4=ylLN zRef$TaPkK5zO`4bUuiv}b%f$94@4#H*N;+tZbAEHv@XYA+XU{?OZmP=Y%K@`^8O{ z7>{0o6Z^qCTb`dE#r6CBn=18-Cym3WTXCM%9CZl1*t&d=;&6Wa{=J>5pC9mEcMQC+ z=6sLh4EvXNJvuDI4e*9rE1vrJrsZ=1o*!{r=V*8y7n7*W8yTN>ywAU^82HCl^VeD4 z@t)_sk8uXC@)rK_iRba!-+t>{@S4~u$4}YzZ&jV|G|adP`~ia$S^fC|#nJkrAFsT8 zk9FAw-j0__gt_v)G&u(R3fUE3Hs9+_euC%LSdnD%<<+Ba;dGS^$M1wE$`Mk=*N->q z2RdJM>6#V%BU=?;OFekrVfTFCwI7!}kH$6ScO+{u@J6@ddlaW0Uvyqf-lC%A!8>1u zUr&mopfWsx$R}j&-fUUps}LFU1+QpO^3KnD*e^pCF$v&)=N33;46g%KQ59 zK<%P>*xm91zw0RN_279~D)a`gZg11Pk9U2+^K7r(EaLu zenWSDy?Nt<=M`~S2wwm0$@A3bTUie;0nfve?@^rLc;MytxZZN`7JKtOUVC}{UnF`Z zcmcf>Pd$HWT>fqT^88$b*Mjfd&%FNW{E^@n^pp2_^$z;J8T^}lwbz^H<=C?gyllzv zc=heDc^7zJnP;y4tKV`j_#OI~R$n@AqvKJZHwVBE^X1o<;?(nl)=~1TmmCJ~GaC=; z;~T}PJ(@S*HO?Di0F1ho0 z!^sQaw-flX>&4WMSHRCWke|O6pEv(jUAqpxrC@w$?a%J(Zi83Lg!N6IFC7P{KR2$p z2Yxl?tH&S38IC_*`{Vr{f#;Kq^+)e{d{KV?0p(X?LFy`o`xVRC@k5;-#Tn+un;*|c zrw1=vZTbG^`RTG}0zapVdHvj{vx5Juj(PoBU2}k+Sl7J%vc%lrZ){**zxKZT;CnYR zuOC+03jBp8`m|2ccrUuOIQV^Q@bjm*zpbzO`9#Mx@-y`<4SoO{pX&KUamMCTaIW&; zg;nC`E4!Y~@vjJeNe9JO=c{!+9kksJyq?bT`SRxffVIb{QjdjLyuQqr{KEa#W&}- zUv|8$wQC6eC+2GzZ~FGrc^|cZL+xhZHFD#(pW=+QKh0e?@V1+9e9`Ai{Yd$~ZrKL> zN9_2brTx7A>i4-lc(x7Y`-|s4p3xb63lrC48aGsrGkrb44`<_7=K6$4DrQo$@*RL|;S6>f052Jci ze6$?=K*9K+xa8I&@xdzao3Z?5kKY5vtOKtO`#ejg9=!SI*l;8Go0E~R-lOwIs_&W) zQQ*}S?DtWedi<)7{{tUy2k)r~^Nly3OK*t=zdy^@T=g9~VjuY11ohy}XMKGmROj=H zoMOOxE$}E#U0)j4)L&We9Rcr|puQCMxB0-UZ`rWp;6G;TnM{3oe&If+!FLj@Z#1s{ zmOpQPM0Gh2em3T7na_HUj)PQh>rt1$8!K4fDNfyg)DH66_KpWHr3vGgSKm86H^Dz8 zST89rWA&wd4az^2_g(Pg1jm1hGggmj!yka>(vyF@)0!{MOUiez?-TGVd22sD^Tt<| zYR|zt;HBN;_1EbiufU6Ep1I~*iahVY4>IBWl9&Io?;pYU@sRI7oy<=8cckdozIdm|b6MU4Q60kn-!eH7j^o+5Vf_qd3F- zc=deNDi?Sk%bB;o`b@|R{?o$pzP`THE~-c8Z3VzTQj$OaD9%_taz8B$-WD_0BOyaE z@b}os*F)c*bX=!;l&xJ7{K8fFKE)YZ?`fQqKWTg!@cr25KkD&EafbEg&9@g9%7a%< zuwN?j)a!ftvNqs_C1d}QH$VOftpr|Z8-6{t9*<}oQa$5~RtA4vNA2TRUr#!&lGpFD zGk9ye@I8uC_m8@sZUbw8XOWEk6McSk9z^+-Nn0B{e|CIOj~|LN%#YVU2`lS?*Tu}o zvzXwS~zFEzpE)#H6Jckn-%SPz#|Uf{hkF<-tc_ewj;q0G4u82cgxY>zc90Z^2s>x zMf)AH^?4sP3H)V({S+-eZ~V0wH4XgF%-1sicwYb7q2MKF|CZ+sC_fjxBgxng)_XJ$ zY5ra-G9SG4g7#9JvH2ExbrE=(Oq_35#w-J`feHQ1TOVB-Mu4ADuz#jyedNvO*;&_s zze3<^8LvFQRl<7kQ?vUkWsc9h@fLk!6ZjcSynl24s;%JBd@xtO!+Y)ozk4$02OS4! zehhoR8@zmi{X~j0oFBaYIyQVic%#jHf9AihV!&@On16ktHD8w>N5HE!+`Q-4k@sW4 zPchLnpXMp`U+Ofcz#kgM_bJZU`482N{L|&nf%IC`)e_!Wc z0)O33zAw8T&Dk0ce%XEUzJ5KT>rAT8&26{AcZ=rNo8pYk#}YR8z^k`XK3{#kLsdWD zb$il7@GGt3`xKYl@pmQh3HZ~a`1PQ1YgiB7dVj3#bMUh6((dWouO82NVqbxGV?V$B z6!*9G^Tu;&r?=q0KEn59$MdDA58&%gnb&Xc_Z|H8=gsTy@1S!w+%KPG=K0?2O)Bs+ z?2yl&Hy%GtP6vKx!F=b9Z{B!m`O^ZtZF^10*HxE$Os-t|zh|glpT_8apVh}};D@j0 z|2`awQ+^N0f1S47mf&c=q?zm7lWz{e$uB@46VvC#u~&a2`bSe}AhnXH|!X z;M}oSet$`F^SLqf2{^0R-|0@S(>&oBIN#sP<+G#GD{u~#=KmdSWA(Xw-S$t2<*}mr&IHl8ON&5W`$?f-+ZmFsou8Xdv<(!H6 z(t-2OE4g+JS!V%Gs<(1ZyjK=*!m`P=tH}3k;7l(l=d_rY3!K<0a!xtteBiupDd$YP zR}h?Uz2uxzGp)cmJVMT?ShoZ?E2qmjcfXbf=ldc#C-u(q;MlL#=A>2~2U{+-0VjGZ z-=R3sad2c;dvKO(%!k%lIxf3ys{+o~&HQ{QPLz*B!|LEn9U|v^%j5!1_i}PhlMi*l zxm{VCQ&`nsQW0&VY11z)Al`?zmHR zL2qy@?6f&lFRHUm$$sF(G~+uIM|Gy>HJNbqppw5+)?0^6^uiQ6XmndV+=UW%F2zyR|O}4Q?b09 z9K7C3>D=PlnZ&IM)efU~0k-=TJhoEOoL!FjH6zeT;SUa9;HoV{E4`B0oFpX*0n zf@6P!@2J~FF0F^-JG}v?gy4LG;zaq(>iz+o?s??K;fx=j!3qDVtzFbkYFB}+-@)nh zQqGy(OjpftA8GtoIVbJCRN!>ZDOaC|L(_rdR!Por%%2gQlg;Fu*h5*snWOQ%McrS8 z`)3CyR&d@zaiVd!v1M*>@=uj(*NA-i!MQy~&S`h45ICPF$T>Gx76m8MY&mD$@RHyx zm?h^7btntY(}4(KxhE?FCLs z!TCJJiRS6Z(>~x-y`U{0^}J{@ED)R{A#zTj?ND$+i^`oR#7`RmPDX1v=l%0waDGXC z&KNUhEI1vOYRiZEkNSP=&k5kf?B+WZNBvIc_vCyInF7w32YiR($WixuT;>_z6t|G; z_m(SXffFvdua&*-Kj1WUmCNVLzw^O~_mFe^_AdtKZlE?NyXrbFx_>x0b^&~c;za$m zt>j8@E=it`UAnOboM$un`OtU~<#TiP25=fLk#n*(-waM$jpqq8PiS6zDZLGxUV?oj ziW9YKagJT!{MaDZuI1AYg41}JoKv*b5paGj;m>zt{T1KvI5-_6+xj{6t9b@hKvNi)aX#}6|7<2BrX9lN4m|Q-gC9{EZV1=C1 z^m$HjDoWl@;;ohuQn`EvjK~Ewt#bo#I`5V1uc+#c!O3?- z&gqu2IXHbK^J#jlB{CdwNbX0? zx#10t{~>Mlq3cXqe<~d717HaGFW(({7j>49+jf`rPa=7Mx5P z_cPV)+UYnEoT{g_jYGB5v(i*>JT>M+b*DPtOEnXm=aT0ed*03lXNbU|c2K|5eR7mf zw*6t?v=^LjP@Kr|8MY9d)`Ig5wL|$)J`q)xf)l<;uD@D+S`JPD$?-hy?P_p(@081@ z%+B@T)Rf#mT-9Y0IL;cc2dVB<=aZ?of^*r6-!6)yepmO`m@PZN**%!=P@KqFIA}LG z9VW>+_agU$GeUCzaQ@Uo;M5V+N8K(u4pE)=I2{9LV`qN5C{9$L+7C~F^TC(zsPm!c zl+Tm-XTYfxtj(e84RVGLj|1nU;5wJ$MD5z?bQzqfiZTPo#493m~+M9BRJ2t%jJ{i(-&}@CC}$q zKKKDn8OiIE^=nhqFx=PrCV4%Wt#=x57D^sZdzH@s&UndoRn43koM6f8uZK4+!D%Xa zzS<%(CpfJ&wu{z%niuv%^Mcb|;82`sUTpU+2u``fa^pF&r4=|)lIK643YGw;yX1aL zx!a||$rdTsu11T?gY#T)-DGUtXw=mPoDuuv^6@Qc4^F|Oa?Z`XRlwfRNc&Vqd#WAoIbxFd(56(dcem)dO<2jS+{-`TA7J+k7a34O!k)y6p z)-lV#iIluQ;X>I6aGrMOw~OAN@}cuZs?XJ{tHIF)@g0gIhg@<(Cani&#bmxiaUy4& z&n9r91lN_uob^Svf^%N-KKx~=cY?oYs(DY#xXHhz|sc@EAI$?L&uPhWx4Tk^clHu5bvPbJ%xbHqn*9!u_%Os(@3 zoX&#tLt}Mboc<>`70&XPXkVhcsb|rqzvG^yrIpZ^ADQu zPg`dO=ceGij^ae~{b~Me;0!;fEgz~oTAVos>_+zJlP? z5UkG>NA;oOD>+prSb?)purEPz zU#U*ofYVuW9orOb501O!IyPlw6>!dK{JB5sKkE0*wX1;>EEqo&C#rLBs#@U8lss?w z^1Ti?XC=>D${lS0PAbWL{yF0sgVR>>dRfO*m&{%Y{N zEjSM(`>Wa6j^IQ~?yLLn=n76xjr&^kd%LJU%SL*F(^#->P@Jef!yEX3Gfm_AOxL}X z&!Kd^!KoscUlb?Gr_V`WaOOzvx168m56)wa`z>_*pmr^89RN<3GyLNz#fjQAsZ$U* z(DehxTw=kOhMyJ-J~@_BP>9ysj<`y>=6%IDeS` zy$l^&ZSA6VQo9_QMS%0Arkt~1w+5VLt>v74Th@cK(NoUJ=Di7={E|QC(mvl-a8_u% zZ%18czoI+ANhP>XhT=ryr}oX==mf~sXWyLt;G7;M=TvBN2%MLaKQGhG;uttz!nEZ> z^Pk3Zs`Dqn*&|rTC{9%8pu=at>9tf_K6L&{`COkF2Tr*ee23yh`NS>149=SUF3ho=EIFWNT_&GQ(edO|aZ~q#c0zk&eI8ehvG!- zs`C0HI2R<>)k@31g0p`(KObXtuF?J{I71}YpKRGu)-v4J>NihYJ~Y0m&Iiw?0cX() zzC&?TXIhWQDX=>OI2{H1*c3+&^&dIz!!m<&a{)geiX(^WPL4&jY~a*h&UYwIL}%KGaTXm-po&;50bM|G8|66SZqyaB*;&o#H#{cG2}1<+HkY zX>hLG{Hd3TmYt-)zLOs-vTo3{t2!&o^dP3q3zj1Sf3(0WAkqUeim z;G`9tuTq?7o_1a9g^tGSW%a!H;n4$}M~k(!i;id1u2xokz*)VP?@*kmU7s!d!139_ zcc>lYP`xOh6_*BpQ$q0bRuo4Lx#Zkf5(rKT$>U%;kD=half1vKy3a^(GHlaUA9cGd z%Z~=<*Fs6B$aruPH0~o&-Kow)?@t0JTrj^VPSjspBc_29tMU3&U7tOpL&51On5Psc z%4bB~x!}Cgc)m(5)n{hf`QX$MTrX3cD4(X87lZRfFfWWb^=~c%$3=3Vq}@Le;CKtJ zyN%_OxBVJ$N(;`PjX7HiZU84j<9H^Q`fEq|&EP!JSZBK4Am`58t>C;898W1u)bB;2 zc7jt?vd-UE?*ZqQxD|oNLc_C{E<0eRLL_&OX{4I=)gq=fW<4vrX`O@F-4{&)23`z}X^s zzFMQvb#P>VfA0>9+u+qVcoGGYdE& zlIKxT6|#eqVvSt8>O18I=d|SceAXZNz)7KTpP%YQb+C4Zw7&v zZ=ifVdH%7m5#YNAnAhLYBm{gvR^K$Nze0W~|Fn!ZegExK^`B>^ap0dC%&#}a8TKEq z9#fA^1aH<5^VVZR=v45dgXDc)Ju(N+1pf!~)$>DZJ&rri0dKyU>(TUQ82IZ$p@l0`s{m099_@FJ|JvX5q`th+;})EZ@5i{CaGDsK=j{{^R*uXT*TN#sptqZ;vlgaQr?v z^C<`L`mwJB7j)(WJi-Pa*iSKJ! zKX}L6=&L2cKbDqX58m;cw;rXrSq8j}%v0AxOTN5%`^+g1{?;t~@gZC9%dxiLw zwdPN+qw)9Xh&}k%vYXd$v$Qh!ZOxp2^~uiQH!fq|{PXRr4*q6)d7n4lFFmgXex7QI zuRh<=I^Ites0-fBI`aAQ=F^0a4ZuIsSl;K=r!SBI*E%nfQbey2^J!)e!@IqVg z>q~L!^;JDzqAXj2cc86#+y7%?8}JKt=I6^B54?IVN!J0q;qKZ!-g@`zcxUj|c=FpX zyWX8^?GC=T7vHCT`P=ynuO9Cnc!7V=CwcXt_oeX_II0JDPkNb_FP)c=pRq(A@Tc|V z=Sy+M#_y#Q{lHt<&%F7b8ZrR> zp)(?`y=j3^0o(JCA;ik>E;@1{}NBfB~=gazh zY2SnL?K@{F_>BbD?-ZwQKh=diFP9bIwNK{$oVx$(TC4`I;0W{1w-Yzlfj@xxTGtzO z|NCv+2wr!AM{$PrcY_oe=tFl7h$sgsc}Z~VJ<-VNS#!FZxP4BO8; zKj>n;AG|37kH(=fFYWY0-~}bato>cH{p1|^Ec)>3I1~vd|rJgUOxl=IJRG< zW&HEp9P_a_1%Z2L#tcbeu`<^~$l+Z@?el zjo*K~dh_}(e#QszoVuFky{-ENybC6JotypuFK05YS9$GqsGh>XaDR1Im*ll~t?Kih z_9@eV*Hv(RPH~3E2j2J@vpYR_@ywGM|Gf66>XI4!@5yX`DEpj%){{iH?BLCB!>=dB z8EgNe?YY4_ZQ_1Xgeu?4oeP5Br9MAjiZhmPk>Xb1m2^@(nf~I9hwPrk!5?a7ziLb= z@ZD?4=g%8&4ynq4zqB5|-n{jSR}ULkYw&`b$mh%R3$3&Tzc>5*Nv1z}^%zme0sK-O zOshwUSyjO6#^$rR@(s&f4g9wr+Vj=7pRU_zy_mkTCV1iAe2?Oc&A(9ZI^aF%DL)=~ z{naXK1MoMn{X8w_r+QC){&iw)Bk)G_;kTdS4BM~w=sq;6XSN~D!0RdSD9+I1jsH|G z9l#sQJadgFmjzwGuhxv;ey!`lI!6!i^0}I~|BoK*4t^hYJkrwty!vKa+Y`KHto`Py zua8?_@HaNqUSIwAqT?QouZV|!;Pq;bYrao$$vyv$Jvm}E6#c}cuzX<+pp#NgExLN zz8?qv1QUE-Jy!3U1pa5{%huz?ylLPqV*O>V`8K3YDEP10=YO*KR{k;vym&L`dp~*} z_}5K5|A?Nl2)rd7<;NRuJS2uJ1AhkdW$XLUIRd;7CfvWon;*5ltOj3aLOpoC#qRas zpE0xFb;u_0XPYp;dHJvEycK*eGhg3+&#)8xX(p~8(^BpMuc!&<*SvbPn6@AMekRmg zKY!>tn~sM=BVxcmB-oFkIAh0e->4(tWia9MZC*WA-Hipmwh8;6Jb%ceQ{a~zCcj?s z{AnN0fgfqY=bgOu@L7sW;O{qazdXBRJa~D`{PQLE%H0J2mI?P4@akQ%@LllB`kJ>t z=cauC{t^?fhv+(t=I5%HkHMeF_M_FuH;OYhKMS9E241k4=cjGtEAVfbaJ|jzPy6F< z!LMk7&znD~uY3gmS9fK+sO!x;f9K^}eaTnwx|y)w&-42Q`~*M7%>Ig|DIE>>!~2=p z-@P|2_($3Pk<9rwuRb>OEWlr8;_;LE{r_k?53ne%uZ@qrEB3A!djsrUv14CrumU0~ zim(Fq0`?lk9y_+!mDsx^YU~kvO+wUIqDIu%+sE$AYj|(&d~B}%d7k7tul~8qZURJm?ae zAC3HOF7f>HsK-;*Urv28pZIx-sqe zsrxVEXUVvl_;)_zex;0mr}H}Ew=wPytF&iX{nZ<>k@(Av`lcWZKG5-ekoh|}<_hs1nmj)t^H)~?hYsH$UK7JU z)%rnP;Qzq!vBY2YALEJph~PbLbenh`jQu0AzM&KA2hV+j_cr}K;^i@^ALv9L&Lx0% zH#}wKgzM+UCOqgwo*2IqE7KCMh4H+e=r42$d9wPhlQ|>t@_xp6DeM2-LNgP8ZGFe{ zU1sl#Q`v}j;xq2&%lMVX&!o@sRPyk1s>~>Xt9d_sQc%{C1!5e5cI*Ims&%|M3?-&(<%j z2Jx%ZQjd>pe0n#c4)KedtRKwACp;I5{&cz4fO!2}<@Q4N|Lrfx;{DpIG4bb=l=HEV z@L%~^1~(;sh|8Dd|GK&b@#|Lkviz#|EX03k^e-y?fvo=;S+yPUFB2@|EB%Rg*<}7A-^&h(B3{P^UpC$+^F3|+$hSyq3C-s~aX{4c!zc{1z(@%MkmeB2fftiutHW*>ea{!nAT zQ;Y|63F9G)Z`jBq#4BTR9@5eH`lLKgywqQKzcg&wDdJBwo`?HV^Ro^uV~C&E*pC(C zD;vLL`S$V2dEy;1&ReM3i+LRKq12Vj#J^|kmx}g6C*}k8jlgrCaGiK@Ci}V2i9Fng z0?#-3pTryAQ{JCJC-M+y@WT4MAYQKjIKRTU0^XL*uZdUnGwxT(>VNvqcf@a5=F9fK zSMzIC67IhxtE|qq^$*As#3xhJWW1A3@y31O`(10!g%dxYN&f>s#rB8KNrwNW8jU5M zQzLo*2wg&+tbNqpHGz0tjOT;J{KdTc|I1(5c+;fq6yl%w%=uT*=hKK+`7@utg%r9GPE%B3l24B{G@;uu>e8fl07qk8YzeoPn?-fP7bSC>n z&?T0C53+9~-o($?FS7ZMc{l8>y=5oy^BC(F(SPU?+H2#X9f3Ee-(KR)@|O3X&?V%_ z^0#=>gT$Nm8PA`{>c8pAABi7gvY#WXFBz{wjh~3O!DK&2#*_J9?AmeSjqv}n<9(m` zr-^^hIFBdRzsz1){ZHJLGHN<156UV{(2By2SEz z_PQIy%NY1&>#I$#SmI|lIq&CaeQk5QO}uj^JemKt`o}s1@$`Lomw2VD^7ap1V)2}? z<^l0a_y4l-EV}eD@rxVJw>pylvV1MvH+yN;AB1G zUpD5yj`Sb0eE6+tBjWvLaz7MyB<6pK%T0*)@H6J8vV0gZwgvH9e#YlR$l{^3Xifa& zCi5Rh<8kprJL35pJk|WgePrZA^Ddo;f7fJw1zp1Wv-ywTgLgEd8}U+`tbd>rdH7ry z@VfozN4(p{`2#Uup-ae<)&H?c{=}>P8Rws6^?$EnKjP0ZIUg#ke;IGtMl11#Hvm7P|j$zf2b2thWXczr|y*!ycxuwWAryF@sROMyD0q2(BzJv+YN zeq8+bUk|Etxmv>YbK`sZ_>xrrm=O*63tgJ{-^)0qE`1r!&pubkp7RxT&;F}D0ky6Y zr{Y4r2L3B&@V0Bj@kx=|9-05jaXEg2IDbdlB58}I@nd|?Zenp>(%?7ZELfJ-4oHlX zT&VXh+D+nA_e>|{KxbC(x9;C2&egkePGWxj`SLDt?srWuwMXQ@K3jXX<@3%G4~XL# zE$2XI=GT*eznJ5c!6AEKAMAPb@dDnvetX(DNKxY=8 zEM4Cb$Ni$5gLs&6=G^&6oTG2mIHiXqtDdkQ+gc!_Qhg1om5Mllx76$jJCT++1@5bH zj(cV#PT{m_e$~#Kg*aDBsc~xE%1)fI<=qGkb~;EkYd2U^z#$2iLHtP?-|M**i&%W4T$HI7h!#ixv%3dHeQEayOH7Uwaws}QI2cXAHm3J&TIac=jnI&tPi$vM!01MVlz ziQ2?Dwp-4D&Wz(duO4yA?N#G!9omRE7xy{FL3@K=OB*yL&b_U24s>RI9WUFGIF)wF zIbwVeci8jdacklXS}*572Ya#zd02RJd*Te;BIiH{4(x+H1FRj1lT5L`=2YlPoFzZX z?ZNw-+0*QmH*vBZSL2+`=SQ5IKdW)R|D!i?yw|F6mYxV8PR@;LoTjg=#0guW#+k7{ zlsL7Asc~)&7)+c!;f`^Tm&n7FF2jkFY^t0Comu|0eGo>R@$=-I#PYD@#WBR05vj(p z%$q=*=-G~O;3wkKZ{`%@L@ktapfih4skYOJQ*EN0lURJtWtv5twca~oJsT5 zI47qpAWoI#YMcelmk?)>N&lQ!e3E8dPMpRj<4$6nEEiT0=f^#2e!c#EEpeQq)i}@B zZzN8dm1>;t`+ZNGHmlV*ue)z2&bUo#oYWT`R{hT=EJk&Vro4qE^&C+U|Q1AD|S*W<*lI`mxH4^SeHGL$nchrwreWmbmCeF7v z9OIxrLcLdbU6?o%zn1TdLT6^r%)-Tqvn5E*Nz5W8iF32Q z8pp-G9&zr4t8uQMY)G80JJdJ}1~nnhxyx#t6-inUC%xi+Z>k6jaiY?x^?!|d6mgFEsc{ak9!H!XebqSA+D#(PFQICj@hQF`&cN|%9RIx$#Q8W!jk7XzHgWPh zxL=3$2FAg*<-R4(Y?E~^bY|^++lz(7>99-9uaCbiCCDS$r;)UQZmWqF-*0HxXyuVYxk`U$}-}3(ssJPKi@$oC#<45NED~ z_8^~NPsOd##0fktpKm~iIAh)kPKf(q;v6a`=RgOJ7@vYGj}j+kYdHrxGtSsdCy6s~ zyc*~1*q@2hf0r8PTFP_8S^0+=r{VsK#F?00Eq}^)x=NfoxzspUpWh%(_3~<*21{az z{3W z$*p+b;rsphh|^<@TK+s3>r9;1>m1{tA3>ZSH!DJ%>n8I<=nx-VgYz+W3E~VjsW0fj zNiUpFXrYxM&irk1zn}vL^#^%r{7YE}ba%%0ZK%5P8<@P`ad*CPRNiv`cagLhY z2Z0V8aKTyOR)aXN6x-L;40VW8$0W{pe=~c^pQ%TjnkM;^7$?W{M#R~$-LW`>i};+h zG$qa@#d-MN8ZC)a=7^d-JCd{^&UVFh(}Bb7iE~}iuO1US5@&>i^La7OXY>ExXj1Rcfg{#?*N=mUqgwC392rKOVKMSJL!3?~@kz{{?2{%CC-kVAJw01ZA>E(dM>3<@B-o-F=-cgfB1!c9N6FwXE3anAbtE#h>Zs^-^+$h*Y3u6RFX{g?;D`D>bE_Mm@+U-@c2CeC0Jd!RF` z_cKqP5-0Oa$LtZ~d}-?o;?!TN#wpzY4RHp3r^cD=@_{(_9Gq{6ex1yav=;4aDLxPK zT}%q%RB-VA4(8>kcaI%uh;z$i{RN#_y#mE81l}Hlf70^;#^kUf6h{pIBUAg z{SxiLIE45VK3Il0{Y}n+KxgJx;l6Ifx!>P0dte{zxm~0paiYhlaon#}Ax@I%YMe$h zYY?Z3;{J2N7IlbIPjP?Mxl(=NJaVxA5#zi*TVvupc5vSq;}GgA*ZHQzxn%NO33O)p z^JscY;+#_4KOEY!4RJb}?Drxbum|lQe$C0$fjDma~IN*Xa_*_Tg96coGKxf9W z?CeUM3yRO9aD{=_+|n9o=51Q5s1!SModM}3`&wi4&H zVw|6j3nk71lW_;{Z&vT4nhYjRdXsf`Vw^b{hZ85j!F>R55udtm!-(UgxIeb<&=}&p zP>l23NfU^(`zN*b^?U0n#Cd(vF%J4Q#HYrM>BLE65+CTy>iwIQvxw7dtYh|w^>ra^ z9&sv6bc}<34Sr2+yO22jOwKVv2fuI)&d4-NiSx*0e1{Gk(XWR0mlNl4gkye*obAiL zBhINwY8?Nz>xnZ{@qFFm44a8l*knHeewfAQtE*dxb3*Z)@Z7K+#3|QIEj|^B?;*}< z2hXLzPvlS3`TfK(JC_2TS$t-X{ed_eO!7G~&bw+yh*Mj!-n;*Oj5za`tHn9$?J436 zQhYAEMgFtInLb<1o;w9E66b_fjZ@;!E5ymExWC$S=?&s!>#AnY=BBa43FzY(2mLpR9Jm zeXU3b*E?dIr)*9|oER6mU(lKP727HuaXj)k#zDML@5}$pNSyU1pKAo2nLYEiWg$)p z#dDSAC+8r}q*iKvc~;3ooF*;goW%U{PE&w5Q@Y7HiPhJrJ%xz#t73mRD6lAT3iea8 zr(=PV#K|;Fjr06U8REQ}tj0+h;YOS@isSpEkcz}9qBy>9bE`_69P`!uvOKOqoCAvX zTw7mO>rO#eJLHjk*!%rpbMf#QaK`xjS+CZBX+o;JP1ihA7@| z>V3nXINcQE)8#`y;uKcge?D@cKXLL7R*O%O!2^i1*}-$>xPO50{STKR#OXOjZVz;3 z<9o~>!-+G`!Fe6>3HBUV8cv)tE9Le;XJ*gCavx z>RX7DQt@0t|M%O8b5d~}Y<^%jalTVLe>iB|e&Q56=vaKvKce0*H~E1$f0*2Fg3heo zLn|I3&c5T0*(0{Ey>E{ZC(yz3kRs>(ucwI9>IXG@+HZ~_&JYLZ8|aS^=U2lo5NE3i z2RgGjN7T7OoIHEf{A!i{260X~IA6s&6MmhE`HeWoP43G;XXe+)pKcLn`Y|=X!q(j- zP7lTQwItvHandTDlj&CGF>!Jzu2aT5dPM8xtxP> z#*E{huQ+iUYmRYXAN&g2U5YqIbIUo4)$Ok1J0DzwTN@sOU{ALjB~ASUE=Htbc_T0 zV9yrkhQw*zM$Ul__TUA2Rd+Y-w2#mGruOz6_fj4(19byd04Z~#3|t5ItBK@o>9%ch_l*cJqVqd zJ%42KCC=jM^7z0HGfu#np2WGWxQ|_9N+05MH(6JT_P|fX=WOFZ;Sjp$5y=>K%Bo!)|Jq~9@q!Jj_w>noU7~P_CN;?xZq@7HG(*mOz!JJ2M*#6 zj$hAF#QDo451|7GTyVx&#u3MBquej(zyTMWc}|mvGkTev106VMx8STkJC!(rCihRF z0|)kjvm+vcIL;>d106W951jL@XA|dJ#rqB&^L$I3Vv6hBBiz7aO~i>XSzkbBnm=2&5NE<^ zwffq)X$Nt3o>SutU%H1lDIHv=pkG6LJbVuj=c&nlICN(5c~}2O;v{o$9{}?_*pn>H zPsEvIVh?m?_B=dxoH!p$o?k|LGUEhIK24nBiupXE#aZHvGFcCb_8>12pWKx$5~q;K zJ^*xPemT9nLY(S~`%(VUH;D6xiC>A?^VRrR;@nhR=N@c+n>dvnT<5|*#MwXlJ>ujr z+24T9EY7)qdqA9ASJm44y_1iL)8U31C(FEN#CiF%V;uO2`25xFC2_tzE9XFG7N3g^ z-xBAVNu0%Yfos^)z2ryYTr_DH(7_(q2hRGqr0xm#wN5GSua4TDk~pnQ#vRcfT*ICz zE7KAulj68jE+8Xu_8ySe7v3N7!8Pp3UNsAGoJ{UtKxf9;oFWHt#wpH!I-Sf-oQDoR zUx4uge*Li`KXD44lE()+Gr!jMEkvB&iu>y8$`>WhJQI7wIAcByzaBf6Bu+h(b_$)D zUt#yl5T~JHz5ja9jW{I~+r^6U6^S$1WZV(sgKNa6dY7uiDXzG0+`VQ^;w*6}4=q}@ z0!0SY@zyk(J+}XXb~;tE^*`72uNBtt{len+6+;(2ucuwF*PBIqvaclb&SZ9o2c3Hq z^E!7aM!enLS%`N!O#dcb(LU(xUn&~^KiGFsO^cT9I?tT1xL!CpxJMfC@_j+In;w3Ze z5_!-?Ugh|mFU9py_qS5SE3``$P`{AS5Eoxv zR<=Spx3Yt)R`#k~v21{U;DEtpdj$?~2??^ zV7cv;nkME6boM{S|4-+6Xxls{6YsKDW#WaHeBU>8uLF0RNCs7xg5|c#OjxL(?z?jl+n}f z58AD6XT(2tMz1vbCEXs)SubH{sXY4i#ij!v*#7k|JH>v(W_-_{{_)+1OI4S0F1EhS z_5*uLuIfnkpgn|?=YHhGmQGX;#~bPOpwBAefge8Ff1pEt4E&DsV^DT4;(WK$_R4?v zN6^{-82>-;ws+z7b+$-%;(1J$^Pr0i>1od|o#&yoSj73}x7&|+8?Wg+(NE}LSNkaL zKQ6EJCr+2ga=V}t$LCy+nYX5PKjMw}L+4>0gZM*dX}Y_TZT``H#pRD-@m(G4d@hWq zXA4@1d-k>5PUyt?I9HeJcj3NZ;)VFg+beYLV-nVfMf2>&yoW&ph&SwwoClp~U)LF& zFHJHGAzq0o>Fg4*KZZ{1kFypiMg7y+;NiqOzEsYG&MVMwaH!WnAJj(&f3|P?_hH0a zyh_f4PV_gn+gKW}t#3vX@7^6b54xWI0e;Rw!OpPnc3#fI=rt3F7gR+5{w^_o&~f}c zH2;&_-s&{?hIs35==O>24LaD>Cqu&O6IjydEpxY!30fipY7; zMR#MK*1r^VxEkD$o1;#8A-ff zN6UH8y$D|$k8~e^`uMd7^E}6{BwmHVavpS?C-L)?cIIVge#<$!S@ORfZw(snL?}@knH+eq=ooHXP4xGeV!tKCi>Kj_5xeVc{-Eu7;w;#I68=Rqg-ds{MbeVlB3lXwHG$$8MhKHIBk zz3yJ#n7#auy^WyhI^bzRX%cQMsfB7^o{vzx3_*J-1KrdIX0sVYJd&e(;Y*&t~ z*8iDj&%AQW9tX_(_4ehV?X&RudU%h!6t9~Tx6gsq1^m~}sPw(@9I8RL0ozUsg z{AZqh`$z6SCZu>wy!2V+^$1<`k)AYu zBJ#LFhqzU2%lUd$drO>)v9?$Kdwl}kzv3p$mzv-EK)eixqQ8%X{$t44G{o}_m)9$FV*im@*vIbuEIsjl zsVC<_C))Sh2F{c4o0*8$(b(4!=daL-dECK;=Q(roWhGvjlXCl@L;q%ra#GErtq|h3 z^XnYM%ac|+dg-)!GzgBS`XFolNc)xd*+Xr1@yv;E~iI+OPyniy|z3RsG`^tX= z@hY3_??5No7yHJI)+MD6h7)h*JGp((x!>$b^NM?UdENeQ^%&xXy2|4Romjv1g!y{) z$nnIBULxl~$NqX~70Pmd_vZd2;w?$7=Zo0iL5Fr$Td>RX%s0eo+(@2x(6L<>ZJxke z6EK5#WoyfM(22b8JzNj_x-BH$VUvCUI_7z3HTHAASLO9G;w|vj>p_egbcoyP*UZ@; zyM{QIev;=KbYlM$zFueG?ofq0|K$a&ClzIkZdEAu$3cD)w|eF|I~5B zJ6})kCv=e>3Hvt>Ek$>3S4Tg*AYT6>azCMipBNu8ZhdI*hByr)Z7b=2*C%uw|M-3S zr<}J9bG;|t52xik=)`u_s6CH^&RMl83D-F--Q;nDPRzHUZERo8_eqJje6*Ygo#^k( ze9YT>WDxO=Khe)ii0cUGMBbXS+>fNMKa6<4yp^AafzI+F^E%skB-C&08_tv6IY$t0 zgOl7o=*0NFSj~Ct-ad?Y6H>_Q7dnymv`u9i_dQOHB3|5XIS)E9U&_p4`OT1fc zPHiNy0v785;7{ARuxP5eZKaF_WHaQPE zk=IFB#}3~*gLt3y7B^M$UsytY;5_x4YD0;=QjgpGQF_-bXns+%GTM zDUx{BWpW;LiPiIk`74NbKTgh**Rw@)>B@QdEbkiP?wu1<0ca7_8CEm3sdi~n=hiyOM`mu$N(BVG%&5!!LAR*tjUo&|S z`+u+aPU1I=kmnC{`n&$qZzImIw^ENi#PgdV=Rqgt_ZwkeU$4u4;<+D}^Pm&$Yv{}V zj_MmtycHGY^#dLAJhW`WygpgXQR1bUqw~ah8+5i_E~)nK`&(GoK(6q`9tS6+T3PX0ae{uw%slSO+V&X=<$ubOZ^H2S)n z2OZ~yhxXtY_oH9?rXpVMBRWs?6FT^r^fmjL_eNUc1pFFdbb6Lg}##ai+_?O|9B;uSDy zpU~wu3`Bk4J|X&#Dfe;{Z)cq%Y4MUl1AR%p%ZzCE9{F{>P)=K zr{wX2PUL+h+>h?ErZDlQoA?Wz$g9-Hh32I{Y%fN3byirHwJm~nGt3@l7kIyeP+EIykK`-R>0iBr7*w;k;J`1Wwyqs_3 zJm?bon?&>c*Z95fR&C<7NT&OXd8FC+-PB3n=SDku_&e`oJ{Z-2xW7)9+X{{v44gRent!Dtp8lrk~n<_$?bxU@3UI8 zI%{}-c6?cD;zfQZ=RudSer@BnkR>sRy>I{1lk4tdwpwL5WMzm?ks z9oMIaHd)v&IMuW#@%$di_u-&}U2%Kazqw2OiPP?aeEfmV7BAa97VySqWM1(j{fM_J zrQA>G;HT|f={!Zua{WAoo2`e4|8dd_+bZ04M!5fAeZ;uF@=`b3U%u>lik>|~UA?S9 zzJ2{dpFmVm5p;$;`j<3;tyU2VLmbY;9%liPAacY=tSPz%bXvc$A=SdM_xG(x&gs~ z&hZMY_t7$WCE$NR<>ZE7Zt*Zkml#H&$B&Vx?0FW*x>4|H?&LgI}}Cg(vX^49*$<4Sg) zrNkR~Q{GOY15fYQu6%l)L9>SDwDq4_gt5Z6*qigG=RDku=elk!?)j(fFJHD_3-$@@ z5!BDMU#RUL4)kXKoDUn)B+~A`*q`+)&h2PQk(Csu)DPrwf=>57^DI41vH!#=u8YzC z8<{S|sfH1!gE1l7|7UUP?jP#a%P-I`*e5h7*r9NJvOi<5)slF5XxS_Bb1S<;)=<0# zt?R(V0^~C8gQXY5cO!L#-MUT6)F=DvuHH2}um=X7(bARanKa0DU zZ&1H}L4l4$9R8>D+#rqjkuhax{Z#G3W{Oj@t@3z7r^nk{>%XQnaXOaXN}N}VbPo3I zuulh_Tc24KZ0&wT$0hdCHz1^pb4aLhUoL*X=n3~Obu z#5tKy-VZ`2?kBDm<^y@N?I&KFdU76goM%y=+J_d?S+KWLJN}U}M{>u;MgBZmult4m%@*yXT7u`^oJ;Hdc3e@LOw*iozhQ5Z zk@~L-xYV>g-}i5OWzW6&1`n}@2H75ctDvj@v1jqL1NJz>9=(1&wA`ure3aYYhp9fk z4$QL@>-4&Cvb;6-=IUj4LM%j9v8%MSjm&bnRi>~i1SbA@}F;O*MoOm z;1%=wnRuH!$@2|5#1Xt%C2Ep=ow}VPUQv_hp`dFr=|X#tNzdCNZd=|9?eybC;=Ku$ z#~(V_l}VT{#(sB|IETl{{e(_zr&u?l9{PN^LAyCY&d%vp+|uS&SVQcRd5pE=w8XRfc_ytJ(hYsUP_w z=kAU9lvnql0f9aIde|3=z5Id`f86#{zcY09ANDwKJCA=ZBOTATk7c|^@fulGzOM$I z9tBO)9$snmy@xswk@XPe-JNMBVOx2_WWr3{r@Ij{X)xCt5%_k1D!PTc|^VY z(m2gY-_1Ub;B{qcKBPDuOD>NSbjZ_x#VKlTRV)#KzW&!?hBoMx?^efWPE zC*NL`D>@RUPjPw?{e{?P z;_;wIFRfa_`>nfr$$8L;ynMpEe!=u)#CtwT&Vx?m6%x*$z4tn`?580XpV+Xus~_`!$=3XCq#Q1-idtdx8%2I&B@- z>vor%#93$ZTrzZG-f0(V&^opDvOL5qbwyqe(8X<7X7Bf4U)H6(4|a1^e&Ws2)fK7or!naPkw&|I`n4^gmW5AM-?H?l#_A}bnq`uI7b+HwK#FI8ys={0-YHD z(Zc=Jj%i90&ntNbyF}zcC-RaD=irL=btPW64ssrJ9RK+H58W=?=YMEd(Z81`UWbBm z9(284oV7MB-1ZgX*FG)J>$`QUOuQ9M%9 z@Ht8mtMqxBF<;Z1^;cqj6K8ymQi+At`r7{TX<*8j1&ea~;-ilD4f$2)h8yuiK11i$JGXt_>zO0H{d{}e$LjXd`&W*SqD{ z%jgi}x`JMYuh93Et;Tg^H~sx%SL?rbTcejzMV8umZuNC;)YUC^d#w%hYqzj@_P<9p z*01Bz&bMF3HPo-IS@r8ELm!u?hP}N-H8Eaq@U6`aUQzx1tVVrUjqA7;`ro6Tb+YT@ zEXM1zY_r=F*T&$Lo2%Pf*6=rLly2`w-R`L1miqe}T(@HS`^6d8QO30uCHyh2-HPk4 zj|%h9ImWdW<+~u=K3tbD;%Ag`Mj2(4){@5GjWRBaZf_K>@%Oa4pVm^w>y0wbD5H$h zTH5%#QN|f%6n>w=Vz6XpV#VnECzOKRj&bT)6GRC-$GyIC%sQ+Ett+%%bqjcV* zU;k@-v97V_AMRI(`vS>b^^W6gh9BqJ{`IeZJHaP!(&zLA4DILRzyO1O)or7%z27n& zU#vy!?M91|@5@9OpVK!oYysat#W*M%U+gx<4=|Nn%DYt3{e(RuXZ0X^^nTn!OWBFX zmj$VNQGeUEv_2mZ*GbS}oa#D^IoaO!A> zI^Fw!*85J=cpf~}FPL~!HpzL=VSgL;ogTpRxYSJt67QnPx&%7WzT3ic+(#!2C0TDmQW*aaS*q*9Uase6_5wF8NSz67ig`%X!d=@k{vm;gjP(f4O^+Ux}CZqC9@k z>GAW>aBXXcHqPjv-@gcF)~?&cd6He;PVxR`_Poe^k2pRZ)i{$zJs{4Z*=n2?Z~r1r z%waXo!tkfWS@&3t)2;3c;@r=c(Vi+|yGbn0e_ne{oOb!tI46g_Cr*PZY8sd0+0PEDLuZ`A7Rc(wGzDg8{1v;RaU;`Wo%6vtf z0|Vq7=)ggH1E<7?qQtRIkaM6jgK-ZT<7Mb=`?GLE(FsY@D#bp2S(U zSIw`Mo%#^xcL%>$5phSoZ>(%3PT+Pmd&1v@5T}6Rc$zhO5OLPzP_w5*r(wi7o>z_2 z`uRxW463Zg*_(VUagLdc@2DTMc2Q%)1md_}SDXJdsxpN*EB;dB-1=!6an3k779WiF z$iuc?Gl}z?$vg@=v-pfFHkUX@nmc9>;tqRWpPo;g)UDMx?FTLsG9oms!}^yzuxJj^fWpr0_~xOiM8&Yb#=`2{Zg`sU&d;sl&f+mD)?FP1pV?x}I^ z_PIrzj%C#RdU^E@ajNuF)kyI4+BMm}28Q;oJ|PR*|(t-m79pjK*}A#aKjXKg(-PUEE|i4)mgjq`2Ovc&nSuVWnK z6Y{X^dpF{2A0g*JXO=%HzOP7}LW=t&@8hZxC(OZlChUV>9X8Y?&T4-(zwUK%C(iW{ zIY(?4>4kk8=X?!_GhwWp1D#o%vs`OToZ^acJ~yKoaq`ZQ+avmwO7Lsq<5t8exlGQ1 z4u0Vp^|d#k9dT0kRO5s>c@pQcvtu0i340>ibS6$$#rraze|IBJUXyyq`y)QMhCMUa zb|+5oLb+einQs+=&X|2_9OrZWiQ}g5z0$UI+{yw2 ziIZ!ynmvbp9YUP)LmlJb`@B)_180mNPPWl<4s>SqKCaOy;_Qx)a}vv+ydTFBr}h#x z&htGJiIa4l8t2)nsl+L>O^x&Y$_V0|TkRM}9Csf1&L+;OnQ{(vX7$ye;kU#|rr6#q zrCdau{3iSSiPig`ZzG9wae>^g#5k6?UBtPtUu|9RO~E6?DcD?Xox8Zz&&0V+G(9?XNfbSuk9q8?U{Dt$M`-}=*;SU+Mnl%ll`2WBie&}f<0c_E)l0&ce!8C znc3sC=_+x2@5wo$J-CKFZx7rcPTO{B^>y{>Z^Wr;oLd*;jQ2P5Yf6n<#OadMu{^}S z68wrhc!xM&HuG~C~XU-93kO`L<3h|^o~9MMa^D#U4~xIea{U=8AAR-D(> zx>B1sj?U-NU!&hxKC2#al9|lsp+h^xH8?)Q8WE?j$$TCrAlT-2hk53^V z;>=&KR$u*6`w{1i;(gC?>HLXP%|UxGE+Wp6Cjy95#H77LXBOwIW39wlyhtrRwd#Zt z=WhqsxnldO@qQ3-25oW79{7p)EZ9GcII~U8M?q&6pT4WZh%@PsWA=!CtqvGNoaKt= z1}8L_K%CJ;J}HR(5?GmFofRMUx5Sg}7G|MN`ZJXPdOoI95|Yd1L- zAF;lUuUbHyvkvB=$Z61P330sVs@YSi&~oDZv{;RktoIt?lu`W=BhL^= z^LNZIkyCcVdE#vSQ7z8TO8ico!)qPmi1)3!Y`;yMGv4xjE$9#*#2xW*nQ)glJ|pEE z=*&2U&p##3$aHeQM2;Bey%k;%C)rE2eZMM=UK1y>gxoK@Km5Wq;`7e;9dSmDSL1~I z_>njrW7Rl=GbXE*aNp*)9P&8B4>P~qd#5B$q^BCk<6Iiz9G<1d$=56carXYC##waL zi8!^Ns&S4C%toC3Ssd#(kl)CkthsU#=WuB`2RgI*+PyO`an38A16eV)AaRa#lG`KZ z57uMwtKHbb#5q<#&VkO%ugyh^6UQm7oFm$UYuHn6e<|YpYI6S_Ix~A#b#x)l<(g{t ztbA9FIAc1fal+12BF?J-HIB=QYQ#yR$m!Ci7I6alsM&Kzt4o~y4&J9m{h{9HY;8cC z0aF~a2kSa;miF-=PEnKnL+H%%r+m@o#M$y0 zUlXUf;(8}@lg`BXW0#seFI~KdGca0>lkr)1;`G>|#>v0ECvgTTo+l|ixDRn=EAA&; z^A04A%Lz5VTGtLH&MC#`(IzGxNSuR;^}g%b5aO&?wCBLi5ya{1;CUO2XXrPs4;n?B z2`2k~(3$lceX5Nk&RhrgZNz?K%Z?eudE(%CB+NhH*OQTRh_iLLd>#dznP1*dza>t& za5)ElfP-}%?D5{Wh&Zm_$T`q~11>nND@GEhaDr;HhPeXI6bR5W{=pf?&y_;IIm;m<0*9L zf3U7Yzq@^LkL2EM9BobHPIqAzFFBF?1eYW5T=QI!8!q)nuAUg=dsCoe(1o#I0Q~W@LA$SnVf@#4ji--aQ3)d zBu>3aj>SjhWclj~ah}Xn;~ZOkgE)N^+t-=yvBVjxc;9nU$=k%q;ox|R`a_)W{dt!- z$xQC+L1$K9LpD4hPAkRxGBdw@Oq_KN-j@;Uee1Ai#M!QRPIzUlm&AGKU_U0>6ZZZM zanhT_89K8#w>tQNIE@ad<#VMmN$Vusj~a2rF%HIicmN9avx3Pz&`lpeytX9o}28?LkGVw zKLcm=mb%2LY4Thobl|`~aQ>RykT{ov)Z+83dK2PQbMV|3`XkuW_s{0UDL7ni4|HaJ zrC(wpPWP#D4&q_P8QQ)bapo^jSE~`2&fwR&jsB_w7*PRNSg&&)(M~iPOxaA4@FGXPw6oCr?i`d+J^oPn-yMHO`rl zlZmsur5a~+iD|?sX!7}oiN(k3;tb+s3stix`_wtaS*G}$cyPV>#ObcM9z0QTF>#8R ztd|pu&#D)Z#95%&kGOLg;2Pp&ja1`&Uupw!PATpicfP-wI6o=wW6xc; zl{gWK>(lf7b`mFtgX<>b0s8rro%a$a(q!EPomoFWs?I^;+)zAE+pqc|;shx6s~O(? zM4V)b`EzaO3E~t|oNssz_?b8s#rejTO6Q2P!Q{S!SYMdGqTbuQx=5U5Kg!z|bY}HF z@ZMG83^^|6i1uJ!1$*K){X(2ACVoL@X3tOkeEn>YN z?WwsZ+>d(T;C)!+CGzK9f#k#~Z?fKj4*87Fbp>bG%T&xUnLk4Z4!Gbf+n$a%J|_1s zpaVyYPm?*Bh~vFpt=@-q%}SgaiuE<>tDMB?XR<#AKg|4k7@LPU9~@j?U_Tmh-mtX* zagOeg#|JvtgMJpAs{_6w&c*F=4s_taPjG5hDMp+h6vyRSg-a2q%3--Zcz-i{=HDwz zoMsO0har+BmlVl2;C>o3KB`_n#&cKCcow{2IL2o0aX~ z{|erQ$RLM!(22a`i%Jvky9)z|S1-A~Z!6jdo%sE+r#$%ka)VnBA>I>T{rsoMgHGf{ zEaC4%PCa=z@nR>+dC-Zx<8z9Tzo)N-5pUf&IS)GRB!B;I+bsP3#V0F|Azpx!JbuuL z_LUL7&)0eOc;aPiBIiLT@|?@Czhk#fCf@5&avpRdFJC+MH+1$i;zj$&dC-aZ*F6i@ z@93U0iMOqjoCjTWH?EJ^0qpOePv;OXl?e|z{(fo?Eh5g9@};-;eBvb=s@o@i4k~o$ zmr6I_cJb@AMa0>$PR@Z2?SI#%8f1@uXe4poT#&~LIV&fDPNagNxb34 z_x+0P6}ka|zGa+!f_uR})HC|0QeD>&Z>jM)I3f=^F+a+6;(UAhem(KB@00rr9rL`k zB2zd&iXPldytU_ap6&bW?LWrn2Sev}JKFv|+?FPK*;?-|H29xdXun$I@IpRGw;Xxxn0mDiFefYA9x-Hsw&=@zMi%Hwr-tUHg zmoSelT9$KFi5p(wBXLVrmirBz==W2hAMms!tCw)SQNER&2OZb1MfkXFRW6_qGV}g3HS56S}lz^`B4vCd)x_n|;45@m87SN1UM(?Q12B+wC@%C*I?H z^7ug~@_xC`@mp85D)CMqldm746L}lka(`TMY7OGKHj(q7LwAGtc@^aG?0kwk#M?bu z&V$Zl%)?wBwLT^RFUxGszcrWZ5%0oUIS)E)y{&F64A`kOOjE_ZXcOqWMYB>)&j+;e`o5^`{WqETrMmrPd70{s`4hro*G>0E?PDjh_f=-NEjId6L>E4@o-e=`J=(s(2YTbqXnTwSI zh!>Gr&o^;i0i9d;Mtht%e%{)VnmiBnUK&K)sm8vPxXy*n?c!ehI`?I!wttrn(X`+| z=McYuo-T;%0b$>8w-!SDt99l5Ds*hGhc@A7&Yw!H2NJK#V0pWPPPfZbTRENQX~~xl zAx`jac|4)ZnYK~pKYCpr&h}X})F0YQjwU0BJL#?5U+6fGJhZ4ZJkEKp9z{H#O!~Mf z<~MYxhZ~{X|4nW>jyNMP$?bxU^UR&~e;6HCNvfG@fTo{SEQf z8lM{^<~MXv724VJkJm9C+M8NDzk1^xLEO&wbUVfPL5KJ~4P^WL3eP6a0An2~t`neh zZsQj^Aii;cmqS?Brz!R=@rszthoIy5d1$W3xqe?>Tu3}?PI;a{2fJRb<9SA!`b&xP zi;17masFDgGMRYZ_w>i*#4FlKKJSB${j_K!o-*&3iQf@#{1`b8I{$vwpx{vFK=_-j zDf^pk;X2|SnkeT%=RSt#8HI%Xt^%H$h&R^wd=)WIpcCVV`7hep?fzSc_p5Q9PUJxs z5a1gS6yjGx_t#Uac$4#_P?{aYTij0fS8Qj{MgINrxvifU*OQ~_@_g;qq}{|V(_bD( z=w5`cjd$IBfNp2iz>Sfe0y^I*Hm(~$_ zX&I@PPDwJ^Ums_bF-i67sAPH>kz6mWDfH4Um0mif*2_4fj7g(kN2S%vh;({s$)J~R z8THaBlU~LdWsH-49hF%xty%Tbl1(q&vg@T&4!w*s%9xz`bwqBxwC2%EOJ2Qn%cqx4 z`Smi+D5DDM*AdQoX)UCemap{Et*~A?717I>qIwxsOfMsf>!r1XURp}(rCTYzj4Q2| zF=g~Js;pi{xag(TRWB`Wdg)YNFXN0drhSdf!#?;fVqw4FWwV_^G8tJ8zhhD}RWmHrBI-;3gTAS;o zTT8ukYNeMk7QKvWt(OsP^wQE!FWuVfWn2fnjQLtGBRcA(wUb`DbC{6n|T`}ERzzh1@}CHnI#!hG}hSJBjOG&7#l7W+%+#PKHYSKO~Px^kF!>5OxdA`d!r zPr7{`nx7NjM_Yg481Zs%(ECYoJ^~&5-0jWd*86Wy5$BJ2^8OY&w#%X&74FApSr9|K zi;Lwv=)|}+6z&)H?|p%ImnX=1(A5nH2}}k1yH>omjuOS8=|? zw!25X=+g4~fKKG)Si$q*zdRli@0FXJ2OazCp@j?iQle)Z@$weZd1AYO4(;O4w!AOv zdE*&zN@{YupbP)`gnd1W_PVwsuQzY`y(HdqW1cSB1s&|dJ~Hw{Yx^Uk;>tDo?G!1J1N9&{qlw<52vQ@N)iUgeT<9(3Y* z^LAdI56&-_o_N1ykn^BJ{A}OWn^e1EKVxO93(e`(GuwPti!^G`qI;4$zHp2Guhl5M zg|#jH*D2dyZ2KU#SGx8N>gVU$$HzCQyQ^3B zySrM4g!T>!tl(14%{9b7)UT}7$G5LfFTW7i5MO`m5Z91UpU@CjukQXnA$~pLmxnIC zx}a=@a&Bb@SFP+-xnkJ>|G)u*%k~N!;1Uv~eX{%Gh>Ujs@7Mwr|ARc+F6_hJ|H+Bs zly9|sT@D@Mg#52CnA=l@(%FcYuA6-Q3?1ykejxI{My`Uy$!Kz)9XipjkJWkK@8N*2 zh&O(azP~26OXy%%q_AIFcx+MP99}Qqr-M$9{y*{mLAcM9bbSfpwKX~C1s(fo(WYk&8Zki1@@6Z39cA)YT>cWFet+o@$hTUF2uWfTVCJLiQ{XGu&*{|gBS4zRgl*UbOAo0{saA- zdmw)Phq<1|7V{8FSr09?Ew_(dv7yAf z;;Hk*aS}Sj%{z+o?P#|l#5vhnZWnZsR};2di?(0r|3<_OCti$6+@RzB%A#4_*}nRz zM-lJ)@^bs2hsOmPxCe{e3!jn)U1XFwn=9E59XW4 zjrnH9=edX2e*Zt2ZwB}c9O4x}_v{|v=W6?RNNA!HPmHHUi%qu2L7aDbzrRBFVGn+* zzwVkbKTT;ozbUo@=xnpyvXv_u_YFO?*Q2ghN_gIQcb#bz|Ha07Nz7a5khjT%bG%Pp z%pgwgAbER-?pT#G)bF;M%Xt>`^*rKr87AjJC+1lvp+EAjy^wfE|CaaT&>_#@Z?v$F z)BDI$;>|ai|3D}5(C?xCAKhO;yg4i7_Cd!y53R>h?%#5kUr)Rzqja8Fuh7BIicbsE zczS2$X5tjxCZD%M$97q?hg-Q`14?csUf!W{9&{qFaCe^H4av8IcsCZxdC=L~zU@c% z*HarR>_b%Z-%Y%3S@gJx^Bm|>={Sx1;i}0u2=`eorP)v1@7wC!a6_*d_gSEG8$Qt9 zpT%8CVJ~CmCDZ%2Z}l?js{VWT%zA0PlidDy?-^lwX}r#wRsVaeQATIeucNN(rRAQk zi%yx+u5-$v|K91o!Aqt8KBl~}Pdz26&d+J+Q|o_^Hr_AJxOPvY|K2^9{yOLRy4?}R z?_-Tpo2v7yvHI)6)9O0Q1O5G-bL+o%>!Rze5B1+`dGy~~jWWXExIHp-=?sqXzE3{dmY`pHV{(GzOI=6g=?ulMnjWW(CBNiHY&{Z$BEIKFBD4p^f`Z%2% z_tdz4X7G$M!uWeQUf)gUx&5ubE)II*I>zwJ?YY4*%9sNBwdINaeo+P|X0gt5FR1_B zIlFGB(^KPhFZAELy)<}68D*5-IrP^%JL`XsdS$%swQ+5faVX#DzqjNx;`-Kj{Y3pb z&iH+7F5~Yf^?G*8tzWyn(_inL$N0N(9pz=j+ql*W>92E|VdSrI9ra%SecUr6?#8v{ zZ~fZMTjw}E*RQpF`um{&9aEh9%kCYcso&V+FCQ16`yqozlAk<&hz4(qFb=l5dWd+( zjOU)j`2%#;Jo#yy&Y7<~jSpALA0=MSb8`EjOIy2kZRZBgwRHPDwAw4#-w)eQ5O0FX zIUMNV=l40vkzJP#{7jr1Y2@<<=(=pW^YP8$J0HOt)0M}Wr6tZ0FU;im3+T|TgBMen z{ruhQD)BBAmD>kh!f`N(=B-6$b)Nb${s;4iw$pwiUbscC2b?bpx4k(22Re*5Q(Dci z&o5!`zvk6(&-Hy=Z)3tK%&RTNyn5b)Eemao|H-`CzMbo3+rka@abPdkW(VT(?9wfJ zTzFpXp;fGMnda4A3vW~WT5Z(l5#oFaI=x;!HSglQe$KS^9&yq))j8t)0Xkb$o#Owq zwLe>le90-yOSZ*4AZ}cF`8)?Yo{v~GkA0kfo_GEtp4V784>~@V;-M`T?7Q&j3Gpi2 zlE)7^_?f|<+gY!e=fug`Qr@3H7aubF4|s!av!7K?y(QklMsgl>?59P0o{H})C%y8K zc##|Aaf2?nXuNsB#Xi+*hmt%`e^oI>gM{~k0_Mnh&_$P7artA|KY7yZC!VkWoHirz zR{y1cf4SJdLl;=GM9Y>XwO0RX_s)Oo?cUjF_wIU}v7R0orW1WeFSNhGmu>f6wtGBY z1AKymeTJY+bSZ5&5beI<`AqgWaJ#o?_l5n3@%yq+ynf6sj~8?a`{AUTMXQ^K_aFW5 zWG7z!^R~$9?c4qjW1bHkKUZhbp7iDR@WbI;#H*A?ZXa~^_lW-=+QSdR^ZOS@79d`( z>2e-)>~EBItP|g#KGD1|@s?%PdHSwUydj3a&_(Zv((?+28l_v8boO~PejR?cr+qxf zb%e>^Ba5>RuZRCBg>!>Vhn66FH=U5z4|F1LU1zSJfyK%YZ%>e%2OaY~v?mKs(|$}V zS2yC#7_RfgaR@s2={}kJmH8nRiPNl)+%D+YE{ir?xPSL-Nmb$%Y%J$N2cGR!Nws^Q z#$zo!Qm_9Ay&uMY9r~4{M!&M^p<8wvf`}&32{%IZZ>EDj^F8|Cc zW^Jo(kB8X5Sci_KdBxGkH7RaRrR8yhF5cyE>!r$N^m_Hs8tmtKJ)ELG@w%nZd18J* z2hI=Gc)$Aj;)cZ8_>JwA`aO92Ka6$@9r7$?Am{(?c`b<3Cz;$o=ZZ+?J z=b6=!c*B>-dC-aR>y(|>J=d~yAzq%davpTN?(x(L59a4heQtImUfv0M|1QpFp}TVM zl6}3{FTP<1XnvpS`MmJlX}4KE#J!tVKJS4}te5@5=g6f0-H&*qP0klUC+-)!Oyj&x zSJa<)n?}g(gHEiMw!*r5d&2dXix zUax6#9&}>-!o4fe`n!9}fyDa|Bj-WK_n-fLAGXh)k;E%?P0o|whxOF*3H#c+laD6e z%DZ~~ig^v4+sZBWaVzG4Uh4gqhj!x__g~=y#u2w^3f)eze}+!ycx%52&x_rfFp)UR z%j+E5cxeAI`e*3e-Ve3Up9Z|sOTS>wws=cm2q8@<$iF z&Oyc_bk2<`{^CA+HXcK)%(J;4|6{3H#Le4LjnByNhc316p86mD(x?36+5Y?=qF!=6 znoIm<3(Vs`(8=;(O$ROy_QWh8UV^@VOV*pv!A_*(ZVk8}ldZsF;+3*F9}hZkN{aQS zOQ$6e=lhe&pE5r{r#LD7paYz*-m{hyXKohryfbtG+mdM9vUqN;ciUw8n0PZ9nR(F3 zbFR+K;dPyBFMmqBN~O&_=w$g>vjErQuO_S`UYDX~9&}ufyDhAfNBQ2q!so<`EpO&Q z$LYguUHggsyYt0O#H-fM%!5vjM@5NtOJeb@#CzP*%!5w)H-p$uusGum;{8LEYS{66+pyTx7vZ_4bcKDEA z&k?WhU2{1K9sKDM>nfV|y-1vMf0*kX=(wDEexIYe-c{nQeo6UL*1y>281hp;e>lN? zYUgdvZ@c&XO5EBu<*7^Sc%P%&nk~*X%y!L3yc@5W;{!Tb-uDRN^!!=k1L8%MH1nX7 z`&pBUaeliJ@`QM6Z1#CWC(msOn$EnI-n7l19akQ-*xX)*PUg3JJ=niv7N;j(%b(3W z=p-*v)8|Xe_8oLwo}{b` z6#da7GmcB`_)`%0h7F2fHxXF9~ENN z5&PwHHK|AZZ$2{fq5CI3>_GcB<-iK!kJkN=|6BW?;}32^{27 z%slA+$-aUGM-s1CIyIUl>kG4gpFdY*>ftwum+zLcFAf!#9>2u7BJm3g_{VD!CXe>l z8$Rv2KTywu-cirvwHvThJx|u3om15Nq>t3|IDFP_{3q(UW2*X|N4wt7)bm9BIpAIW zeuH`*qCb1J>)fKAThr9{+}ibRQ_mCdS-aju^*m_0`hL82J-gL&Ul;v*+D+E)o%_{$ z=M44U*;TnA2S=#zIiTJr>GzI!_1@C&y+5e;9{oH~y9o!B-g8*L@2+&dZkj(+xz1zi zy;pzs_E66QwBC18y?4)2&l9xk{Z{e4XVrUWPxapWx^jK`d!F;^eb8+6+|x_Bal4h? zc}cxb*6*Ej)O&}1@4Kqr$LZ%u+D+W0fB%Mh@0hFk+6~d~J-5{RMEyKqp8B4p-#hQB z_aXYZSGx&cE57f(dT+g_bZ+ei>G$48>V1NK?$d7aPNnx)1^wwgXui_NYd226ccxSC zll61w0`)zIe(%em-pA?ZN!m?J)c((+-a8g5okzPN`n@NIdY`DD2fVMom)Lli8t2cg z-aEt9bMFAo1CEu>xj-g7{G_Ks9;QgRW$KIEKoecFvXuRaI-qFm1< zMzeR zZSMp1y`YE6P10`MU+S~-q57T`tH0k+xt_O_8!%eAPKV-pKR@E<`x4c2-y_AbLW}!< z-xH$TxP>A9bQrQ zMycm+?RvJWcu2tKBHG_?sP83fH%PnAH`Vhv?Rxib^{*fDyrrHeYBylC`W&KNuXe35 z>bXOI&#gZvXxEvj?1|HEl6Jk?PM>z2U#ahTwCmNbPrJ??`n`6&+I8$y&z;s$f4C>= z&jDZS@9XCYyVPfIM)ke;sfGRVWmj&j?l*zTjWc>SPPR8tZY0EJ_0N|ky`;`N@*K_S z|LmEo_HTH8R-c{cmFupizUMrrK6`WK@_#?Rv3`C<>kFyp&RXiZ_l(x(QO|=a>EF|w zfPCtCz@{z!d=@uc*%klmS%3Ign&UjJe|K+9e+M`q*34ZT-N+~x%~Eded@FCj&kGE>+fAxpPfb2?UltR_1p`tLp}E{RQ7q^QsI^`M!8A(HAnl$S;_lH>bWV-)(cfRb9}u&e|z(( z?>V&{-r|ZI7ohbIlpUUO+CKf<`MY}VE~%bdIn?)kkJR%3?VkjfdhT7X>`c^lCT?%+ zx5Iit>kzM%pQw0#tT;aH#_R8SHQ!lE{cfUu?$mbrDyruJS+xCsDo#=vZD$Gf+&NCU z@nzNXpmOS5+$5b(e4|u2I*Mrf{!$#z@e}@Z;`4R$*DI>r2)53gXWJ5D5U#6 za{dB3=j1p2JRUC#w#uGj?vs-55_fZY)t{1F=#o0=^WnsK7N^cKx8~0?h`ZirN4J$&p8b1#{rkjg zaN6u&=u+2_rvAadI7bQgeKF=k;w2w8^Pp2S@B-3)+?3bRG%LT1c!@_9uResjp9@{c zdbK{rt=%B)I<@Q2Zh&?z?fO2`>uR){q}@dAdbOLN-FWTBY1gCO5be6P8>C&Qb{*Oc z(5|IjpI86Bc9XQ5s9mpi6SNzz-8k)fv>T#bw|0ZH>(s78y8+s@wCh`^e_y*v+D+81 zSGx(?jn{6Rc0Jk+(XLy&LE3d{*P-12?ONLP&fDg%|4QfqOr)2@XGYwO1%Cy=V?HPY>W8Cc>v(mE3$)lCEJ_rgHFcB zs!Y6}=&kCzi8sB6nFk%)=duQW&g+vao!w8o&F9oSgdBH=PWjVieJA!g9l3vyI8jr~ z9Ow{Ue~5YVi`|llvn*V3Q0YJDU>ELBK|Geeb(J{B z+nVRIq2qXTSvyPf{=pfgeA3ZNGbVzh)RCurG;o%X3QqM7o36G8m?-Tv6?2SOUo?5TMf%iqYtZjes z{AtTIzf*YajZ^E&Wq3h{bQ*Gm)5E-+_lVQIg_#4LU3%D-^8s-xL@SO=r*`S#gqPPN z6#V58@!tN)Y!`H}YnV81XMT38#k1=;#`G|+Lx+yjsoM%2#^H6Y$P2`KQSWn={)CS2 z%L=i!PviRwGc-w0ydCUUiy5i=kI@-F*g(O2MjW= z>yY<9Wmv%RQEgu~;#|=88Ornq9pa<5D9=XE&rO_VR+_j|0?q!0PS*1`va+4Ex|Jo~tVw1bbke?u?Jm$hyj+Vb5U+81b38-G z;pny;;vT{d1*;J6Q5!Q4I?2m-gx6<3_@Nr{{_1b$LFZd?)}P;yZgz@$O-p79B;NLD zGY>jxpIe+`wy0tq;!XS7%!5v@4=yJ5m;df*K)mDn++ta7K__|E3a)Jr#9y~ zKqvisU#v&1H?0}*n%msl3thn!U}}wk_(1;}`6GCzi+D5YnA=Uz$$ohmv48Vui8jRR zwZj}A&`JOPTA0uM$Q9L&c=_*}dC)mJs^*09uggj=?qeSIc?aT+tZ!~_LWgjxF81R_ zmU@ji)7~+M8+6j2`NX=hPbYOH-e=p)Jm@5^tynkzZjIN87kJ;ygHFaP%0r|R&xH`; zHP3ABKR_qTQJl{T-mztUi5F7H%!5vrqaotF>jgCi5U+kIGY>irzm)Up#67bUHw+@) z8_g9@*5lA2y#Tk-ywU5)1u=d8ZxNOGWi zED6cCzmyP^25jtHt z)711Qy#GqN8a(V@JO%6Ta_0BtAN&0+)0J1;fBV;g)f7I#-OS+)9j61gmF+n9%Q9zJ zPrOk!`&ytonJ#*0R7_Moq=#U!&NFw~jl??_Wo~yuC(mDa63pf5=EN<;`ys^4gHHPQ zrr3uUP~j`${b_TL0CeD~be8=0IVlN4bbV$nzYgehmM-0xZK;g^N;-?`6FDTNs{jB0 zQX!UN2gHEX?Z?)&(NQVw=l%agHEjh&N=2Y zFR9o~;*CjX=0O+qs6VwAj);Et-2=CXx9vA`JU|Eks&ej`^Pm!fRXG=ua%WKLU!4E{ zPtR?is>`|LHSheC`sTk<&czOiO*vSys&aY_j|%m8QqIr#$2)^k?Lod7yY99>JY;!? za|Q9e3+4WxaBKF&9B$BYyarqCMSJG@xj%_l=z!wMc06=(>E`>x+o4^gKa{VqbNi~l zh}-_C*-q%V9I(=^7V9L!PFb%!JC8B+xZ>4K4aC$xy-otU|6LcM);s*azAnNO)Xcap z;*FeK&L*`twx*RNTgfq30sQ#@I2Lx*&pPMilg zF)1@~R?afp1)WN)e~;Vt7WGN4?AeJo)aG6Q=s4fFEu8OJ z^H5h#&P%*`4b430xcqTj$HchugOUY_SGJ9r2OWo>%ldpA&(r<9s0i_j_E$U^uh6M@ zby;tUbpk;X9K_i<+suIu>0{ymuGd3~mLX1en|q<5WB<7=tOG%OHk(?GcpGfaeTGiP zXQkOZ-qO5eCE~pvYz}|uBro?$_HRqClX!J)`rFXKzt8oPlWwVc-K)=!b*OKmpS)tc znz>mxreIv^oBwRRKBRB>;FRkCtM(ewx0nB4|5%wLeunj~?)Sf}$8rA@zE`+-AcfPT z^5$@dj>Fw$b#7Lf%CpzKb%^(^E;nSlhYsoWeor1Z%XqUsal$g0?Sf9l%HQSLj`dt_ zFMYW&@y^?%FX-5xZfk%jFVb&qO1z#6&Gtbj?fbC_r`O~0Er@sX8#50&o{x7~vlnqY z`K7I`h_`&7;>mb`4)K6^M*2t}_$qNq+nhH5oea0ciyRL_n|2_c<2$oIp+mX-T)vEJ zs`6!`d7ON?&X;=+?J4))%$MQ4B0_tI{qtZs!gJi~o&5gie3^0&*F#PZtI~C$aO(7# zIsTwS{qci`*JTV?+MPIooy;8QkPe#9tVQ|y&fC3+^G-T*yg?_^L18f;IeA+j;{E!_ z++KlB=AR{{IbGFCH-LDths`|bq`rQ8h!c}@tkqybO@bHHyhS*y16rXEb->pj1xnL zahT`IqxdwcJhBR@pNv=S1+)_?ex3R+%UXE!Qp&0K|G9R;;K;sx28UMd(`#^O%B{gE z{|t-n{S;HM4)85_s$K&mvyivt}N2ol*u|qWeWgq5OTZ1Lymy z+vXCl>Wk*`7dnI+#&uy=`56m{6I{rg?xACUx-7&Cc;01;iPyBC*)Hhp{5SK^65{0k zQ-zmoXGs5*EyMY+ee_4fX=rnf7If@Cw^b-Jr`PO-J|SMOE9Q8FPV$y~mXGScUbQ|a zUK5-83cB#Yp2(<}h^OcD4HN6O2KL-SybTx4<5JM2mKmvkNFVRTbN}f{hOdY>-DT!M zSMA3puQlo1@*VJs{KfZgRQO^i@ordV9(1XtUFv_}mFvOwl`go4czbi0dC(<9_w$E8 zcqha-^}<8@i5J}1%!4j9>{9>0+cBvo#qa*k-x05MOEV8TnO<-{3G&yiNlC;D%4+68 zhjuRP!?+4~V@4e&-k2_C9(2+^pE&oc}cke}^CHYuH!gqZ7mnsBX@0 z&`JB+y~gRKf3-8jo7&RMgHH0||K#%EaN%>rTW)jz7<4i|GI_Wia&O5+;?36g+{<;G z(8=(dB-WX}(D^FyCKNXN7do^xLn1& z2hvTCWf_UrZK#_+>wppST(Go5K$}_HWAhVu!dM zzRDHA&mSBtPrN~06)$ys*8ivH>7Yx>_u{9GL)1On zsqba^ih3IH+$&FI;x}1fwii0CPg3?NigEsfO{x)ZNwVU}c!4f8UeZ`8{ucKqrcDSW z&b=+{Z@c>w&p%P+inMc=XcvBTtS<3u=zT-dPUw(+a*6i-!6OZcb3ND`e$dH$=M?!l z!A9NgUZmYg%&-{GFMZBN}W*&6Xz97Mi=-!5S zcZZpI&@s`1gU>UtDB*2=UUtV&*~jYL!mS zUVHWAi;6eE+91{!RIbsNc>60TUg|v@{y$xALFZYk=GT2QRQWYiPgBWoNEY`67A!h| zekZz-*>32bBgI^vGA)Z_3XIXwj=e&OBcItUE zTauI;Rml-PB<6Jo;_aXqe}2#_iui+Oo9%^;?R8s$qMg&=>saEAJz(ZRCwUEuaQ)h% z)JWnDd%-;31Dz~?Fm8Z+^hy1v>#FK;okeb7msyBO!o%{302Xm!Y8?A74F~M!e(in(c!Q{#E0| z&#Z?HaH?@)kItw`>YErRuJ=gwAg9Le?UwrHKQ*5`*k6jJ3`nKiN7|=XO4bOA8WI}O zD>f!P;@{dAAKGA+KOA_xIM}M)mG{N0U825sQhPH{uSb#XYv`OUANkv@9Bz}Jj#FD! zX?1>6fc}Pd5pF*gRnNBvBogy48YW1pBvvNfEkk}EG`wxjteb_gm zN_1qE>Z#vHKB?`VZSbGlYAE`{!|%|IX#(tySBJSE{_32VH7fOZ|g= zL(+4<^4l|s#2aUG&meTXug-0a&ceLpJ-diE#b$gGI+=c}^ycz=!1lewtKG_+U!haJ z{oB98#r}wx6Ws@9ZbduL=D33Qa@A4h-mX9f5CEkE!GY>k+8zS0EH3y$3-sx**9(0mdI|JvBeVH#2?@W60K33>te7HJreB@kz zm3YA=%sl9lJcXYbXL4H)CNnRz`wil4x49<^I+>m^?u+to^GmmgSKQ`a2k2z@y;GIz zjb>Tz67RO&pDgQJ=s5j7e^11m?N5l8&`gD&Ea&CD2}dh(K6`TIg_h5b6A!gn_XeG; zf8Ge>a{kYi8HiWr7gbJ4yP#9;)ugBGAcG%Gk-e+-iR>FzwQXeI*oe@mW^J1^?^tzM*pOlVR+(pH%z(%tHL6r| zR*epi39IZ0?K3E4!DDn)cgr?a{YxIhhxGsF)eL{P-*%VI?DsF)r^-M3|MpM(H_AUw z|MuzU_GvuU{;|#+zx|)|k0(4LB64_@ltyQCs>fr(`}M0F*tcJ`zSV2@PHlwNs#2|r zGvx=7-gEe}{LlXIZd3m7q}IqOeh{nTvmfWo=IQHpDTa(^aw{B`{SYZ$E9;+eg8#&M0&!84D?j5 z99XMPwQBupSM&D^ssvW4p{)7aKTpbJGx+D3^yR3e`gd3_oXGx3dtc?3<%<^1{V)1w zM4#T30|UcCd)EpKP4!DPoxGmLORgpV;E%XfsU8nNwHP_i~fihH8`|# zt(v`ShxV>fH`OD7Rq9l!mhyv0UzO`+|DWT-ic|h@JCq0X|51MZuKf{TdD6PnSO2Aa z5*!*^xmsY&z*@EY)bwYQDo%fPf9j9M2iPBIXCHZu`_)xFIsE=bzq*;tdMD^=St+GW zN+^OCeTwVp`x|l-FSE_LUC?no=(aXx=lUi?j{L-1pJ?t^Kqq-P=NIvN$5n`U%l`Hgt_pUo&R&8zlRh%H|Dj`lrp(KU`{zka6jx)!>Yu) z+rZ3&j>FAuwVclB?e#0wiT9H}r$e^mpp)&)z%RI6IQ{Ed#B2MS**@r`ef7mUuR5Q- zOuWPG%{=Hh{kp9w*EsxUhr5WETi?qr{R-pB8MwA)dB*R0L;U+~ht6izYYR5;1> z3LVnx8F60aj8+4Pv#x_VyrGln0pq5yf5U@8#M^0e-T-vWOQ{d4a=kpTVkGgZ4>kJ} zI{0&jm?v)CCyF>_cbNSN9oyx$ik;(px&B-%@eXD-*Duh?bdU9V2)7GOM-nefH8T%7 zX&;`0cl!G`iRZSNFNO}h=kig=GL?_8zewbx_jEp5v!>^N^xUOq^U=Sy<1aNA{hg2M zy%6X3Kj$O2HB;oHLdoiTXh*Jj+Z^uDsbBefJT%R5K0o(lhj%F4N87CDfX>ytL)(^} zn(Oi8VC#T5uViSR>BQ?hM5S9fe*|5UwZ^{=E9g`|S~uKHtUJjuZx(So6*uPx=#V~; zzix>8jNkiXF7ejLAf{y+9{Qm3M)}IqExwqLq^FErC zd8)=d{}^(6Bk^+EtgD0$`Qda2t{=W|e@UFE7tD4+C)39vao>HXiQ9=csh*h!oxJZJ z{T$Q}_Xh7I-XBxUJm_Tq=+Ij3FI;G}hj=r(n|aX5b%nRZIS#JH`-wN%h6f#opUc`U z))ihHe2{oA>h)MMKR}0g4QkKn$0K|Fl<;W6T^w^=U) zoy-qK#C+GXq*KJ}GR<7hLC4{il3&Mhzw*bcXNgxN+{}ZH&zDa*_fnh>mgW8P#5=Rw z%!7{o>$1jY=lP!}9WD_s{71!;@d_Q{Vf-M@ua6F2B~IZpX1k!1;fC@B@jBv#o5YJw zW6lrIu|M4w?t=#JT;wg{eQ?@rA9OOm28na`=D&A`cz3gy?SoG8e(TEN*XYE3;$5;S zhoO_<*Ln-DdmH=SBjR~$neBs4);j@WeB`Y&X<9uypR}{QnFpQZId9dbb=q&N&qTbJ zH<;rCI(c3v_9r2JC$!B)yd4+J?I-ADdKs{s>uvXhe8f8#VCF%WoIb}h^O!DcR#PtD z{~A<~c;UaL_7v2eCH{Zt^_b8hpIvOs>EveJBE;F$!)zCHes;?Lz>5^?7vE`6oOmOD zHuIokUa*Dqjd*y!a!KN)ZLZ=~wzr`R$oQJSd~YZAr`!LYzRG{YKEq@)+lSBOU*=uQ~Cm?K788(6K+=)+hHxJ77{P;@ur@=0V5h zr^~ATfc^XZRyXk)>-$1vxIw4F&1H=c?YdRpzeb!(AE^8nr31s&}2iuT+~r9+5Qd5YOC=w!a^HYAAZt&bk}Cf>aQ z=6Hlo-dFI}6h7a++3hgmb${Q?gU+e{;NQ28a=Er7S2*$P)_+1L&)rC@#_243LIm+* z2AlIcbR4g4tB1HRXz3CU@ybp!^PuB#KbKWzIop@ETnzC}jWCxV&>`Gj5bLU9_YWt| zukV}ff{xRP%NqTN>x243-Xva6y{|^*E9jJ6&)26F#>Elms@{KVSD$*P4)@om9YuYb zPn<)&vhD=pUe0FrFLb?nkL=|M^VC83)fMx#2@BsQ-rfRc9(26k$Zf@nedQIiPbJ=w zSIs==I6mCgoJcNzzt}yUcnjVz^PrRI81pZvA9mHAMZ9R6aZl(t9lNc#H@RP2eE(eH zRoh^;4?5{zoSzN*ZmwTIynIK@Jm{DgY`vY1^Vykhi-|XWzv9Vs2wiCn3?G%U@G9ls zNH-5e`8?`;0&!=bGTR9q+v&0{e#qx*)Lpckcp*;|Pv$%5U{_7C&-#;=9}{OmX7hY2 zbjd%3&^X0KQNHB5`6=?SsTsi1UhAx%Su|wF1*rWNUB(Pw-8{^t21msKy2=UJn(d_&>0_bYSwK!^Cp z`JDK@Pahm4PJf$mZ|G$CT|tbaZ*H7KykML2#Gzwe%Kb5AxgX=qdW3jyPgeevDF5c zKk0X|9&VXlpLnzL?y9LT{!{68Q26lh=*Wm+VNogj&%#Fhi>dbSdCm0EXZ-%>bQ^5p zehj3y+1bufIAz!KkTO1@^CrFG506oz-Ib+3hri>)3&dS*vkncqgg#tu-<{3lx1*R4C=J{SKY< zZ*y_JV+1QEuWaecfyqbR2Fj>&xqWK8e*hKk@R0neBoOc7<-_e3j`?A>xd;IS&&$ z8U8z3Icc1vVDDnYYnx#9Cv>V*OY^j!P;n#2=ZXd;iFYi~%!7`@&27~d`L%Yrmx$NE z<{nGvB(LNW4!}o#I5mzxtxa%>3W%%H@N>%W8$o_Id2gZZJm@43^PjM9W|mgOYpw4KkmWpd&!(HSmfKn}iQ5;yywjF=XXl&kgO2TU zSs#aRdb!fEJ@J0nrFgO&g-(TA$~iOQyp*r5cOs6{=3H{<5dI~_y*&pe1rw+01+)L4 zWB<9WoS$&{er8n<;vINm=0PWUSTBP3jC-{o@k*aG^PuDQSe&lotJlrTb_jHikNf%CtITy-UtK;N zSSjWFCzQX7zkiFk{q;PstgoPhe?7l2r_!LY#JM}u>|f}TPnYwrLsmSOb^OXNWLM&Q z6NoqXW5ttpK?l3S0v6IalVc9NO`JC$nC*g&!z;wvn{brus_^F&;+@jx?WT_Z`2TeM z0$r+Isef_PfAf!@aQ)@BCg)&#*N&J@`~x4D$2*{t^Gqv6`>XhmGl|!5ubBs(vi9%! zjHG4U4;b8ZF7Za&oVNv?Y<~s3!Rh+FGz*9~uBN&Ehfdn}R~okO@X|%Z+iJ6K4myng zA^qSSO5~3jK?%gmug~+B?M>+9xWbxU+&(OxZ8`D!*qrA9U81X;|GZ6QU$B)V#!+(a zTuHoQkt+OTxd5G~M=O6ioFc~Yuzv^s{;KF|;ui9l;{`ey&!wkwygX^)C0>(SW*&5$ zPh3_laS!5>i(e2gP>+Ae@&P);Z+o%MF(BO*;#}Kl&L7aRU2Y5OZV_(7mVZUOCQHmb z=s4V-zt=2nsa?cN&ZBtJpYmQa@L|`r4Eu;P@87!L(QPGt!Ra`8-?zle`JK63g^vB{ zwi=20qiMJAiC6N7nFk%$A8xC)7;l?U`$ysx@|k(iv3)LU!27&!XYHt?#M^&O@nk$e zhx~d-^iQsTaGW?FI#qni{0d!Cw_k3hp4Zq!v26U zhkx36hj^<;neBs)$Bl!ngNHa@6z_ANcx{#{o=nfsJucV8-!4r#T5&*FRCtU^HzC$% zqCMX3_Cw+p`BHIZI}5s?^q2kl1gW9sKdw`T zcsFd$4}(tnx7x?tPWU)odE)(5$7~;T%yU^m-I;g#%SyxxtFL%6oj`|l5+?SeI3t|I z`ORgv3p%#TZS5|>JWed3iLVh%Ux5N_qWaekQaQ)A+!>1nnLIvH-b2L$k@j}~}dCkhFI`rn64hzm@W|M9#7i!#cryJ$hxj}s z<_BuG=t!KG^O`x(AwFw<&gr(?o!5x-YGJegpyP5pWnO<0hu7v6-H4Z=vY7{+Odokg zK0W4Y)neVfTb9!G)Gm?0-+L?LK$#`{&^FS`&ev^1HUCcb_WIXI{#`)~a z&*O;KM3-|i-$RFZz`dymxBVk05T}lfKcQp0+*aX|oL>*snoPV&oy`7(PKH}rQT|?Z zzDvAMZR!W;Wchndv`3EpIfHm_w=&xYowP60fO=H^=HD}$c>P~B^PuDK3$a>>{`{29 z?-4Iod&QIe6zJlXg!}CYiM#6WugLXkWkq@5^t?~}%0Xs(p<{d97VZ^5`Z@gihr~;= zsUM+};n_;8GdY}V8S$1kGTR3o^IX=+Ke>IeV$TZV{h;lW`2;%TleZRgzx46gRm7>& z!)zCHzh-nzjSO&21+PUN-p5p-<67d)>SyLb*E=#Y!V&rRJqhSYVMw?fGw(4=vmB`t;Dyoy1EVVD>k3>~FVKN{ky`FS&*@M++ynB=iPI@3qgL)aa!5pEXy2;6Vq?Sm{tH_v5Izgr z{Ob1))}t#s@?k1;u|7c%pplRWH8Lbwip z`!4Y&-!so|Kqq+#V*mEz-~J?CS3NHw%PHuP?ht-R->`3f&L_l+x3Ld8nJ=(j5xhEU z(zShde6_O8xi!%7_^QkDX5xKx*)L@z-k18`VcE`r?z#MxxJ2y_@#v6sC>QBAtr zt|xY1C``PI9nJX+I+-sji~AQhtZ)!7OCB>1I%%IJ&R^=DzBKWo_L;*EIv$^LTRX-0 zUaq?@5pVTkI>i;hHFm8?b*paaU@lFpi`xiRtUz88vO?ssY@nR2{dC*B-3$YJ7 zKCC+NZY(kLpp*XnSgFt<`c|3=wy0{6X!5ynp&55d&Zi1&`JL$cINzc_Fx0z zRkk_j6goLyFn=@iW`EU$c&pQy>ow^3oFkXj;Uu5ybh$=z;!XQV`B#?9(4kzJE!M>^ zj&KpD@ek&3gO2M9w{=ULUw3U{TjFiBslTD)@hX?K_fJ({ARSi;YfrpNpPBs$9sF5Q ztnWT{zY}pztTOu(I+=duI+0p!yA$u*&&@pOWO~EBTJYzwMLmf(D3_TBoy-q6-r|0E z<@uq++u6pkM?r@(G;$dU@QN)Y=!pwt?d<)Rle80wH>il22VMN0BG0tzQqHXw<49|hCKK-un|+GV z$#7dF_Bs7|@Ll4KtE=pj;RYST?O}f&w>{P@o;Yn@GW!!c8E$1n{jmSuY~nS2+028E z>j#&0L!8UeH}XB=_3L2vCv@;ksp2GJw*LyoY+C{u(HuDtF#kJ%4OYF;reR1>m5wC;IJQ8&5-<0*tBK_tX zeSmmrw0$xjphG-TFTeXSB^cEp)s-GT1sH?g47h@i6h4>iKs${t8{{_($p= zw?k6qf5pCx3ptJvH?z&U1L$NwWQ;h+ZO+b<#4Az8oL-=l@v-qCmz%eToh9Dwn&x}~ z9pYn}SU)ze>UrXvY-+X(Iu&|`%9KNc30pT)9fTG&bwA zpp)(M9L@P2q^e)tAl@OH`2^_LzPPlBjrrV}l1aCSS4W3mO9YhOZv&n4izWX3KMw5% zXxGxN?{oD&S-VNvP1LSey9wHj*KVA4J=zV?u3Ni7+I4Ezq1^!OTH5uk*T1jbB<&_@ z*Q?zG?Z#_2PP-oMhG^HV-5~8cwd>GsfOakI`aaXYuiYf=CTiEK-30B%Yd2229_@x` z*R9C&Qb{*Oc(5|Ij z-#YdCN%L#@_toUo1KcYAx~zqHd0pxsXYNw^8&=9(4nT);puOlXz3O~GoG0nc*s%<1mZ(mccqw+Tn+ByZ|UuFu8}EI_<_K6AW4CwakQ zU2lA$BE;)=*UW=X^3Z=ldhYa9apIkO(VRb^<9K#kAGtVPU%gqHcn^x0dC)P>Wfe@| z`J&Ks<%qXV-crluhsyweb7n!Fi(T-_A_7Yh1Cg}+?qi1BlT zW8+OdiU0dpb2vixti7q{+F-s5ynz9Ih<9VCnFpQZ^$_bY$DQg&yi0Y=Jm_S)3A9?% zzWml>2NLhK`eq( z?9X5;y;wKcZNpgN6=nxoNN;^cl{=0Jz=O31_YZ_=&##3`fC2bBJUE<7eC$8MnN<3a^$>LMXO-7^-eUTO zkBQT0qz_4YpEeWB+YWjclq>A0I1zbK#a z0C6g%GsgpTJbvM}u#Xb{jG6R3@h(*~^PrRIIG<>DfAQxb;?3!7=0PWUXcxe~$G;yX z-rb329&|EZ(JlaQ*pySm%N1?rLHAF*{Ns~}H#^y!PV9J#u5dkZH~%H#HL*E28#=Zx z<=ptnoG(hfaE*AM>hm{bx`PhsZstrbhbQK~Nt}@v&FdeaW4oSjCk{Jwi+El2`UhE0 z*u`tI*q^ii%3b2UWOHr_bTZuqo#6a>`TIYK*JHEUztCkE9ND*5ROE0KelF{}Snsi? z#$Uu+xX!%Z13JV*oY;4LsgKp}*>S>O&zbFlj>j!RtaD9xomZ#!>4xJtk1PvKlu z=$s?G{&oF*KmEb)_KAqDU_BoxrpM`h@ z|1$d@IvLNeR_FY2enSr8jkd85I+?D~pGNtT`Hx)0D_6wqU+5&yosIKdp*qEhx3#aC z2OY2Lb6egj++Vr6z9jMHH8h7GbW7T%jO{*-q#r@4i@1^I7gn#QUT%Pt}PU2Pcn0e4~Jg3~_ zB<7u8D-uY&zZxo@ET^GExNYmj^+#lK9pW7O)f{fnv0W+q$i;nY6K^&oUj5r<9(0m- zMeHO0AnVJ-iwiaLpp)@1{t>rJ%gt^{yx@c8b<)sD`&P~1az9Juw#4hz#ms|Fwwnu- zVcy~=?TF`%F!P|}bpR>n!*}EIVC#1sh*#r=;>mOZ9nwi%ai7D=$DN5YZkB@050e zIL|W1HJo_owwrm-N#5fOJdXV+VHEMY*z{MSllNeK+Kb0Y0#=SDUV%T%_CY7@`(E4w z*eKUH;@v)L=0V5vCn@*1wr2ZwjF?Eg_PJC)NtQFvsr->L?sb;Szfo_$L!6iNe39fp zhkS`BV@%z9^+)mq#ebG`mD;(FgT`xiRp-{&G&tn+{l@sUA{^OgT)Eph(1ZjKM=WV$;T!~K^DA)ga(*EK^HbWyl>0^$J6xEcLw*5-r8}BcJbN>>$61fx+{>8aj zNGIds0*H5To4Fqio%HXD7df4rc#@lVfiIfvgHHN)so1aG=R|(u6)9)tLB~9ob)W)| zYg`*vn0O;gE1paz&>`GjE5iE_zWuovaSqot+XbEMw_NGR;|~>vl_cKa0cIX_az6@D zD8$DLVxA)Qb|CR81e{_tB};@z!n=0V5pzm#)bw{w4OXyYctt6om= zWV(Y6{=~jv#6!;T=EQm9t<;k{oZ!ixT%-EU zj$Z0>K!|dKvMATRR=syFP@lc}y+^-yI<$Y@*YD>jyFL1|FH}FrXYD!{spnQ!{a$mt z`ngZL?zYPA_?{~KoF#NTELJ+_2g>#7@A>riJRj=!;Oo!v`m;}ePQ+*JI+k1Ot~Wrr&Rr^e#z861N84}*SA93rCsMr z^%?cxkrG@leAp<4%8l#;%;gz$QPG(zr~ZMrX)L#ALe>u_Uf;LPJm}g^OpK}Kn)(WO zeG2or;T6liLA>oY=UG5krA2uE*r>1&@H(|;`&Qi@O}rZC&Gtbj&qux4k^5a8W5*G% zyUn^a=wyA+McnTcn{yKJx@J-1L$aL&9sD`<6!#nQ{P+%WzAtSaXMzsZ1b8^l1MQ{0 zi>DE9L?ts1I?21ejO+O|b!QUqfXzB&=p^sgk{quS)66AaxXr#B=y*M+%PJ}2!J0Im zcw^g`;{iH^8`c3L+-6r@M4U1<>))W`c9+{aA;w2?e(@pk!X}yH0Xi9O=-0u%3u!+h z-pMs)9(0_3-4@O>1@F$@Pl&g!yqO1`GBXc486UYnesW6UEX+!`F)L7a1e=6HpU?RtLSz$aaH5wB)%Gtazlz-_tna6C-Dw3m3}rkZ)s z$#|Hzn#+?9Jl_(pkWIdTPM(`_%*plam3H3~@6AJI`=FEM0M2Vde7tq#5b?%*Yvw^G z{d=)7m&fZS9wlDg%Vr*QlDGUTo=3RYYWx_eZ<*ovtJ%Mj!%~rI*8B9 zm{9mW@zU!2Ank%q*%fTfy36S{PmtCA*>Uv#zpC<5jzd5f(r=@GKVCv|KmU9);&aO^ zE>DJiosPKko~UsHX&-b5xAh;hecpF65ht_Fxs%XwxVbH?uR!`}a6c>YRz{e|Goa)2 z;kG73MclUt|A9r^`dU`CplN&`JLWi*rhwwa!nx zGpEcv=p?U*Sm)N~$3ny#qt{={`~{ulCGO&N3Yj8{5pPlxbA1h+9Cx+E`jK5FN)oSi zS2GVf8NZL`a6R^5O&Q{S-_Oj0j@!j<>wZx#Ps)Eun0e4S|9a_}dwkv22r)k2 zB5xJqSJsnE`{wivolGx3iu+G19d1CpruoeNg--f+)kAK7zMVUWc-K0bdCLtX<<+4v z&4|~{X1*CZ8NU|_arhm&-hz0cHuKHU$?$6{_IJ6vv?AWsH0E{>bh2GmZxq|N=ulhY zCEK)A zOuVpp=5T|Kc`mD)SpS&gZV%%9__pFne?o_Pd+%SoPauE&{=})+$jpHb;f480q}xu{ zBZ#xkrk;TA*>s!63V!Y!h0~4SAYP@pD&M1>Vt0;0D}8<({Cm4Lw~I3toDu;2Ae zR-b*B)cz~q$NKYi{rNI}=d}O(&U5OsBT0?l`p)X{+|`Qbyt+eiZ)*Kj^?m0J{rQ^u zoIFKy7pUKJ>(Ac#>a$b3ac=#&pqf94tG>n04{EgB@5VP)uIEejJW0RzZd0GVCDnMk zbG;s?eplNwS$+1d(Bt%&C+V(!$9wuCKR>RA`s^&C=WnzfL9eUN?liUi^}gq4&F%Gt zUzesr>Kli?%>~d?pucZ2ZKzsGQfLQg}S5|S| zr@!&@6W&nIpU#(*SD&3#wOc{0zxR|<;T3RUiC^dKp*}mSEB|_GXt%uL$9K^4Dt*-V zyn5asPNy^PRW;w~(eYtzSnB6`bULvD6)!2fibqFr_1Skxg;&ygWrwr2dLEEPeJ|u! z^*pGSdhV^O-R|0szRFF!s{PSW`(v<Mh)Jbw;d(4+p(^xxdp zEYZ%-yKyG*-tT6f?}iS=6UxWoC%8XYq0e06l?*YLv(Ukx(=Kp1d+D3`#F?YZYuW#R zPPSjLj}i4$)(VS=_hf2o~vEJM|x|FzC*O={uPM%+n@n`tEZoL)6>ua^ng7CdMx@!WA{9(2;b*cT7`?ymfbc$1ErdC+ls zaarlbzQC3_zb0N8-9D7B!NoF2&Y+uSfC;NCjB16qX#G5%-@npC`hj7Ds8rU_m)KTJOx7nu+ z9ozN%`3TD%A17XZn{f?Odq1s}vY$(wSN!W6r-|3UgV~?Z!Jm$zyxwq0_hjP43^#{A zbp7gu^obl2{q%eQ>_b7gU0ra2cqeT3Q9~!!FJAhCVS` zER}yH&Ptnn0G*7_i0aHcmd8iDJFU&(2AwQFrVi$M@lmTg#Jg`Z&IcXmE0^`{9Bv0> zJ#vqDzZNv759knX+q1JjcP)NMoJ#f0{)CS0a$D)e`0Y1opAhe1Lo*LL$wN7X^tPu@ z+72{6A7$o2C*u|8kb#$Y`$giN`^3zHPVz?l%!3joWD-LP?UI)Hv5jClj&}1LFP61z(Kq}x0~Y?I>{>~ z&RN@4vo!IV+3d4|PV%~odzA|mD@VLTznkrYF11)s*_rorT@%VJq`Q?{DiUwMJ{M8O zFLYeqrRt?OylNadbZDzA4R! z_t@sVOz1eBxvU|g{OBF}3h^>7HJ2aI!Jl_Kb9;PkVk_cw_|WW6=w$zGpx6&yqR^|v z3$e*h(6K*L=B3tie*fcmd*U6A3XACFa8ND7#`V%_%v*0J3KDsT9AWpPR`hZT__3BGpKAzm= zAzl}!xg3H{+2yiOPs4wgTE`G)T{bfZI`|La2hOoA!-=z}nmN5f$Nmeprigj5cUrwc zyzzY%PmTjY2OeD5IkFh9=iPH;G;zO~V73#wFh_J)N{6N|crgXI{kD71IO6^Cv6%;* zOuxs)d9UaHnn=9nd(AxP3)yfNqH)Ow=2OaZVR#7pZ zdM0@h@jg4Cc(Oc!4(aZw@MrnG3B)ON(rg!WY?s?=nTh*x7p5*JUhO0^4?1~1lv~7W zy@nqXZ_QCN4?1bz`n+sknm<1!Uid{b4?1q=q@3gZ3%8q|TwX`KJvyDpcz_P^uxAe2 zRXP51;`BXdwhKC$zp#!5>8;7&O~hMw*UW=X@}9iO?WUexw-WDxO@4q*p5xp58uMN$ zv4eQC&ztRoPTGfj0{>Rq@ip1zr>$`ZJmdn~coAcR%->wpG{|2)^p@To`iE{Yzz8l0D zHpOfgbR7O}3*)fx=g^6_h?i)yjt4q9zVwOcM|3WJmw0(+nEeTz^5^sY%%ywo6KCcs zGe`C_+s@?mluaTY5vOdTIlQ2g>3?Gv-v97vcB|vFOtsa_~Tw(g#QO;Um)IO zoBR%)v=8@Ug4cgq2I3vH;Xx;PyTrKHl08|8H~FzS{Gj7;Ah(6{^I_keIXQ{vwz)?a zIu1XVHRe9=cRn>JFY%h{bN6NW0i8;3!PZ1^FVdJZ1&EVDj|0j633RXvF7j&{u}{8E zyCTG`-@+Vz&<%-D>(!`1yj(G69&}uOx~-g|zTUE{7V)YrF!P|3;WxP~udmL~r5^Dzere`GH`o&q zR>=|G59u%d1D-!Pm#Y!+)|@c&pp)U3AkJ%Bv!w~~zWv3_gHHMv>yVM2XLN5)ynsK= zJm{o-Z;JFXvqekdIc(Z@&`DnYB0OFZwV^HXT0Jt`2VLk8NBEGKu&3n#_Bq18eInWu zZ{QX)4>}IN=kF2Uo3S(TBJ_BLEKm6!akq6|oX@m&c`$MIB%AGmPPWJ17W-#+1@$1_ zg@~) z;RPLsSFlw?ocD5cLlp6HH&gb>`WCvlNBZ1@gTyGdHQndIO5G4YPJhH z*wuU>k7KVnIf*#i3YzVLj_q<=wZ(YC;=@yj*U)BM2RcqCZfj6hPRFZ8PA6W$AIU_yt>s#Cq5< zWxT}8SxbeVOgGS3uj=)%G10@rV+K4;H&LS8t9RjZ;)XUh#|v~^?zt_@JEC2&Z_Xy- zd0b{5bTS;VzYaWS%dN!A*wxH~j>9qK+))wFoigko-u(V%9(2+^tPg^Hxf|{xUT$5U z%K8mDX^oHdd*UtB^9<5H=%jsk4&I^xKN4@FO}`2{ z$wN4Rw=4fo#OtB&N0jzKCwW-s0^Yn*Cy5tfb6*{FGW;HkdOY~}8RE_Gnb#XY=LqQ^ z7SpRwq$;;u)_AeM?E2Jm#9Q^3c^w3FDE~@{{m2y`ULa0L7IXauo%AQp6GXUWA9#g$ z_iWC^hmLtJ>*6;&-ca`Db>hv8RQAdG5;}y}*e|#qUv%g<;=J~rnFHO^@DlZ-GwgTb zG@EV{URo#9+4i~YzfZE?Bi?YEb2*@s=RnjC=lbH|>IcMg+KfX(=YFg3Gws=wb)64* zd@Jv?$HZ%-=LKc>Lx*r1ATGwHti_rWc^S=oGbHH#`MHn{hc`;pyPOO zSRIozN_xIGc;Zl+akM4Zzx zX1k!{bvG{SJF&hqxolJ7E$wc$3p&_^dnOS6=g&7M&ZJk&c0nh@-&2I^)k`0=B;ME# zW*&6%US!m3uy0qFw#3Wyx|s(Z*M}j`-P_fFReR!f2vIX%0q zmK}Ng{bFQi;wM&Be3@UNL%e)_o9my#<+>8*7n^=4baFlFY%#9gq1Nlfo6*D^j?lrs zurJSG_GgVIp~QR3rXLC&^IX=$2b|Ad$Qeewe3{JQ1|96ex(%e4)jI|d=XK4I?SAMu zKHXN;qC8Ia*`NsGEwfpl0Ui6(Z7nL!^B)z8d59NkQ$Ik*?H-pE_n5=&{qLfQ_hSQd zxIu?-t0m6&YBPB_aqhP@rw`~dqy$`Qo&m417{^;O;tk@Jwdr3&$MN8@=2qqN))w^{ zL%ilT^%r!?u9SIv5nhK&k0;I;oAGSu;J-1KIo#Wxo?6a&B`U&w8Zb!^{(SA25Usiwr&{_Kaor=+x72ixfPs-x)zfUfrK6`H{ou{b!93P}y ztC(`#FDuul^$z{n30_n6K2d-6{-$_7?K*v$5BuQi=RWN^Z|U!8*Q?z){9bM4kAUXt z_u_LXf5dA$J(}ZaqTV}-D>q4h-=p0G?IvrkSGz&_x%YQ%zji&h)n~7EopCXW=yuI2UpZU_Z9U%PP-26y0sgR--UZmhog4A zceTCxvk&j@>*w0_!PVb)>d($UHBY?^^*vO-?|h`~(60A^`s~xLS3h^+`+uqTUhO7ozBf?W9oI_P;m)k= z{y*%UcbF8#_V*h^B+9B}6^4w&AxCA&0|*Rx$wLN7BRR^Fg9s}*vt&fVNRTA3C`eRS zauODioQ9m`UFMwc$~jZByVv{s>wT`z!#%fp`uo{ab*ieXtE;O+vr^j4=Lcn@bZ`x_ zQG9+7r)`|}g3D?5Oa8lT?SHf1Z0(`M^RV@@br0p^Y~A>s({|WlbRPQ2*5&1RbX!x8 z=r>9Q=)a+p{gBh@5hB;EefsA*!K*yTd_8~;-4p!7ermX0y-2iG@QMW3hZ;J0-{-XE ztNn44UDz&o!E?<0E_Cv~!)f9D3HaBc#gBqFVXB!2oxJ~ZT6^x=#k_#)``v=~;SMto zI$eHeobtZykAnLIFEP&_>v1!5dLQb&Y98d?)PsWO&25e^baEcVX+0k$uUiA&iWEGj z%glpLU#D+Zk?VYWjW{fLXTLV{pwspFZ?zx&gPO+$Z(xA+xzOo#a4Xfk)SvCn2;RK_ z>qMc`@hz#|XKtMCyx{#--JDf)^IxdkWC$_f{IH`@eyc?+IR@ z=UmXq`@iUYc!FiWygK0{!5dWH99QUcKeb4$AAIZNQ^AWEX68XB*AGU2uQ|IM5BF{V zx8Mbb5>M9?=ul6Js`=@vH4-*>{dqy}g=W8?>tyTGs!Q*7`M|rV;?VxDHwEw48D<`I z??hLW0@44#+phK->f0l!;9Xd6=0R7!LHYU(>N$pkXH)I}#oH+buL!?~p!;L!bUpSc z-h$OZf_LwZ*+1ws59_^gJxkUqwcrg(Zk{iIPWC5GYxzg={=W8S=>>0MfO!_^w0~B2 z*>20_{6O%E1bR=0b+%UBI&_JC858knxmecY!yji6yq3w#`GrpU=dj8qlh>cOMrRki z-F*Gg?E*UZh3E5#!?z#i7Mwn3%yETI`sK6|)|2Jd^-^BJ8~xPGgHAs$Osv*PKHODM z@SMC}U*{7#%4hU<#MJes(zGIiGmD>lX%2M639$y}k@>S9D9`F%D090|I=bC>sx zyIOYY*0EiWPBHrqZBYI2f)Ql|H_r}pxj`q}gTo5?M!qLf=JRrbSBBr0)cJ)D_2VtI zK59+Xih`5=C$nGB>H6_@uxtmjb5|9-Bmu@H(CKl`ppKGv?ND{W3*Kb*4?69iyRPK* z`?i+g^$oBt1v<^kq5946YIOx~&K|RW&`F-dDz4f?*>ep9ukm@}>9|6NxPG9%+wJF?q+pGV6(XEGf6TG68%{=pd(N61-if{GrdI;Xm z0N;CpPRI9*dXMscK?3*9- z!VdfN-mCdTH}4bdN!ZEvyS-&q-*+_K3Fx_-`!=n!_SWxB^N(+b(0RWw?;u$>>>f(n zhtPSR%ajhP)x@XsEGKJc>t^d+M#tSt*=`_ftH_K`V&11Q}tY>S#!+N&fnGbyPzaGzK^7k(NZf`~V zAltL`@*Vdct4Hx{P5Ui-Z`|?e+*vulGaC7R_lDo1n^f*@KHpkLaS!ME6vg$!-Gu6e z*Gbu)iI2vR`vrUiH1RL&Zi5 zUZYI`{9~Q=58qY6buj3cae}vcyqO1`=3ze@@Fv?Q2;QA#W*&5s=dk9g_oF|wO%l9W zXNjlBBhVoZ;KQ#9KTj2$G{490mz+0nTHmPel3e|0rr;&LXy!pD-*i z8o_bCZO$ij(l4i#;iN3LarM^;-ih<(eTAXx-nU(kQ$ZEm!M_T3Pl)~BzMr>I@D_T^ zJm?C#`}gYDy<3r2yQL?zmRqQAR>UP=|71mS4s7&I zsI;%FpP%~gzw-8#k3G8=C=z`>{IHkXEc~GI53y>LKQ79@*=O5C`7a($<*)C5p<5nx zk;?g%bJ72S8}+B0hs=I6LU4b4YTj2JI`1l%uUtmZ>m&)lEm~OCzp43m3-06h&Hh4% z`1i84ZP%q;ub6rmJWg_3CEO>tLHEu3wL?d_NB*O~lY`SQsD!K-uE%!5w%FPNV} zJb$_LtKjWwX>KRbY5z{Gmg^$lz4g1`{dwHXgHF!tIIRM$Wk2)o`$q+DW(9Nq44wAR z>L=r|q4NpBOE|;KgD!Mw``6Zo_}`x-zj9jeCY&T*C>&AuCqczp`RZdhTYGUzN3eC4 zpmY>lcS%Zz-tF$w*-BH|xw^G4?Mc$XXCuc@+Wj`At?~SOI!e30q_j6Pr9EHq`PnJ$ z4CCLkQ95iY|IT{bXiD4b(Rj~2ozFi&_X**%`26Ce_s*uY=M(;W%lZ7Fl(v0KX=lZ5 z^nH`nXDL6!>34T><`kj)My{mutx}W@W9#8^aIdCz?&k8av-K{c4eor*WoysRdbVD+_5yU=%hm(G3iA1ND4rfp+gH>0-frjLIqj)U z_fcLPFT`BfUpYT~e`WuY?^pSJPhsX=r~3jgr#-*%`hp_NTSI@}otX0J>CE@Fjmb~D zjqaE1Md^G`C%(V?h3cV)kK3UivCdb{ZuZ|^jQwyj?;Ppu?7x?J?nYGJ?klvu!p%A_ zTTd4HyYAHyKEC%?%7?uK`^6UH6MRP+{qFfM&WV1b{Tt@-6LjcKz+0rov3#(|deqW9IjniY|YLgl1!C&oof zM^nw4U%8rZ$QLu-?AX0)yMmv!YSX=S!8YC7wkue(d)wY!TJ@+{vwY=x1^cw?)<=(; zdv@y8F2B1~o6fB|wCh>0Q}>Rq{2xXWY7EE9D#GXQ~el>q%ERZnd|4qv7k% ze^YRI>v{zpII||Dc=)P3(7uabcuR18m`L?XbD(?0F@5i@%#q}RbE+nBFpduNy}Qmc zzV?mBQSZIBWWv8ZLxTi2NwB%zLl=Gf;tK?LTh1l(^?S%~=}zecFTq{&xDPtnzN6>$ z)$@yYFMcR^UzRlUphJB^JVsoV`Q3dctKdzWPUB`B59q+jQva5(o+F>#ujUXOcX=}h zI+WMY;dce6$6vVx=j$2faXfUgyc||V<=6Hf^9tVQ7L}LIA9V1`l`{W>*XwQiYXt-+ z(-3pHLnr6CLj2x?Syi&I;N3bzJk(!&2Mh1P3_nTVGrRKMl3OpUjon3l`g8lciV5zc zcVxTxx7>^0loZ?+o&ImQzr0sga6euB@3rpC)k&UT==CjstX>*RZ1Gjo;H#@TLV=M+cqcIjnr@ zyNBb(^cB3O(HoM!`cIb|bhvJ;b4tHPjTtC7Y1R=(w>#(p)#J%mWIZleYlz@}I@erJ zpp*G^T0<{L-mDbE1#fDA@`FyUw~YS&bzPb7wg*ND-oOCm2Oa7i>NWa-woNHWu zx}HEM^Xaf^sJL#fJ3;VTZ8gUgI{1bAG5D3?!%2d(GQfHv=%imx3&+8GTx5peT@A3F z3A(s=y$jC~yw-eQq07z8E13w#;a`o&`GR+czq=FaYZcM|!}E^N1udru@c-HWmBY&+ zmi51S@c(x`KtGA=4f>7r_ZEqMqf&uNk;H7(LzW;#Edn`@5e9`kH z+#T`rIHy%xanI!bR&WpV_k47J30++MZ{=O91+Qj+`A7Jvd3BV3h1#zbyo!~~!>8xKR$DR+AFxH z!~V1K{8Ej3S`0oYc*O#Y7opSTiF%KCXDJgYcw?5E^9^0ty4Akc|A4Ih7TZwzJuDNaXOchO{KO4o zYoA7ZFI&%at`{@ep6N;P;`n>HK6&yI*FBle^UNmee2SGe|K@(!}>@*-@Ap*^Ss69 z*CHE%{BEMO^8x3N`C;I3eC%J+dG6t4J?F^Ux08+Hw7UVN!=w1TZDg(UWZhqo4MUoL zk3gF1hn>^*9dv$B60&wD*$DQ}&U%kE%~!ANY~5AqJkOtG-3jUUD5OLA_tRhdbnXt6 z_IBd)gE`%qtd)JT?>uieO54km^|avgx^kLr@UK*!?$7yrwq7@%$JSec(-qm?-(Kuh zJFz1TeNXz`U5TuB0sGT~b!@%(JH6<*y$V@p0^)l3xSM(2KFnioWRH!$zwhDWZnpN` zd|p51a{1aTP4bnChmX722I@B^T|X!Kjpn@nYM}8(M5x#IJS==9SY zxHsny<-z&1Po(p`Q^`8#lMQ9-&2iUPeo+hQxQ%UC^WMIETZ`$qdlA{lOmy72gwhfG zdqhz>?#@Bw=q*6jnndS?enr;Gwar)FVQg)=DeYk!0e{(sf6afFt!Kj@sb95=$d5?p z%0t%6*5Z76(@^=iGm^FEr}HBBk#+y%@zrm4K}uV8vSDm(g(&UGNBOX`^>X>Txt_VY zbNu;yFV~ac!lZWyfn=ChIOj*42<~pnl`%mJ6ca=rknu@kZpc2EO&n;lDTZSx>?cpS2&N zwEYcAdnYyW9S@sK)_$6PcPFObJ?HrRB$N)0;`DRk2TdcLHJ$Z0>345>N{4138}X2i zdrA|>`H0f)GL&{_r1Qd_@OfqFxa}#W?H^Ox^*)^!{EUu!`MBpZ^2gJetoKbi-=2)D zmu>WWJih!!uzybWFPQ(no%!x2jeY02k1=OA+0eJ>cAXlA9d~8n-=FZ`>08ssbA86gpV4u9spGzW&t1O`=tpSvFvJGeJWg9$@eh*{oVNMjM zy_}95MCaKClMUsxo6`|&-4WCud%mFatRZBbY{S{wtrV0$wk|$DlGB0ujRRj^7X8Ni zybnpB@rIRZtgl=>>8N~zwjQUyyNztbSvSRjZB*(0zO<8VSRaa`ovnxKr)>bow?Em4 zQFVQ~$dP0{)5zM`28UDHIiIX|*>m6fqM^Mu(EGDfzV}&cD%mi$wrP|O?@i~0^&uO< z*4>ZN&dGG1yAfGW6Si6C_n<|b{)p0^H^@eQL&xoQO1m4ar}tRf({Xnj*5#((of|ow znbP*ItPf(|R{lLNr9Is#?S<`4>BuOuVGEfvfX{Q24SvAqE#lu(kd3Oh(dVB%A0Mx< ziTr(!(#~f5dp7>PAX!%{KAwiso=;gfoaQw=lgPTiCF{N4mF6iX*YsKMGP0fuwdi4>p?dF4u$jjDH)qlP#fNj~e&jjL%e$wh^UcqDCz17RA#1OffsPO3-y3{K8Jr@y))=|=uEOf zv&e?C4dVJ7)tAnXbHu5i;hw=r@kWG2U?7?)c`zJq_>C zd(B75dLGg7h)G`G@#t~C&qlFzPoZ@5_|j$=t$Qh@tz~4x*xHs; z+QT;Z8%jsAb*-SZmu={`lnz=+Hk_?}6{RECI#*LVimiJMr7aKHFt)buDD7bzyq40D zY+dUp?PVLfp3*@Z$cD4Ee^2QMw$6=|j$-THL}_a?*)X=YEtK}K4c+L+D_?kw)P#Awue%^ieT#;N@;fl*EhD-a7u@8_7kQG-3+MB~y>#5upN@O^ctpnRzP}rpgpPaq zu;st&DMfteVX~ora+t!1@g3|VpWNo9!Mt)3w*w6FdNf_lT7jNB~zBotld1067cQ0G#6-s;923=)6 z+eo&qYjiwMBe7RfD{wR^dD*NIXG?c7&DV^u}h3$}r zzTcg5ST~ofh4U)Yc@adU*3*lQhw^bd zr@eXTcW)80wxWD~6|%v_$cEy0^+i6ewV95Gv9)bs9oxu^m8t*QNawpZk@c>i^K9%# zWIj$8B5P+qg81*c*?O9j-qV}j1GJT(^IfILdhKLA>}MqF!Zwr6wuSXUK1);t1SD`o6COyS?@ry_CaLbY&~r4gZVgH4_hxR z&ijJTW9wn-WosY8zq9qQwFi@*ZnhrSN>q;Cik$CCd>&g5TQ81R=KQjCJ6TtUEg$#R z=JWV>FIzXC=V9xGt}dPLX6s?=WovKBzt<=0WovK1$Ju(=dfD0=^6zXtu;6go-iXg* z>tX9d=}@*F{=4p@bUcD>B>x_~f&B0squ;}ill8K-kp7;| zv!A5j-HSe<`(|Fp<~{w$cmL|W!FKm!O5^^$Jf+=rs`B-f!rAaPOPHQ~b-E@L5kOdOzFh!0&0#C+peD{p4k`?k$^r=i8?*C3~6n<8psV zX>Vs*&*bSx7VDe5`{})OH{MSl+J*l9TeP2oeIdQC?H$JP4JGSoS_+J*w(jnfw%laH*xIKupRMh4`knfX5UbQ5 za{b_pV^2lDF|Q5v8+yGTbgp!{eDn75{X(ae>$K$lIr?wGJ>H(UdR-!PQHg_mT=LVO z7gjuBqu1a6t9D!;=c$ zdsEFk=tB3B9&tfAVBKIug_MGKVFB@UT%dDjp#2neT;M;rUw-h8;1<|y_7l1w%63e? zaU8sBYf}r}yBlKX>2f@MR+i)Hs_6x9V1Vy}LMPv^aauSI|DK(HU+@OpG5ZHyT)YwI zG7H}9H_ZD$$CtNubT+~3dh5UQZ%Ce8f*15(`uB5W9>Mbj*q1xL`L(YoAb1n_JD$2< zF!Nm2(q~!4cQ7(qMFcNjK8gqLwiWZf4Rn+GSibAW{AbU7?|sYm;;)bW8%m*1wSo5H_GZL11iD;L#oU4GEP zuVHF`?ZW+P2+qtF=5_*|^vh}4YsmNRzM5ZK@cNZE^Pr1JG3K@FQ;4;0T4G;4LO+vg zd_BR-!21j8_(Jzn&mf;ZJnHp*tQ=Ot`m+8d4QnL0{Zg6B4?2|JZngjKgXGNwXJmly zyFe$)&tY9!SXi{XzGGSn-l;A0eu<7Nbnxq80@>fUO4~+oo?jr2u5ZxkeN?`AEcX$+ zw!MSkWqH#aSLpP6#`q37;yUcd&VqMno!LL=&~A}me6Jq7l}UOCUYU7j9&|DePRp}e zmfts5dJEp?_ssi3KqvF-uv|xE`IY{qzu@)1VcxF-I^+}I7ePMDPZ%sXw!h4NK_~rk zT16|#eToKF9WHofuA6z#>2j<3T=F`n94&aRR%RY_;f~kaY4mr8+e_Yyka2>yyq%c` zovbGg>*p8QMEhEva)RKcUQ9gQzM!KxIIILGX8P`15r>nChy2n0MA5rj4|>P<&k7Is-G5r4d`2)?>`!#N0HxOs&RF8r|BKn5D6V}w zTlMVUtxc!y@!NgqpZ$QUjGyVtm)w8IX;m$JT$E$&3bRE1v+OqKA3Dkf?iUZF%{^-%2 z`voU5o%#9e zyPgT&mh!~Y^$t3!cOll+>av}dYxPoaK6slr`u+_%&ycmg@qx~7hKZ7!y!sm{h#1(FM%C%w#fpnVLh2hH;3AMNH9k9b*C77NNVm|2IM?`ED zb|d7~2{H3Q|K)zw?b~#16}@fst1rPs|7#VUiZ=XhmsfuW{q5?JAN%qt=Z8Y9TI#zh z#oj3|%A+PfPuBfCbnf9xDc>p5z4~)8-&LujxGgeN7Toy>%-0d(V&*}o{loV{QO_<_X(@QeikNxOX&#;z zfVcQ`8^L?~7xTCjIvtO{-k0Oj{-UBdEaj6WcfL)i$&#p@5e*>2%cl3c|URJsGT{iKTllny~l+(JaG&VoNklN z9O#G>VhyT2Ld-ws-ZEHlGE5_m?q8r=y0MJ!KA>vLip~Zt>o-O}Ful9%#~YR#D!2u< zo68M4S#D12rtP@M?`OA12;Qnc%sl9vXS2TcT-9k^_~x+S)!9Bq@RnUO^PtoIRT?r@ zl;e~+;|1^O2{R8m&CB_otasV+hY8-9!)6|IavsfTB}ylGA0(L~cy(@>dC^_Lx;Ex6%V%zi?r=k==`%p>ko_cdQAcwe0~ z^PsaGZtv5hUe^qf_4=W;Oz@7BG4r6)aYnz0a$M4SrQjub{%Ry+S$ez!o$miy){}8w zZu1Ds`V$sdma{?V=K)Z=-^M21~Oh3-a0NgL)V)*(1DZuzKqxIjHdjyLkI&j)4&ct@t z1ZU)g$7OxjYp-Y*#RuOOoKU`>(Cg};yW+~=ThA1f;UV4ESeE0fVm)fv!C%hnDl{LuxcJ{H zzN7Us!Jn0b?gum$p`baC;osrs4dSqj0o^Y_Sf`9K#JAMFtR z(pw#a1iwo`nm5pV=;Gr4rpjkcqSS()jo%y4eCXohqaMKjPuHXq{7C)|i{?WY7vH1& zKiTqq!M`)nTt3jn#ow&r+az^n!EZX>%!e*6zEx57&kYY|75rwa%zWtL;@?vKk6)Qn z@Eh=Z0=oR6i;M48?RQX@Jc57qJG1}L#l=6M;+rjR0l|N7i8L~xcJ{H|J$uBCiwmKoB7bi#YZ{9|Mule3H};>&qBu+y8o1KohU2#4S3!B{}unl ztYE<(lFGd9KfeCIr^c0{BtkZoyB_d?j?4PklApl+fxJ^Ze8Dp7hNNSa=Z>X-pt0`<72{Sp^HW>ejv6 zKhIHX9hvC!M?Ob&T1#>i7xNBj(|j%bUii@LH+13KzJ6_+ow=jjtJ z_`6CHU&jwRa@$)D{`OJ+dX6p7FzWzIKr2p4#JR5>mTTO)Yi1b8kDU0mh3Kk+)jJ3HGP_xP2g%etFM z)~nzl-wS^Ey=FdivB$}2ZGKah@8X181pnbZbNNCi%hzG`{!Z4@kmK6~Z{cm?>9|9O zINewM)yuX!1t;pJ*)Qlq8|3!&8{m0P%J)v|-`yp6KfP`4N1#hGh}Q8C&tbJmE6c6+ zn7xAcJ%&Zp#;=0HarmxX==*N+1UBL(MXb>isu3|(CHBx~1` z;<;<0rH2LoyO!qigD&)k*W=+|e(%;kE_ms9yrlE3%ddtizjMz|3C^Oi=JJCM^<&f& z8HX-w&I-hGu3ejs@71^7NFbTY0X)~vm9{4l%t6TvHX zko?o*2k0<*Nn7d{TaWOng3C zE8{;Xd4eXd&%bBp`8!>YphKLeBwQ)#(LC!-!D$URPInL0@ z{@7{l3X;$Fi_J?RcsVDSdC=+S@f*~2u}!@o!5b5xKY)(I!0YxLmRq%tZN zH(mbFq5P3XKi^D*P`$ojhaarBe`0vH+_XVfZc5{7$F81q%|Mh75p3H*ZliRVb z&+)q+IjltdehJFqtqs`(zX5+&TbBcLRGuzti)wdAd*u?G!@N#Y`w1P|U26__%Qg9) z)_SXBOuK8X;$AjqUcukP^-}u@9paAmgZ!pET|jWM&7*i}4s_r+vp*H>xA3&Wg7YZA z_obmjxgk#QCs)7Xg0p+Ax&A;Gd;M`*?cb2&tGUHX3;wtj=JpJoZhxiJI3fMrj|Hz4 ze-B%iA9N_c@hYCB*OV8WZQIOofDYw{=Ta!YB{M1uPTf7`@`En+@^e^8w#e($$Y#|9 ze`bL1bwd}(&t3Ox3eJH5*QNOSiSZoD;jhR#g5S4?xjdmmT&AkHLbw108XkR;`%_#dvvUrRIXur7>~z{W^4decb^yj`=BhE5R$#&CG+2 z+5_(Yoz}2+vVD!c-&XK8aJg&$pp)&(X+>6-w>H=Cfgf zQ-{Czra90NC&U_eNY=ZEOQQtGy_E8$$K%jpJU%uI*w-4xC_g-JO?6N*n7OCJ5f?#pe10UF`M8X&rh?w&$YnP7?en{QO+|3!QF% z!&QHp`M0Tp*MP@wIzG@L&WJnW9JYR@;5^xFu6NK8$A3JTxbIxSN%LQgCtcPx)xN(d zzCiE;J->%8_V|QY`}fG}anmeI1pl)G=6VVp>S<8l{kNKxE5r5F|Gx3_i^~N!(GGLH z()W!`xdQY&zU`+)*>^GDVWm>zrpAj`3VxOV@q-TaV5+K5t0sE{=MLXD>-q#8>Va21 zhfLdOz2FQEaNh!5>~V5gQK}xSNw-PxZ|yRdCv>s%{oASM0bpuD?f+R*AwW_P7keXk$O`TJKqO7En5wFom#f&kl=1R zLw@VHL#O-anJQkryBrg|6z9$Hf(~)V^%U_MTkMqJocznofsQy%3-i3-FY?kAxG2lsK{6!_(i;Ed+|1~dmc#C5xx7e7Ayf#4kF zahk5T&`~+zzQbjO<&f=QZ`mh;7ZzX~1YN%(q%V+ZiSPN2H{$i@GEVDF)y}Fuc`o<` zJ?6MVr{k*U+s0hAn!Y}7nsy8EbU8wYcpyHAN0sG?1n1)b^DfXK9(mMrm=bN02+m!e z$I$VBPPa3x4}o9jY$*h9PJrtvbg{QHhgDYHcf35GO7L&+Jclkn=um!`hk>6*qTUmn zNxc3-bD#sKr@CGyT9Qt1(glbobi@gTEd1t%*%Ptn&2=y08A-FTw)NBf&G z<2hUhke##kl5k z-rbx>a2~{Kyy&oSy+e7{7+gSbdU2d|y@8J6?SDPW@=0OAY5iYbkM1R<{gUA4Dyz8Q zXZzLMZ$Wpu^Mf+BOLus@5n}zhPu_1l^p+C5q(_OT>nC)mpY?vvabw`jh?x3`d2jgH zXw%1nn<+p!K!^GAM%>XhSeC|a`)n!m9aa}LuDN-=s^CxNbmwqZZ zLl&CXAwUNX#<$=s&*v1Jj5W<1=!oOAu&xK3eDmrFP90uvqx)^>8kDbJZS3;~4&r&O z`@wP_=qm3t61+vcZ!qm15nb54PY-n7H4A*#&nUK$Y$Mou*oLzWV;joW&DO=%$u^j+ zovn>+5L=6_cQyZgwvlW@TMhHA*ShoSL2mSalwbe(vBVRbi2SZSNO92pR{ZA2{MW&} ze6NMzPkvxtZ-R9&O>DF6?GCq&smEzmJ)XC)mEfg&-#l)B4)qV?L)4qTW!eeOhB9Uj zbl^PREc=6@MLP-3j{Ig0bch441Mp{J>aK#biSwo709{J(t<#qe*D!l}Mgx$=C4*eI^2FuzoBY6Cgu|IIX>#)9CCHd?B>?`c*{ z8mC+sJy39Rw=;8~BaYKboio39kG@OcA%e4Y0C9A`1D)#zt-l0sfw~{KbbgrNHT}xW zgN|A??gt#!-p}Q{<+*921kW?qoKNWB*G2XGp}?qdf>Vvh*E*ljkzeR<9M;#W{dTG_ zLGY{qenE#g;ywWNx!aSk1?NwmpVrqi=!oOr|D{|vRd5FIxL>!6`1OC$`%mAI>oH3A zm?`+b@O^;x6FTHKGL4Mqn?>ddPSXJ6edxf!ydTQ#X!-?$lP|z{A3AUfsrk!oe=QcA z%RHZ_^93C^c#aN#w#;2FIGFVMC-HQAphJAn?!Za0 zeT(3{@v)f$9pZ!gC~z8g-!3>)o#ywjpo^>gK2gtw-_G%);2(CG%MUvFT-ad+sr&2w zQM(0iQ#W(Ip+otNQT^TUUHb$lw7;1H9XNPyitYnyOU@amQ!o^HR;A>Qb3!6`8Ks^D~d zVjc%T2M*#7PM>Bs1*hOOGY2|wuznVttwrt#PJ#gQ;?SY~VZ9tUiLD2Mv-_O6KZY*$ z`sc7(sP=y(-4nr&4DcKPI^?&$8gG7a;kn@S<#j$fj?jTKP;t&Lwwk>@U%H3a+iDJU z;2<6-xBAl(2~IJ7j-)xz5huiIa9nGLK@1m^&+gVOUL&?VseP+N}vuf0d)uqON| z*LT;=m_l$b2PiM-$Umnw=77AO?mL}IaMJU9hi>1{*$d4Qe&M+c%I(Mf?+M<7-_7+O zI+Q==JHeUqRXV}>I6xesL;hZDFDBmK`n%Tqf^&!0h3ou5C;JKQH)q*9V7HE4A#ucke79_>C5r;{hGwfpH4rQDjoat5)#s1Zy7dmV+6jKPsb)TOn8(V=6=hk-;=(^A|0!nv@&|R_d}(Dz!LQEW z;nUY&=nx-_4^R%Dg z`PTga#tVq+(cwb{=j;PB2RhV`?~BOql`hUYQgC{_XTE=c4)x;=^g;?p;I-tLLj~Be26^WP`T`x= zZ8LT_^Zb;P-~E1l%>46I)o#Zmnxv= z;BA6)aEh5D}!BJl{mSuq})fyf3~co*th- zhj?P%0h|==j|fh=0P%zl?F;uI;B={ULU3wuJaxYgUF_}4VPU)q{<&ml1iwK5KcPc@ zG4BJ;Uyll`0j`l~Tz z5;lK*Uh%t0#MAMJ-~E)+$~IW;kF-5!V!^-4?LyZd=yd(T`xPk18Ml%O-qHZqTj(&( z`;-Hnt^eUvLmIC8XPl?*!z-;#Dfqbp^nda?~JCSCT3p3jUp;=K2bqZjTrTgEym4 z7Qy?itGQgELwm$J0C2pH9D>t_pBL)u9CYAJQ2RDyFZPk(lqhHR2Rh>TKkq7XDWBk^ zFKmu;{H}8@>!PYhyC&NO|514}AG+A|8R`>Xw{z%poKen*&!J}} z1n*26bGbr?_+VTD&Y1OO1ZUOfW)5`3aaxsI+r@gHGoP0eoaA4a^9P;m#~juNY9E{D zgDVPN;xT5wphNz!z8Cp>7+h6w2J-bz*JJ3w!F2|lSud&!PUjwGf1m>g?F^h!TWbl< z+CgRxbSQ6puK}E|XV(>+b^-bg=;U=SdVPhO=W5ifq2S>;ysn?nNuI-c{E1vA8okYj z;4NqWbh$%^eB%8GGbzCJI)43{)55-F@Uzj|T?KzNkKc7%q0{XW<8knUkGKUdlJ7Tk zT%kkxp&tUL;p|?5(`Y`zp!#$;^Qc9H+W(jx7D1;G8;R z=0FDyo=c+qJC@%dINzT$bD%@{qaOpO-pfsblX^FCbbW&^_VI6s-*qTz(GPizn}X_|9-9Bgx>_${=}UB`1NZJYrzuv9m@C09Txmwn6L8>9qKFQ zD^QLry~hM6QF7WCTGv*7Nz#J9asGQSi@KGV|j%@946ypC`)mjg40Y z|Jzz-K6HpzCXUy@VT1afeIq<3UbxQ!|NH(o1%Fp%b2&qYcwsyOPQNmD1gBU{GY2}< zPxOo6)XVZfaLP6?=NG!z`&0k@LO!_kSn$(VH}m7SUx>rPdlB%z|2NMCzqiv|4$z@I z(QkoMp}W=M_4(6zjm#YAz-g+UFK_6ZNN{>IH*=sPj?=<^+VE$3t)zleqC9c*^#VGo ze*ZlG+PPKs!}If}6uioZslU`b=;ZS%|NFtBM}h=z#!bnBxE)IA5rEeo)dbI9GR?InW^vsGsoX^E5>TCvAZDA)t#r z4*uho{wqod{&xe-^*Vmz7N@mB?F*WyTp7V%6X3ZyblhBG_T|OYNr#=z9&wntN zD|Cnt);)tWXlVt(`SPTh106V_YMwsn_$q?a`ly)$9pa4f0Q|{Wwz}YK+;5HpbpL6b zubi(b`04n*Nw??t#yLu@f1RkE(Q$w-_Bi;zZ?&xHIKdAtZT3^oyQ3W8=dT$j2+ke8&(q^G=umD* zgERm1*MifG@8{y;q*41tznnK!aIQBr=L1tg_#2#ah%pSx8;0N z%er#~=TpA#(fvPky8lPJM>#D|yFlaZygO2j|&;5^E@vKfO1+Q-hGY>k+3$aQcka1jB*dusNyAV&$6GDf1tv1{1 z+&r{2VKDNI>odyldFu$l-I&(wCv-Zly8cJy-X(a|c|VG{>OaQY@T=yiJ%Y0*%)Fi* zI^@r*zFU;)(tg2tkM}LmafMDk=kp&=zgz!T!AqLbJdTUsILl$ZxFg%ypOL={exi!z zaS?QA7icFazlf$s1?M8a2d(1(9XNRJ1Ds{)P72PMX=Z<*0|)y9fV29KGlH`^z;jLL zh~vK=#XBHMa8fQamw$ZMqj=Q1#cmn?68vL75?|LB=wyBI-+yiV_RE5o{;awD^g0X7 zd!qcZe|BAP4nHt+phFxmp8!tZ47Ua6kEGPzb-jR&I8LjSdhfGgl}Cbeh3A2FIYB4; zH~)5(WY{yoJNJ~@yB;URuU+}`*L?6&@K^JDz5kByusW#uiNxCzwtRg)FV`h=e+3=l zgY~$G&#G}xW@B;NKH0s%>D)No{CzM~> zFS85YjWhH)E zZk@fv)#V6X?Bjp`cD=i6DZwB8t(hOcej~)1roJzhB=^UHe`2t?T%bd_Urwk--89$G0N;Wbf{l>xqb~!H*Wu=pxs>V{?FIO)~F=-7G;^!b{c{oP7cM3Fu<4-%jgnZP`CYmTe^XM=F^6HRv`vXgrC0>+yN# z+D!zn&^qGjdI23cs3*vG-GR*oC)tPQ>lAdvaazsQIAYp%m*C9qM;zU6L#M|PxbDEO zTIt#f-n{_(p+XmXJRH_K^`3I0p&bN2b5iql6*}Y_>oDQx&sjPP&iL}g(fNjsn|;i@ zIpPD}Z_B$0-i;<^zn}{g*Ds6q5S&x|JVMtG8CU;)Il-U31*iE$^SB{?{j&da`OE$L z3;sjCpU~Hp__aHS)lTi3ck9E!f*%oKE>GxCo|vyjT&^4s6`YGF%pB;z!F&ZcKTjDU zILH4obD#q!YhBrY<`_9faO&_n9vxrkz`?%s@aKz?;{~Tofb~_-5huhN{)e2`h)5PD zI9F~_zVx^QI$U36dvIDg)ccL~HcuAZM9kIg3p&YjShz1nep_^yE_m(ugX_B7phLN# zy@8W4<7~mn`JVay3p(QXkIRqWo+mg>vJpqu|M>ND{_S_?frWyZal&5_^WBi{*5yxs!4>~!|<9|IGbzraH4dCZrx;@7CdW7c{$oGPA2L(Se z!1EaBP=4q~!1<|aq~KI6V~zuK;9#BroLh;H2+qC$>&l=*92Tkf?oVGmE;yxYo8tf- z)ic~zIjpzU`%w|+&I#VE0DeJ7e);#W3pW2HI6oJs>ynOV{Q6h_?@k5Pz9RVde=(1H z1AQM3^FN43zirnAuiXvvcpN&!75nXjlc)1-!5NmpJkEd)ajmYNe-yuRS8zJyC5~=i z(8;(utu*TW<<7ky3f_SbGY>k+^I!i|tlCq-D>B#|-}v>%{_8sD-FqSUUjuAAA)ve}v~8h*R;|30$wwH*V+gweAO@L%h+h!Fl3%Q*gdmVvaX-;5<_M ze(iXcL~vdPc#i-&)SC%v{=N3*6oPYfk2%iJ$$r^?-;#Mlg9PvN?`B@0eM|PLd5lr@ zQwv_vW9IUL4&{aQ&?xuy>Cy|%qa$Vxbi|3?zuI+Cd_Q`^j`s!U#cAT`ehRwS#}y9i zrn=v4+9|W(r{neXx}HIYxE@o_k55+0COCO{osRAgpyOr`bHDqQio;tua|zz41m^1) zbch4?tw0=7B+e^1*8;4|gbp0+a|%xR%mpRKYmO&$D5u^(9Twj?&6uT#;LQGoIJ%sm zi|c)?VfTv*UhNI$`tfha@m+r`E%+zbnfZDgUy%cRG5MoSlS5X=jN`Gd3(9rpsFt~np6;+E&=){=!oP0K34kNRRm|@W^;c3?R@t22h{|>+g39_e(~~u z9$9F2O~F4NV11>29*Os)5SMp0*AbkrwvnGYF3=$^xZeV2*XRagembo;Zpr8S_piAG=RPhot?V~&cOg2=ul4h4kb7xSa)M6wgZ}#_1m*1|c$;UK%SrB=;IdYz=Qbnn_7$A7yxv(~Z=u7y#U~ud>C4_+ znB%MUG5sCxQ<3ik1qTU!c(~b5=nw~_!KwOah~PwaiysI3I)d}*y5WNJR!=hrI`|W` z_NEx8lyZ+2oE1FY*8K%^;M`F2vJ>lmDLBVQoBe?f9E<~yuXoc-6r5~B%pB;zLH!43 z?~zG@lYf+%108Yv-@nN>ZJOX*nm`;~-_7seM1S{4-B(X&JWKF)23WTP9r+bvdDZjJ z@wRz_Gn3zA*7LK_VScvk_F>EWpSa2QFHQ^J`9Zm@Pqa{QCoDAQ7dqV!K331KzfHJQ z@ODf!#{oLT6W1sBHFeK7f-@;Vy?~B5{`04&Mz0c_2_wlL-A}~#I-W{>ALy#%JHdbP zl{vrAq25hU`=0Gjxj}FeaXr!X4muqNlmp7Ij%Nv7+}2^ zbjTmB2k^%=HbQWY@%{@sf6&S6i2wMnZ@pcDH<9-T&~c6L`0tk5Z|RrYdjBr&UHX2Rd*ntM9DYE1VFV>~=E;I@F(esvrIQ_G!WCQ;9gb{y<0V1M`z1 z)^*jN-d}J|@D4R3o-TjrQ2qmUp=`R$+hV%-tq64B?f;I|DnmnU?HOBeNACS$Jag0riJnFAf-g8hQw&&E@?1gC2= z;^??Qr`t8|1Hk)v(p|yp-Pv4z(8b>FoL0wVGCuQaJrw-h{mlMCr~6sVqr=}qDW3}7 zubnnjfoy>Pw>D9Wx+53V7KNH_y=y*Vfa=faZTb`eoT5#S8aQ%diIR4|9 zW7EiouU9OJU8srR%#9r=OaKmF3&?x2gk-8rmn>VBb0TbtnL4=|pC4&{mY zTeOpOIkF4R)4t|<2^~22?glvBFXR%O_e0Gb=)l2yR^ZfGlvi*@jx}?jLwqqV0cTsg zf`XHElDYh$i@p4v7UplkADyR&;P;tHeBDn&r~5thJK+7`EiQP|=9}{k9m)^=AUN61 zmlmAcyswgOXV8Iz{T{(t@y#cKlQqD72Xu%7t|Q>=>sCQ<%5pjBb_QMSaqxeh*}Pa4 z!OzF#_V4&k3*Whc|8rhc6a0Eyo;uFZ={V!NCE)$>b4|fpwA>tL=n$VSYP@l8VjaP0 z$Mb?Z&d?z~sPFJ+Xp;tlQ;FyQbooISdwd)go`Zoup;lwTZ^iS1+E3_^U+i}PPH5rg zg7X#MS8EP*;Gn&LGv}#Ga3TZDmq7;(o)dzzZ%13f$;0yp+8^kM|u0)jGCZd3ybYdw}Z$bjTmt1M+t!*=WIu z3UGaZ4jinD0OyOt;{>NW@7t*BKXl+!SI@8R&Y2)M!&jNh8#?0nkLSuxm?Sv%?}(%O zvG|VXu$~F|z1C!!;NRf-s`=2x)lXu-K=4zQnI-u91N3jup**Lmb};^pd4l5&uZv<}|*Smig2Y-H* zY^wx6jO*jS<2x<%Unr0K>Aw^FM|{1FZ@)J|#c4ys4TAS{p1J)%hj`<=Oz`Wk37Z9H zK^HRzI>b9v^;3U!+a@@bKPHaOA9T9ktETRo^VxO^Ui)HZzn~+(Laa6W*egh7at#tnV-UYaVWP*3-$_bo<`)Kt}oExzPS-!&?aqfu{BN6rObC& zSl0yp-F^oJKLO9H>3$YE#242&a8?(L6r8W?n#&P7;`ooZ(_TC*I2T%)^BdpsJo;n! zIsEtIf`2B!d=+$Y^|N^14}Q@Jrv<-M3v)R@hw{WY4xEKm&IwNEj%E&Y;NZRsoKWk6 z;GFJd=0Harr-gN+;B<_-BslZ={#&;@=wv_Z{~mNy_%*?M5MW-<{2sK!debA@Uwzjt z!7D%1oKNUb|CXqEs%2l@6`U;Nh@;yZbZBo4xV>$RS~%|K>OaJ^H(Uo%j%7n03Vvd) z-@3g)hqz+DV{k?UJr$fXtz7quhYUWQWc#XK;>GFpT z<&XJ!aGqaJAvjF}%p*ew4z45M)La=PI8%8ZPx}KMI97zL|Ifyz7MyDVzM}&jaYC%Z z2PNmvTImI+G5e$M$DqS?tZuoAWAYBb_ago0IX0C4KyaI~f4W`-n&-g%5X!B5ZkynZ z3NRlC9m=h@8qX%bnoV#v^)|O7=!oOAK2+~Tlv|cda2D}84V^#ebiFUAc-uPU6}%k0 zUPJSslRTHzK=o_m^Ar@k?tK5M`!(p$uVv>3b*{T3ce$dIV)`|_$B1~OyIolDrv_Lz z0UhFj`C4#3U0+;qeh%;)96HqF;$J7e^ZI)|1qPNDoJD-Uqw6trullst|0Ayd_~h_s zp9t=^0rpXe-}uC7RZ{)k(qa_^zZlOS>9|7&pHZGzAAs`g+@*@(5}Y&q9VQ(w=nyZI3pl|m+6vC_G5=k8YJR;g9RDGemGMT4oM( z;GkWBb2-g$!KwC*d0zwQh!g!@V7Qz={M$83aF$Ln@3#P*yf63PhjP)Vv4Yp}B=K}S zTL zEfM^NNzL(rj*1b_85~x`$8w+AwAYpk-uTq!dJY}R5A)P0zv3O%2u{+XW)5^HKfJF7 zPMPuF3r@*V=6#Z&Lw#w^^<~4VUTKEU*%4D;%Buc<)6UI;-{^pO-x%nSU(_G?nf2!% z1n1;C=KWrv1IM;xxmd>>y_K%uT}kPU^vukI zPOf)zT4S%v^=LinpA)_na)m+NIXN_4wwI;Afpoe0|>t9p*nmFWepf^oz1PP!5e%ob$i^OmHW6GuK1t zbe!>>8N}!7+J6h)`9!4U#MkkK4)IO(hST%ok7+o*F6(|vx!>5i z&8Y(BXRCgadugyY=h6YgWWu&(TjJPDQG{C-@6?n#&hD#2f7soJyzD3QqG2 zW)5_L;*_jUM#1s^SNmJ~-+#Bxm`U&-Wd3jN7o+!2ODy~Goj+$0{8A-|ulptFbbG`+ z7RojK+8lznwwn2V3_8RI??;1kr`<<_GpDSX106WHKLe+Esr-V|vWA%hU7$Eus!~XB z?s2~UU3>J8^MbU+1b;|=@>BEUJDyvu=2;T%E-Cnv1Ms0kyDY{5ACfHi@r#j*W7;L= zlTpqmdz2OYBIV5G3?1S%UX9ncXA2gb26fCF=!oOAu%83`$$F}y;LK`899^%Wlk>(7 ztG;?)D#f2w1@B~cvtQ7`ugz*+Gd#S8;LL4c_6s`Q-Y{N4KC^|?7QEm9&sm{Ex#PYT zoJB$P1gAiNc}?iRLAwE`#lePxGe5vOXXsE)In;Qy#<(Ve(}Cv&b^M@{<5h=MO2w~# zjTVAuJHOzZ-wgjUIX@TNOZ;3!_ixa}e!uF! zFTtl7dJBHW0MASGdIR)F@YDXXpWp;q{|6n~0rpt|$J#SUaMA==*9{#w*zXXWscS+7 z=WAZysOtxGsJE-~$$cB!emz2PHh*QVx6l>hjvw`8)E@bK^x=*%g14CKt*$4~Vf>i7 z?u+*66aUQD5tr4vl^j2o8b4leH#a6fbv=O&*Qs*c(0(g*u=eFohcMr1txtTkNWPwy z^{PAvME`4ZtA62EGEf5A*G3{<^=f_@fm+>P{QMf40xehweY+x3oG5e)(zt z9Uu8b`BW;@fB?WO0XHG=m;*Z;@dS%61%e1HGq z6e+=rmEam!B)A0MqKgNO8-!$uP@qV#;t;%0;1{IDX>f`LiWFLCiaS9IEt2A%ckj*| zy5GrMvX=kz=6UFJv-y7Z%$YN1dS}@WyuUfm9(!f+R_V$?;16*i-emk+S&st0nv?lW-W&&h+fe7m->d^Cfxmr# z_3f#~$4X(m$~fv2@X8I8^GIje|77tNUF#h1zU1{6^QX1_dBfd{zzcSAf9|vUD)3L+ z(4S@X_r7ogc(&v-l}Gao8lNlfxdXiW8tqLw%l1*@pXTAjD-wDic%N~e*ndg)srj$0 zf4MDq2>gXSoe>H9D6_XRoDzxD|I!C34$@V^R{_XpC6{-%17zbnPO2HqDT z_S-A-ckVyl1HZhVbNPMtWT<7i-p$+Je!i@~>l zMZc%^s@<&^@Xl%UH`1lmUX9n61YQ{LZ}!@Ea&#HsJ>czSEsw@cs_!55D**3T4Ib$% z>nqFWHUFs$ymS8Y_O>^lr|hf-{7!*#KIJ`I^ZEKIHGyw*kWcj_f9L642l&1keA3x! z|AEpCfInD+Z?Au@mudq1Kb*{8|E?ME4{5}YHTKK$=hdUFfZu`ht?fV5@2Nc-P5KIW z({13%^6{kR9e~%-fqW_B4^lbF%%>FB6kq4|1OYF>$>a0tcA>z3)Y1NYD)Yy;C5^x@pl5t* z`kQQgnYbVlcn{mi?X@O<%IvN2#n-^k*xojK#r9h}Z#eL_caqynI$QI7$;zXEzoe^U zeDWK`U&F*Pzz^yn=abG>`@f5x0Q_E_a=yL(nOkKt@I8Awm!I%p8t`ZJaW22%g;?M> zjCNx6ACgz%Rt@7yBpa(#ls&eZK?V?q2r$L)JfMlw1t_UpQa1*V_EL zJLP}CTkb%7$m})jSONTJ4%$1X%PQcxIyv6vXIcmR1^t}ceg&><1b!|r#<#cqN{rtO zysZxG2gv3h!@F$<{sM3N{UPK3Txl2Z7dgO}#a~R5eZcqUd~5tI>i&G}81R-j5Wlka+B^3I@CP`Mk7fMCfIor1&Ottn%are@w>k@a+TRk# z6ViQZJXhm`#s%Vk$a)d@Wi|HCNGFab%Ewc&KifNhb~}=>w&i|% zPY3uie`h_E75IS;&IdA1$^pD+C-=9--9HC@7LE3_*Wb&($_M;P4)A6Eb}v#0_;EJ$ zKUw~)a-<0G92u`<_O>ik3iw@Y?2ptA)E^i9TNZe}U)evOliBL0y?A-cix?U6bySrOIzgO!|^8#wG|E;J4yoY>$K%8%q&N9Et;&n^Y z2EZHU!1`3i_qg5|`2RS#KBIAg{NX#Q8Sv|AP1os(KP{vc@MmhQ7f6?u zzZaMO3V6Ow9zVKYX%GCbHSCqmPh{=4tX2=;b#t=4+q?D#ey>*6_Yax9O_KWouVr)l z`?HMydbSVnC$+SnFUxNm0s??Pv9)9V$nicHct*~1)_f?(vQXf^;rZPjzRVwM^hV%6 zX=}YdR36Q9sXrGf7X`cn8si7)Ec>&}zRUN%23|7_o~*ygc*EYt0Pjy5c(Qo)nLHkN z+iTk&uQGnt8xw&)s;>Qf)gN^3n%e8simAYFP*2|eq)V&4u0NXryyF@?S$oOq->Kbf z;H_z5fBj|ra+~J?zmEg?M#djfeL2CXtOH&_8+kNOB;K11+kki6hWVvh-_62#yc{#xhkN#Hebvb`79oC1F14)XY-eq$LAvi1sZbq;tgHuA*& z)H~@S@S=FW662M0miEcwHDSV4;N5m$y(;4ejJ^T<3r@~|JImhzeyJ{wwbzm7_kj0- z+bjB4tNkfYP=3k3_yO=-y2tWS?8|Q{bKNDYuVwY1!xX z%S+&mwefl^U1uQs_6~guJb#V-1k!zKJuZvSjtx?Pzs$+kr$?q_bhq5!?Wl46U*=z# zz3vaQ0&l(p`yaCS>aj5g@VjzdkiFBXxhs@r%j5UDYz=r%G_ z@;3ia4|rcY$h#lV2zd8w;K}mOi@F}b+o;hWtZ5&WPyL44Kcd4|!0)TUC!MYBzb3Q; z@QbyuZG2uKtapw+G5{}+hnz<`%kf_3-=BlK0dG3DPt0f5_*WLMx>r4cKcS7>UfK95 z;|*QW2Y7>Q-2bEJW>EWYKj{O!d>ZQ;(pmabW?#^l0N^!jWLtgdbI87HBSL^TL}R^7 zy0q+D)gu~si4ASDkJ^dsiyko)csWa1&y&UD>p#8$-o}cI=d5^C45NX6x`J)?QXZiC zXPPk-{a` z4XYRnysk~H=gI1yZN@6#9c$@W{pq?N#mB_#>wtfVX!avtf@vTx1i9l+bDvA-v)udKcM#Owy%VSatlp8O)? zKXcg+{6Re4oy8wC_#p7B^6|i4KIJ89zoj=11HXgD^-|K=n$J#5I|lqU4qShf<%cZ& z{s4YI8_t)=+B4gXv%qWSc>(xUHO3En^ZTHlSAajOI_r=2#EZ<|iw;}|{uF*a z(VG1Rl}Gbbiod=OZvroGeR=*MotQu9x*zdw6ut|*%zQszw2yR_JXt*KYkePhO&qvB zCgXQ3_z?I59oWy6wU_VFWZ*}0zUU8Y^J%v~o&hhB@88-JUow9j-uDXl3mv%sPR5TK z@ecT%`To8=_RIXe&`qghxqn!O+i#7(WxQz_G6Qd-#`Obh+poo%Y{2tz^7YdneRBf8 zjsx*5Yri=~^8mlG1M#Q&oALnVkBImAfZs#odIISz^M|bc4sI<3ycsrJ?~v8s?|v!Z zZE)cFjEwJjwH)x*J8+&u#?M%#67bvd`J%J>Uzf#Ik#FOAoca;P!={JTffuW>|3bR7 z;$dWqTELs>;Pv3ab(#ZjZ5!u~_hI>41HWP=`S>iWzpVXt>AwQrdMB^<=gnvj{Ga&z z$KL##;g3$hd&Sqw_VW6a?Fzh&HpHjQpOxnK0A8TR{N0-Qq%41hm+TGv-44cUmgW6` z_tb&;l+50;6McX`$%gCqGXECn5CFV*zF%l>zM|{=lwTGaLV)k8VK3>@$}g3Vh5|2d zJL~(a%%6T;jlgSR!~8?#(Yl}N`-?Fac%3xtBVAhc?Vd6Rc!@UbkI35lbom9qi{|;x zp7xgUCk8D7eiJ9}@15Mb1o-D17{Ar_qkSrBzZR~`fPY70f0}e@wO@(pD}gtH=P!Hn ztFg*z;N|7xp|w1UJF4%j_3MDwmgg&xM>?^-bdH#KTWkLWyhj?>A4q4(lf_e$zcvGJ zx&!A=W&OW?neD(&v|;~M%B%M?@KS6XPiP;C{5k1=dw}<|K|cQ>ovr&%dFW*smd-$fM68UZHPp0gs-CBjzvC zrNwKR^)KLk&^RBb#iMb7?CZPeZ{Ri4IB!fk(LVA6@${V@0dKs<`8d*9@?`O|_2R$4 zJIw8~r~j+%O?jQ{{dvrD;1}cP5ybW(U0U{re)$@B9~?XnMe7E#_iW-j;75AP^DXIY z&A0wrmAaPuXP5Z%JniwftUZqp$PD~UPUdGQmL2#FeC_v-Z2UZVJSXt?^Yd0>ylC}5 z`aQL0?csTV*N#75&t6`yO8J3zEztV$MpoYqcMAb8D%g6Sto_&gSQL1DL+o!K8Gld2 zm%y($z<$2WA7A^H27YhOx2C;h_GTzi9(Yf=f1TCeS~Y47{J$OGtNx&QHpTCl#chCJ zQR8|K>C(!d+po6=-qnfrx1Y@4UOhVlKWMmPe45viKc0^34*ahp<$TiFnx9*L-wXNS za=yL!e`<1n;P(x+pD*h_&7b=MKY-uQY)$`>@wRjf0$w|f^&I70%lK1ybX}9$Z(Y&= z;ML~yRnb1ui9Gro;+5_n2D~>K>sQiQ@?`NlS&0B%q66y{8UNveXyD&+;QEy6U%F3& z{1MALW+L1imLfzbx`e_o;j- zi;uvNIl%v(`$z20vi>LI>B=nt-USfZCg81cp#5e1z4NyLzlQ_;R~BEBdj1UjryBmyiZAjL`S)dp zy}(IZPmw{i- z0lqB%B<8#h`~w>M57zdNC(mvIuMFoo%OC$9y9@kWPTr5XyZ>+CU$7ybW$m}{+eg5w z<3N9w@n3a(0{k^j=6_z`1@Ln?d4J9Q-fQ5;I>1-soAw1LKh@}y0{lRY_9UHU{+IQa zJKHnXv)s=tr?Foy>o2l=d}l)@;62cozf<0|v`-cf9SURz-tvp~=R4J2+NUOe>UZY? z{;@w9U(ENUv$R)M|ICKG!0T{S&Xd(&#=CN?5b%!fb&PjvW-;I;t+$>hYoF<}U4fTj z72`Q8Kenw<4)}hH9OKhGncBbAv`WDLeU_Y0y0qrk)UL!Y-M$*|t4wvw-|Lpv1YX1# zxxMy|&mA+=1^%&b?B}cTO#2k%j|$}*0{{DHxj#skR)71YT2tWN87${fyIAsM@%&58 z7Qpj$^8VrWB5i_X@bmEfNim<1E-iof=jjBz8xCAAkoo&qoo>K4 zIJo~#?M(i7zpE$kzqfJym5O(A6 zPtu9@QokWyRH<#iJHq2t%$KA~tG~qx--l9Y$4=mv(b&Hx-KY4fKOX&1KDw#5qNw(( z|2`Wty}sr8z0E!O_)I#5|9oaDRbGMN6@YiV{tT;lq!W2Venk7`|8FMn4F6;_OT_sS z=|tX~d}!a~xLDvVn#M>Xk8}zB1I_hg_0=mI-y&JacA4%T>ub2jpbRy43XotahmjZ8I0oBurBDO2(JjPkJt3jzT7vp4Y zavboyAIN#6Q+tj2FNf)0M&-iC{-qeE)Kc3;NizLYe>}|pRaEckO#l2xGYcD4%O9t{ zqvj1YcB%)54+^M0z{fA#x4K`re?avP;r`K~J`pWCG;7th`jCLIA=N`ee5)IWMFoe4 z)v8uQS3NQ$D!|R?<2TSJC?K+Wq+f_}SamP;Z@m2cd_qHgP49U5Rr6yCw^}uHZbR$U z@v2kXEi@!7dZ=4aSah|>@Q-$vySU6;|G!u|P$>R4^g=%s8od(y6xB`j0IP%fe|Y?p zjyV1P#eVc+%_`vh7AohEjyMSkUd>VI6>ba&Q!D7^SEFXV06$$#HSqWk)hA^3 zhELZg=3iDH<53o&F?r2||6l8KB~Df!i#(yku==>PBUeCul6m_SE9v`#`bPS+Pf%cZ zL{x-Nn47M)PFJgbKn*utE%%xMb=?C^O;pXjny#9fv(z7Ir-0cTCGBKcFAr9)q%us% z|4Z%kh}Wy-S1o^1-%6i)4UYElALJ7i>{h={{kpXR>(_UyU#qrX&6@rJYQ0RgN>#7Q zK|evgDBo1BdmhHw0_QfF|L!u+lQmAs^9|`3$DmZ2i=3{W+kjKPG2^gaX>ttDqomt7 zkm<>;@imbb`R@+k{nSft7wL%8r#5nmPTB>W0ng+d(h=tefs>`-Uf`@)l+zpu;yj6T z#F^a~?b%W;0XRzSTv86{h~w&xoOd060Z!xfat`T;^G2wb_tqo8X|Cj!+Cw_xG9rJKlg_b;D^&zr|zYw0F&UG-lsfbQZ!_F79fnd^>kxd^&%_-ifQ6t3$r?Gos+ z@z~`r`b3^XAKD9pyb`KkGkYz9OBuQ<(lkyhJx#UtB(L9eCR(u>2tMNQXSV za%o*fm25MY@CqY+a#dNmvIYjC)ZrM55**1G)w z{M7k3xxYzA{%#|TTY29<2G080a(|Of^_BX=czR{GJI2+^-#-9v?nJp=q$9hM`eD2e z_&Z|*%YBly(em+`bj&KU%b={PhIYNXnFV-7`^kBvqqw4a7QW>M@xJKyFM+qInS7ik z9o4h9U|05t%D~xMMg7av>uA!UT?S=raa@m0`B?`%zd$wOQ}alt{D$lDsfB7lJzs8b z0KD7p$~x8%9}%XQ^;@4g>m-$T*U+;TqI+|_zNuPf0XWj+~86M#8eYi{UG*p z*K8QK9|n1VzjkI}?J4?;bj)8#|M@F%2wM*%^9getD=80d;r`0o%cD@b`HQ*B%j#rk zGKBmT*SWdWPbF&M{Br!|7T~9EV&!o~I*QMp!u+!BwN}76v`)?;9rbJPYG0fEOm_d= zR1ch^6XpGobP1EOAGQBz29#nJCmGx!51VJhQ@c99PKZczR_k9Otu2m;=c zlX4#EA_A&43Jv%3A^Y-OLf#kGLx8tCi`;Ld^NYHaMcrj0-mF=;Upf7|FyI|oAm@<| z>#J9C+(-W{xI7qmt9r=ejC5qzO2ZUb|DApq1)T61a=S>^nEw#3=*@Z%Zx{dl8hG`S zFeQ(*{^@_eT+IQ{raNvbClFy?^M|RP59?C<5ON|1~l6d+2j&$l6p#G@542n^S z%kMf(y`Vz8(+kD|Z{A_X!!Ez1kdF3Ie4eoV=R{?LxgB&dtTcZZ|2*~j*8Djb zKf31=;PXDrklRl>j2{zkJ;ryfxXHltP0-+RUBY2(CzmtTpxp=6n-09ee=weyuSiGj z)Ta;Ty}|urfwSzRe=wt=}x?kbd^9Cvm#@aoo+?{ATg?228A{V#XF^}y-SNp2VEEZZrgqE`lg!2F&6 zpPzs?XRzEZ(ve+sy_wp($MY?~DLhMV7wM=0h&NZbF28Kl4&Xf=D(8_-wa0U?E~H)+vR>E9o6$qSIjdV8r)_e(NJsTlN3)EI z#k#`vcNX6+JVFiZFKJ!zD_>XS{@;ma>O1MP4vP#A9TMOb6&_#=4T&0V#Wu^wbw%5x zU(9|G*I$4BitV_m@~_~pb4%oYCtWH(#SdnFO5h`c;HSOZPv^T$|1$mj|=3jvCon?h{bK9JZ>-Wxc&U{7njm6ZcWz^1Ea(IBE2F9 zM??+sii!w{PCInXE~5CYI`_8OAK1@5mGRAS|Dc=U4*2ct3g$O{D&gbxK+=7;ocYJQ zjHmhYQuTXdAoHKX&J~%aw&owokK4Kb4)%LiP<$F2iV{x!1C zrFOW&+u_xt^|R8g9r*s2)rU+>hEO|r%z7kg2ZM6xOI9!XyEb#5fWMR!xxYw<^Ig3% z@Fx1dO7IKdh3%8CYe>i1@j`~4&2b&O{`ec<{JfcQYO`ke*sn;Jyk(hrUYx|sL|!KF zGM<-lyo}{#3@?qm^ya03mma*-@zRx-F1%EDnY@{QJ}(n_nZV0B38e zmvJcr&HG@Rc;ZWy*J6HP{V9ph`#e}fQh)l3_b0zv%a^F{q)%Ss^O#d$;Ag&e>;`h}cFIR6u+5AvA8a>e|m)aj&tKT(&!Tr8BpDe zl~Lj0)?G9**+FsXd7^;X4>X@){WJFC`%2trtbQ)+e4gpGxq?)`GR4dx{U1k@>W5xhsBqtQnwe*6Alh21nu|j zMb>`e{EBpm89dE-h*?W z_l2n6H;Bf5U*%dw;C-cW-hy;!pI+Iz2lLXr(5k>&draOQq$9gJ^}zh`;D|eL2Hug^ zlXPM}I#den^1fOhc!i&+Jty^banj-byr)u}?_X2AZ2YD%@D^TTJTab0m+Z0G94{W1 zu|1~W#`gI8l?QNxAIj|{9ofn9fW^GmSexYm52jbV&DH&QojecEb2)1L&sBZrV;*qI zIe5x*el{mM^6%wY&CPzGJV$Xt=YHt#*1Ty2{>u2dJWfa#cM<(RG#2CT&w*`$cj;%j z|4A2rvb4o@LxXZfxUZr3ln%fPnJnj#4&zv_B<;sI-geIbyq;$mPt13uV}8>shgu+K zVV`cmDUng$Pf17qpO_u@tve0$1Wwb6a{rUgJOH}-M37yvrz*p~wcDFMz$=nZZWrmu zt~vLxAKlIB1)LpKC`^2o{uBm5#hS+h^3*xJ3LU%BOS)AL3tsZH@JPw z2)u84$$6v`dD~jnhB&$XAqse|<>frmiM$scIIbon4Fz6YMmdjk;{8nJgzImui;o0e zk+<^pC7s$H>JPPte>}GDpC`ruugO_Ck93&l^h*9;u>Ys5nE}XH3iu`8T>8`mM^OSm0IExZX=T zF-}@e$2d7rWG?Vd^XuPyHT-eDNjlc5|F!FySJ)2aJ--9q;p_7Hl8*9#n)Nd(7W=gx zRoHkRtB&NRp9DPT)t8U=xA%V5M14D5#(O&U>TkX-V_N1!8v`Ojt=h7s@jR>hBC|hm z+}A7Tg>zb4_ALg#P57DlO^jdCvG~<1>oZ-1cpG=)f54fkabA^l)DJRyV_f_=U?p(M zo|m^H>C`q*e~9;fCB{YdFIEH3MPpu0I#&}*J?>1rWZ|5UVg7pH6;W&xQC;CUMKbe&-kyLM>?{P_50+H z>u<$y_qq9;yOH<%B<%Myc)#D-tEhQEOV@rM5E2v+R^2B;J@a1ObTZb;v+ru%$j`Ng zCz$;r_Wv)0`-xt!{000s@E_(svHz2fwX0s)C*+Yj#g77Ktj2v(q@(z#+85UicP<|X z&YJ!5_9R_gWT3^mT(3kn$NrG}z)9c*@O=ZZexy_F%Ak0rIv4Zn+*81b=KJ`xPSrXW z6R&6cZ&crs`LVu*Yn=n`mwV;)B^}+@pK9DnD9gsJcow%xBF66^9=|^ytM`{0C+Ql$ z#)xnhx{>_<(%xF8xLvjHyxAXO+{Ovx&2PV70>6#_Qy#aZBmc(=&sCVx@EUNQ#>)Lq zI!*t_UuXG4!~bF2|DD?{H*c8!-}v9(X8)HNab4>F@xuKFYZu=Hztz|!_dn^F|2>rm zVck15(_P>k=ks3bS9BhXbn&_Ovp83S*z}XCA1LQpJGk&3F7$&?-VPrYzr=l8XYSX~ z{Ow~u@UyucwifzJ(hde?mT>>llo5Y}|GwWYk6+R;uhINQuh9J=)c=b9`w)0hqvh>N zx>Wv8_?`JbS)PZ&x&JTUj2fca@_*A0hTH6a&-sre{?{uh!nilL{uA(*?tgNBk&f~Z z?YENOE6sfloMA)c?MOOtUR2;q9QWq5d=0!OtK~e>iM)Hl`NhLG-vMvNHaU-U$kQu- zKBxlg-4}sMBg^Y}IgiWzMmqA_ocCByeap*Yn<^i66cKQAa>8QV_>ZitESU*it zd!Xq@I9F4L_tSri4jQb!oi6>>%yQg2^80HwD%* zf8cz#X;s3xMC6YwUB8j_zY&ezUk95c{GKdoB*oZ^CuYM%-^ppZoSpf4^CO z<&*qn$W++u2hm@>gy$xW-}wdjtLZU$zapJ-AXu#~ACE2kmHac0Gk#}-5ND0KzpBIx zzx#jSuLz$Z0TGctp=rh#`DG@`*%IKdWIm4(_p?Yx`7u?$G;C-7isADY z!Cwuzze*$)HaAH6_RB#bVd^#Zu&|J@Ak(SAfV9tukl(ycmz4S~u?*(hNx`MSZ-$=o zI3peU%_7cXHZi{$xyJ>+)u%Xn@_t16$5~)#_}6KMm}p$r)ckZvQtGC=pe}9?ISNa8;Vk~W+ksk`@sU-22UZLmJ z(BHLt&;E{{_mErTIWVM4)gP7BpY&60?x)59-waECKSczDNB9RsSmi0zn*ZW!!#%fD z{*vAQ)4NDDh_A(uRFAOnUoExvId(i?&xOfQo}E{r`Ado??$<3_TLbw0&N9B3CrPJ% zrusv?O5QlGKYdUecuO^|^OKIz{^Pj`=g50Dt_Qp_mE`@4bRv)DnG}DK+ZzJ!R)0B< zbmIKfS2&km&CnEhziOPPBOT>S@?Tfs`4F8_ngegI#yJ?$iS-TMg84kTaBJWl`$1k` z(uwx15cX$AZTbp$t>4Lcq@(>As&8}QzA2?`2jI2KFP}e?PHYdlPD||@a=bI})>fAD zNGH}e^Gv)yaLQjjfYXeNGIAiatS`4bJRjF;Qhnz zrxp7z=|o0F0f?)Ms$i$eeP$sYo|YF=_4=`fx>l?lTAVnx0V z1KtCEKeD(#MLN2F7q1(8Dz$G4{p71?;8y&Gk zwI`%LpG7*6m-`L2kN>I>!0YveJYSPeT))l`?wfn$9|OEOndLmvQTwQW1h5^()5{!kKaq~r{=adJ&QlXFz;hDtHeZy-1?j}NsB#SNi_KqkD)45$mGel~q;s1C zHBFr(RXZk3X6+cu@}}uldGimoCs z>tyzP-#GpkI#!b3L-_N3|Lp(5{1?vkyZQOPg90KW)o1>iI*^ZbH&vNTB>x}HHCN() zgHoapUSFGPm=At?H#X}Yi66Ya{?APzXb zPgwsE|&wV`=_UU8G?FZh|zH%PvC{L(=r9L;LxN!b1 z-;o2rTdwguanh-6s{Rm9nTL6--_M7DryC)USJE*W@$|}-7uX&*9{&owZ?4JvBk5SY zTI9i4fA-vL55Ax4%07|uU{M~ghMD1d^__HzSO1X6D4#ID0I#o28{%n4?SFYN^Y!1% z^%vuozW0Fg<&dE#z)u_Y%Kc6{<^{>Pkr2W9H-Emkc-_F2`|EAyv+vT;U%@`nk^bQk zfq~ZeO0|Rh(6ZnkQh!Z(gmKz#?4RJTmJ{XvA|3gQ#l7ji=!~=By3K~2XMi^*it)tp zn{;ZNsn5~+%2Vmoy_>34Dd70^P|nT&sz~}RR5lPS^cSBvU>gcF%A`FwY&K`60erRORCocUa#$sub)Z3 zdZ~}L5A+I%hzO7HG9@6>J)rbNd#c9(w>A7n3caJLHhW75OaUUh=C=Qul zEb?(e3)bGQ1=MP%x-R*g`(;jGN(J@pbZu`hzoA2|XmiyL@ zcZtS*H>9KaPO5e_y0doW*DK6lB4Vm9#d{KO*UKda4OIP*PVE|LGUJ@DNdv-}^!&8Kr+q)QAgXr6}| z?%>K@`HT%7hO8$G}K4wSEPm$(xD8_osUgLfKd2N_3@pERM zcOG7UUdFhv*DiTk8OKXqKK43+mxdOM6VJKHr@39+zcJj8#;MoL&udAJEX?O?>&IMPX3maIv>uT$bZjYC-c|wzWn`3oa4t{C-c{d{Izj1 zd*9oizt3MM^Vh~H?ERPk_I}JD{(1cO@%;A&FRt&y%Ge-QD!eoVv)2jywQc}=?GeJt z#8Cd4m)?WeYa@T1%wH?KG*Exxuao~^f3HjAyl~E=lEYEvOxBiyTLrI6@tX?V96rZ2) zVBTlI%lDe`#QaV=s;6rbUI!i0H#cyW&63+iI!c>tT(#J5F)lafAH|zn;L84z#?>2q zT%Ef2_loK}=`^keSv#OoJFltoxw-!0yz(F6KH|WcdBINs^W=Ua9kpL7KY7$-^N$$r zClB;fBKOmzjgfcK(NF69!?cu0ds0b$`fg-C@KdUK83Wk!v5c%f;=JlKuh(C%uHR3` zdYK-J=bLt=Oa@TB>W(f5_4?RPzQg^Ur~L{+`!9-=x4$dbaW8z_e<~~77kT<{5#ZgZ zAm@<|?~Bwc_3Gd}r{r%XfVZ~`$Ah7dJ~kcWAs{p~#OM|56EVoErcH6N=p^%lm=|bYisI+5dF8=h&otJX zq!Z(!jPSiMRZ3R|UR!?dMcmgV9rE-_g7Ex~m?$^kEzT~F3(`?sOj(WN#OJ4Uz`3h& zy`6Mq7t0eCaS>CI#f3M2j-EIBONxu)eBUnrUAOh>JI)>__`dwdeNN*r-;gl>v_pg1 zyTjfZWt|5t{#)vQu6g``x0&DPBIZZZiM&$6a~O7i9tga>PvrKIPQ5y#{-gS4 zxqxxeaa{=T4)m7CFX_4j_?a%a1cegMRk**c;MPIFD>+!sBOQ*%md_I?%JQy3&B&%7 zK2M{(>&5eKg>%FIRo_mZysLgUW~AvmG6z-niU>FD^O_!co_6Y0&G|TAd*3?D><`Sp z1|^e$)sNz2(YF!cw~#(^zmd-JIZm?ks2!7s0PhmN-$nJ4`lEFowMpYy=6y}}SO3LL z#@Cpa%N!jJ{3U1PaY(vm-J5mm-c>)2;>No;Ue7(TItF-UPsw?t6V9_`GPK0?W--Hf z;ML{#-}_O;_&y-%j9b}rFTHtb;H3vIb-Z-tr3)_=UMBPBcP8;Nk(UX)jOS$>FJpNb z!%HJCy?JTir3Wu{ymaNI3ojL3CiCZaCh;IK2QPKJ zbmgTBFBM)U^XGRa@iLK@3A~KwWgIVKc^Sh?BQL#qY2c*?FLk_h<)sTR6<#LC^XHcG zGLe@Fyo~2%94}*e8N*8>FTHtb;H3vIb-Z-tr3)_=UMByJ$R|(r7JI8c&YF*c_aUPUMBJ~ftT^TjN_&5;2871=K+5C9>*QM66B(T z`wq8FoB{nQe?Qhg#rXs2sGk=X`gy5qvw+iR7|ZYCct|?5EA{xA^Uz%2y(b;q!a7;OKqILO)_i;Ud{aS`7`O5U7pJ049|Je)NUi^EG#r{q@Jg22syvpLdqx-T0z-itbN9_hsXK=+eVoWwW#19&wa$>WZ6Vw`M$ z%)al6c$ti+f#)_#-XBOuetRoC$LZLD^T64v@jXnWyObrs^kwX!=BJb5_qX`-?|0yy z5cb?{)Kbr_uNtN5qG74TPzUfzzRvy2Pb^K{xYN&=qi2R5EiKh>JodwpQ>K<)b5X3UrF!*2n1 zzs9n_PEF<+&A|SW)~Ut#zHzJPIsa7ONv}ALiZGo?s&2a1*i=}v zL?yqzJo3cs2aID+rJe9S8!vM|1ApCfXa1tNqjL$QbNQD0g*%Soj`o%5^ZTxO3EWy5 z>qydZ+{gWlt`l(`nbQ9)@V?NvZb`bN-(2816z@}Td>u1A1$djE$>)QlqjNk|-yLgl z{7R~wsfp!v+FGaOJkp8v^%0&^y2Ie_=Lh`e8rPULOQ zgY~`E`g7nl)_ATd=|o%Z|S&&D0QaKa?*9mcjLC(}ShJ|BoNa`#0%a zUwv-QXQ|>n<_+_IlKJBl#g&ybPHp1;Z}RPuf7Ex8sj`~=EBP0r{6T5M@%}*$9ko$3!bCTJh>L|%Jci`#XLhg7Uz29p>Q5x z)U<}cDfp8b!RncI{=@fcNk{#~Pq?ooE<;n`ywteAl5{j)r|K`s)!2Q0iTv|jSxIql zj>o~O)%v#TJL%P5Oiz-r>M5oa2d)0{FxMZ)2ZQ1+%p1;cX%2o$EYAEbj-#Z*I4~#= zy5l%e`o~tlYga+eBb~_mUHCrYe`bFLyyxG_d8A{lLwQuMWPXYBxS99b121|c1Ota7{A4T!sqW-?B)-C zt9C=)4y4293K*0!!t)gRjST|c;WKIkq`t3!bd)zJPHy$bcGU+D0A8E?a{EYU{xH*j zh}WXoWp@OoQ`hS9(q&Cdae z{fl(0p5Dss>G+(C(Vd3^XICP#OO03a4<83er#n-~y#6j*<2&=b$3GyfX2sT3UCEEM zPNDePTjLwx*DWodCy-9;zg2tT{_wk!qkuPbzMMz8#vQ}M0;;+Ol6~cbeaYovV}Lh2 zt9<@QI&q(4gYevl3^^wNFSMDQM>=vdvrn&hpTRsDQ-3n>7WI^`D@dod9^98^;CafG zf07F;X&TNv}LLJRl<6mi@k;3QRTETg*#^o8bKF@vQ0KpMPu1{Y^SC zzK-X>`PK9vX8|wo4tac$&NV1B+}9`cWBkABkNfeDLgoQ)#Zozsbi9>#UeYTQ%HsE` z4O_4fc)zb;Jh9)Cj>Ut;Igi8`zAmiJK8nw`C?3l4aqH)|C1TSv9)d%Hf+MSYnG5sH z#HMeKNPCs?-}b39yU32eR;{MBxp-2bFQ{~MIAh41m1_t|pbW$=*m zNJn`*RXZeA=JN;siLR`qb|}r;;Zfc9x>3whjMhc^SRtHW%lLd7_-jd7d0dc= z+L88i=<{yg*$JF4-?4d&=y%eowyHnG`$9PPx3~Eo;5GA?#|7!sT{iU}@o4{??7Mel zKk!Cr+#fd; zH|Z$9QN6?Otp}gHSZ6zAH?yms&GDh zaEsI6uWB0C14u{nBjzuQeL8PGUb!+qiO&Jp!2LC9YmEo#>95d8+nzg8SGged)1w|Z zui7w${T+=@Mvd<`Aszd6T!o*@O?_X*>$g|H@2$Jb^O|^HMJ{3A$h~J0aGsWu_eauU z`&q=hOGP%{pz8s`e#Lp-eybPgm!)U>8GVNMcv*W>g~<@gOYO7WkhbGc;rdsDdbhz} zar{0faU38W`HST*y|Uea>+B)#{sP|o_VRWl9dU{Y&ns#2_dmdK?=0t#j`At3i^&f^ zE_w`{A6v`gfpl0u(>drvxV}_wKLOqi4S$g?=1+FLiP@KWTynkh9C&S~GyAm0C3W4R zK0ijZvtrFZ}hSeYPWA>FeF17CX4tPg2&VQ3mv~R2MeZ$kfRGL~| zXD%~IJ}!}tS@)lPSA{sf?4JpEwKaI8L;Fl|ESyLB{#91s^*GM#6W5KTBfk|Eo-;af zKn~z+-Y2go>1g~V9-RxNc>ZuIH}FROE$5L=tY@;&&+g~Q2fX9O#~0#u!ps)Ub&YUzh^PZyD+rF!KdI@d1_ zoV6q69MTcT*c;bnHFLTGXL3b3hjbLL8(cAORH#@EIL|7`D59^fz!uVABac|&Vo~!nh)YsifN9|f=JGSdH zU4P(s99RD`_4PB-ncp=1hj=&LFmEK}^#$Jf6giJ{BCp(Ec#h_emjS?A^G41ioybed zg!S#RAryE&J(crFC(b)I3)g+x#fAf~#{fBxbadT^Je6LOd%euT>bs zGVg*GBR{BhbZ%Z#_vcJsO%-K2SRL*kVAa6|?r7@I1&$9k`$5caorUj@YxQd+`0KOZ zn7_pQMmqJ~jEY_Ix`eFnK#Heq+=Y@c?#kCSMz5Y0h|aQ#u3Lk(uvO#_7cu3 z932n?JohDXyGWNR4!j4lIG__WLL8Lgajx#lg_FepQbiueiGMrq#L?-0QUt_3Cox6(zm2c^!7q+=X|GD+BP z9P_`az$saq*(b&Y>BRlU1HExPIdpag@S={&d8A{t{4dWIorv>2w~=#zS2`oh|Dt`Q zBOZ%4i~TsGKl3X+zd8Z)P8A++zNLd*RYtnyogt>gllFA#<9a5s^jvfOG0sxQL&~59 z;3uzNncu~HgYlqO%4EZFE${F}zzNzX=a4RypLIUW&q=BmO+PwTP<}7M{XFs9`Yq}^ z>E>tuaBGeye)MxWWwFH124(&$jPKZzOTj<4a>?V7baY>NDnA+fGC%RWC!X&v%KfzL zz~-2A^;2kMxW5hOT{}Nu^%vvujgX(VB`gC!?JdUqB*r7@)F@N$tGO~4=Q02Muo5_R zs>=OMI&r>xQ#j8ydiW~f&7ULZkxuo!`a|{=5w5R3>bV|xB|ekSpGo&ep{eHU?ZlfY zeDCSOXYs&$!JpqL<~!0c>&QM&Wc}tiIy-LpnYy6ZRMN%0!{wzhvDB z+#K8F?MphUuW<#=dsiOb4V=9{%iDu=G_Pbl)A^spxc*2vegJr-G@jE+I>o?^}<^o<1{#F7-v)F=K z=VqGq1@IaLGoI=n^+)Ue_DYvnd`3pO`Ubd$gt7LpSf9o>WbNU?ziH6hjQe;UZ9H#} zO~HHmsPCj>d-z5g)Begd@<+kW#U$;ZS60=;eUP=$#lde=dA<t=rKvZ9N*)cLXZlll9u*V+3X{QY?T zzLA#+{Cy+o{kfjM@14Z-@w`-Su-67&#_=+lmmW9S`?0(ttxM*cd1m&!_3&*WsM zo0SQc052zjIFN%?w&bgpv)EuQOTP<9B{-Rsq> z3Ov8}avteu++*XC#q}D+-E6nw%||%lyhJP?m+DL?+$){OC8OzJ^)M^8PR*H?#;3Yj ztDF5m^C0qn(s{hE>&9&z_^XPG{G0;mYI*6rYSd6#r}9@^HRdn#2yHUO+icFqaeN&2 zSG$!rCaLeF-#E_BqFeJUX4MYz*G-SwQh&wN#qm79k30CQAzu%Q^L5gxpP~Mc|IZ26 zQ%~lt54@AH@^~ekxDFdDtm`iCYy`ZE)8stTiM*Pnaet(277ySxT_Wd^PF&yrD6Gr> z>fZu*W6m?4INu;0i!+P+l3c5>IE&*753_qe##uCvvz$-Qr^E9>{=3h?v_6UO4P>uP z=TxJDO;4n?rjJs*g}!fP_Ji0z={ykq-S4B@fWIE+l=n~4Q9h%18x)A`xvy9|;H`Tk zZ%@)O?^68g75}5S-prZ36Yyd(%g;-ZE>+%esleLPh4&yK&yMEp>9KHBMb(z{%Nt%1 zK_Om#=2LlAG$gf8y9r&S?YOfts~7!U;dR}>Pp|p+tcZDobay?Op4!n*n76OZ;0e6a z{2Yzmtyqo~mCaB|J4hhbguSb5Ig1=z8WAe&6BD z>Vsye?>N~{>}&0+v+n4{zva#vkC7klmJgBoD~rnkbG_*A{v9z8{MGl8++UC7Ky`VZMh*DJ_xx#C9xZwue27xNG4kf&E37sT_s9e<1g-uo)79Ynv8 zj_fKH+yMNRW79a`teh=xPtsBR6OZhrdRE=^E$}vL+)qF{tfxV7ErHk9yn?3yuYsq0 zJSUx>PiTN^RB!;Z&vc)+u;2Ursp-JW;3pq{Nk^O-Rq*-4?#hr?Rdx@-|G}1iVA9 z7*E_6CEbUL&CL12DOEYzKfBiL?ZEAJRBk8f#QVMH3-{-3IJgUV`M;9$ zNGJNS_!E4N>x;qrfcLVwoJTt3nV!qt2Jc_%bL9Z={^`tkV*Vu^`EB1~eD2DOsfU5H zYMk6I(usD(cSgHL9y71`t$KR0qJ7qD%ym(>44-I*e(&9rEgBHVWS}{#BV0z`J^a@nXnP{5fl+i|wD!9EUN!tTbNCYkr-4iI>+{ zsk_d~cwQQte9m-rS?S`(^ob#?^x&oLF4yJXVZKgj%wjN~<23T$C-c&~KI3?lVP#SP z5nCXo}S(zNkO5H=Q|A&>j zWL6q^nM|*%GEQRA{H)&&WBR0K?6rZH##fxjOWkY!+AYOwZ#;jURD|g~3bWGKoRx{K zSQ*oXmC38Gny(8R3)L~NlXP#neZ17=jCZdZsVFYouB1Xtr~8VP9vxX}Y{z-7j1!;2 zI67X&kL9}YtTedr*I8Ma$T@MH`TJd28DFKh*&n)dET8Fev)9H+?0sWS_Sz#Sd!0~> z|Gpb5<9f5wg_p+Z>~;JU{&zF@YhLPRve$;tk>~$iy)7yu=R)Sb*3}mJ5JN}w;jnjIt{<4VwelaT*H&!O*Wu?xamEHqb88eXo zp8tI^?;pua*ze<)@%NYWzpui7&p#*mGLM5jY<{CV&)f4bE8}>7)cLbGP2eRPUyVwr z@IBtI29$!l;d7kj4RKyYI<4<3i)pgQJkQFThb0^u&%2$e2Ky{?kClVZo3u!t_ehth zY=HCM7ld`gyt9>n=iw*kkq+~yL3u2EKk0%mssYc~R?Z_GrB_y8ZzV@Foc9kHQv-O{ zN-&8M>w zR>1XA<^1)5bDOW@#PO1JdHDo~c;k}MuI}R-0dH#$xgSXiNulJvF9_d8hp(i*$IN;w7c>6EOd8Dft z8SG;Wh^$2Iv3ob#m#cjz;Jw)?=aEjdZ+ZakuLYj#0=(pnavtd@+{r$gCsF&>t=toM ziHUL^>CisCl4CB$U;UDOfETuy@x*>XI)DU{yDHy%bWbT?%rB&~Hx=h7yC~FUe;jid9rx)YdH;g3;h|-p8O)?FV)2Yf3^BhR@0BO_{nAF z0zb^xe!eXJ_viQy`1iaW^Y=AnG4LLE*>A7R-xY5C5BTl+JD0yd*Gk~e>T5q==I>H} zuLAxZ?^;Q8Mjv)4`lgU ztv`*UG#)%UxC?kQxqV`PCf%pTUs-$gS+o!M?`^2Rti2}9I0(F-H0-r^JbyI*2=FU9 z5Dzl@^G-hw{4+hA8*l#IP6EGiciZA^yD<>JnRwL@7s{uz^~Fr9uK5TtNj{ey$8Hu z4vcrQd~s;sKfwRnM{chyzo|T`2j#c$(8s{b&gUy)z9gMx{bly`fB6J>8~OZ7J9Kt47AOEqJ16iy$4>tQ0sZJ{{P#g47DuRzoWw#&z}Ay ztAEx;S%81dXq&w>&QbfcZ}=JT+D6FjCEcgSN0~oTGUW!o8@Jb9e^4Bg{qM`<1%9lC zKS-BW{0AK^0K6}3SWn67-)Y|$z-t#S_piPA_3s_Uf&ZBU@g}qXVw5ZJb-aJr(|$63 z-pXZxUy}RVS^Pc86@Wj;0sqMQE!;P#Iyx|0|)pre-9tf5%|q*7*A!qtgX8MuYe8Xy)52Zx99=9IedT1p7vAiUBUOQ zslUBz-5dA`8uJ~}rPV*0wCNALv4QrFAF92Zh57TCJLiC3vx$5@K{`u&WxR_uE&*>t zVcU4L&LV#{i@plH;w9uf(plOkYyV9nlYm#cjAQ15*IZls5cu)Uhoc=!2!jlJiT%kuI(N)unzZ z;CXB8uUN~g`m!wWW@_wL$#}B(+O@DE@PeIuedBuDs=%Mgug_T1A7t?~xTp?zSsZMi z-?P*PUS0=z$M@6)-fbJ|D{G(Z0~-P_AGgomc#jq4n{C=O1^xz&{Y27P#=Fek1uvTe zuL9@UW3P;#IiWT1*Esq5r{B1?!2is_{@ScyH{eA&$jd)72zdMS*5@agKflQ`5O|v_ zFrGc_rS?}kheP@Fb)hie2UU^VOS-h;>qe%*z#Hknd{&lU&gO{%eghA=y|Vl&PS z3B0a+K4@?LQwod$-p(41*_SkMJn$NDf7)x`oZw@bjz|5_-((k&0e~WLG{l)e--euX{`51XY2KkQZ?2A|6Y4$uf6R@ z`y6Ed*AY8`-?@|AAEZkwKacR(1H7+!ezMnIG2Y6o-VgjqHsn89ywy>D0bc1kj`^4N z!O6dFK}Uc;pYM-|`IU5{e<^N=mm|Y*;6*l)+ez|wm{9G;M z@kZ@#=?__dz8`f4c;{MM&y&SNh71>g*H)uH$MAEyw)(fjvCF^@>L71Fd->;wUI+d` zC-eU)a|`(Oc)qYFK4kuHbLB4Z?-=a&k1SqF%>NtsFZuqI*dMKpzuqk#0q=bm$KtQ? zU;hGc7T=F@mVf`*_Z;|(9XMYkYrn5oyas-I2l%q~D;oM9_(qNMRD*w_SrrI?Md2)}O`&;w|^d1-z9S=eJ1r zseC7^|GM5Tz@Oyc{$H6=1%Q{x>+h`gioI7D_$_QWAE(xz#&PoRqaTX_Z@mVObYgqa zI6%C$ekFl7%E|k|0cFboKhVMbV5%3{Z+KE3_(e7LFG-h{KV~hh47>(T-VbgQR}J`g z`TnRic@jAdS;pF|?lJDySf0Tp%P!2Z+-Z2N-Pv(z)yPE;O zm;?Th@uvo~0{#dmw`a`uPQY*FWcxe0`vCtO-|w;~pUV96tlL1~4=d%|`P}?pjKF`C z!?F2S;+tsTy;GcP?~CHYfZzQUyCc%xeDd$i<3%m{4tV_=Ic8sx@MXYzVZ;8c%)XHN zYk}9fqV@L4;%Sxd2H^GL@$anmZsHdY{D`8~+bgU8QvWT$Q_5P;lhyxxx9z|?>&AHY z)L+IwS!y@%i}33k&f+I-N&x=X4%$!aHkyC`+3FDR7i)~ir2Eu*Pv)N!$9@AowWqWE zQz7XT@U!Q0Zu_>HdlC3`GuqFW#rvho4}jm{nPcOB|09opm;H+UwQvdtj%>{S`G_D_!&a(fh^&c<9 z+pF36f#;&J-Xoo@@lf-t!oW{yX?^`=?R8>OQQ*1q>pS+g*Gl31db2YnfZvi|FA>{| zbfSN$-%xwqbt?tDt6iNNzw3T33;bP9-v5jBtqAHlqXZ zGw}Q8oMnHH-JOAdo8K>G4`1#7bZ&&=vtfarz<(Mf?@y$&>`$_MG9b_wc)2yMCs;e5 zY*8r$cx|10{^4R!DDYo8cz@5d>EXZ|&-Y*KZ7=c@wbzjvk-%SAPu^anORK$p?J)#+ z;~F@|yXHL%cu6*#Ka<7NB;6?BmC~3G$m%Q0$NKSOfH#THhs5@=Hy@XOJ0AGq4xCp} z{Xy$EYX8lhCILTA<9;L3S+>6{|F&2<6?p3$INu;^za|A{0>7pW`9S5-I8OB+moOW6 zEBJUV&JRhKR{fR6^MU87F+STnezv-}5cs1V7@t*t(0x?okJ#}`fS-+z&!Rs_XXy`F zd#%a$0eBbsIhGId-px?gay@$9hW%(+yt)_60=yp`%&)XgBY)QKnjQEpY+T<_oDgrp z_c?)ApXWm{-;nN8^E0)*Xx&5n%Wppi{sFzby+|jvKb=bW_7 z{wHH906&zU=M?QH-KXMBX8*u0Re(QTWBeq)eu^(^&uR0j0e_ke`xP?Y^J6uD7u-(X zKdg<9&UxH{*V)POywkTH@cZ-g3t~RE*523-?SV(@WicMCX)jqkjHqY;e%{8+zs}-U z9On!C75sXkv-m3y1^{2rpGV*uVw95 zu1XB>Pc^l^{Zt-}^W@*(XN?2iK8^fLy0qp8Kh~WHyj}eMAF=+_&bE#xu4ktJe~u0L zU*^xO!)E~Rs*USWst5VgE#GY5opRv(zRcd$3G;v-V8eNMl}GzNWN#ktg}{5r$3Jm= zBAsRXt2|nl60g-~OMq96pHCNgq!W2G4ic~6nKjk$?P_a${yk}fyQe67~igTNoj_hang%koXgq9ed>&aYRA{h9i$Wq+30JG=dH;FaO~ z5%$27FHk@86q0 z}#e~OLwyNUZ% zlTwm_*Ik20I?MX2_R;UjpT>*NfLD$02aEnBoyenpO}svvUIFij#(aTvY4Ij^eFwZK z2YD&^3f8mSpPcRB{cGEAeF3~E2i705{$Hd;ao`VdfG>;BohM5I|Eb3FL1gVEv)5Rt z4De1=vw!_7v$yu(^1y#>!}Bs^_E!9@67bq+tjDa$uPUF`ZPXt=w08sk7QP-6^C9Wd znom+cB)-pC9q?;wj5nmS>~AuExR0$3yf6pP`%v7Fy~kYZ0sn0q+t#O458@r%*${X& zHO2$drB#1J$ELvB>)`Vq#QI0SZVvnqjpsd(&eC34f2g*oHSh{)j0e{8o;7a^yk<7$ zTiOSp_UZY)1Mu=_jrF6fzOs1lxvx9$$~yV}wPhwT9dD3^P#65 zLxEqAuTQO+56R+dXk#Prjytg4mGS@05(WJ64){aH-*o(I;9qr+PvacL+tGR70N-E3 zKcxHA_$ae~QJ>MkU!lRbHeRTolKt5#j066c4(>O^-2N7Ly_~$?kaNov;MZ|rzd<%0 z75Q-n@H=q6*nj>XYj*)vRkQX1{1}+nVs|SlDq?qH9}5%3F6_dTXFUIET|*H{~Es<$PbmPTKso zkt60KI*EmRs*Jxu`IH@H8!@7TI8{C!qTV9s{dLGN25B^4-ZzaA%j!(znf6-!z zs8eqAJ#ySS#?f-x7JW$0-b`_|oP(+UA!p%-nA2xlgm%;AWl{1A{U*=`MpH$gx6o5byAYk@syrZ`d%7xdVetIpq&_p z?>A;3Cv%ec()9;%s``Ack%gRViRF%1pL9ZhO?;i5oXXB}2XU%=yeB)8bH2RX5%a;{ zsE=0{7jm2$>p6GRyOPtVuAY-Fd0}$8w$yX7ohwGpH>;kLHn0>q2|e|kp8d*^lc%Ph zv#@?8a!NJPbEd?pO3pv^^_)0PHOa}=O3&Hi-khA$RrDN}FD=O#_(Xm^#XP|L!u1UE z!r8qoIXgbd9mK(b3uoozZsb%bU~U}~ouKr+$k~!Zo)6+w`P3-Ymz?Mo^c;sV{mEH4 zSI^0N*q59Ncl4Z?#fFm8V5gq*b-)O6raaJd-X9-L&bMrO{XV+scydy7&~wtCo(s+7crhMcA82~$L(?lajJ1R=*@I;VxN&a;f?2{y=RkiFUXvO>nqxI+F?F9&yva4 zAH=EJRsZlJa=Z)49kE?lN03kBHp|HATUzcQPL>+o=eDF8&$+LAWIjOzP zIj9$$>=QSVQ){r?L7b{xlgn)-=U#uggLbH#b)R>TlV*%L2l=60|E%3Z&a@eF2XU%= z_6^xjPQY@x6JERKmpM+(rloq$zyF>hXYgEe^9%W*K3#iWA!qSYJttk|8{}9Q>aEXX zhTkUV*nD#i`VZ|I`uiR^F`vpE#HsqdYqdw@xSPCx7wi0H-4k-wDem79r^+Ww+85+> zH+j9ldW8D4T=trrz-{t=N1Q63ySd(z)9i-a5!;2jBOjlDkL2w8C3g^qe2^cUug|}d z6EC%VejyGH@`ICQ!%uSdY?kMPIF*xfz#nq1@6dCe{*GEa~aTj$OnJJsg@@tIgN7YIWsP%A!oUpp3`SU26D<*Gv{Exh<06XmW7u@j-NX@0jczy1bM2H^QVBG?)`Mw3?PBos#$7o4Td`G?h8s*uBoI{25oD2urlk=d5IY(UQm$`V8lS(l!5QqBU zZ_LxFx4V+F|E4@2#KA$`;Y?`Tlbo2ZSLa#;qEb115OJR=Sc+6iacouT9`$s%_Ur*iK18%a*ye0t8X3S-H!YMy77e>H)e zbk+6p$<@%0oGo5@PP7=)$!V2Q&uO=07CD>q>p8=!%p<3^$^8b#0mhGi;3{&qG&h$| zOyT*?!0~IyIh<2IPZ5W4i1Cel3P0LNPOGnS2XS!3c6Cg=jhrcs*PjV=f=jnbi3xIRW$ZoSDT>lhc2cx$7X>iFSQGagLmzU-I`8 z5T~loz{eNKaZPD1AF)15Vq7IBMh`uwQso=u+*G`ufPY`rt}3@~lXFb-eB)!W`{b<2 zrPuFQ7d;}Uatl4DM~A269GkA^1SEb*&ZyaX&W34k$eF%YAR133d{={enxej=y9 zNn-oXeC0Pw>-|K=Um!FuVnG=b~IXS|dgX<#dY+2<*PS5dj2XU%(taYc9tIh$t69kIXQA|D59X>z<2 zbw->jpEmKzlhbUYxqOfx@^Ly_iJX(W^qffaw?jv z57r}$!>d7q$?0@l-Y&$c#!u7x!^!D;TJD554$~hPO-_~ldd~XcoaPGP7Bxgifxq~>BGrhoGa^@=Dvl1QThkOoR z-%rjxXL&w|Lq0ekhm&d1A#zTYkvoWkgZ$tOu5gT;RTbn8;#AJ1Hz&!drulw&hnnZe z8Qs}jK4>S}<(}XYIl~pt(GaI<*V#>1$?+>{E+5nj`2^IxNzQ1^_skdfze~=KwtD$& zD)Eq1k|EEdx+p#`gLbI;eP_4mw;l&uOwZE;%KN=+(KRcLH)cX?~7lScfF!oYFjJTaqm~IV&`uGhV-x ziky>m^y(93NIG(QwKnHq9l`o@;#MYd_O_Edh*OQjxYM$cvm&qD32*$&UZ0DcUz*p8 zs`c}d6JPUvl6dtBkh49HUc0{iawEqxrJi%=S5b1x<<@gzFD*$<_2lLp?8`BJu9PcF z&IZN$gE-ar*^ss(Im0zSXXli>3OT1snQIruCEC?$byITsY1X;m4li=5_tASkxcYS~ za?Ui9pN6zC_dd}=79m&~c^1UXkBdGI|t)0mkTvmQPMVxBJ+eQy8Hcydw)=;iY~`V?{&9no`gj`t_0i00=n#< zdd}+k3&=UGxu3V@T|!QVAA0$$d%B#QuA0|V|DmhNd1-Rri}MA{(-`U3lT%c2zJWN* zQ~V9*>7h;Jv{k(Sj5w9^ZP7Mzyt~M+gW);JYVRT^Lwj=$))BO;fA)RkeCaE95Qlc* zZ#Zvm9w4V^Z$0Pa_`~FU9j@oRE_0lmtp0jV$`TjI@m*xjLI0sX{amh)Q_EjIzYwRY z^OPIc$q5=PchEm7r(Dz9alJqc6z$t#~gtygJ97`G%Y~8O)6zxMS(xPQXmP`m}UNPR_1=dd|4$smXCsTnEEzSEs;qp?l8qc6lg|s#POKYWhdPsUt-oHKhx)jX6ZA>GuHxTUjUSgduH>XO`FTdT zXxGbag~(Z;xjwh7QH-2zN%iUz=UGW|vT1&9!hdI3atbP*JD?t_I(s#*NKO|`XI+dc z$uL*<-W(uka^ zn)ltE-8{+hR(w7;ynK#)^djei$@?ZeFGQXF=C>xNv*LUeajNTM@qOQioNt=%uYNq+pPZ=+ z%;kgo4dml8ZV)+rX2~7Isp^xr&=_)BE|EK^hsyc$YdkquM(JI5Hf)|u&J2^Ei$s2? z&*x135YfTNu{~|jPyQ6PL}n0`4nD%ik!zg^qjPg7sv@1tLLP; ze3_ifCil;{E@C~rGwM1y1r_hbAx<@(TW7mX&LPcx_&109e8(_=MzqE~(cpN0+4J_|(&L zPOeT%&ZLfd&d2!~$@%G_w~s0sD=RsjHNR*3ZRZ^1WawuuAFONW_X}xrlaq3T+(DeG z--pk4A*X`obK$%hUCH^Vd5%qxRyqez&;dielIenw) zwjlW2k5L7b{S zFJAW`=a1(7@~X~#$VsGmzkEKZA35Q zJZD6lYF@a%pF@tH=5xk&rx%dZf1qA{vX59o&Xm4-&fsDz$VsL79!Q?|tH}wPte20+ znf2sY=Ic4hwrwV7oaXb>B3-tVvwebIKC80sCTE3aKBuAvkn?M)xqR^a0P|w@jX-kR zEs{Hk!@LmJv7K{{kn=>*UxJ#dtWDK-7dX+#%Fp!&gDRJ4*C!E34HaK98bmf_aaW!U*7)D$SJ;8 zejP;rz`^^_$R|efSLAe4d~Xlp;J}6Rdcj+AifcZ9f8-KGPWlvjb*{fW-@R8h)=JhypWK`$SV!06;`u42x?^&b76x?L=C zb~lqdh*OQ9&xsw$sbKQ?yI7xm@e-0#PxHC?&2ve}@#`UP7yf-!yMB&IPR;_&_lP=_ zNKHQqMAx^GLD(L7b{S73O6lXYC|&`CvRDpX+sVk>fT` z?jTN;PY&0-LA}tf(Wl+W$*kD7AWqe;>zj&_6L*EVe9%th z)7-x#IW-k^Mw~4ls|CM99PAAG@4p${8Wjj$$L>b?-_N~S@cXA`ey>*ieb!*F%lu|` zdHZ5qb;a+q2765pw<53VHCv)$euxYC`@gdM_80F!-l#cp4|NLZiNBv4oL}u%oyq$% z&gMnN@AC%x>+1F(|NPL%^(U*?*a8_0{P`2Bapg_W;t zyiDz}h5TCz-%LGZ{@t=W$ggVW@3rH)iTyzJq{SGA(tLtM!E%KB?#*{|fittR(m{UvMf%M(Ayi&{tS z$=WMh4=0xYm%JW!j(@ZR^}KX4ifhRG!^w*E5OE>vDa-er)q%XUp0@Vu&R6`q$oE#F z*yOJ<_G{vNMO?^yW&PFixFdNjjs2zQneDHC2PGtLnX&)XX}_$#>#8Ir|C}BBcbRWl zn1cKit?lbS)D8Xjb730tZyEblGxZMcKg?@*Dcfcv?_^{7d_!Ew@gQsemR`=}H7%px z{$L;bOys-!SYGnS6q5Uh3v0cVwVzbsDPJgYokGyC=6e6epqJ?b?mP5xxX{sD2}<-0OddGdNU(a%>lKJu=r zME=G|_B*>&CEufYPM)LZ4s9$aJTiCh&;hY2G<$8|#q>j~n*TCZgD{pI4G-upJuNISO-vFzxRE}+aAIFrszGd z-k-cn73BSexRCu7++OTMkl(yLzU0-aD)$f<(v!{S*tv(2_qk@|&Ntt^BgpSxSKpWQ zzi-nqhCP>@UoECq4bin`NxOX2!d$ z|3A6UB(I5`{j*BApa1b-4*4Y%_cMqS>xprU{>j>X0ePzP2E>UTuA}gx#a=?*HRC)^ ztS90`5B&zO#oFcM`9`3>W&QJ}_G^NVRjsFDaH<91m@WuL~ zjv?zCTn}6qP~Vh3+sLbC+`ozSKwLzxCwt25B0sz0`GYE7>;sW+k<9zZ8)3(OBe=d( zg!AvrE`j9lRNQ|ePOPt(@9JlV$;)fU{wp|N%tO?p6 z&V13Y$hU8$^W@J|8J)b^k$j$4y>Kk@^Beia7Hph_{1%G)FT{nH?~WQd$V(l`&)W~A%}xG1JD#t| z=0}<5F61w&2$3 zy~rC^QhpwWxbViW^Qpe%wNIsg{g?IEy&L_>pOseMm(@4>TVL|KWiwxoU=Q~RsBe^H zL&?kQBKHs%UVSrpj36&rVg2^Y>icWgX!5Tc;~}!vABTqH$$ykbKmXu*gZpaa z|FqRS_RHIR@qPqx;nh3IorUD}t}M^j%>9^bd!sc}F8~UM=(HoD3wdig8{o_7B<> zUjKMaJVIVe<9#4Ao^1X^8*qZW_;#Fk%l!4#&yau0j`Mn1egCO^f&737)K@lMXa2lQ z-V3Au#q~sYyknq6;`cPRp%tiP`0zE56lJI4d+g!#Jp{Uh@8E6%48 z7uNMlmjB=bPsu;3@O7^@r%t^j|CSxTtUtY`y(Pbio%8!>g&^|Q8J;+v&D2Ad@9OAZ z$j@ce!_0aV>|q~)@lpQxck<0`>O8#3reA)bHUOff*NsaMmc79;oV0_$&n})nL3J-B1$A@gb{WzMQJQu}ylg+na z5Bo~wH)~{O@`@;)=O8X*e!=~ZbsygGuGz`6D9-Z{7t!lQOgCrp??v+bDC^)tek(i9 zhh_8Q!-WFmw~J(d-MvEOFR{ay&5sn@i;?f37|*i(v8*0%dzT__za8^g<{xQUj{L56 zTu)?v=l9jfSM8667@s={YrF^78~3G{Pjfe0$-i1bet(ZRaem_8g_pf@J@O8f(7(RQ z>M^oeQ}U~qj$Hrni`L{9%BAni#`lWroyhlhlGoep_#XYR3whriA~)X{YkH7hBfj~3 zW!Hm%p}onQqwrMM197~?^YkGviyi0Dvi{1@X#n}(vYKzdZ2VR{K8U=S#`$w(?Z2N! z8Ag6oXZ!MXcNt0E$^4N!zTU+hOMdU7_T~F!!UXbu6w%K&xc^27pXYP-W4e>&W+4oOdEF zy#0Xx>5b&MNAh`tU-zx#FZPnp7qjDgY_gr?t%&6PZPn9z$lqzaA7keJPc|Ojo!?Jh zJ3HRTlKJ&s9U?!Q;(p7_d`T?D+hGYj7oB`;Pvxrexro~->vpT8h)T;<5UUTyC9hWzf4{55{dpa1+J|D~e7 zhzqYC7biz66!Ls|pHW|({g}O>8C*7Zh{N{G-4`lw@ z>KVxI+fILd4Ib|}S3rI0WXwYTo{sYM3UMLlXRwF&qT!v-l!LtC3J-B1Jz4vErOQoT zqxSZ-ANzLX>+_Eb`P~)Ye}}m6^8Gm1mArR`7g_Uh(vZsJ2Su{Kqx&yT$&attf5@(nGB2rn9rB`9l8;}RCmV0sZ+ek8%J}?rWVQcoMQ`$B<~EF1n%cYoloDFjlW&TMv~vpcz;y9e>FS*Y7`ku-m~KR z`O3!Q!KD+(&sIk6>yF1K)Bh;>X|v1gi@5Oe{Z;8Yc`XvzJe_=H{Z}a8 zE%HZyv3*Z?Wce3k-6h}A61n-iow-l`hdB26SXa=WH8wvYe`#{LkGQa|U$XT%!PY0_ zAIN0$#qq5gFL;iFe8-=CPTp{1|04D$;=;;THeP;5drf{8H+emDujluMy(9m%B7ZZ! ztln|=2a(^@xZawncd&>14Agtn=g;I7H$D#|dWZ{ez901dPF|l<`t=RY7uN;koAJUg z^7|?F_lOIbudJTsi&zSWJn!+V9I^3>enfsr4@4ucS9SgN%f_#zW=!&L8RuQ%{6Jn2 zogeOZm>-uIH#W7eW9FGxB$?R+@ZYqrRe#xQLFA0vXGbztqn0f$K2xPZ75=`FP(| zTu%`f-t}wZJl=q^4c2fqc~pC4`Gd0+4|sItuy)QBJlY> z+5Elf)SbLxc3z(`4^iLi*L#tFRN*5ota=3JkN-dT%@_EP@2zF@2(y1JIKbzbdM3_uQBSayFP46HHN%*k^KC!)RXb#`$lp- z3hbCn{z1dntw*te{^SKja=ufC&m`Y10{tf&565fFCGWLjKaX`WOqaDLVr@|#7VzOwaUTJ;U&T~p+1_WFT! z3Hk1fxrO}9iur)JkmEs?Z{8Ez$xEqN-_7JJ^S4gfP5$Lb_CGcYAV06-`k|YD>V^l& zcelfr&Bw!;kCH!G(VrN%A^TJ2y}f;cyyFq{@E#Aw+pWcC$a|z%&kz@0ehG_QBCo09 zdaPMr@26~69x z-8bl&tZD{J4sGBTlS0+68aSm*?c2u;cweS-lUfeog*JJL`dU z75QK5_@4X=c7C7A#}c2&^HscWCz~I#_LqM9l|1iAKF?gd@hAD`?fASv@OZ;Ig8H8A z`G@@eiuW-Q7v6ZQnh1d^2Kl}0d><42jCw5H8HfB#cD~=3cu{=v zQrhu;s_c4{+ch!yjz&Fn_wPv#JCXO!j{TLa|91PPB>$})`xRL|QWZ)|{_6-_|77_F zW$@M7jmXrL85x5_b)i=-2+~lpZbAC+NpO3sOc8oV!zSSla zB!6!N?muK+)*3~~yA=UX*8llpmLPAE9p?wKd=H;1O}<|Q_N%gdpN}q2-Z#V3xgV9~ z+hcHL@?+XLe`8m#MqYC}@|D%&boyH4Pl#myOi&&2+efm0>x2jS)g#$IGQBbR@$K+s zMrChj^xji+=$hK{3?p)71*bR+E0eLwPDxpoA3R}>!N!mIC(PGiV( zRLlq5*G9DcKiwvff7H(P;ZlkzMsGt1FuV_a-Wb4njLo>-guh?&y8E-Pb z@w&O>FSpZ2zhnIMU9ph-*>>Dd$?E^8$5Qfd+Bx6F{8JWMNq%g_d`DbZ<6V~jva)N* zUl_@L&!ij4FJ#B{K$d_0b6dzyV}~!hK00;XL4NuOUT<(6!g#zpV>fw~73(45!dnkx zFW5((vt8eZ;BHAb%*iQtPm3iu-?9Jww_MEIMMIv?RDN!Io*&ir+OY-W4RNvm``@El z+%3rjr}1oa4&qc!$E(MciXlRws7a$;_>?bt*IajJX@j=o3EiP*9J zR~gX}^NB9x^J3luayrfm{uhI5Y5eEkKmYp=;#B#}N%oi=*O_tt2Z)ZC5B^5K$GY%@ z9KV`zwH)it=j3?Z*K=l`cty?v4@a$h4hOv@=k*SA4#olMbF^R(IZfikljegs^cUs{ zoG$x6le3|=+(8^1vA?Qh|4vS#o^l6qD#v~PFLLs)&~x^rwiFAwZbZMV=S*J~jhsF| z^_(s(Vv^&&R?l&Z5|^CX+w`2p!{U?E?6jU!Hg;lissx#HFrF|D51w-(=SH&l((#Nq z)%cm$J|#I0Y2=PL4&fr78PU>`bGDG&L7XZd-{l#|DNa)7IIwM>p2I0 zW+x}*Ks~3(MrU#oO*H4=`hj*GpXNeN)46g7ajH7MtK&+}4nMhrx~iP_j)ln?I!Dj> zdb}7pBbVzr^`@61C)p-*4#p+w+`3sgaz1XBJBU-&XF|0~9xw7 zgZ$90kp=fSlGBHD(*nnlvr;jh z(GJunv2g!2V8kSHQYywX;^4qVeUen2N=~iQdhP0vX9hVNF6cRVzsx3Q|8YIXYu|iw zCTq59!rH~;tki6mZ?EO#6ggzBU6?26uL4C^lk-i{?}$_N`@?_Nk+Wa3KDUoeH{mE^;2;)N_)R-$zcXyL!%{9D(GtIB(9udX09?`f!+> zhKhbioT|=0j~yo`<4JS*h;^yG{@oCo>$4~Jyx$>aU0wu zC+lx>_orwl`fF{nyX3U^r00BI`GA}Xzx13q#s49vY-)4$!8(F={e1k4oS8-C4&qe( z?mhbzIiIS_9n@9j^y&VN97|(8=XjQn@tz0nX$cRGgC_PE}{0K`!KMS*zEs z6|G&#>9AGL`I5abIVD!>IjN%-Cufbx^C;8{<1okJQsf-dtk0IA<;bb8Ss!b|O626; zr&pifsjHIHa<`sS_+AZi{7jxFh;=T1wl+C)H1~Ch{p*ucb(dbdy47z)&gcL=XHyDK zat0pJb2i`gA}6Nie${DtYjU<|-dD!%+@72Qn)gkG8+()Et$E*cJZm>{;+i~1635}( zH$BOzbxyCpdYtP^PK!(C99*Zd{@mZSgB;(K@_jhsRO@^Kr#YRAWl_h|D%`4nQ}?)VBD#kag(l+ zQ@oA2cELrvCJ(+rPNLW59E>M8^(WsZ$H!CtJ^4qW7O&gBs~ zjlRhp#KD0JXZNTlVP0qVUdd}KMj^sGB&~sYMOi0eGo_fx_dP&KtJ5UL;3zzYt>oX3oSuq0BThA*S9mlfXOYS0gJQdKrS>8xh34}e z_lK>>iDL4(m6%V$GwsO9d_}L{9oKgvr@iKL;qpDZlB0W0a;$Jqau#WxCpi4>L(Uk@ zd2#h{e{!5O&lA>89z@Q4&HgGna2Po*n(tj)YC4LXPnzR-Sk7_ecx%ot|92C~sjS)W z1CIKUQ&Y2D?WavA=e)`DI^2(8Jv`HRHaQCw^9ymRb!PQ!x3`)+R2^p>f9sw33A%> z*K@LdK26Th@p{hLz30iPF;dUT-TyK7)iuY~8 zn_o$%79eMc=6eqQ4GNLda;M&SzL=^QIVUyeSAlya$7Wbt==8oO3ty>fF}Zi=5pi9h|$NT^IAWCg+`^&WKaBtJC*(4*PbToOsfqJ3NWzP&E zXNuzbg*a86>rWd-PCAqA!hI(4ac(q6mdGIq5ae51XW)M2_zJHlEM? z$VseuZc}K@baD!6zV|h0`fPGuYwn}GeCCrge}~>WR;1Wsa=xwCb53SoPEHP!&o6Kt z!Z^J0Vih?vR_f*RW92$>)@;yo{%yC3oQ;b05baRSi`Kcek#k;iJ=~sb7deMbu0O~R zbuM{#FF9K@=SA~H2gsRWa(%}24Ed~XeVCkFihT>>RCV^qb)1|$it8!rp>p26KSjJ?F@2t=P99PL)r$5tqnGaMRp)#(oI-l+}Dot>Pun$LIAG;k(oy5{qgq48bFnbYLGFm{!afH1L_1Z5oT&rk4&qe#B=@aBPTL7`N6ZI*BcI8uJ;<45@_U~|$Ei(Y za{PVe?=2%v)h?eKp5#0qrROxR*@B!&n%}GT<92IudOp<4=c`|Paw#tYYyvf-f zU(cDlzbiR!o%Nh+{dU3Z~Ia5u(mxBEe#`DruhuBe^k0DMqenxaUM$RS8 z^VQIdbNiJjcd*4ae!hMRHau#t-6D_35|dDmgVY+tsDhO>)v} zp0{*rc$b{Fn&Y{5nup~0n*1IHj3?AN!9V|yGgEQ?j5t-D@2z-Fj-%%D_tqU>le1Ct zeUh0j@5yn}eEweG%SUonXnrm`DBvqOV=wEit8v%=ByYI=;h=4svJ4{dh0pcYP*y3+2pxsOyNEJ{tK&a^fX0S07yOF)zIDdysRcklaC>s$D78H74g>9l0a63+o8-d0Vd; zIj0rl8F8w7O8nb`oHFh8@~N|~4LM^J=aAv$(<^63at3LhL#BDunVf}{%(V-3N1YFD z=uS>lll>(+r969+^S-WLJ_BR+BWFTuJty{_f#j6cJSWN5c?daiN9pBLzSjtHW@tL8 zUB-|TtBbjOa6XQH_y1=+Idc{3GvYA6@csmxXa^>fb77=BAH=~C$Ipje{^TUn{2qqx zd1sQ7e3CpL{QIhWlDwWn&M1@Thj3A!Q=1l$vsv@p=6KH~oaDrYvC+Ac`xq~>B<1+F!IaeL^9FH{b z$yt_N&#AEIBROx%>p3}^eY?WxaQs8ghIZy0%oFsx-@B+~LY_yp zQrw>+PSx+}=f@!Doo0Q8xyK>rPiMV$ef}DcoGOaBD$w$hmXHoP+$(u0xB`lXLO5+(DeGT@`X?CTGQWxf9;Hv2uSla;Bs< z*Y9xAu11Y=kyELdp7Zo)9&)ZL?w9fJtLo!%F+VwzI+@D{bw|4{k9Q-dWe2%~I8{C; zCKM&dMKRyQYghN;CCM33OP)`7&drBq$hpuwY^TEie#I}A1?@|_dChxg-WK+5PoGEL(kt@(5Qn@xg}iDXSxk=068StqoXxRX zj{b2ggx{|XZeOW$E6KT^*XD@7Z;!Y|?+X1d9$uT4`FfZ3vi%-zuouv0EqNXMZQsu+ zdWaiRKSv19-EyT^7V%&(|6?unW6^<|$$6eY-cH2*uap1#KQ_UUF{4myb8Z!ExEMn(7y8>3(vyHkLbxvpH7Fg}FD~LXNlo>5r1L z#F*#e_(0r{eIdt(m!(IBAj&KAnG@u>`OEtOanHR{eh8l5foW~di1Gh*NKk@=$ak;< zkDEBD&X8YkseIfZ&gkC1>q&2+U!Qh7Pu|X_@_t1e`eV4@ynJzqoN+nj{ed_*aD(S+ z@PF^Ex<*bS#knBjY>t;@*~xRXj_j{+i=1jrZ25@u0de8=!?;!1XdTFU_%8W_jOPsE zxIvuQ56zNszdBcVNZ#*-^7wf|%L5Tf_xJ5VH{%<^?UhRbWIDJL}@jdT(#?^|%$;tb*Or96w&_4VP=i}v6d$~|A{{0uzzzg228O(M{8bwKkWee z$ttYpWs2k{|G{nfxIrA8{=#}R_)iYK3G!Df);q-6@@#0?b&~g)yEl{}XWutld&TP);&A=?lp=;x{9JecUcd5H z2n?OTe;YLv3`i-eY?BGLs;)yFZCerPX(JNu7`+2 z{cvB5cAxfWOwJ1rxq~=3V}yA&CSEggezuW2h_g9XOVQ)J503t!1vy?l<#j?FuY*=g zfUvH1-Pne_PaWhQ;zTb_3*Ps(?bv}lt0F(dRX81RyWp7z#yaO^StP8Z9yvOb=R3%j zpSZ3d4(n)LV}d1qTDjraRVV(gqqq-6|JHcXjr?q50>_4v0e? zwg~;cBkEXk4jT78V!tB}{a(iC_xx9`9!)Ud(%*i^^$Y#=I&cE{BY(>8lM#o0Zz;^{ z^;xHo;}K21z9P=%G_(YJ~{(t@#6ZTOHmnixJMkEh95H1IGC1xH8}_3 z${oa^-@Sx>>GNnEIhQla=Q-lS?iZ_N$SYpwYfaol{;nK0U%Xx;&ghQ6=bf0hs9*9} z+sR9mNuDp_(9bOlxI~n$1wX~#ZS2q7EhmJ2{(5j1`Gs%E`x$ZQAKWh?&p1u?k<;yo z+(8`r=YcRU*CY!hCr4Cydl46Q|9Dw2f05_oU5Clv?uQGrx5%5`O`aFxkXJ?FKIL@Ed*nQ7D$fgX zJa0okM;O2JA$gx_%kvVSBjA1m<8{)2C*%zDmOF?;|6v`36IkR0IWKz19mLt3(D$?J zqQ4=>$=BwH*Bx{BvsO!oXuN(l$o_%+L%nUjI6e?3_5-d*X!qP)pUE3gOWtn8p?;WG zaNLu8CntYlxq~=3zjpIJ@YBIxzp-dkhc$Ue17L{!TB!oy5pRjyo<(thuA*EA+Jfo^RkQ4QwNq3*c8qkU0xWhSRaT6x_O2Pc+r z{TcN*8#xJb${oa^PFIC_v-&_TaxNE?_aEZKd9$+AI69B7^d~QQ3AcL)h1q`%#g+ zn{VayLmb92&PUL0_v2N_DfdC{AP!DDVPChxzXmx!g5(b3;7p6Uj-E$k99x^5IEv?Y zh_gANpDT{>s87z~$MSm1pDTtwkI9#z5qUX{`w4M;m^+UN{T$-c%ckT{Qj9;;JZG4I6K#0TnkK?=A zt%h_iVBG(DSsaCPl-E&vk=ylx%@y-T9PY2H8SAcV;GCB+?k)N|4sb4paggp(U-CN| zd5hx!aTo`Kg!w*s-T-ozD;&gOo-SIxispN^?SsiVa!Ec<5y#iR(DQW7nBnAEx5(>f zcAnzC9`)N*bu{_wH_3g(g*~pU7VLlEH+CIQ{!Zh2AjIng;*4%L&TFg|Jhy_EJZKVm zjn~QRi#YZ|UvKklno3^P?KV#wKYYD)x8OVtd97fi{?It=dT2g59Te|FAkOB59)HItEG8$3V%;$}{z9MU)Ni_+{IZJcwRoP> zO_;w^cdsU=_-lDTA`W#E_i+wm*OQaRm=9w85eEn7I~Wg9+&7ccS>YfKj@OJc^j=}K z#M{Y9tN1=2#GxPK*5iH1$a}lU>0rDUA+E!S^KrGEE9FhM{D1rL_kp%^fB4upqP^+X z?jygAG5$p#abd3`q2Ghb-#d`}+KSJ$iSI$-{t0;=DsqIJ4u|FA1aasWod3Y-=XQde z9g6qk5NC55TB>Z|^SV*rPLosESbxOpF5+?84?G*dN z?7S@_JTG|K=Q8<|j>zkXIMlIbQa+DzE_R)q7shiIalRoAj=%8ULV{tp$?;Wu4-Vqs zU>>7i|8c)h&RfO%lZdl9q2C+I74pBF%0C4}>pl5>BP z_m5&7N1V-RXgPd}_vvSvL}?rHT++c)^s5E zy7BubVtWxM?yGRFhkkgTC^mUq#)-ZPM;BRkV!)`e8~@&iVLuGNYq1REcTucQ zh(mv49>Q6bFAF(8jpzDe9T8`9td=ne_&wfxpR$v)z*tYj@r*crkJoC!`6TjMv)-A! zUB-L7qK7zJ+3;K~^m&0xCl~VS8`lT1Kg^vMxLa_2M7}MYUC9qne19Y2&>wgX4d>j8 zLgegG+#ewhP9EXC@njE+k%RrA*sq9#gY#zOvwBe}a@rgBf1-mpjE~)!7Df9LOP;r^-2ks9Q`Qk;Jv4*i4YLdbL6*xKYYRO|~8hyIz8_ATvG23D(2&ShhN zEzUQ@asRkm5((Gei-jAJ7vrLQUw}BY@6|?LpASUwB`HbiK;p9&lFVB6A zyQbM3abJVD+{S?4mug;;Z|wq0qmDQ~!}uuaJf2*y`SSb_2M6cDaI(CaM9#A1atCp6 zaGnIG{-&wq>{MKz5CH6Z$-%!L9}5 zte9zY#BpZ!JR!|vKCjH@yM+AgiqHQe&X%XuQg8kpIxo9Za0NLJyV>#-uN#O9?|uH6 zCD)L5VxYW!{64?c@=EypR=#iR$=TUTo)_Z8^&aB|pyK;T5C>#~F@T(AKJs=U4i4UnLq6$`29o1FQtlwm z=6G4~zA&6OLynMB$j|18;~sHgpKDqzpM=k!mM?aK{5o@OzBpbHCq75Vc_;Es`0+G( z-B-xljX2Z~&%@zV-f*6r+{SY!aUXy%6JHHFWIJD-#?@|COhFd>~F-H*hWjuUY12sGp#NocZxiJ9l)XGxw{R{8E_wBfTVjNVgo0R+w z#(ktX4iJZNa8wuvCn}{RXSd>c7UD3@aa}?_$@8TpXNF?ELR^`1AwNHFwUo`t_iah< zWFYTyZTb3uxQxMx{r4Z_m)Dc~xv)nT@~SHKVTfBfvUIt_BjcBWhxgOb-al8elNU5u zo*&|D*&#o73-(p;+VpiMZ|)wOC-x`eY$pg|RK)+pY)MI6Qt)?YYhv(zA`lJR*hv2KV9Z+|pa zcrV*Kb8Ye)81F5M*Hy&v{PVKZ7uM@^DLlw48BgBNh{Jk~_thK{CmHyA@#eqh6_}@} zN4lDg$)Bm%PaqEcgXhg~Hpg#9j#Cf${6?J3v04%dpX)AqyahSujB_Zleu%Tx@BjDJ zm$LDCy>E0I@^&lkyAX$QbNmE<-n>|;4& zT5((>ZkOlmwIQG1hL+mG zdqeKACzCVyhOHmO>m1^6om<rD&oTKUoQ*(|Iq&P zFJ_ScVV->aATDg*Y8fb85AH0TL;j5=Hec*_#PRjO-O^py7k6v6fV_K(=ctH7zvDWM zc9%)Lgq)STY}2XS!lxd1qUo-4>{VcZ9a4&rQ1==-Tc>DQ1G$GA@x*DZ6`WiQKV z;auc!&JE-@HJ+D?KH|de=g{A?;_zrQ`FRJ)>&fS`?iSn+qn>&Fx04fpoZLYi`X#O~ zPD)kXO-^6M^#pM?$7*@|mG4i^I0TRr+qj;H;}>x}enaoyZypIGuetFas@MW za`GGR2Z-|wad0s2;6!P3g`5+L^K8V~oY3zt4T^b#oD4&4<5cWNv+pnA`iVT>@48L? z2V*@D+lx3`o}t&@^Ih(fQ(>_!PjURJ*54+=ylR%?F?l|U_lgjQe!%q;?W^<8Q*sXM zmd|s^uH0d3iBevJvdK$XFrd4(zzxzYEZ?3=GE#E)zI-9*v zZ1O8B>VP=R1AMO}>bbdFJaV=g?qAH<=b@jat( zJYFUxr?29D0K}o6QBOF{mZu_fzO|uw5TBle@|I9EO;;cn;RUfL|{=S!vtRIS0^x8HDwI z>Zk1FFH)RmBMuJsXQ=PQ-Ol8=Dn5UMIGba&TzZ$2-s`Q`--VnDMn8$;9C7?ULFn~k zVkKAdwg#-|@Lz)N|nNV&vjIY_Z=FhjkvGpFjS(?Th4{ zR{UM(ab5v`*1S^WFEG{@RTYW_bap?C$C;9gSe${m+CywI% zL&WjC`R{#P;XRz=xvG;FExkN1#M$x+J$|B{twql4)b@-Yt7WHfZj#NrF8MJE*nDxk zBko3&IdpC^K=^!hA;$*fMX4%pH{#H**q@_cAKh$B&g~X*2XSzQ2%oPUKcX2qg^lMw z;`l%uoOQzIXqq)?NzU2c@_Y~nr?l|i)Al%R$+@mL$3>jY3H>?Smb*HTlfif%FZP@2 zJprtPsQ1E-oyqew-q#cBggCSh{RpR5hVJB4UMjB>;?TZz!uMKjyxEJK-o|<^whwW( z`G|9BtL2ICp4j>6KIDBdKA$3bi1YfGDa7Y{Lhs|}4jo8dJmb10u8(H#&+b*`eOrNw zL&z_#xPL($`UB^==#PE=BgpBac)o}@IOu0MU+Rn@XWH`Ya?%>- z{9->Kj-UUzTdD}_Y@3~v$y;ZfFN)(9akjh~TGoi)*WA^goF~TqPrQyI4*RF}m1DpA zwd?cW^HrStqi#2w%p~`g;(iZtxQ^C0E-KC+O+|a(XG=|3aM2@v^iLz7KNM)FtF3QS5gR7xq5)zjMY@{G2FlhZW>MP|Q2T zh3$u4SG!GILw-Et^9b#|Ec$ z+sEXTJuM$sh_g9XON<%(e6&!`XXM=xm$v^@Oi`FcQ46Xe^Z_p;*eJ@ z;XTR258sk=;GR4$#EJ6|pASd9T-OGX_gHa$h&a?8^9jzy88ub##CF0;D+wz7! z*VfAQ2RX-X$m@hSI7|Hba}IO9|0ZX)Q717U#KG~J#NSu+&LwJxkms4x?#uH*931Q) zP@m$z9LNb&>=zIR2j}f@PT!AB&K|{c7sSEIFPtOgoF9*zK;u0Hu|9~iISnms$Ihnj z0jSa_5jh!+^B-{^h`1=m?WKG5;cT6ohMen)&xIlm4*D6+#PpfT>32;&&JYI&>nfboNwbslC6U}g zoXxRXqSoX0l&n{Bky9ax%@Nl>#0@-{Vfg9=cn{Rw(m=THC|WEpd7om*^Fkc*dXS6r znzJ-NIgd)q^FrLvVz!KJo|k2f@O7ITtS|a($g&{a+P~+u z*sq{J>MblnesAM*?BX~>9QqaK_;9{0DN9aA#q}C-aAqFh>(I)D70EfgK>qs*h(kZ% zePrZQHFXtoc8`<)J|g18`_jOUPiWmwIJr7`Z@0?xLLBm%CA{Bajc+CA{X=EtAP!D*Vcjm*yD>R$z2pw!Y)DX`+~?h=YUo8{p(w*^Qi*iv1Mg(9hy?y4R(8k>mE+ zRwuEa5f^qp{CA!teE#Ri{l4U%RQzrt;*ckP7X)y{YP16^Y>7$cHBl@pz(WU;y6Yedkrn`kMlmQe#Kqn^`2nMPn>Uv!+dl5 zHfG+LYKM*U@6eyO+v&KE+?7UuitCPezV2yEylv&81vt&!WccnDT*pw?YVQt^pQnW_ zZ?S(6hknNAtl;!meVCl04do8vF#quU2u|CU$I02Rm>-A>Z~ozZ6Zpl_o+kfKS6iN9 zJrNhSAA0^(xP6ZNdIRKj5a%E66Ore?b1so{*7)5Bv0o5}e!=?$aPBv~M$X`g@_Y~n z2j9~Ir$^OW2JBWjW-Ba#kwO7?Zo$~!%6D-f}CTsRm z`c8gK<9ANPd=XbB9ly7M^$K+?ef1Z48(Pcjh&c2|7U6!b?i5SMkmrkDo#hVVY)GlXM;F6y@mZs@!?6xsWepHFNlMK_wJBS+4jlF`8HPWAWqf4&d#aHIce-y z#Q9;ifARbYd2W4`j{M8UIx6~z3%h@<7AIk!|8adL@>5Q>)j^zph!gkucuxZPe(0Hv zyiqgcbwwQdCy6lsCZxzi&gZ#u2XQv%zxQRLWTE#Uz1HR@$Juz!F18PGydH-B-0Af4 zZsh$o&Lu@p^|@1g-v;V+@O=^TYA8M@i8zcyyr&AM(XtZcTr$ox#c_f-+^1(VI`l-| zZMTc|J!sqyx?3s<@59B@1 zGR10E&b?`rzu()CL71veu>vL)D!i~)}b>wT@=6Ph&c2o-k*RIt7La_-YR~N5OFpq z^f^diwBFggE&=xv|l=!oaTyi26O!pdcS+J@p$q- zcC_Uw_P4qHQ0VXV^h-9G{HVr$TAU~Ry`G`JujcxdspOm)CaDCFB zkYC1VuQ=ZjS0rh{>lyZu@SNVQCU4qQc^wgl z{=oepoVvHxlM~lCKN9;Dap(`s8#pd&Hj}fzmbbT6k`otHEya<0yVV6meqzScP@0O`-tuYAepI5r_W4bq{sja3YYLW9{Vqj5wQP zwNy{R-=`jD_z`j*^pcN1#N9eKc;u=jT>{bHcrO%rRjhG>yp}`c{eigf`Wx>T!@r*O z4EakH_xp$o+YkMHuj`+kCx52m{9W~ZulSw-&Q{B0;r^{} znjhq4SLBB{u|H8y^h2WezsdVxsvq1fIM;%+d`*;2bl$0W4v08ge}${XIFU0)asPlgIM^q`nV&KxInB$-*H6URoY24L`#D-#az-fL3o!e8zSSRRAn#*l zd&W!X`}2vvvyh)&@jK(<{kb^LE?&z)PNAQ+^C59Rh&c4a1>yUz`aaK1&THeGNpuhg zr>XGXX1^Wz$cd(SZyj;yPY>aJ`l!1Ll2iJY{M-w1y#9v%JmL7EMaauwe9wqD|II$v z#d#&hah&oc$d9G?-VVg!oU^jAkggdKRHuE@BY&R@V*d{RWQ;Q8`zYSKM;!VG-!BWN z>bdgd?7Jc#FNm`_q1U^~Gb)qgV)V0EPjl;CLre0a{JVOe$5tc1++ACq;yESa7R8+Y zzxTt3{JS+*yTB*Uc?cYLWZsjJ*AbLmhDrf;y)6s7p>X#e4OLgOgdf?}(eE z0Xc0HpF>3)9L#6rljLG!auOTkTb#d$vpH5v4&n2oRsEWgQ^=@~xUWN;cy5)uUN(Bp zQM+$T@+w`B&tt@i`-&xrx&L~VXiHu)vqAB>AH>1IbrDX9 z*PY4nG`if-J86gXKi)Yo1gzZ&tJv; zS}tEd^3z|H_bcMKU;ld^Cj6eq<_coADqjeze|^!K>h~BdW^X6?!RCE>G$oKPw|0C@zprcB*uYYkTxJz&!G&mC^K;w-!FhFo0+=IKjy95{@ zxDz0Q2X}XOcm4RkTRX!(w{Fwl%v!8?dBl0~Snp@g?oT@m?60D}TjMPv zexqCZazcmtBJT`Nwn$5fbL@_u106W{P8m33Pb??Sb({GFbl~6|EI4`AtRhaW^ZN2Y zXL2ES8AqTqIoA2?yI4nw^HkmclzzcD>pp&3&t)DucY^q< zZtLqS&t+nt2lZV&?+kGU+}CrUgWZrH0jE!^3&bg|;*_)-bl^-8`-rJ3ULnr8C;IY0 z2m9kaYm{eY;v2-tsN#{dKXkvce~9=#(X;4xh(Gt9zMRmZzVHXi`N!G&#IZXk0v$Mm z#W|t`vmX=3&t{zr9XO+f{p&Y=PMkzG_m-gp2lv@guaGRSiIc&meL`o)+4t%laaP#0 zPv*FcKykiyNAM@&G*j(UuK%FJIh8MEJ2oCy;6yY?G#mpn7uft<(K?d##Z zEbKOCbojPE=O-7cb*k(~(7|rF9}i9g|H#DIWz!y@GdV6J#zwwxbSYbO;v`VxsI(h& zuv_18GmCV*{6MXnt?}`2v)IIKYBRs|EIvAoR_Q;{ea>vIc*M_R^F1f%c-`qRs)>AQ znGXqxS8kTRze5MVV4ViPtPe>_obKwry7Vt};Gn;O^KNiT;)&r6(w>ia9w{?LJg^GM)~=~aL@Su*PL z-q6ARSigbOyMGbl^iHGCQ$y!|+_b){TI~;C;d=X-Wv#oxfZ6XpMyx)ky6hu8C= z!+bT1ny(JnGUD=%7F$)kcNmDX;J*qjL;OFY>Fo?1{Dt`nI79v{PnW zrdJ)}47a%#2^~1dr+{-KSAF8twmD}89XQB$g46A;KXERqeNNebp))!D2JY2@(`rsr z;y7*2lR@X+kN(C$ala+N)spxL)x886SD|yycNty8_oG7_v?YFib`rDk; zcXK~pusp24VLcAJeD&@?+y*W6?GHNm3F}30HeTsWoJ}_8oS=iBkUs-w`?l`H>Df!) z-=K4EN9#G#uRVJce@kaQ-?RAZFiwc?I4mgGkNAsi&X+(3d+reVtv(+H66g64yvpjM>4;{}N zt^UlFej@RrE%-0~wBBp^GtpGyPn@nVuid>Cd@mh#9C?2R@!r^+3xE#(K)(Se^w1pQ zEU@7~2af+#zSq2P%mU(+>#J`c(3u>oAJSI}Bu;(RUS<6Av|nLHSrs;(;e03f=eO)3e!^*bzd#4Sz)#>z>3e`UTU7m} zU!Vi0U`S@lN4IQpm^f8ceI*AvaEgm}SiUqrPMq&H=enT-2m1l2SFaMMiBr^Oe1Z-f z)DxU9;m;Gt&*mN`bl_mV4bJV5OT_tX6MvusCz&|MvZ?6YqO^vzrzcE*=zm}JdNBm1_UsLj-bH8qI7}=|FTuA*XA@Psf z0QVt~!l1 z;$F(0?AeLCP~|CPKZZ{BJFJ^v&jq(~60dnxy*;6WUvO>$oOILk5+{SreNgC3j?-8= zo#&T5odt-KS-n3Y>jzzlo}o55AMeFj!iDUDg4Rq*l$S;C(+`lq$E~)#uvcEz1t9XZbBlv@|RwMp$b)Q1U zDdJX>CNBJd(f%zvm2}jf?P63eLwhi2ri0zJ22T)UlcBUD+RJl8UDe~n*2G}R2^Bu%I1 zKnD)q?FMK5yV=BvkXz4z4jh~#0cZDt`NZ+bq31w{_HgSd+k5$n#l*Rn-Q-CBK_~5v zeJGTx+0Ox_{HouGOj`Qt9e6Fk*5lGY%}o-$IzD(I+PRlY*5a0gSHcAND7l9%LyIE8>ez? zpQY8k&+b?k7SDV{JUd)?7xBAh)0Y!E_yy}Al+!Q$9^%w1sOLb3`eGdfPUPGBiPK4y zN7fg*U-@N0VLm?+F!vDgj~mdzALy^( zJWCS(&!6*~b;9d8(1DXmFP~uzO(IawH~s*??xAK4pO<)(ef9Q&PV&Y@=lT4R-T8^v zxSXB`os47nJ_G7oYe-?@MX#o}8+5Qg#u;$-H2Ix4`KstS(3u?TJfKyklEisX*5t@> z)U$EGVKfr^&L5(cA^xq(`f@^t`rVISTF-$F9E=Cx41G|EI63m`InaTF zcR;~OdZsFIc4g3WpaTc%P;jEIt4W-5iS-=lz$q%mqt`9#5a)UvJqJ3I<1+Slz1Jp>sd)wfZakn?b~Ht@13A?^!&x=2L@L3?qKL3+6mWj+e-% z%JB%}G3+s>#wg;|ysMu#K?naJK7v#F`&i=K{G{hV2mc_i3{L3kiNxvWrS}hXzw!_K z2>zy~Q;Gl5ThE8iJ>U91k2BRw;(xS>EAo9Fw11Ry%7;0`=~YzU-l2nEuwM_(#wQDi zQ_E()2OT(Hw(@??iq(O{NoF(NL1%KDhHoZ5zp!K4U&Lu)lOKgnzH>cJOQ?_t1RQM}d?Z(vrv-JyfMa847Pnuj(J=Za0-gAN?LR|QVA^IM2>(WV_h zXL79f$46`oCe8*muFJUO>3MdOL3VyiQhluZ!Uf*w_ z!#Gt=IecM>1as2_E(%+RBHsf39|s-$1-pQ= zsry;tG_ZMh3Oe`pbQ-%d^LfzbH!l$1usP=i9rqiT5hCv0-f&$ZUKe#gNzNOg!@M!* z?U5%5zD82>2W$Qz@s;but)}+LWxJC32Yg4$C+fA`Gsn#c^E<{L*m*;%+r(dNbDkSI z_#JT;a-`j$`&IkF z`;Fkg^nOMB$%=1@yXt%)w;$`fJM}BSBi@;l`g+U$cR=jlpM3j~ICs>3w#=tN2S4CD z_NZTniC>9xX1l&T(7_LgpWqA(4%hzYd}C@gUz2`-&b=S3?}wjn7K!+ygY@O(b1l~R zIxwHc*Z|GI93qERjZqtpnn3;&U4dU&SQ;OO#_dc!Xd+K-07g7F27m^eI@?*Un zpo2Z9h&*1QVX2AJGMs+C106V6r=vV&3a2B^xA=Mvbl_m#0#3Y_8Hsbv=3D@DcK(ff zF)MMdsB;0b-+AiaN2&N6=&B9B5#KALem(-7`}X5BPKf@!1$a$;SH`esa4hByIH^?ru#SAL!?_Q^WMD^L8Y>O7P5Gjx4ECK{dKQ}3`i z3xA=!)ss~wUjHNdxBwmejK9HoAGaED8m#q<1HXgg^|U5&o~e75?%Rv?ykz5$I>dRT z?y*P?pOJz8-G_ziEo(kQY>kk5ci#XNy#$0}%3vmP7i=!G5 zx0$*Z_3zrV%g8RyQ`X4SjQEFa?p;HN`E{8pS&J>NgL~IbVa*i8pD3zTH5F_J?^LIGuBJBu@5?dJc5pV4oD6T4%Zt=a$X+Vd(7Qz^N8J zh||BDSsv+MPvbz6WqYYz#{Jft`14e|lyLw$K1XPs&$XD{k9fs5>&q+WbNQ5mcWpcF z^WoT{usAbCoR_KEc@Xit*sN2agMToOfPd~JA4Z(a$Mt@O&g3|a;o=_Dx2q$Gvt8{Y zOZ!2`=O(Rr+(9G860ecX`pL8Pw{_mWs=!3z_cHY3G0)o_2Ko)`7x-`raq`;CyP$&~ zWSky0Zw7Hr+0473gCEW`&zjJQ|0NUAE4uS;xq<|?{zv#EFj*r-%UG79(3$i zr-64xVXutg1Bo};rhL$G`5XrFq~HbZT1LExMf84z4)(`512TAk5}2oh4v0V4So1GaZ2~m$1CXIZ{+*Hc{OYwahm7T zbD#qU>mG2z#|kCRipqKpbSB4oFS6mZBgDDdU2lI+{foGWa<0pIg7`7#>iN*Q_piTE zR_tTGJ9wJ-0qT659G{^>ez<_@;Da7juWUU1=N*07KM%A$PyA<>^mc|0{=)nOc9@mq zGI5&PoKu3%9OlSQ3V@+>a;8~9!m$`$qYE#m#I@&M9)(7E@Ib-bH6?H=)C zywclGj&~KryklLCr^HEaV?XHNhpr;;vGn^3;;c-eFAsF^Lw)g`)xb$_i1RU-$&r45 z&TMXY@4{)6Zk&tWX;_}=1M!{}*7Km_^GnuurL*k*OuX#%OrG?sXYWe8jO&&7yvSe8 zz7zkAiW_pgfzBKq4a3*3MCr>#*N5fho#NfSPO&3&_&IO-e4O4t(815?#Cf+N&m$8j zicLNkI+NpXJUPhY)3q(piL-IG-j2|@?;rjKzGsI17t}8{@jK4f^PzLkcN#CmJM3k$ z#v^_YWe3?#p_B6-#3Piq-@$~$+rCxrAL!s8jA!7)Z=IAlanI^G(1C;X05}oir6f*} zI;Sefcj(|}KukF zKXe!eiztWh+xc|F)-(TnHyieXeZFVOLHu4e@5n$0e_@{+oa}FN5yxku-tW+voF948 zZ6oNt!CF~;h~sPX-Vk(A0~VeC-p^3)b2yBwx4+ZS0m1j1$qv2CdX++7vF*MT3nMjZPa&KWW0lp$7Sn&PQ<_J z5bv~l|5^Ie)3{Js^xL&*>Jz_@&3m}eq2Cr){r2d>GOarl$2oq7F+sGC96L2m2iq^O4HyIuR#|djC^$paTcad>u$uUJW1~d z=)mbJ+Qpv{dJ*TE%lfAYQ!7 zdVhMBZ}m6ujvMT^?a5%`Usme~*;G4t~M-1Wx<2lZkWVrrs~m z{c2pp_YT0nuxdK-`-JG*BXp>5u(1EUF0+aAm(BSN=)jpF&XXi8x_~&Pg7xKr4xIU7 zJ@d9vAaP#Wyk`s@IJjp7`#enf7jY8j)RzZ3lVcsvT!&T?=RqO8A3Yn-tmj~d4O~O~ z(iQdPl;;z0Ujp^L9&-b6Zn*Ru=wLUDKj4g)S>jiJulF-_oX>F@*~I&N^L;N8Z{=4#4>~nmg{^lm|As#b z)wxQ%R$uh~fDZn^JxXv2r@Kj>axbykFmZ@DI z5Pz+D_eHj6=y;#i`kjfm<)0AmYh?ZSEZ+mh`$4eZgRjqt^Iqj0r2U|SAJD$RNx$Yb zak?kamj^od0rMzu9L?Vo=Z(#IYv|mM^G?G#hy9SZ$Yc|_|4YBJ=9#aribwp2HhjtY1?wSjt}Mt$oOkNJgN(1xfrIzX!P&jHAaT;zd^Zg`lk?;K8Zj@3G_ELdepBCd zlJO5Zo)=j6Z_|1gCtjXlGoK>;>Dm6R!N~E|e$c^w$Q!_ZPpXt9 zPJ=J{{2p}Rz`x+kdQqM@p@}_d7fz%9On#5(#q`R=S)I$|NIyU~|9SqO@)iyQ_p(v0 zn`NpIZ%%1_xu7%a=Q8?V<@2l8GS(u_)Y>LT&gY@Se4gif*84P1opp&DwS>u)?L^+E z!TX6#mZuFF6MeK=FE|X$=TQHo;TjM>UJ`x%p@V-@iSaY>vPQ(2kXg@x4xG~!IG^95 zdNbmr{aw$2&gA@fpI3Y@WLAV$#0jWsa%4M!j{B?ASQdr*Q;Mx^iRWmn=RwDQbr_g` z!0uN%bs*j!9rX2t4)#L41E+NDF2qS4pyxnma;)Rak;FZSQ?irEk?qX0@x{tF&h;k# zpkDul?=Z56b7v=~^do+_!FoGD2Ya>^`PE9b2N7r8XgvoyaHa^(gwi94)5GSw3($ds zbu|3@bj1YX+}@y%ztEW+>$-NO|5V~MJr(PJO;q*=yLD}5@or1e;2FdV?5pQN$NkG; zL=^t>i!g_He{3~*a=e1htf$L}Re}3y_fhkS(<+>4FBylStEW0@_@G4hM;)9J7ALWP z20w&_k?4eKSa8)oH%nPm>k((J)1vRzk7DI;VR;H z*s14x_T4jwF<8t`8oydg{FXNF?m-88VjT>7-W;=$IC~@N`zv(dloI{5ajvbzspGBZ zKnD)uD#{aZF_<`&3hFt~nH;C_QOq}eH--@Bgv!Ut{t6xUFKc`8YrU6vrOKM+`giTc zVXRKT{ccjqgT(JvQ(tfBV868@e;<7MFmV<()pMW&2YF!l;m+ve#HpypGZ{~yGdb3M zt?DaJ6KA2#xairs&S|XOd7ADK3~qg%_&Y0^^_6~xj`J!`;6;X_gD9S{#}2w-WU0@>?QFtsdkvM2yJs@lbF?;wMq(BW1sYPWH=VHF>`}{g|l4E2Pd* z$@m5x{DbpZuxpW8F^QAHh65cq=wILrKOKiS<)`WW3>`Rr;vMKYGZPS}y3IZ$bZ9SF zm!LeMJ(Cb8wOap4e?sTJy*P}TiP@iPbEY8v?Q;71LWlaw`CyjYsfn{h#cSzD=)i#; zQQufI(-CKNbA5TBgWb@c!C6~96LG$FHaXI6(7Cspzi~jE%bgb|8}aY<)AONo&vzPg zKCxdeocN9Saa2E+{R}$U&+u+3?9paY9^#FfsJAP0@DKb2&Vb(eiL-l#o&%j7C$vOi z;zSzj3CB7vCeGjOnN*tiL38!x zhYsg~aw>->sdcSX@xjN#)_Yf1aeVcuSC07G7wPQ`9sGs<1v@nVy%KS<+i;*WIZorq zW&W;R^(a+|b5hw;wrlA4{4NDGm;NW}H;G|p~K%7vU{SxR*j?=gx_Q}hX>rb3fHt%#n zC(kQX74xC&j|UO&W@3H&gAV1|A^dlC(lFvAv)MO+PWn&o=hWRWig>$9>dOTk?2h#| z>^`-{IO3d6spmik&I|G0;lenRh!d1u&w&nh>LtF1ynn}3;^b5NEi%qRXWHqX`(bHU z@xD-;4l{{2zPed1$%Ag?^GLZ{k3OZ=W&Xx|G44%GKbLq#Ra}$(96F49F6Ge2r@tnC zk!e}jxQBZ>@JHlp3y6QHqu!3t!5^)~e$b+sfyCKwbMF>9_+!QWEOZ_#$%wy*lU#jI zOWF^*a6jt#fB!Xk{>C)%z0c+qf{53rr&&K~Kj_@gOI$|jI6VJc5PdE2SN1pgGXD%6 z_5t5aj8I}w`5j??LEM91@*dtm+@&M*et`~t!MY8cL}RuP=fMO$2Rd*PiubE8R1GH1 z5S#fdbl_kg73KMuID|M0Y|gVoXL4M|J@F29${Tx#6FpFGcj#dE6xaJsU$pY3>TeEX ztT<<~{>cI2=2rLoWO(g>Un;{VdD4>(%T(6a4;_f=UbQK#2G$J&w&n{_u{(= zukxQJPL%O_4s_t8S@)FgUq$$QjyN-H*8R|#9BV!+@b62+iEA^Tm-(!MDz0UzR$|kh z=exrC56biH>iH)7PfILT7TE#vSp!+5&|>5+~LYvplkX(3$o7XC81s%qxPz zeinnVKnD)y7vMBqor*XuBIwV%LT7S*ypJKy>-bDaOPq=}-_eBbSNold zN@pZqE1UN*u#WzfUy~i-{7{n*S%^PLeZND-2k82@o3X;zAM;X&f&E|DG4HAz#5=6s zMVJ17&dwhbTjVBAd7Jm_*&hxA_g_$+{poy&bJylvHFWUnzVAMC-@DZ9g2Z`OMt^P` zIzF#weGh6(U{T`vI`ure_n>f36ZI^yusHFm)YP|M=wSD`BCcg<;YXb9>fI~pKj>hu zc;X)P`iu_ZeDl-S4>}$v4jOBG#r^CD6^NHxea|Nz0?oJY|N9Sg-s=|p&wp{$uc3Zl ztTq4l8|wFj`VCRPYt-+2^*dhu_E*0h)UUt#b*kSr<){3wU%XTUjA<8_5h(6UwO&<) z>QSneX%{)(L5J}!iBHlS)oR=e8}G0#hW`(bt4`c+^-QiD@1Vna8s{s0rses3f5n8b z@ecc;;Ct7sP5k85_3aBfv@h(3g7YkEJ>ul3qvt?pa;)d@l7%!RP77s6Szph_J8Pb^ z$@3<}U;14?o_ZR0?;Pd0Tk~`a;;%cYA5Wokmf*PCU*swFE^kA;5eM|+DRl4;-bIFg zrgd&loD6&Q9O%HQFXBkjoSljD#-=>b!OvJHpggr6bt6vv(|SKZ=YIUNwxh@2dlA3s zH9gH>__~Noq9fWaW_P3w|2luj2 zo>pV#5GTJ)ehNDH7w00u*;iu$aT+bww`b_w+uvo(@_9pf82^-k#Lx7X$(Q3JbT6(> z_}{o|ysGp(Pl`CcC^z0uK)F}nTSnZ{LHcq-hw-tJnqN#Udb{+-UJDf8Vc?uH_?g$N zBz{aAe?td9A-@IA{rhW(vu1<7-9U%>qCbIC_R0q0{54o#U+CQX$$Fmf+ng=LPtsY> z_v}33kNd5nA7&jGO#BQs_W_`TJ+V&%J51~uLY%HP=O>{92XPLZ-TC(tXIMkMU!VgA z?>mFj_SpgAw6i()2c5}r8qfCgyKu$+I!v7E>Ro8LZi9~J*Z#)AM?4>y(Be4pW)IN& z6T0=C&2bLn)VrK@?-pzNO!W_^@mRd~y`a)*;wKqp@@09U!?YfD#6At|cqGnw;x$zJ z6LQ>x4xF}Po^txcCE^se$wNbDa$LsplKfqU!4t0$=a9-%%D4qxuDIrU!Kiqm<>zj( z!s1qIZ{{Yhd5gIHRDMW~chI@-$JYDHO_JXueu&CfOFPQ@%Xn89{&;okA#oC@aY1sR zgFkTY1e`7_o)TxHO_64OMpabWsIKRH_ zxY6b3ylf4d^OVqmgYgaJ88js#adO!B6FS%l_jkaF;1`uR`_%X??F607tK|~kQ7Q2? z2Js&5(zg%j+_w*Zqh>>nrxWLU5&!oidOmcRZ)8=0Z_nmfJ)Lgu;R5ndU;?+_4UfJKEgZ;2?1N#mBJu7j}*nGDg zI&kn^WpK7;`i(fQH+nxn2TrZ|+t7h(VaE~GiV?rCP5*)pcEr9n zI4!;wC(c-#cZZ-eIZk7oc(?FskRNdp+vMq?<97=k2F{tITyGmYh&RdReGll&a`_v} zg&(e$sz{u{x%7EQ=->xm<=_gP$F0v%aBJ9jhP)rj*($D+_)#)@k}tHb&;H0=gE&jo z_pD_DwE0=oiR8qTM!G*@-y$Y`(`0o%{KPb>6;sXgA^q z$JUoq&fBpbLODOQ>qVTc>bo7XUqA=@!;T#A&ia&w&p0%PQW-T`+Vdae~!*q_Td{@x5G!kw~1A zThwC>@sit=3p$i*k$8tBR{0IYnW5I_vRu$fyCaUkUNaMJAzmcKlW`0>*d6m}a9&*B zPMrNV`G4rZ!TAAjlCRiBoMSfmVCcYUDbA-ZU%ZDn6KwLq(3u>UkzDLM_v>_kIMvm8 zGC2>24)gFwu`6sI^1QI>hfbr0m=C-!e3-a9)c%w7BXrV_qecI@AO1M;KB)7r(vQ%= zZa4=5`|m$|ia5>GK9S@=XL79jgZrnPBTjAA9%TN_)BQn=_bBH9|4YOVQSC(Xp>sbz z`5VI<@H*~6`)kBspxTe*Lx=e}?pH@OeEP?TzAJ2gj(sMSe^ilM#BVoMAE%&$zvTQa za=3fMxjkLafe!wHT~MAp2ObhMg%d6hoZnP1WQraVn|vjjy8fJx{H)IDOTR+*E5F96bC>2PLwiOfKHj63<%Mp| z>0UpbgL4`_A}%H`6_t3;RX$wupp$VC;|uB^KW0qgO}5FSLI?liejzx2e)A$uH=A)1 zI`lKFKfwtIjZd7l>OBPM59r+c!@9p1VSHlZ53}jV9`7%T@0fh8ot*faY|hck_j7Td z1a`RPotikm+4u!I_yzlf;JkU5mN+SF`~n^N9rE4al--b#IA7H{VA<}VlX-HCW8f8= zmz8*PZN?|)-22&j&wX^noWyTq(@*T~xo;Kw7Fi$WAzmrUxL%bRhT$Un(8^w!LMJ%eawwB2P5+Y-QqoY^#573_AD&aSZ-=P`Da#mX+0W zpaTc@qQQxrxR?mSB9Lyh4p1H+Z5-0OzJqJ3I^JD*2oKrq_ zwheJksQc2gokAz)DVRs1T=)NMPrMr+_5OtJSMf4M6TXMB+N(41cP7&JE9kgiS?{YI z+uV(K({t+MfV{6(T!BjkRKK3Bb?vY`4aO(fV@uOs#GmPB@?|`R4*o_w1SfBzzQjpV zThD>c;esI12!gm_S#s1~_4kL*Fyt&Dj?G-wo zPG=WBY}qu-KS{-WziRn0#9O7#aY_F`XXl?uQ6>BNaJSnmhu-1kF&V~x0%_^IS<;twCA=R@}^KF;OC4(D^sCw_T#{z8t= z&|%$PMfJ=1wKontJ~%kc?>J`-{-YR+iQjjS-e1tc@7SjVXX}xr#93)`P7gYh^W!~i zubFhe=Gv$g#M!6(CEGQ0y#BI|tK}-KCf-pS|9Cd8I*s@-cwa4P-1Wp?qWmMv3*Dl4 z6T3~HIwUNQf;y!lra9Y)lf&j5H+0}2e+bUN(L0HgZmGU~ zKnD);ncyU^wVOESZ0{AIwl71R297g2WiH!6$$ z>&@W~5qH1Ji^_fm9r{^))z3CHZV;_Z(bZvo#e4(xpP29%@mqM9AHzKjaPD6^Nu0dh z^!|X(w4?R=TQ#SjCC=mCCP&uSv-yd?fpdo_XT91NiGO~Go)6ux;^Ojq-0wE~UM2n$ zm7kL3hfdz7NEG0ckIS+{QK%GyO^@Z+N_QbzKeUs#SLi`yv_Jqzo-)Z1|EbxmTc~1NSDt|8HBy_4< zh25{m{v>##vcDl-xTAWzLI?j~{~8>hxF3jJdyLDg=Y)jCNzBv1uXr#z@f)dgu(IDl2ftz;2%Ln? zViV`@jr#gRhx+=9^TQva#v{(dlO{*@JLuf^3+p;$_xXgx&!^5yO1@|P&ib9^fFVhV zpH_XhNZM0=rx|%l*t1!Ml*BpuL2pm!;1|qOz?u2kn>fwXISg4I=uD2&7%kon2s)CU zIM2Pzb1$-gK*#6ctnoXH?rQ%6Hcs5V;Hv&@g`Te2&a}vLq&3E0QbKj1B?6ZjT z3k3$`BmT^ErhlY8pp)?xeuJM16)8x(Mk*gA{REwzzsDahLY%ML^!XX~_mBLQSf8f! zFHW3m7xf(IP@a(1yuWk(Rw?3?8m;F*hxX@vVK=>VKEHHX;zUs2v5@wLj@zI099Q1e z<%w74xxQR7uZDeV_-%BC%EZ~8N#9SQgPo8E2PfN-s>F$0N6&!{9DKhUoQ!#D5@)Z? zJPJCKW9^^s8`L3AX7z6Ezw4h4BWp-zdY5fjqUKepffpE zKSpu1C(f-`Cg18Q#DDutUry-EaypId75ThG&+FZYbLOkbk^LJw8Bfu_ z;D_OpdJ(U;I?pKE7j*Cg=C9yvDcP4eJ8a&kfe!VHE53L01fo{gW@^AQ(1k0Ad3-}L8P<@tyyVjQ`Ydkk@|71VQ}gCFoW{ILJUc;cKW z=?TYa%oOW}J=Z4_r)fp~JO(laI`K~C)AOKXp3B%H@-11$%qE^+29qcA zEzlv~QsrucLU|i5Rr7r7xrp43`NVy!{3+X?-MI+N7vPWNkroqgW?{WQpo3r0-@uu- zXDM-fOY1q%nH+y(b(e#b$NWBc1#zNOF*!2MLFazHXT7)4!Lge7Rchk5qaHNT1WOi^qx4r zf0*@>?F%~D?z4$`df=4L#OvEwZzt&BPvqfYr|MI`6K6m(y+5IIA19qgfOx+>^NtAJ zf6hmGRnpfRI@z!A{Z^DW-G(T{JC{~pZ|LAxj2GZMX%~YycWmxcLHB>0aE8n9Pv$-P z{~n$LCsJlF;$%v0mIw2u{^m1v-=Qo)=iM!qA{vvc}cVmV9 zJMKzE93M4bm+cNZX@9IcQQzdfk`XV9&A0>|?2q>wz$xNPMVyE>?=(UO4!GdN=$V!{ zrEKQ;(3u?TdNnj^CgNm?rSAuxu4`fof2Q=yM*LJ;_3KsWP|kI=IPdU2N>1W*v)Lbl z?o3xxkNNt*K^=P^k9}Xw)F9SOG z8SxZ;UUH)lalWeeXyyD8I&cb#`9ke!#fWq8hTad*f#W6e4}Z)mL7dq(`3LAsj?>sE z`e&=brHPZ|tUmt$owPUJdw`w(Dpihn=hgjtX(#AVKe_Immah_V;vUo62|APGZ{U0l z$}|3jlQ`#8UP{gvp!?PMvQ(Ut?YOK4@wce@%JvE!pObYNx%Trt!(vrk#5<(?fq0Ji z4ITPbn^NT-K6oEr^(*W5FM34wCvHo%KO^lR&-WBj4o~xa(|}EF7ly6B3W$71k(aHA z-{+A&uM8dhj{Q#ft4phn#96mj|2<>qz`^+}aH4PTLY$~iWB#uuvK>Naa-2q-u^A{| z`!cvYaSCSDf3F$3`Lmk;PmlJ9c#U#(J=BYMg=U#N=?Cb3)gEU&=J;IYRUhIXk7D{k zwnyl=KU?2hc%6Fy@uoF1d9u9ny#>7UIyGMK;%L^Yg{<3y9-T`}VS3d+KNOSJd~*;>E;&7h;xE@}c`xJHq$)z%SKl8S#@} z((|E1KCOgu_{hM~DM~j(-qc~>{sH)TlC32EgUfn5LkEB1z8^UGDy|{UnU{JFbSB4X zWDxht)92qnoN;?hj*M&2@x5}V(YJ9fitE$9Y$o2`O?n=5yf5J}l8C%Pi+$UP7dcp8 zPv~GT#3R`2P>)^2iL2U;9JinY2j@$`+0teYaV8$umj^m2hr9Ys9Kc0#6>ZKQ6BTjUi^Xkz3 zDn23KhH~~Oc#HUnZ1~W*=R1sAqQ9*_c9-}|7U}H?9qfsD1j-rO_91b4*tBoxz(M{5 zoPLR)66eV^eR-e*2k)4GbGPbC;T<{sx_U`)l$Q zS3eNH`zbyD-;Qq;S9~FUY<2EZmS2u`23i~u#Wc2tsFSyEP zeF+`>6;Jf*(zPNH=aU*oq`#npzi@9H<=I>_8gUw^`=7GD(7E@Q!?+{z@2g72BL3YH z`t}K(Sx)QwdX4{#OPmV%OpaWi{_1@_tTR#nhgB02_e*qrdx8%0nL5hfTShP6Uq1D^ zu=Od%HSlxPPD=cMaQgZ~2Y(}<15WzPDT#C7fqtC<9sG@c0M77N-ozQLIMUzHx%anq zyqy=Ep7^sK>(?2cj<;(>-e}D5%)}3UX7c5D3!NMvGl@Li*qYgi7yqT+uF%0h*k^}b zpQOn}oMW%_9O%G79sr#7&+`)J+8aFwI&g5_2b{VO3lOLNJ3R+FlVklp?#<0bi1Xx@ z$&vQfe;?O+4|CZ7U*bK!ZSrLMllL(3PRq=19Urzla3n0gwiR)4T-Q>>e|b$`Z|Gn@ zjI*#`{$gc`<9AHYfzISu`@v@1m(Rva$rn;9e z{o&apcIf@>8UM$AS4_^=eK^sd_+INwzHFb+g~T=W@E7uB@Yka;gNfI3 zrQToAfrI`C&K$>Z;=HlR%R*;ztltBDIA}C+c7*8br~e+Pwf`+`Ii7eWChGm+Y5((n zbA--GOh`GI_^C$d?FXG%Z|nHF>-aR{RGO+UryO7LUEaW~kvJuhrN3xB~sX9vw9 ze$_yIIiZ7}u@4Q-#k%u|^T&EU2RhUj>qc<8C0Rrqht0bp(7B(_It+Zb8vOcamJol+ zR((04Gt22RVv6s-=GeHLID7V(92tk8LmV1CV?xG~&Gv+itJv>A`6hN-Mcf!_+>>z# zI;>x6sfqF3L+!5KnY$xwT*Z6_{2@N;h~HI}Tdp&qgTL{;CvbYd-AJ5tNAz}t4*tfx z2%O4Wwi2ht8IvRH3!Qs^Tjy(;7VaSacQs!8JHErHCh|usTmDV_a5i>;4)&ZP)-Ag# z?IX^c6MB0>2M+8DdsdAXN}TKm^c?8G!S}Mk8Fl&yabl?bCD~4(GdWJ*bnP{*e~7V%fuOA zGv9#@9PD3!6YJ7-;w*TmFAsEKe%Q%*?PQB@6Q`K6o5c@GC)e3{F9GE`(fU5|+S#nj zpfk(mGJHb$oWlM*kBQ?@>qi+!p+g*vR=dESlrye}#ZlbfMfrv&drsVu_f5OWeh(eu zDAp&b9~4WqF4Lm0IQm)KlaG4)74ZXX=GV}{znJHsoVWjaN1Wz1?F~AUV~uk~M}H#D zfIIs7dKyPD4@Eh{4f{rXL)BN>6FT>C+WLLjUA4pa{5g+0{-9ZY*$(9QWu5Dz#T{HA ziZ9yzWwHNK(yvSYrO5#F9Gq#WYhCKjkmZDit-=NmW22_Z1Ty_!Jhcu z1vuSeq##ak4Sik~I&knkDR5f-otikQD(N}UnH;C#68EHDbWKN`6Ln3FYMhUGIPBIA7#2aBdUj8eimh;tj5)uOD=g)vLy#P*c0o0a58mkN}P-8 z{TgXc=-kIEr!iEV#~qZlCGod6(6=M#_&l!D7$Nq#{z}}Ic(2;(dCBuc@}wQ1gFld`1!uu;U5NA8X8eT?9PC$tlhCUNaRP1Laexkf#rLkk8S=0m~gPoqNAp-w_-2cR%7sORnd8+HUHIdj~tG4>M@Qufok3={R5qQ|2T{<;@zi;r6&>psLgr^ zIkl2` zY(3@Zhpo=;Ti#@Q*gR&eSXYKFoJ;(hHs=DMgTFB@!cY6!E+kH?C;IgTbSB4XTpZ8u zjASdggg8soJ1=q`3*C*u?$52y%e0($`PIIyfa(oT@dj{sCO_ zNgKOD2mhe|px)Vg9U)G<>z?#G>wA<>N}M20b{qS7wyt*=k*bZP_a0V6KSTVXHgN_z zv%W6l>S4~`EqZ;PIQ3PWk#QC}#M$(|A!Cl8#QB0Bd1H~cG45O@ZipHOq<^84{>3|A z@JsWR*NM0Dq~0&k!N2$$oZ(k*6K8`>KZefa{J2NGi}SIy5IKW?)@ip<+&(QOs z!#G-48KQ0A%?_ba7lo}?u%7|`wnOiTZ+9OEI`|v&TyO%LeIkz2W`74d_`Asro;P?G z`bL~%DjzNV4c+kVzq=~jUcM8&10uh)|3kQ5Kj%wZ-qZUVI`;RE_SlH`J-P))BHk&R zb9m6T9+V+R+rK7<&68Z>ed1@sqY;Hc7^$OipUoc)|IW9v7|E3pl|7H5P#3}wk&w&p9NGIN>s`NSmaW;R{`vW@8_gK%1eE&NM@lLCI zX#cMNIgR;Zp0s#R3gU17tS>Kg$}M5(_jAYN0O{XXd6U*y~2-z?pV z5GT-PJ`J78vG$MXzP`knY;z9Nv+>DcV4o7@ToR!a@vAHU$o32!+F_RiJU?0(T820{ zhbP-1bZCdM&y9Z3>)RT&PV_fciTK)Ye0kz_Q0MYwe1Hz|)ukNmtlRmi*Ty+v@fGWS z)W1cY%EZsB#ur(C=-_YUA;8I-wi20%qf)4Ek`2v*Z@rDk>3084N zwioDr)n1S{2me~XF2s*wbG`sN^gmza*g}85UNXJS->QE*44eZ2|48Q^#6N7)Z=i!c zaZV4My}kj&NoupMh0e~N^P==8PVN76?#6nbeOltd#Gj}9F8$@%eRh|zp&R$VDYu3Z zf9G!fd;mJc;c9DK6I#T+92SRhPYm|`ebFf5*4U?y8_=O0G*s=N)AvmqzMmQ%7Kd?O z1pLlT#}PlJ%{eUS&<-%q0_SArNyG`VIkx~E+5yH-aMC23Mx0e@UM2km-LLu;{0V;U zTQiC8P~Ux%eCXWs9R}W41Ao|(xx^o@&hg7}4LaBp^AT_|v{^`;jy8D!=)l1`5u7fS zmJlbBvZpK$bl@P4g0nX9a^f_%$!|ah4&o>{d9DNzr?*WzfzISu-^*#gU@dXl*|ZZ+ z-^;-}O(aC22KRCziP;t-S7%1kqe^)zB{NFz5-?xDdakZ*)^wHhRzrU~0 zBW!#{|AC(t-8n`4iSP9913?FWlRRczPvTr<#tmxhmD_@$DsU|?mi)Y0bjixpo5<=4uTU@_XTmHbJ0^ANeMAdFI#OlwUKHI9xS+`hUK=gS-OxZOe!2{d4}a`dGbRp@aS8 zeofj>5sBm6s^>rl`*mBw-#7Ig6_q#}LrsqKD|D)wVfi$?dxH9H_Kr!s2{!MbL1&iB zWemH@`G>1JyoeJJ%{)IK^AFG=K6Cl}jlv>6-)It_xbf4QTp1UjLwv?Pm0T;6J!{wF zAOE7?p#Ian5)r!Nn5CdaxDzCE=MaSGKlIseWN);wk7 z3k8WkzLlQu>3RwI1lXbZrSq{Ea5! zJI7~_G$#Hxl78aS_q@?Rd+ocJ+&=-VN5zlyIbd$cBAsR1TW`d!9X{Ec!| zX!R#?3fX)Y8#?&CxPKv<4<@P7i8vjanB|dvhc5FVb3Taq1NyJO*tahEsw;8RtM7El z{tKOHH=I*&8E?cr+%Y?P67P=kvn(fcC||i)L4TF4;xIJx~ui=20D{t9S56c z98R2->K!XtU(e68TWn zFZTGk#A~6xmnHoPo%{IbFpebTe0ra<3yI%0gTCI-!G2hep_~sgE+I~$ym}6F;6&NP z=WC*eTTYzH+4LOf;0K(KL3xIM2_jDEjCwyn$MMNwU_Az2ri*KdHzs|KAM$MF6Yc};IuQznCAJ(tnTseM{I63#~InaTFc_lb6ww@(UFPryh zp#ul&E^x9oxk#J^Hscg@@F(($;EW1?l{iaPy<}X2&V5|7p3B*=ax>^9|_W7vv>i&n{8p5$Cqe_nV+IIZk7sSP$je zosc-k%1c{doJQFbm)2dd56^> zk1FLO-tw|0PsRu7k4nlP*&+mcE$_22%paInz>f7w7bN~O)h=aRg%18e9vGYz*@_Y; zbxgfKpffqv_3klOapF8t>quE&&(^!vc|_doe#GB-=D*A%T*kW!y#8wCZ7POd_n`8V_&=_lmDQJxt! zsuSnk6}=syb3bnVI46B!H}%WP5o#0vz+ICs<1ci)@9s2`$6&vl3av}Lm>={!=#*Q+ z_5tw@De9l3R|DcTRNwWM{(#QTAJrU%FD>iCPmcFo`~n>F+JMjDh03^@7-c60gQ5)BdtwK!@?_Orra*_cxlY z;+NAHT!r%sSC4ii?(BPdzd|Sdiv1DTvC#0Y#GCot*Je|Ws(Yr+ z?T<IN}^o z>lE1!q2s)Rbzg17lu5)pRzTkm?e^6Uiu~Yp|7palZS%cm=wL6jci1a9^(^9KD5keN zbl@Ow4$kgtbBVL5jGhCX$?-QjN1I9aW{(6dB+l^AD%dLA;X^Y0Y}#7FBs7*{-4EIBWg>&Vyl_iFZZqUrL_c z_jhpq4|e@rZ9DNE+w9XphxUm15;!xH?;_5H!TPujogHUoiao^HHQW=9(`YB|55IoA zpEwmenjC2-=y+VW&VOeWI!wGjZ1&GQi}y|=r-*-BBOE7w#UlFhLdWsX`Yz1JIj4x% z?RS$W`-^9JMeDiq^Lft^zoI&4BI7=vJGYK=Tf$u;&i!8ca>{Y8oa!ik@w?9KxDoRl zhk<+QXqUSWTqXXO-X>p`6FRiZ8DhK)+J2KbRc!hRbSB5zj+zGCCC;<<`ucj-j;!xN zHz@p&`0>>~zO1i&4;uGGV7ERWpAct_s;`WP&>0#?V+~Y#IS5JLIe4j~re?td&Kz{$L=@+f$1jdgoMq~~hwNX_nH-lfLCiO*@{q_I!yj1!QxSiLjUAzb9dU0HoazPA z5@)FTUb?g+bSB3-&J{kIfjBe&(%09san5Pv**=2i$64BBA%1U_Ka%|dIzC5k9XFH5 z%|X0mDnBFZ&Eux^JEUE`a}%fP7k$6rJg&o-BG%gvZsa4*N}KQVL1+5m$NAhqJ}>@h zOhMw@v&lO_r^0zyo+z#8XC;ahCEg>IcanC34*l%&KAzW)e&S1<6e_dRmqxrpk?Lq&;{N_wZwKgPJ4alEe_ju7K)h)- z=dGZFpV1$|NmHgVaf0IO{R|yA!Q%axa^;#6CqXJb2Rd+&XGM8p#%fKR+Y$8~=uD1v zzFg=)JL2?J{XotaJ)JM(JzKPTP`3$4|Ycpa1KdC)P>VPL(EdX_EPhj`Cz+8K24E8+q;i!u%%j>~4;feswR zDR7P_A3~h+Hu+-cz`=PMaQsdWC(d`ZpCbJS9XJPz@qI~YU2DoMc+=LGdWJ9{()mOuT7e9J#hxF(T`)$ znehetUeZu~=f|3C*n#r-PL|Dj4s_t4 z9fGs?LMU+#E5H1zKVjVf&X!?EiF4$tzC6&O-DMVe&U2kl66d0d`_hll$$2Ekd6X-; z<1F!B-!#kh@8$>o#??^nCy_o}Aij^<_m_O=+~;qcMjSDp^a{K}{H`y}@=JR_$L-o- zAb!Cftp?v9UXaba3p%tLyaNl)>H>F&W7zD|LkE8^74OdXe{i2TN7VXD)(<*fE7h7trB6LtW*-=ijnVkDWRwEZ>at1hCt^2q}plR9s(9=ulrDvHo@J^(M{^b#74h zSLjTR)4)A>lxN}4^u!sh_SI$FgHDcfI8P4Vn(~>6SJq|LPudSU_x;vsWE10%Pu%Rp zkJ8ZO%kn}e`xVxWDDTA!If*y8soo#Z!5_JWKVmG%OPq?W^c?8W{%)jtMd!Rq4k7pY z@yMq}W#Z3l{$KbGqqO*5>eH0fh~K)6-VV^gp0~yOzRMogBu>WmdJc5pTov~>UtFz2 zoY0PX4s>>YDZjfuak8uSEaT0;^~=c-jflTbwZnhMcNlM9@;o8>@|MJ3Qb%tG=uA5} zjU{4V^m>!l#2M3EKR<_#w!)`@t@TAMr> zcc4SN!S`0s4>x4$%bbFG4s_t)y#;Vm)f-5hR5t5P=wNqaD1WD=T=JpB$uM1Scj)HN z`uYB_!@#@-?`D4TDHaVDmQHR zZQB-}K-}#Y%<{?pAor8-eb-)-F2!H|Vry7l0{K_ezucS2#LuSg!^`?Z2mfNe1kRU* z(}|PilHQKc!M`tRW~KKS3RRp30g$_b2j(up@aQ!UkdiS+Gi1RdYwv5vQ|Q?4i8C!71Go{bw$?F=|)t;qap@Tng?;hoOd;M?X>2Xtor zkeB`OPPfSOjt$yJyf-%Y;h|%m%NQiyU+XbElz8jZewoahLWlUrez4xx>RbFMaWkoV zgR=k1`&#}A9P>rEaF?P_30ohc|G=($-<%+RGn@Nb(814$hu|bWeug*)e$&q@pffqv zesW~;1>!7Kc??-!&-#g#AJ=(>_yIZf@z7H~#vRzfh;@Vbk&5W~&|#f|?{i)#KC#K> zSO4VE=ZW){Q5W1H{+7~uJ3|M5brt7Z{+{%JIMHnO8KDEGs@Qi4Z2yEfqipiu(82F` z4;Xe!)9eLtYTMlZg6>!2R1+~DoLlq_@o%g1N-_>Xhx$5&{Wp1iAWoJ2`nU)kIBkXf z2j2Kh9PfR44s_t;65sbqx%NA8F4?^Q03A5^t|IK;c}0YNKj#lm+i;*WIWFU#IA5~A zPZZ*0Rr_Z$4nl`G_@!FhvvIq<3!A5*-JyJg%EchA-)g_C z*@x#fk@_Sf{!X=jCd&=o*nV|~om(8d>&D!l=b28UxX6<|85cb6f5Uu@r=YdINB4Ab${SR#H_@rH{rkd<;QpM#QQQ$?&Tobl}Vo zoUwU}5$D7#eZ8PFIo9*B3qKVnPDPvZ_MV-OwazoVclr?@_Lu$G(|HE+n()iQp$_6F zn5u7&&|#jDLv_5GsV2qniaIMS-eP|Y{6iHh5IkZ^MfewC$eNmoQi)s_6j5?R|@BHF4#)>>v?&*tZ?tOfvb@m2&p5YUKF7r1_=$qT{EYDt_BcGHCGl4c_2gY_hk^GSz-i*! zmN?Dw={eBB&v*|FoHpy)6DM6+JqJ2)=8N-S-xhZ!PPguQ4s>WQ*ylibS{Le0oMkOd zjc-a$n-y9HOr;bl}MLP$}s^;uKk~=RgMz z#$lAF@6sW}>0tAn66j!m%xl0|+i3)GKG=MR9J*iGf2?RHu6SdJANWpRU+BzoTKj#q z1>=cx*X#eM-&^0cNm6Vw@fPOM$2)n>3HP^Pzd{uj5vOHkJqJ4Y6YT_??RggyXY&pH z_XnT@XNs7A$7!{UIQQ0=FP6x47Gx5fgTK(e!Kra*A8}G;(Q}|PIoA1n`H)cJ z9Eq*3uV-<^+8z$AK1%$cPx?HNXYIjhR1oW}8#_-D|La4OFXtK1@jSy}q!W3K3ro%t zZ%C*`35})Iy*m?YI>76$yb>i*)Kiw zv(v~W{E(>KUE-JMWAddRpp$;UI12kcIPi*iQ~mYzh7Nwf{283c#(UzFwE3(m-l=7Y&dnh zXXh94&phPeeCV!fsjim$V;<{}`hKXKU!X(1@h$}H*DGZ>;v`F}pU+T~h)4Vq>RmM1E}_L4($>9AK(<&otZe+3cFmgy+DWfBd6+kiQ*0`6)DAn zVE-a-4*tvf*@)lph~6*I+2wRugT#L3M4w#5N$^0=fe!UW9tq{S{3$49Im zVb{5N%MzzUZ@qt@gP$=Ef^#u-MdBpSs^>rlKQ~Fv=PUCZu1uVrDQ%AQ19Tzz8SMe( zobg9B;=jqO=R@b2@APxdZ+^s2uHIwt*3VcEqP(NF)FGaw-V2iB1$6Ka&ar~?yqrIA zo*VoOoy~DuI3EB``f!bjlUJR8lJ$d*;~OV0aCS4|bzf%t!#keK3cHriRXr}+ig-&6 z=fmN?<==-UZ&7+>+7lQZ#F<;}MI-|t7EAA-N*N=M?){%G^1U!j9v z(GS6KZ|_Q+8V~gx=-}7U;`>o;ruHDt4t3r}`T;tg=bYa$+T-d&yrqV7Xx@Ft$YsSA z`!&A55H4YF?3kfDW7_B2Tli(=g&(`>HPwbl|KN@%UYz zQN)R&&acSwKxcED>-B8O#uBHwdWTz%qu#CKocm>WFO4VuqiFhdm)tKyya4;>T|J37 z9TVy~(7|rF9|+E?=F^CC*Kq#^I-B!-eJaj5y{tcrIHlEjIN1-N3u%0-Q*j>gb{pP> zHI0{FzHyv;DE1=a^)ti~(3R_GpFc!9!S~i-w^#?45-(N++y1hhK!ea`*Al1xJAMCx4)sO*MtL?B-AJ5!>KzkVU+6-b z|38X->6CA_5WlVAoE&s=9F^aT-TG_?@lL4s{bhNfgMWTk0c3@>{d))HKM=g1g*XBB zn6Y9v@gx1B?|;z2KR9;{&h7m z#9yl3tCjYI&a?lW-#a|{_eJ8rKd;{(k>5KUDdv+g`L7b^;{iPfI`{?od-!Gei5tWz zbWP8J4jkMI1ZQBSJH+|tlb!<|{EK`oIHz;HBu)aACzAGu&a?fU^WLYsZ;0=Iz}|O} zeDB(ibKV;?^8@j3pVRN_$axRr8|)Av+-Ks%G3*aP2fyH4DL8oxd?U`iu<0Oya!EsGpypgC7D!zpuD6HgTrJ)pMY;Id1Fu ze$FSZcEu-7-KTmxLC0~7vtRwbEfMj~|6-Txzxmbq-pzwN$%sFrgr4u+dpFK=A$e}5 zBz_vhySwsS2H7^O7i1>R1H=0v(4ieoSjOL991=SRao!u=QF8eQZQLN%=6K6i4@|6D8265h}e5(S)OO!z0ZlFVaR9?kL+g~MIJvGv{ z;P@!L*ykHiurTpseAbVP(7_)KMZfNKy%=$J80JstY>soEG*Q1&#Mu^0UtjO$PnU&t zB--D{G-ZjuGn2mkL5FrTP2@WhovT2c{Y7n#^fPqI9l`H7Vm}(?dj7C7@!D3>mkT=h z0qZ?*HcqcboQqlX9O%G7Tm#OpWor^AmUfo+w#y^ORqy=iJhz$X zjz95RHPG|D%Xj))SO>xmrKdM0{@QAKK6L1Rxs}7yMF=(VL&=H3{SWa4_&qx{Cw^&F zeradu;4i#?19PC8==?(Se zgw8IfGY{Qtz#qiv*T&|^{%6{6kmnTE7w$>CLf!S{f)0Mb_xIq3S(*D1=eL1+4s_rk z{{zn87Xyg1ytAGI9sG%Nhu~~pF@!kzdf6P=|Df~iPv<$L%k72}KhI!2-`jIY6~+7) zr}t>$Cm5mULx=vCQuV(XBRd6Ie{N9otII;Z8TMIUcpUM4+Uo5L9sGrH7@Q%W1BjE= z@NOM+;A9ZznwM{$Oq^e z4%XYS&s?AR#0eOr=RgMz-i-&R#)Cz~*)vklfexHl;=9qyjxHlkr}26Ybl|{VD9^sJ ztB4a@-5ZeOH*_|~x&F9bbscd&se2W&pL#d$x~-DpTvFu(n~48f-4BrCC3G@gfnQMX z(bhKN z8Tub|;NUzqI0XkBAx>h$dI>skj*9sFQHc}8X>Hgag$^8yS18Z&P-loUTzx-8+8H{V z~@LsWPVnV_zv9CX4i@LMU_vU zlYtKBWG-L%P-tu9uPRP+zGMD-y4%D}X_$vh`=xg}v>udm$oAkk5ci|tpFGK462JA| z`tb-l_<4ocN9gh34RN9+*RSWGvpLRnSmDkeh!Z)r&5`4gw{bi28mRBr+nN4^sLPl=-u|C5hC?+P9KkMR|ps1;)pr;=OGfe!r~-?;?m^{`mP=`q*lNc%y@ z-^X;W)0)3dOuTC^ZJumT-mcS@c0Ne&^v)lbocL!RM*W}VB_BG^?a5{3jKpzJ^suRk zfB&=1m*XdNu{L_iQ}^ARlQ`E>>H9Bqww*99INM+9ba{z)v%Y@3 z@wWY8oQM7T&nrOurl=OkBbrK;axokI-BFP=i6JQi1TfezP{es(`8wKe9q}y=d#3)vru17==s?QY!+jtB6b3hY>TVrAkEe59{Gbntg|$r!nzXxa{)E`3t&`#@RxLnh`Heb-f+sIE#21 z{+PD46>%;$)^nh<{o%H5iF+7*yR;)ty{h{9K^M~aI5u}D;)VCOdH>D7&h>4Vs(%pw zYcoCHyY;Q}`-pX8_ay$2;(EUPK4L!Q_$6~%y^j)VMR0s}Ta3StU-u#Ym-04Wj=#{M zKO)Y8|HGUeK%C90T}lpg=#PlA!D%pj2yvpS{RinU=sd?e{ua&~gWsy)aN_q-{ZaCv z^UQbN*Esuf6!FWde6{SKxUV7mC+0bn_tcWH#B11E?;q&kADk}%XJFF+;{4T3&w&mc ze5U}M?(WINsc*;wLkAA>3gA?&GMzX_y6VdVoy~E+vwZc}*~FP@u(#=*W%L`A>%!A{ z#H(QF&(LB1OQ9UOp=auyua6!Jj(1=O@aryIMEu!?eFErUKb$`Rr%S>4Y@pIj5Xe!(~ley7`yh~HGjGm;M-_*E2tYm+@gF0?ru+<$QX1pN6?o)iC* zA%1`k{CbLC>w1^nWhVWl%I~r;o`9cg$V=kqOrvi{(82%mo@b;vZ;2CM?eEL^0Xm!G z{I1hvzmLRuoy{(fw5NCDw{yH0neGemHx|+Jy&Er_=RQAOwg&!~-;AQ-NofZ@_vy9@ zh<&CNOTrRoygKh8`vG*y-NEN#qKJI9&yh$Y(8>BW7vCQVojf-24yp6P((cgN~&@c=B~+%N1O|RPkLF-Uo(Xs`N=l{O2mK zAnOku{EK)EoP*g?5ht!;T?L)Z@wf0D5O9j#OGBK*HEcV|ehi)GILvMRJt8ih@9nlP z1M#yP-pPlK^XhJEmDuN~-~Tt_wJ^l>(8>0Q^9ZPak^EC=Z;(C;; z`ESLEcf^qYgwAt2ajw5-29_fJ#-sZDqIc_We=DgtS95e=S>k_F=ci>mhYs^|5!HaA zmdKQQPx7w8{TgvP?6W9EMdEKZoI8gO{zrQNr($4b;#`iVZ;#N~9OwLer%5&9>@(cU z@NRx~Ta(27(v!c`Bz}rQ`t}7K=P#Y-_NVWuLp&eDyeH4?XBP8k@$UY_NoqJ>2p#-@ z@fLn~U8^y1taf@oKxcED>$Mqin-izM8vo=xCD&`NtfSA5h-qH0)ji#cxWzlz<&*8% z+w~gyE$ZKNY&+t|GMuA>&U1fvTe(Es6vnp`@#CxYri?3~ljAt@ASmynVt){CsNoz7 zbnrXgV+ZH1Pfy~^G58%i^gq}coJ;rn5U02D`+xI~%i56R4ZRPQ@z4O`4{WP%pU~Om zbX(K^;eCU4Lx&J&RTF(bhEDnc@dfJrwCHf+#WC0qI`{$eD>y5^jv`Js!}%TPY>u=4 zjom($I2#S;W1Ri3abVE9yTNfH))^?@q2~d_y`l0;(!bDo?jO#4%-GA5i62LeW0H?{ z7ZQI?J&q6J1WYIXnkxG713Juy8C3&V)L?SCp&yU#g#Yl)1nje<^K9ZLt*>vN(1EjA zymwqP>wMzORq?5ehoG}L&h=y$-^IkKsPe1Qp5FP(Wg-8J`d*2$ocQ(CdPJ5JI@I^1 z`2O~(9jl0w*AUl32Tp+4znMB|9dR}p_V1tr2j{C%uc(za5htdv-jC3MgMJCl-(j{9 zXPBWqKxcFOt$E_RyKf)vBu*?f?nysF=eb`v-%lH|a1Zg%G}70X-%oSiqe$HF0CB>q zJiIKYyho8s^{+$M=3JZH=I`MC6p)vu5He;@l~x@2Ak&PHf z?u4#e4qN|Q()s^8j~5g;_rkZnxF6|kx31;?5P$6hJ>T1Qi+m*PFvjmI@iXkR`O==y z!7rG1!MX4v)Sw^pp}8;UInaUQC))qXVd03g@r|AXoy~E+yE!96B;tIEY5P~M4@~cF zVqJ^+eLEGEc>B}aJUMz zbcoBopD6p^e_O9b`%p4v*Wh_$t~g&mH9>shS2nDVp@W}s&I9Evb|w*VrWp1SpaW-; zxF0utSTf=?F_Z^7aEgfUFx4)Wia4(f>mlfDj@vpdz9ZMeN=uw_J@xGlI%W3Yd_LkJ z*lEw648*&t)*rHeLWla57X2#Rq2GwJaDcvjL1%NE>!Fp;vlFL)%6Ep8kBECa!TU6) zupV+*9fcp$-OWwhRYmpXgU&9W^W4G03KY5OE;Zc5X_*9*E~gx)p}Uzs+Yj#v_*u8b_p^L+RwiC#mEV``6FThc ztXXpXWuhIAm0#S}6>$!IRh(+Xji$~^$#Oy``*lOnA6K68Bi;_fIsrQPd8FXX9#Mxl zF2lG59sG=V0e)^++MhU|)O|47uA%chzxiAD#QXbM!Z#-V5yN>M=sffNtj?7<4>jaS zGva?#`4s7Q=n^OgE&S#6^-Vtie#ZxKEbLNodn@AJQ}>wU_zNBUg!do7$vU7Nak{E` zL2{r22XQPot8aHEPP6{{ehr4BWjJ309XQA* zfHSf0XyW{7SdT&nyPtbhtR=(*bf6|Xu|2lNqJh&fexI4;=DrFU9*T2_oqEC12UC2RG9dbqt-<3U z<~{Jo^xRGS)rD=oYzNSx9Uu+`=TpJ`#7R?D&w&mcqQ;^(D_W;$*00bN<`B;pG34;tuhvyMBiM{ocMf|2-tf6XI9f zrJt9e<8w#O^S7m@JtyAcIr_X7pTGV7-6C;*DQc!y#A$e0AGbj#+YRPDw42rk-V!gh z;XO0x&~C5~4$iU~ABnRzo_?N#4ji0A0;fu@FT~lBPtSo4?N5%Qe>}7X|Co;~o5SYF z_6MEkxW(E2irx)N{DJCxq~v=S7rCsM;yv?Yy(1F8#cz5$KnHuC6nOxjs!@rvqO6_+ z9XL3b0>4~(5Q8{-OY1q%frI!6oNMi45hqh~JqJ4Y7kO21Vuy-HoHT#g9BF^(Jp0$V zAN%{1gv58P(U(*1$L1A&d|4nVar&OrbD)FWaIO~htr|Zial#$abD#qU^CLKkclZ$J z<8D0%I@llm1f1|=(-SB9Wt$`I51nWGJL9Yg`7;y$`o*8&JMSMPyPK8xLC7BMAbHT)^A65~`dN*R@;cx5SaIT=SKlL$>k#M=pZ5;Tc)sSq8!G%zHfA`bSsHzO1ReSz@-g7N z{HG#up5)MTpaW-?ILB3ClP__S7148`182_PSLuD&`Q54$XRIOKhR){rS=~e)F8lAb zh*M9+kJ7Kul~Q)k@b&UvaozKR?T&iGUNd{tBW@@^eLbN=J#pV3oG)D)5@%2vog(qe941O@)929ed%g5+Ys*; z^=`i8LHD7^l%@Z^)9bSEJzLl=G@9YvgNul4gObUZ(}EWG!J z`n}9LmUz|X=;uM`>~cBd^t(^S6DQVUnFaG;$CnlU=rV8` z@sgg^*Bd(c72_4`wx#X7iRx@E*!cwApD=(zv8EMF0)x9hu_c+n>6%LN_mh4>J5f1Y+dae7D8 zmkT;B*Y`M1>^lzMyNP&FljwQSG0)#Z9uMV9KXe=M^5(U9vOhqF_^7G^CyjLNdaYpt z3!~j(y$Ak{D1pR3X*kCM9XQzM1Sk5!J;bSLSSLaU4&p9wZdE!!oa2V`6VTZl=lqcR z!C~S&nyj~*x9c6b&%S-YapH&Htmi}Lc|GN}MvMMZAjKKtUp=R9N6^W3G)criV-8&) z-mivtuc3>%-ahAsIKF7*9x)!@I367DA)gPweYtRj_)XOLEjfNc2mj-H3gFaza)UUV z4EL&_vpLT7)XYV9h_m^uKJTVqPdVGuxiSxkm+89All{=U_T;w0iE%r8=_kZr`%Yh8 z=y=?Ame()nIq?E-{Y-hC`?EtvzasvYw|c(ZpT+k$l5IcCwZTrae33S~5TvlaqKCyJ& z_{3Y(SZ{CW>~ej7m$y3m?Ln=?#7V9ECfgl!XS>*XjN6x+uj@Of=bhkj8}Bj0e&h2e zC;nj{eL0~EE~m)jyttK$IF*vx97j2o4sl9DHF51fRpr{D3IFDC(63R>-ObYyf3n&i zkbZ^^?FHwT!O2@TBXPRc*4Gz0a00|Ru2MIDBTf=SyMxZ=xUJXA)6#wUqFu5RCwoV` zJhI(EC+B&rgHXRH(Q*^7+Bm%*pbKf7iY30k_waH);#;freCRy$zxP8CKldE*JMn9% zJifFCbZigjJ1WsL7bRZJJ$ir3{cXg%uyC5Ut4s-6QK`U&Py8^K^nCB;Dd+j0Vg=m9pL56NOaGY8|6n|aJ=!FxO1$N-^mc^~{So6i zII&OFV9p~w2RiuYz(M}*TGw&4iIe`L-hR-92jzOZllL_)r0ht%A<6XRg3jjnS!cWQJYD{37vi-3&F08F zI&_$yE3{1*KFNYN!RvFZYhkx0d%F|&YkqzCphG<|4uNxJb#LPAQ00^JCv>ns)<@tR z-`$@$SCzeF|A)?QU)Wc2TggR#+rD@(@m_z^k0;PE&(GQ?zK_s%_%P!2eq-~bAE1LD zddBEKFIM}x$`5Yqq?nf$HyuUX%<5dAtT%MBU*UcY?0CM;SmISTj91XXugDLBQ#f}3 zaeAKDk9*MB9B1A&RFuiYxqnJO?s+#Zxvac_=_t8F58`*&1?L_ti)A>{Uf7Gwepqs=i zpZx#Ab6M5J{>iJ~?h>zETzxx+4)#L-K)L)cJRr`O^m-0-;9wsB9G`(th_hGa*JOKw z4x9nvd_|dDFNkxYvc5df!GE&(fm1qm=%GL6HH&r8`vW>~QmlDK?;kZd9gaBhdh0pR*&OG+wv78D5hwNl znxXyG}07?>CE)jyRu(>N(JXgYTK3Ja12BBu@4*dJc5ppud05ci1X%)9nZ+| z4m!_%a9P+N0>6Bh^29%RRUa=whx+3C+~CBI;U-Q@^}QikU+BQWIVW&JEvrJD`r-6` zgbo~huNa)Hm1+OlMn{p|Y6aR@s9*C~H^55f6e$4n)=5O1{l-mc{7 zzw78c2f8U;cjBc{@s4cA@*F6>j|Mx&c+`tH{krM>3Z3l_KdXYc-?C&uKjK_b_LA`` zbjZ(TJ{`~_#qnDzKj&}ZyO}88y7PmGJAatIe?aHi&(8cQ;5{P~7=C!j;Wgg?PKc=T`L3{i2E zY){bH9B04zd*ejn998GMWINaQ8<&N5eNe89ZKe|M%pbP>WVxV2{V<+^6Cu=0;%riJ zsN_Hg4)Ran+?p_lI13l(%LAQ>Gd1@D;w;_o4ad(Ke45XfCw;q^IL*~LPTB9EL%-`V zG~mp^nJ0t$-A(aMNSjm3iF@v`T|PO!Lg%^PxvaQi-PdZuYT_TbtFJ$FXm4vHbNv78 z#Cqb~QstEG4LTVwB5w=-CcU+pcW}Y?_*( zhufMa_B+EwI!62&hWQdY=~wKV!Hy|foFd-mv3h?%2Y-AL=R==WXg z+NQTZbT-G|IwJZ(!)Pyvb91-Nk^KNV&;7vpJ%tPBUlBj`VLjiw?I1QXEH$wgSW1iAi zaio8s^XwnzJHwragd_eK!+tNn@8et_WXKVTI8hAuL`~}hj9aMph6vG!HzAgH=L=ek z{+3}!OyYDiyaNLr{E2uB<(b>^tj;^g@dP?~p7W@9 zr@QLoM8unFIEMip>W6dcs9%DhWW;G)Qg3hQz(Jk?oSH{c5vNNAJqJ2)Mu%QTdG&a@B)I76sFyPV(8(~5JdJuYV_ZhLhvQucG`a89+h8qnVS5ou5A=R4Ir=CaV9VTT+O za})oe;l3nv@Jniu7v9z(KXLA>`;v0Lh7R>bejMdVRJjmwW|Xr1BI^sC=W)$#xu^3z zpPBmSjAdcQ;0Z(F(_&SSZ)$dx$`NLi{B@vf`#%60^uoM-xp_}V95S>h#(r*GfT z!5<^V_!jh{0&#kV({rGMKQLdwA6Ms8CQf}d-b#N!=h+|5JVmpb)rj9m-IxBa{A%^t zKh=M$N&K1F^z8{c^q;uOQQe+>6_p@ZLXeixjH)tVFMh~XVC=sf%V`+emwyuRHZt2Oac z82TY}vfp4`i1PM4+m3ii)q72H{DBVk!#Oi>?o8-JoK#313y; z?w||Fev9^;p*-%`Ts?_jQoY9|#~tW+9CKOa0d|Bfesve&l8-ZONS7rQAIrmI-BEs_aI8U;lvrB?%hi}>EAtY?rZGLF`9Tg4DXJ4 zx3A&0Hj2E@(iG!}pP_+WZ&_aG78ahs07G_haYzi-AuU6F=V#{d!QIzd#-Ue%?H3IdKB6={eAu{G7MgYT~4Sspqht zoqp-}>w4n&M6mt*U;Tpk0`)!ieiQMl{}kVOE_C3sZN#7Z*|vl1ho*C(=uar`%({WZ zTN=f?e%M9Cb={NiCC+x$-eo_84*hV3`2J>*&IHNwDCr(AfyC%@t96t;1!Gg2>#%1EnEvBz0bg1XBAw46d$%Xu+b01>Otn0*W zlvm&0cpt*~oz6ooZWHIUS|7@O>|Gq_Z{hppsQ(+k`@}zA!Y-%eLx=br-#uSjI$w?a zTL%aG8~f|vN6i12_^}M{3PA^d>OUxIYA)$zK;Ah9%BJwVwKK{&HI#8fK+?CY#v(m zpfmZQV7B1@aoc-wCti+$mvj}sApqbjHLGjt)vdCkQ6^~!US5#L9}y>i}$4)(-- zD%kU8om9lxtJ;g?KnD)uHE{ALOiP@VUd8h+3-8E-vm#tZ;v`Y;j>+;shw@`&0CVJZ0gNJqu{5!)B$CSF9tegkx0g}3ggp!TRv_LkL*58F z_yy;V!1)lXGI25&*82rI_yutjILDt=B~F&oHb;&_(0Lw*{H;~~_h|oQ(M~_&ms9bk zRtQpz4}+%7aSj89RvRUBL2kRY8Y>zgTHW& z9Gtdc8WZQOuijtKfiqE@m#uoY8FBine2^Ufpo8DhuTY*YPg@aZp~^c-zd#p~UwVms zq&5575r32+p9>xQl0^BX(1ENqlQnM@9A{&Fgz{G()QR{#40&+qU{ASDPEzC#;w;an z_X~73$8AkW{SUSClV5uf=bc(t$aw%dj;s8wAdv?;wX+ZLma2TD^bd5N{o}UqE-LI- zXy5?iC#_-ETb38PMd$eaPV5VSH{Labc*W}J?Fb$GA>)L!xrP&`T4Oy2I&e0KcMHnC z8%vx`hWs*gHpe+{Jc}>-o@y1XWcva*QtWr^=CBmPWvFHzPPI=h^1Yy1k%FYW8Lia6H`+U1mffNuWbOeHFJzNzLV z=l8VNepySrr*-u8mfzE^rX1Y0P^k=)OYaIEUlG5-T5&xe0{*|;JbUeSht*4)O-IlV*ZQ}J&``?lWog5djZbJPp z_}(X8sfqe~UeKYx3=!us{#yHpIP1IXInaTFa}_Ah`Y6wc({;U`1D(mQ1v&*0XWo08 z^WXgH>~|j?y(WH>Z2EBw?agz)cHZOPHR?U_XE)T3WAYw9&XdEQwGw|K&iChD|JNjB zKYC;bdP2Rd+4ig~GXxzNLZ%r_3+spmjvbKF)X@y_{~7~zT2`l`*5 z{Qx?5Cx;(Kl=_8u37_lj2Oa!?{S|PEjf_K_?1t~8KnG44@g8P|!U>4udsJT@=xmO& zKl$EEOq>kLpZ~SLVjl$cEj1=N@w30S%PINLVO^I~Ib?ft-*8E*jSXIJBhLZ;#gVCr zzs8UUh7R_`IuD$L#nKUHxO(SG_E+dk_U!dCBXM#n|H}G$H$FMvTbMmB3-R;DvDZ72 z@7;R~Zflje|B5Ai-F)vwQ>gMYAp0#2qk`H6GEa6T70 z_y>6?aI&o|M4ab_{VnK1nm^p)JMP1#6eIq^;`;hRhx+3F8_L^Fz5UGbYlLfx1}<1Vyg2#a{hph=MT4q{eIN% zNS|uNTd4A*k_R30{H*r-dEK?RSWV*PQTbarA3=xts89BHyE+UmAG|(lEzU)xh)|cf zDXQ!J03G}waWi=CQdvKJ=jh#4E1aha4~E{F7e|Y!iK2rViWiWUyax z4;STaynYDrb5GL8&CtOgcpn{{+a-n*r_gXc2RfVMXT^Mwp7v`qhZ#+r^h<4yY`@T@ z8_3@!u(|%$7$07bCK)%DxEa^!<9g`2<*@a?C7u7j^XWl>_P>L6G-%~HIxl)VRRHnB zpSSta@6d5Oa{7JNwn@aBqu&1y$?tiT-^ZoOKOy%-^ZVIR=FcR4kymlDaZfUN^Zmq~xzVn#EGKlT`v&iqBkqSEZbUvz zyxxZIOF;)eU|t62^s!^asiVH1Dcc2f@B`*8aO%xEMVv8e-687-9nW{)+r7Bo-?`yA z;*C=4Em~i^AMZ|vGhY^>E6I-3LmHi(&tlMxe<#x@-f%`Vi2wt}#zDK>gH@!yu zDTa3#po1Tpi2PTXLbr%hZiPs9nY^77JN(8)NW{iDe5 zGIp{o@CKIuM!W|PZF|Xn0G;P~(7E0o_)q8&Kj!6LebxKJwBAO1f%2|c9-er6RbE}T zJLupK?8|_&wqIo8tW^1P$$<|3m@Ce4r^z3kI3v_~N$C&hc%E@vt;M{xHpQ>RyRFW- zN*;8~^Rw!Rc`L`wIK=y+$|vV7=rC_xOr9ueqH@dBy5Rfwr$qj7_PGSa^)uvApbM#= zjSEOZygBMS1kxX}pWPPg&;sRC5U0I52QN9$!LMVs@Vf7e+lM%Rq_@|h(htzder6Rv zNbf)|jhdc#elcvG>}Sw%5QdOB7VDsdcM4;oJl!m)}y?;h9CSZcs@g(3H@d6 ztgOV}nohqihYtO2r0~z4+Bu1H-q7!$gMXfg?=L-zotHQ*)j2%rALw}A@VDkBdqw9} zPJ}N=ywvKxzVr`tp8eytc8R=W=L3a_AIi|4pp*S2iLhhifyIe8J)7Pi(7_+u#QW9j zij*eK{(^cAbnr)pHkW99TgfU%oO~*OCj9{&--mSGUt092BJr9T&ST;HLx}zm_g|l_ z@Fji^mB*Cj-KBKW9~g(wKc2p?NxXH2{2_Gk2i8^Kl>Mz9abBqN8*==D4xA|BJ%Azu z8xm)_;XO*|;Mb3$pOtRjlsH@O>(|}T$$p0QE$SDlb4%j+UHh4Jv7ePJ2FHI#?zJKQ z>0LHo&UesZzN;Skm%%yS{`=iCtdCIccJ2ZIAt=~@sRW{ zbl@OQ3C`?3-H3D0@cub;HpiK#@CnP8}#Lu=YH^A$(P~%{u~@7~5C+)`( zH?`qB4s<+zJLkK#StbxK)>gZ|j`3B^lOc_-Z@*3^p8sCG{p9$Hd@|};@8opigxja* zKnH&&zVnUZg_Of)6KA}kJkY`JC#UkfIjF*X;$%Cew-a>I?t{g<<>lTjBHrW+dLDGr z?s#7g_BuRk8S(NM@<7nRUdRi9v#;YS;v5U5k29bHXS~?g*`InHaiSVH(Aga4xHR|t zM&bk*&S!Z$F5&N?z6C~ZC4QNdc76ZbxaYQlMBdQ1^iJZxGMv|fPWl=B1mzu`au4xl z8sb3c;2*5B!MPji0C7I&*83Sc_y^|{!MU;PFmVd1^X$@o(1kQFVg3TY+V11T-=fwJ zk`G--e5|X$zt#6N@rN4DF+hj>Vo~KN-^lC#p1X0CT7Q3kH%iP)lXIOX{!YVvCg|WV zyrYeB=6QaJIBvr|P3UaS_xt)+_#My|*RBz#s=Ci4$9w2*&fs@IW&i2xbDMZ$GX7lu zN$KYOzMXIG5&w6UN0#M)^$s|JtNM>aQglOovHsk z%lVQxiw);={%ikv8S^diheX%=$GiUHvTz?3_RO^Z1M$a3)Y}0%*c0m_aBg@1Oq}ZR z^c?8GNh*3N{4tLiI<1}q9XL&9al702I1F*}8_vBz2M+FCqF%EXMIcVGqWbbc zXLJ0l`{Fy45BfzRP9MWPLFlkvs_}47g|lH#2aiv9HxT7Z(WTBf z;6yAGi#TZv?{Ds|rIzs?)zA1G(B?n#ON zyWuz`Hy|E^zZ2HXMZ7r`^gQU~ zJcRpW;7v-Lk9e=uyJxcBK?i@}`{m$-xmA!jpAF}*pfmX+?4lyX3268;{&2oy9Ibr` z;vZrUD4p!3}CobR?wPE&{Y_k;9& zZ{r!{gJ6eykLnYDy&+zL4)J;cpWcTVtpi1-Zr-)}b}{!zm@Zs_1I;T!kmZ5S*T`w(Zp z+Gmq?^VTo;dno6wlmmz#sPE0OC9itDm=^^W6X3R-F30 z4{{^>Wa3ARq@TB-D_p&d|FyiSilDs6OQ5{vA50_OJ(YKp?FKrGkGNm^Pxz?idfnJ- z93PWSo<;ol20zR3al1G#mAK+O;v7h!?}yN#zL&&%L_QG~F-PUsWPPFY>>qzCli=Um zvy}J&hIIsVA@T9v7yMOx&`RR_8P*Zd!Cz_Be75p_;TBb{_1lSlfbU6yU$@j+;-^*X z2-y#y14s7nVUafy=c8f$03G~=eu47rd%J}=Yt;Hd`U|>{?1^)~;P2nQgZPJ3eI*|{ z&wQ7aDH8YB;*)n1KbIkHhYt2ce?U3w)!R>;mg=1h=@;n0!Fm9kGARxdXR4t*(7`Xb zmj%xJd&h_qx{iLFgU++v{H+t>J1H-go+AD{!}&GVB{$r-#oFtzR=m_bX%K+AGSq)M4V_U-!0<`=%gR8uZnuN+Vhln*HwO5+7CMP6YNic zv!_oGan9t`*AF^y@E#jDvwwd>oZ9N1kt`2%;NU(pILC^AAkI6LPmmnwY>uDxSmXt# z<^N2a)9Rf&xz7w8@`8TJ`s81?W_s|vf$;(5yPV1z^=C!^a?g1hro$`QDDJcn=JI+W!1k;wMz+nIs=N#1G|F$LrDcuQ&s%PYG@Z$j^eG zVpUw?&r|2^WIKfp?Ev{EaMHI+NSu>~JTr9Q;JZoSJj#-kIKJie{TMo%J}Bd5`DI=k&y{q~=rU7dbz{ zPq1g`FPVw6*l?d2I@lBO88}ThXCuxM!+I6EkowVP5kKr6n~VACy-MjX=sfeC-$`!N zJ|FQL8`hnC&)NCC*Qe`$C(i2FcKej&l>M=R>W_&Am(F}G?CjwFxM(f!OHJKZl=vmD z>+?#`p&$J!#^v}uOA_bG4m}4tli$xIEkm5B>icoBzTWyB@eA73j8El>KT4g)l6>ew z^7~2g{jbLh-Na97_^tqSuqXD1QO>MYsuE}WE4@9T0|)z=;5-iHN1Tu0_2V0K;P_8C zO6Pp@uCGm;I)?TPoy~DuS;YHA16$W8P6fkxXz2J{smrP*@_{eYH6mU(_3nWjx1dA) z5NE(n&7w9V&IiN3Ds4&}=EjMu9lVss_WM_;==$#@((SuWgjLb(Ee=|Q}`hV>?Nush;A za3-DaO`OH*9Ez+Tbl_lr1Dulq{fYC@&<>!3oqWW(`0>>T6USeDmrdFUI(feDaTLDC zSFO-6;=MI|PY1eok$hL=yM9ZJ`);d%xW{uV^eEz8RB@gxA9T`>hy!4^S^LHiucjgY z03Ga)H~^d@!^RWmWHtSG1syo}&LcSe)=whNFvB_-I&jvCcTX-XokpCSYMm_G6?EVn zug3Fi{Xb_BXOv+*3>`SF#XK9P=se<7H|&2v2M+Q+uurx33yG62mEO+K*&OG&hzF~d z5~o)-nf{%tMs&Z+#U^e1$%AMTBT zb0qvm;-oh0=RyY##vgF9JlaB>XNGYZI{2Yway}oved!M3Oj6^P^aFGu#WhvMyIcud z?4Jyw)wSgzy5xnww( z1syn%3#6v^NpgffO`Msj_4a|z=J;FB#s0wJ+vkY0CXK#7LFd_zZmXZT|CDIeCE~YM z?^enF1f3lBVF%c6!ozFCOQ`Od%Kii$?1%T*!3lHf7I8Wl-l2oe=D4l-<9VEoHuWBH zN~?GGWc{GC>xXyzzVB;_cb(!jctpH4YP}$N&@s=?s&bg$xtJgA8S#odvEzD~cY+Ri zr&x2(Wn0#1yPD6P--p?_C5X7Y)O~B&|K+|<9p&I}tKwIQn_x%qIuY+K!av#Pydi#y znELh&9sJxy%wI8Td>~Fuwf`yG3v}owc;6f4sTt`raVn@hpsX)+p8JV2pX0al8}aw4 z`BCz{&F5f$6y<;2JlUpLq4aB^4F3JlUzsNtSAgS!|vec$Po&*!CyGv0nX}6(TVfeu#W~^NdCe+2EKd7uf)%*$|>y$T}XW7 z(ZCPeATIHjtN2#(p@YBDD1W_sHoW$pS(}6Xh4TjB*GiR;_yrB`s6hvRO%?Ov;73V_ z^PsN2pF(Hy*Vunj5NA{){XFEYzYzbSzEgJl5I;t7Js&#H{&MC8`p!#F`~_-#BgYw; z7r^|2a#m}XnK&~H{RlevrJV4~i_+PN^IpZVvOLg%gYU_pJieWB6KAV>2Sjq9gMV*| zIO9~S{KP3W&z^^5dxp-l{he`4{al5JKWvPi?_C_@vJQ*&)cDs$iJx%1K0gK>?1^~= z_KdNyByko^*K?o)C$#YI1K%>l>7?u_{Q@2Qg82jGalI%{oX>%JyFnL{U-0+9?=a3y z{4VNyrPA-vdFFrrzVuO!zasozmH2tpIYk*SL3h)izo!8^qr91()gazo!}&?*;2+#u z0w?X9+QjK+SbswY4(?Ha)1_8@;`A}>A43NY_Fcg#RnM(;QSmo4Xbt} zP7%ZU3_92e@1lZp=a;U;*`~&EX(#AxJ7HbmZyk8V?L5=z?!=3y*2A(NLASP(?Ki~p z*~et)6RqYx6%V+rm?A&7abs`d2dQ{TmKQqd*8mZB_vzK2cwf|gA2~ij2Y=w)D(txA z_rb&|XEoR7av!dwa*S`PXp=5Km}^{Lb(uS!`2L3eBIEVcVjhYxWD;?1m(<69(AoZR+OtXE zG~)Cq_;c|YzIO}%FPJ!s_)FC}RoNb)LwiKL22Q;0^N91CVSfucaIlX9PL!I9h|~Rx zJ`R8m9PCSg6O?foac+OmbD%TXf76wf#98!S9|w5bo{xzA?=NfC62FzQziiLYg%k(i z{UX@mVzZ6Jf28gi$o>tT9KT>^@M{mr=92Ue~)~ac#G7$BzdND z_?XvG&!D5ni1*QuuZ0fw!aM@b$n~d))2*g{T!IevIwju8o7L+aaqg@0I^s&5&o=>Lo_O^1F7cxp_O+mcUvb_5evP&BA#vIo*0s=~zR1IXQ*8QE;*2xQ zH_&+=kG_B3N$g+DsS`x}j)piAI+U}4IEVFTqBq3ZqWmkzP3VvZ!ut+Ae#w`{Z_S3_ zJP`IVVTU4D-V?vNA>Rfa{DS!eoYN6M6Q_fr-$G||+}4H>acN&;`>}7tiK@m++25e! z_Yl8-w^;b0>Yri$`Z4cW%5XjmI`{#3P}HwLlL*AgVu;6}gCG7s@Bja|_F$<#txj(U z?jK!5yxb*a6ygUO`UiCI1OAM19=sl%IOz@N9if9C{HOE%$L9&0 z`RyI`;u0^c+P{?T*xNk7G;wbG{QAVi&s0M{uR&*f#Zv21=Xf~qesbbXAF7Xs@TZg41PNimg4*UE2y;l*RUC5J(I6WHc z>kFOd@z5FH-*}mY_yfA?`QF8sE^ElIoR_FFBnRuCGqD*c&msM+WA>!mv=OyL-9d!EdP`E6-^MQI^Zd;7F zThHt32_4FZ?|6fAC|xPy&K4Q_YyUz8v^-T2-e`|@D_XeCUL)>yVZLS3Ur}|&54E~>D|Ftl9-oyJJD0jks6^P&Jt-k)y!OuY}I4_g6dS&92iKve=p|d&8 zbBbRgS0heM!}>*kPSN>Z!N(nb#H(XC$LsBT1ucEK-YL7*A^r`uUn%DY=y<$!S=gV4 zKc=;7M!b%Ob4Jj?ALxhRtjySoI8*oQ-_L~3<~ZL0_;piT;*3A3e?Qc_{V11JLc}F6 zd^!?8w&9&m=m{(;WsIKQKl zan1zh7|xq`cQ4i%p9J=qLVVLZjo!v5I8O#U+-f_6_`UDz?EoF@i9825lS@7Kt2(ez#~-s?uW zl4jjYyy)ut*^&nx@54CHU91Uvka$N8=fq?_9`g#y6>j?x;%teokK>?2f55v(;GA82 zkvInv={eA$ogvQzPR@x}iSx@Cef$AkNd2LP_>OVfw>OBtZh$^cg3d0dpY`?-pD$eI zzC)Z=fi_3R|Ii`+D7T>P?4C6PgX0gZqf!4ISMC#cvt{>J+0URuKdYhoS=$?%YKHE+ zIJlqvDc0X7eIFBlh>w0efDZm{E!KH=Pd_7$e|0?vI{5pDc-Ja^y_dx4qSmjnzR-F0 zw{w0g_4*C*+b+`E)4Tc2Z4DIpmt(6x5dX?Sn=kt{bjnS^=K|0l;Qw(oJ`=CWX}w*c zgMTnzffMiBH{y&^?~TiG5IXn=b^xc%(lBFw%p=}YmzZObQoF?^urad^%--UmhiTE?y>G|HCzsoA_ zUFHnPO8f@B_4b4g_Cy>EzwD`=lQ?xI>N(JXgYON2lQD5#;yjq4=Rjw3+*Ym++>dhK zDL|YR>Kvc!_t434bCS3}mTp;L;!RnsFBf#EANB`PzqKoi6KBkFJqJ3Q<8R?SGB|B| zl_t&-b#79Qo6un%FRUtha&pqYqPyn?&*OMs0sPSE%MpLo1bsQ7Lw&J72u_^c6^XOf zU^nQ%nU&)WeXpUfzb|p_AJCTvI@k^4Fv^oGb9Lez`l)l1&bax_r<%mSXSg>mJn$4%G1j62|CyfaWv{%>yHM+iDt;hLI=(*vCdnbwh3|48930v{q4PX{I?qjqt@oy~Du2gJFFtEc}U&Ukg-MAi>Fj%VD~ei0wF7}Jw@yA9_lp<|w(6;^z& zut@j5#7nHst;lr?bXd3a`?IRGe%;ues5joPhW*kM7)acq>Rf@WH*|75$GLv+rv5X8 zc&7K7po3p=KL(u1TZa>8p5YuNbf_QJ_29hgGnzQ@)x8l}Kj=c*7j9C29PuvIw%e`j zXBZzn_cQ1B%g3deK>QKv9=qhr@77mRjtP^-cS?ec|K6kNBG!M$KTan8R6l*cgAV>F zCf->~8FvP8eANE697mzEInFq5P^3A;>7MOp?CJb2a;P`+i9g$rpW^Q#yRA6lym7`6=0spp*Tiq42}KDa(l$TaBmEe$c@W$X}y<=d8GzI8#-APjaAxAF%ENr&QMU z#Oa~zB-AlGl737;b!8GR{Ld=51nVe^Sc;%;%_H@YQuS1`CSaeyC`Rvw@Tfg3f&gQr+oC^o1!jY@QsoEgg z|C*?5ztC|W)49K~^6?Gg^=fYOq(8me-*Eb~d&E1$&w0S+%XTgOxk-G-Gjjg>#EFza zZ$IeZhmoSc9_{ytIB^a6N9e%8I0`?+4gZWdb&l)H1D(zBw{Xu4oB{b%z9#-W^*u((_imnZTT>(RI`Uwv_r$MU&t7-P_6VIEPZ7_-9zMTXV}Hzd z&G}Qmj)4yT!8?}VgqszfI1f7N{R161SZ9HgYJODW1Xb6!U+CaxjIZD{e;t!J1-C@~ zpDCmtp!4i!w>4uD@0*u@5R3S0N7#Jn2k4|9@a`YVTd!_B;@!6N^@a}h#=b8&11=;Y zPTP$7@dY}p!;`84Rw}zXbcxEBg4f}=HwymO_{oVM_nBR9Ile##KjZs{;KZn(ia1$z z>p9SYgZ>T9*Xe19Gd-N%ZqUJQ@H;rY)}<%TqKbOELFc)>IP>zpu`(0?;hLY}f4{FM z^73n1WF`K(r#4@B=)NKc@w#Nv_h;zfp9mYcpQMPJhdAlF={eAWgM2B< zleAra;@nxO=RgNPrxyD!sm}gRoGveIjvN=D3(3#emqR(*yNVKjp<(=o&NKh}J1Y@* z{QvZ(1o7P}zbpL=o%FNZhaNi6MZ7#6_2WNu@K1o4=i6s1PaMCVdJc3p$8DVx`Gl5x zD-q}KQTq7}I=g<@A93=M)Tu(eUswKLcz)KvYrGHI_fvJ^B|C2OWIh2pHJiGCgI-TKIFMX1W-SKhL1h+iYK z&6oWRI`*^6!ntnPFDQHm;=RbHU*A9n`{8~jI6nJ26K8P=JqJ425Azo|e+=nHoY{u= zN1*dOUODHzZEbrI|B=cg$??`ZzH_~EG-p5J4>!DXCf7Syhr$l$-VY>BPPg8k(3$+w zFYBMgS!}qE#(r^I$-`%*{&D`@2;!7Ut8aJEal7-k{KY$Ni&y_eyd9Zrp7aBBp8f2$ zaK8ffE7$gK;`dkg`(;0Yj?3$^ur2^^cczKNtFPX1k>eF~@CVif;53dml{o1P`B3P< z!8<45y!bYQI8)VqN?9K0z*!}}_ZRi~9O5)L-0y_W=D01lxVIB_^8(_;Fx;Plj`Oq5 zcL9=5Ttd7D>U}urKhwJa7_VTj+rBG^cg>JLgbwyXUI(19QPvPAg5g~Z=wPoX;@#A4 z2i6nkvwF{0)(<+imvdZmE!|AK+&}rw5XL9eJ4f5?#Q)7OPe2FzA&&x1xy-wWGe*7l zE88t};9y@IoJ4W=5vQo(od@V_jx#@U@4`Xi%vSZ1=*k1+b z+l#Bj@$>3jq`!rIQE(nDxJjIKNA37n`WHIS^Nh3KC8&Ov_*F0K`QF77epZD89N)LE z_K^6O)H!w;-$RG^e*Ec~)kDAAqT+AoJ0H&io)UNRIeoe1J0CgKMA@>~o6&cR9S)A~ zyNLDI)rCRCA2!?O%k~5v+5!Fy|5Y#bhB%Y@d&6<|^Ye*65U24peSN*{=lFXlXN^st zh~Glp2a)XzI?wIQZM7HqlvLHf5kJNTyZmy#fKJ9U(ymJ~hZ*-{o;9xe{y<3nX)XLb z;7oYp^g5*X4|KMl-PQx~KEdd2k%==wo#&ACgU+s>d7b?tWpv`X)cYior(d`GS-*<& z#{I+nO1yZ{?08l7L+H>CPnL`EzDk(&s-L*6N@71U^VT@Ty%gg{~)i4auv;xl{gCw@ApFo z|4c~6?`fBNl7l#p)H+Gp4?54|mUG-avp5g&D;VM*_|-GtZEX_y$NMb{5I?Spf24n) zlm0<|7Iuy9Uzm6q4RH{3@Xru&J}p_+;>2+q;#}z9ALLCQJgH&}XgS>oqc=aFPThYs~coR4z8n^uuHcMR`vKnG54aUSJS17G5tF|4zp1LvsV zbV^yBI1vna80f(96Zd6D+^tER&A$5n3LQB3P6F(+a#3C4OsJ*jKnD))<$>dB*MKS?lUH=xu*J_`f4wvD)~NNNocE!_ygxlr?_%pN-z$uA`iOad zy@qbb8*2rIJZkoBuCJn<@|$vA(Zp|xjDo+W0()2gJ00E zz$tfR0dbP4{H62@bnpxAsesdH@)F|Ij-~etbRpTzE$(eZajzi$Wy5=e(Anj5enJ)$c0xJ4^kJQNR7vZ+rFY zuYTR?w}|@9s(zEJ-Ucv)NjyA_3x|Slj=86{jO5Kv()bx_1jPVwpYLY>esD)i>TkM>NmOijj4Vu z^&7N8{rl?or26gGV8GJC{+9JJl#d|6DqwFKj zR<&;{;|S;wN2Ke~;%&KduY&VzShu5m{SO=@Zcf8`5<1iq_YuLVKKLkc{xZZ5(1C+@ zn!u@1>LhWt8S>H4*&MeOSmz$COX{UON1V)6?fxdm6X@hQSIko=SGWR~h}Y+?KK}=u z=W*0+T^HXA{L}Xu@zbbxw`6&t!?1z+Clc{t?7Fvzw^7~mlRW5{=dwnL_0aIn_lP&! z@J<m$@_@$EOn$!&Pg4mxnKzXVS286Sv~sgk}t(1C+@s=?`0&6i= zN?L(vH(SK}H(z3>ByKEqe_FO1=x~m>x^nRKbnWv^KD;V;9!2~P|J=LcL;Rf6_3axv z_!;>TaH`EsPn<}GcO0REpK)IUoKRCT6KCxLeLIFu&VN`BfR}SbHsbC3Ti@@X^F04K zzXM*&Hy80&4%FLEj-S{ULOGws%14|jsy)j71|9q$_qXbwFG!r<4E+{5aPW>f%F}O3 z5#r1<+}no^9DElAoTC*=5a)@ZeL-h)zQ4a#ozL5@PU|90eRV%x_5=rHc#du;8W{N8bF?&HDZ-bk?@m7q{V;x~A#w<~n;55_OpVNHyt#3^Xm@w)Ug zbZFnO3pky3wII$~buL)e7dp@ObXoZBG5D=|w;_H5!@FJ3p&e}&`|;!ZbRf=Lm3Nc# zCUi2+!1#r7CC}T1ctyYJ{Qw>OfcGW9Iryv_amGg0`vE#|Fz$epZ%!}bBut>^KnFWb z6!+(+b?!%;2I=(cIOsUOaGtlR7j*>jI;(T2|E+!aS&b|4`1-FPsI__^Ea?EoF@86d{L3xCZcPIANg z2s&_ZpBsL8Szr-ydK%sdf({(FSf{T^v5YuF4eLIKlTU3Rm4lyy1)V_rjoy*lX^EZ+-DzzRS84iQB>4 zVf%;|^@jf59&~m+o%daqzCK8tA)9QDY_IaZOA*!oSJlp3Cwrtd!SgKEFzxStpT$byW$)exBoqK~gJ}2}X=-?NOE3oJ1 z7 ze+DNY$w%UJFw8g5p}v?W!KuIeGjYDEe4O+PbRqRS{5|mB6t>3ym^V!Oljm9ciuieM z*s#R^xW%@ooL8VzZVG<46YCL__vpe1#LJ=b?{a*F4*tPAp5RO`7lk;xF6sRY9XNPr z8k~1;q7x^v;r$Wl;AiZAgVT7|uf*x1-j|VnhAt#O-xJ?Q4(c42_zTo~E0PbLXTIBd zTb9?aInpO2etor$kbZ_v`WfpT*khZOlz0c#dP4dcI`{|g=77_FYYO7TSLYoh2RfVM zXRRs5=O;Hz@F9*Xy}iDX>jmgml@0jczN2MrD_5n;k{veJZTUZp{4QfB%R>1UxYHAN zSt|Xy3A%O-TQ_NwJ$n_jBfO^o-iXMViTA>AUIsdhkJ*(&p3QEZuKdMC!Rs5WFTg*% zFDvm&sQ1UDJ)whNunz*x(7`#0bEdR@-3cA)i*-scr+HrDyi@llWxIyXbGvq~pH^ln zNc^&@od1pQvZ{-BM@t43CcdTa!Ag5V2Yce)AnY)9V{zg{Ro|bL9O%G79u1rm%Ssa` zy5amHbSA&}4Jb#PxIbk#w-wa-Qm#xt^qVRrDiObiIYO3!r zO8d$95c}=0U$R~`i1RL#zFk8HKVTdK=W^!S#K~p&?hkY}$8B{L`#z~|)g#XH`1-iw!%rcxegbx0~d;?C8io=Mr zNu2|h^@T1Zd*Yoi@Uuo6Mf^JIU0TV9&NKh}`(I+eJ=*Ot#J{fc)^fasPPr*Kjz>NW z<*hM(Jn@!&*2mw_!9U1%fU~~DB;tH9yypQO{DXBYIN3f&&O(tgnKe$@Bx@``*|mZfuh&X_U>x+3?|K@=4Bf>AjzCBYvc1dcJq# zuiHv^l-E-u`UMjI+8moN+Y59DS5E%ldINUFy=&O@WAwemdue!=5;}13o*Fn|7aSl? zX2W+GptCt{tAoflPcC(YI8Q?B^8(Ou9O3jw$@j;J_y0&c>*%VE@9iI?xVw9B*Wyy- z;ui=IJZNw&Zbb?NcM_aJa4+sq+}(@2yUWY3GtVv0K6Ao-zwKK6~-Q? zjH@_u{|uc@*TpoV=MlGs@e%d>)b}=Vt#pbj{Q@2P#l8=ClJP$AsueWX_ecAH6&D^6 z=Sn3rht~}*3-4^BewRi)BTf~E^Bw5mM~si)tSI}6I1$^L%LAR_*yHrShJ8nz8a)(8 z_A~G1Nqax;^Xwz>lj{5;$@i`w+uuVty80{eC+j>?$(QdT6xA*9aN><2_fM_##7D57 zhMlL33ODKdcqi_rrV6DMEa=Lw>Y5uhf3;E>-Ii#Ov&kXUX^O?CYKpKbI!XN{8?D$aPOW z?cm;(vbTu$Vx4E*gZruQbC<8>h<|yK+0W3y&v;K99M_qO#F;wP%z@6y&(Ef~iSy7Q zpV>P<+uKp?RldYOoL7}o&S&27?S3zQ$dC996PxY8es@`zk6_O;W9k!U(Ob2jk^Kfb z_yzB@fs-z$KXFEWHgli@2l)-)L_N`zIK2{>>jj;Ye?N8)AWq2~|Hb}pD{5cfcSKCz zhWOF+`M#X5q2u+f+roEXQ16ak+Y|3-EmdC0gHDdCxHkk|%5|NI*V$p80Ui8-cPPOb z)TSG8w&?eTW&3~*{=hmD99Ozt#5u0#ec2zN3q6kGZ{fWS@YlrZNBs1?%>IThbiO^` zt=#+p#P8~`PM7&^h^wHSCz}0EoFXI5?HfAy1?y69hSUfo&Pa#x6gqH{i}$#5BpyYa z@k7kzflhJU)~1=4>D=eZrLn~E)%B9?7dro!X%_$A{ZEvuxOf-k=I4pTyD`pOF6b1; z->N3=t#sWJM4YK@%}SMD?*($46!~>1uj}ew z;?*c-wjXq`AJ#44bQ^b&IA1H7InXJN+qx{`q(jOaCC)*tmqt{12>Oa^AfD@_fb>jFt^h4<2|NOy+X})u1zD*oo9siT#EOfIk zwD>1(jsA%9M3n2wjeErF=g=Ra3*A5Ve3s2K9})jvcXPY{Q9etI2G5AI(BU4tTz6a* z=j}_AzamaNhjkEi@Pn_IpY~jPOPm^d-zxn89rr`GbxpjpzW3lq;#Hbv?$^-i=IdF< zoJ`N}mLy;Lm3See&31{SCUMRLna3+{&&?Nz_tN9% zk4yYo`hJXTH_*xTD*6fRce-6d;$5y`_A_*_ALc7?68R)0P6Qt_2Rd+YUIfmu<0*)9 zI+vLP9XPm822O>3X^3;Xw3!1PIN?QHGg_|n#7R=y%z;jE{H#afJ-E4dGZ81ah2qG# zCUl5vj+@)B;(|MsIt_GrHSdn*?M za}j@We{=n%U%!g|M)UIdh-2w;%Kil%>Wg(8{JJATLE`N0W3DfBq0cw=^Qbs`ix7Xk z!+7N_ALAOzpJ!MJ;)jeemmfO#E4B95+WEox>P}kg84u-p<6Xtl#82mN?;JX`dwdrU z<%}NNMV!qu%zlSXalU=$Nqi@wVeU%AiLc+!mg5z4JYKo2tD`x76`aIPyr*4Nxg-y| z+0(vXzu3=b^Ixbzyn=e)DchwypT)iy_G&h^^Ias2i9gtQZMs(Jmp|2-)74SP;CAzo!2*N{BunCE8|6yst2pccfNq320C z9zuulaOB9U)u+dKpxeK{g?poduZ}(pq*HF%z(|Z!X z_Z+i5p$mPywco>h)Vwe8=gl|UlkZ`=ES$HXoGWq+AkO&3W)5`l3;Gc_y*>;kPSoXQ z4s_tG%kzrj9q9@O5~t8IGY2~O7x!gQo^p9d5hp~)*Q9@;`;q%z^(~P4KIUc6X#)0#gXG4bUg0)TRFwOsZ8bO5pTP$pY$hmq5IQ59<0o;koY4U;)>pl z2ljLHM^~2+e|34YJ)F+bG5?|6^q;VTc!eC|BhbO$TSPv@`SO1fCu4K7zo7%CtLP5{ z!>=dKj?QKdbZ9qNzo1^Jc5Na~ay^gBb^{%+d+c$Lkb&EX_h6{GTymd=_KkAwD!q$1 zrN){$(7{ei#rvpXo9`vgSBLljbl~9p6XiLd@gQ-!JLI20r#SX@Kls)W;=Jgf>LvS? zckSNaS|z@BFk#LK;_q}gr-Lr^e(Sb!iu{zF{%45)Ww0v0Y%kF1?SLnKgYN;ru7lHD zAl|i6WmSCbXfPFJ$>#$yuVI+*8Rxmf!)IW`IWeM*G2Y!=&W(%J*^1K@Q^-bl@N^0#3b>5sCBKA^#6L#j(GKdF^8q;uO*QO<7-W-@{B*i{H^r zzA*;z7dgyd&~cuK%laiYpPx4E8Haew9P;I$bFyEV!U>3zQtw0MIKuX`$9qSoPePov zdjBao>`%89u$1EwjUOc^PU{io{sA42Bj560FJ+*60R>m2Cf*A@Kg)3mx@$8zzHvN0 z+kImfAL2!Ks3&x=JMvG_Zg&^VM4T#mUoGtgofBth@@&M}IM+PBNe=SJQJ%PWauO$= z!}tcBDv#Uh(30={=3JVWIC*uwq`je&_kMlFyTa!t{Y<=B4)uc$b~;*<^C5g16(LS? zhxh<=;9#8$JC!X|f;bO!d_ejQI&kE774w8CO`P8+oBakII5Ri!zWB`Ea>VKHP#);O z!TZ3dSENxDiIYjkC1kyz17~4E?jO}^Rwd3bhxh|@;9&oc@{~wXgE(g$;t$Y)gYTe$ zQ!-X<;_PsUKR^f0Y_X5|?NVLhq<84A&?%1ny@FSNG$hV-J^#yj#@qAV@Uf=S{`y#( zCd8kt*8|dz(4oG_|AXB&=WRip;6>*42^}~XXTXX2r4@00cZl0S2M)#=aF*_GN1U>& z%;kYjaopArao%@gVkhGGtx+5~K0(L(Nc;K5si9qo7qU+Aq(8ll=k1Ea{VCjnp2QDy z@F#SzAL6C3U%@MViF18`*?!Q0gYgHP_sa(mXM{sODRl4y-Wvd?XS3glGeC2sAE4uP zu-y-kwan?nm(j>|4Q~FljFF3;eFiFWWD4yiRplnMGXS*E$P`*T5mZ03G~; zeJje$YNx_wB3QT27qT!RGow_oM!K>fCDL)t_bdx3}YDSYO_sXPdu{ z_(>h&7|>x{sv*{i^=ED(&TP$*eueJu9R;rU=y%?6pX<|P8}Z`zR`r(sj`u|_3-4XQ zubb=aBF;32@f$kSFSGdG{hTCwi8H*t*{{&?`uyAbSWWZMdSmF>1H_x$&CG)?XT5gc z<$K$|U-e?r5#p8C^_2E^`hFF@3jn*N8*qYnIfk3<4;}gy_PgLDE^~%BZ*`u8^doez ze;v`UnuNbVoVI!#mi2><`_;Ggj5tSLz2`FV8tOQi0ID7;+4^PTapJ| zXIHgP5nI1^j3W(q-y+@`y|0w(Bd0i0`Kjs3Hu`Oar+;C+55JU8aF6&$^tdAZ0v-H| zdn(|R&HIQrO&!)*(80erZvf}>=cmM(uI~*>`$5O+%WvcGY`#wsX8TLxrF4j6L&rS( z`?{59y(L~|9bb|DkniiJ)Du|nmIa?nrTX_Bd$f1de^Qf=#INYE&VUa7z;AF$ru|Bs zWDflnI>qs``UM8l_;lxX*r4y@j5Wur{x9RB(1pI5-N$+)>7T@7HwKxKCN9g%c zwmaxh&+6iRxrs}n5NDi2`-Ki1j90MN{w^_ylgJ?+2pu>R#5q*`{Bej=%OUO#o#MDH ze7_y#Y5qPwamML&i0lW@$#H3-_@3;zZHb9DUXLS^2c6`_Zp8bl?!%H3uZKe%9=dn+ zH%z`5*ihd;aa&o$c~j{csfqVM$Ejrbpwr#SbKZpU3HA$2=0m)z4sjId;7{zk!1)j@ z6LID_#8IGA9Q%4b?)j|5IqWcwd$*pp@88bO%t`!dIzLX1OWv&y-PTg^eew$Z^Af+Q z{vMF@4|LK$cy|YO{Z#H};x*Iuko_Mz_y_M!gHtI+5#qFWh$}&d{x(^BPi5$t;>6kH z(Egy~^9B2P$mc!35O3TD8J=e?TW-1K`Z07){+Zor}4{W;X76+=lc{LiJwIGZ`nSf zLw(ul7JMQDwj~q+fzOT*Ymi@V* zc6@?2Pu}J3u-DU{(VyVAMcF12-zPxvW&4H>{>Sqr{QzOk^K)kwBylM<0_da&e!eO zZTX1!*X!}Ch}&6n<+uf%jDLj@yub6TCEoK$Y95k2=p+wuEZDE}rwzoLncB>QPL8V; z#kiXH)E43`^)b(r(7~@GMgDKljUB|bm2GS?5f9aA_T zBf7XxRz1&t;)NtMmkTo-aj{SalqPmxelRCe-zTVw0cUuGhIz{)b*A==-{Gqx%%l-x(-?z5!Lmo!GNxT)w z&GnY!C)zpu@OkYW;xu*OKnFiy{|-)yaSw>|PPZ314??FnZtJ6n3nwoAgg6JXs(MK~ zL8qGAzxS1n3*L%H&xtpsn3)HihCeFd%%^c_y$NnBc!k@DfXSk00 z%ldlv9)aCoH_qlFe!-3Ae&}64cU!{_@io-a zj@v?B9qJWW*F~JVZ_M_FPL6wsAA=VuSta7V)!!kL;~sRVANIT8B#Z7Q&c~SM@1;No zPFJxnD7?QWaoT(}+X*_wvA28I<2uC2T2pakzw)l#yDYp1hx$&r)PVS-^m<6z4LbM} z`8(h|AKsWa2~(K;37z!kak0-`5YU`>r}TPRmJ2#3KlFOkk~n|o^u`Y^3-b}`7cHbM zaaQSdfGiJm@F(ueg0nnVN8&`$_h6+zq5J<`uQa*Th4?r0`LN`ByIy%H;+vzobSHj4 zeJ(4YzO%cImR=Tvv%>(#MyPh%z+Mm zsUY45%${UCaXK9{bD)D?P=A!C!j4JA$$ap?*v&r9F6tgk{B+m<3*WxZpObk8@uxje zeA#cD*7;b+!5(98&nDhj2R}mx|6rW}PL131h*MgBUsH~2(1Fu@Hm{$`e_2SJsSfJ` z=-}raahQ`c;xghiJ8Jd=bUVW@{pURd#GQ^lUfU_~yZ12h-BHwcam!W2Ke^h>hc5K~ zXFuP|(ta)RpLR0y<#|9QJy8DoG_~r})xe(7|8$4iY$T zOKc_1o{DA;bnut#mmkyZBu?5A=K4bSBYR@rLpgi?w1@cf9O7osh0eFX!}%r70pcIl z@lDw;@eb$0j=b(i|3Z1^#5_X0s&UMAg%18{E7p%4ZXYL3TZeN8=)g%8vXI^f3A^?* zaV~r?pF2PYKjR%})T_tu=ZWL{QE}wF2wmuYu;*1=^1V#_<_>)C@+$28a%Gik#1GN$ zxXbd(epyWS%iGP%wtsN5sVCn2R>WO?t#FI@B^}O>p@YA$9)X>c$Gb>_IA!)1bfNppZA}=FfZ{QSPdz1m>kEo6`wes)Z~pdO)Q_BZmg)RU;`QHS z=0PXdm3TKF^}jjwE%BBwGush5_yhNRz$shnBXL@{~;2l?jU9FO~zIOn#RInaTV zRowf0Y=xWpecUwvelrI;#rgK0zIdO1$X^kNb6}I=NdG~{c~SQF%{mW?LcCc!&E=Bs zn|%=bz7wTm5a-+hGY2}@3HxZ+J5Izn#Bn*CBSQyHZ4ozlekeY1YMwWj2Rhihs>p9| zF+MSI;$Jb_8#>j#|2=m|5WxMV&FbXD3wGc^H|X|=ztcTRtIrAS@0s=Km6~`5b$gKG zqkPY-itdPgi;O>Vbk+_}yb|xA!XI4<`Vjx2L;Hsg{=m5qI4|C1B+i6=W`97ZIQI2^ z-x93-q68*xNnVe z?TGd>aYo!QbD#qUaei%$7fAgombwRwRl_P&8;&)hL=0g|yeClWI7w7Cf7S|&F!Dosu&)K2FerL+x=RXat z@=EtR`?}{v^}57$ZH@MieWhLGx(EA_Ze13J4=T9b)332^h5u%@Z9x1#jw-(NA9QF3 zIM)MbN1#7(!e%qK1LzdTZLJXRs2#fAlsH#LD2|-hpyP4Y-&!fYC%r980P)`MH~SsB z(Ea}HK7hD)y?jt>;&+Z|9w(sVdo1>OxXI&o#JgC<+%M%kjQl0|BiWcv#5p?H%z+Mm z#rO@*&A45OlWn@0106V+Pr>n@(StaB|1xu+L;Juz3UIRJ>O-8Mdx|652Xx#%{H(%v z_&g-b(f-6Msq>sJpA-dD?hTTK<%x_Rcr%k8KC z^z2s zX1|xdwc~i=U1?;tpLh4tT~<5&=0FEOG!X05LEGmLCy&nWkbZ!U`-j`Yy98(-dAo-YZ=VAXI_9~opH^~z z`#HxV;>~Dgt|xS=p7!_5OFdXhoazp7I{BWtukK$jc3()7CEHq0|HAqN^{#tpCGiJ3 z+>3<{ens34oPzV#5T~fad*;xA6KOMlKQ&*I4aAA4-xZMaG<2{V&WTW-Mk%)tXGTwD zf4QE3?nm=ska#ceb)$pCUo=SZW&49pyTfx|4C@1ww@-!m^(7``=UjUpk&+icD*PCVz zbc*A)a_{H&e63Lrh~xjzTtDb|+_T>sjhOif@s>s~_Y3p7#cjoC%<+S|$DR|fk3&8+ zbPKW`@Eek@t|xx5Nqiq7@6^}Cdsx6cEFC|>>iGQq+ z;>+<1I`|iHQgDLbej?7yN@fmp@GrhM3(m$R*0k^AaF-gG{Q(`v&+K`Izl{n{yqRsy z_VaFBby%4~f59N&hBnR=IT>dY7`@6Gi zm**jV()Wrl_k;4?*=NyuZGX3+jVF$SaT4YJ>MlUsd|}M(06MgTy1E^_+@0agn8*Ld zaefzh8W&y^BL1^P=5_!b+5zT4l(XIRV#LXp%glie9K53pPS_5mh||PjKLZ`w6UIky zmZvUDoQ|czGPWc^-Z6tUU3L)lhud-=UN1RpcY0yiM~|AztcoX8S=0`(eHU zXGFy6#0eMA%z;jE+!nsm0?xK&HHlM5pJ&VZLC5PD`@Z}q-#WyLpV90O@Al>Pc{Hij zfcVjhnfc!N+x{M1s*C=_x2l=>GOzn*?fCKekG^=Aahzx0ct@On6&l-&_-=>w33Tv3 z?(M>FdCRmU&Si&uMd;xF*y5aU!lyRGN#9b{SNb11xz0ge2FkVaUI*e8(BD;(<2!Vr z_eYoYOuR$fV?h_<&(-$?9eN@`R5&>XFKMe``RDh zr?$`Ig5*NPOSV7gz$q=x`DZklPMjkx%pB;@Zm|DDy{09aO`JG7jxXB{blh%S z)`t}we_JqPF7e{&eY>n5bSPIW;lHpx3y70?gV}%3N&mrqsAtP_i;3sf=i#zk(82E5 zr+~Ap<8tEkeQdTnbc$o2hZ4qEO`KFw%%>+`cJuk;Ue@DJXVL%A}IJxH8q4(H&|frEVqICrxj zB~G*9=KczjcB*!{?aK8=;fu7VtmHbuiBv;+=!lmW#Z&ZYVId&KbM7mgZftObCoy|YMMFFp*+|>fRpU?4dNsj zZstG-4&KKHr&X^z#EGKEC21e%6vw{Ky_x+1ahhIL<&o&k)0#P6lY zDcL`vll>EZLA`eeJSSeM$maeD9qfmDf#7_M`kFXH^|__AA9RZI?R#kAUUk~@?}#%| z$NOacpyPSVZQY5g^1@NB3qwBv@ysy=*>EFly_Bz}< zhA#Ae>Sv{{%KOR)KH-VqNbf6U909tD+QBWuC9bu5;0zr{a9I__d82QVn8aPF*JaW_ z(815}JL=!)Y8>Je*6T0HfewCt*oVJs(zQ|o;uJ}!`ib-lbR~}RyBCPZpN>(80eLU%?qYJ0o#|N1Hj&!M}+X?xuGs zMrO!LoOY`eNBS2!&Tp~jiMLswgLvC?UY#6Qyf@N=PM<%!=Wfq6cFZfU3ZDRa(tdHNmf z1G^setxUY0+01^1&dEPD!c-&9)>7VZ{H>WHZ!O24zQpP5aBmp8(B~)n{-%B_KjJrT zZuXD2>(sC>c>W7YTc7wb9p15k4)#Pm6Ml)D%AYtP`us$;cj&;0DEi-@*P0TihQs*~ zbl`xC^0b~7K%5>r@8n1287;oMv)Qi=arQXmmq3T|B)iGqON^PQ199#+oc}hI0VaT7W@ZnhQs#VIqp5ii9Qb3cF%^}~Dwdp~a4i#Y9Om^siXj(y%K z7rj4mF6j4E<#^=Xykj5l+%E9nI>oWShu6VpJaLZbcfRC0O}>X0Qoh!hD|v%F>okm0 zC|}W&lZczn!5`55X#B)J5B$Z;f{DLIukU4hg6>EB5@MZMt=SCXSJU~Fk`Eo$nF(~q z9C_};E#D@4Jo6p;2g?68;T+0b7j4PS{fqntV30v(SxcK-~_5pKr!an7}!%>CP2 z|KRUnzX*RuAby+K|Ba7)eef?9k3#%G`o6SmU(mt-_^wablXt?MKe^4(|NYKHC;oAV zyf^8887Jz~B{p%|>hnT5KSGCg^dN6R@_&kk@rkom$7Li3I&csVh217*OiY|)4(oI1 z6vxjhx|h$3{ZA$%PKKoBaS1xaQST-z5#!3OI65!V&*~xK20QYZ`PFR=5^=SZ1@jQ^p^mr6`5HPNe_R&cfkOFirzk+Y3?0mN zgbx0If50hvsSt50H#2jfQyjN7t2uv9vEjsG#5vhsab*3V9^M7;vv!F7mVIPd;tkXJV{#n;9r|1BFpG*VyAsZmhm3oLu-`8wD-d_S!+sSy zjH7t3du-vJB@P{2bl|MZ!{2W|;oFipeJ7bY&^g)vRFbyDnK4~) z{x|!(t?6RC8x^A?@eBT89uJ`7@xcDBug{k*#Je`yTyJ@w9>3v-kvqE+r+qZmX?(O&R<0obkN(;>wB?9g7I!#Gz{71WyWy3Om!bLM%*ImD;Ik8@@?@tch^`vp4m z5A4Ii*&IBYI9na!_RxWY?=FJVxWRbhTpnpI4|K4bT*p65J()OxI^HDv8FW7y=dl0w zl=FTt@izvW%LyIodrst6XI?dfIP+(kInaTFc?adW)ng8EUg~}?$4ls7H;nV(gl!c< zoFNNTd8FN-3*Bz^bA{Ep7ZLx0K1ccA`1ZJE()UY=-&CKA$o}jUx7;f3v8Ot}l6W~C zzBdOQ{DXN3c5Odv4RLM+nf(l%;<&A>BJcWcvkkS zCC^9KuiZ!dg%0s%=-_{xtD~G>`yL|BBt4GH{t2B^|NKzq7;!E;d=JoDd*XZ$<$N0R z6!EKsnC%H&=>6Xw?<{=jEb*`C_8{#b$4%r(qn!OhE)u8P5>-ygfewBdB-WXG%3dYT z^5td@bl@QG9_6XB@g{MeIqa{YgMV>f9h~F+?h+?{Y_t8L3*G+q@wZrphr~an^MC%g z@z-Tl7Vm;hPW_blS+kq%2_5VSKfw;WkG~*JV~2P)bl_lp0ZymgZ-~=CpU28}0-fUc zS>?q%y{qE~;`G+#k^5Nau#auDJ@<~pU&ngJm(Jq*)I%$NA#NgvJRRuJZm@1aJ=47n zGxPhn=$K6Ab^~4L?Z*Dz+wHkO5kGoivtQ+VZ-`%@oL6c_CeF;tW)5`lE7mRGWRDV^ zI4$+LoAfJm;1m(>DaPIvi#XBSn#%(nIC$3yy&{w=*lfZsHvsYPKJA z@B_wCaJHSzPn^Lze@gZ*=up4p(=SlI?VhQHh?8W1DvzumbbKGhZ8dL_lg<+c)-Oi9 zQM!JT2c3)$W8IH>)=OE6cm;GlB@a4j{|_rTK9TZP8RGfs{lByubWZjkySO}Y0vy(h z(*Ag_74@6lsWNf?m}9m-bi7}%zn>LQs2cG`%{TL$-p|54Je03Y_L{_-wAkEUp;Pwq zx3HcE=fwv<;-u62aoOLY!}wNPJJjb&>e~a-4)@H{SkHrBb60)hC(-*#=~w9BSFGp3 zDKyNVILGz(N+bt5)EDCvIAf|dBTh|+bvksR&-eCqVzzuOiGS!n?%Qzx9OZxdwGHvh z>9~jV7j*EKkM`H6!s)lRpBv)w7w!*$fB8uV;uq2FUfLNt_zSAb!RaZ-;rw@>I4 z=iB#egZVqc<9l}}P63CwAawlxuG{);!ets)&y?s*yk&ZPkmD9~e2?MVy9y$1+&fZ# z;th0&8$&0@L7W4_Zo5wmBwiXFN0Ro34)&iU_GeM14I$1ohw%YA*gu7c8z*Z%oH+CJ zK2O>UIyny35pkHRbw(2}>}vBk2%Y5Ny&c%=vCnwoHPrihSw86gD4d|u`JO%W`Iy_P zD%KUFA5S7)JqLe6C;f^25Xv`gZ7}gZ>TzHC6FT@4d6M94={JKo{to+g=)lP#^2Ae? zpF^C2E6n{8I>qs~PKkUm|EwXzNxDjL_XFuy=%gR8 zPJ{iH++Rw(af{6Lh7R_FU%=`2=St%2a)@6;r#NowZGFz~`7&V*aq>CDBcYS^D&Rg9NWxMw_j)?bBP~W{Rj}zbDA!#~>tr6yWK?lxg zF|KBf8Iw3U9quPU2Tna<=gP<85a)dZbKWd;ieq2jhbf+bIJcUa^JdNKd;5K>2tOqu z-sCCfa>@Hvus`gbazb+Ataf-mA3E3xaaeHj)k#g9PfN_@fesvS!C7%49dQ!s``kZj zM|dv-oW1=r5@)V{A477WQ{}PGtB0RvAm_Dc3cl zMSgdyd%1~o-{HIpI@mvx;53h1fH(_vd{34KI&kDT`ejxj;#8_{?#Ix<{!herQGUx- zj5r%~{-U%$bU#{mJZsDG%t<>-62FvQ56X21bR5sL`}0z>GQ=yq!(4CaPn;v7{q~Jo zo;c6Xn>o-q`C-JGO2irU)XZT&xGdydqdbkqyNUBTy!pG;(4jofHgdl9x^lk6N&UrK z9_Y|65cfiPI{o2CoRiU1d1SkQF7$R`A77^zZ$Nzi{pNDY@fC4Cl=J9Qf8uO&c;^o~ z*v&2O+g%;dj5yDJGnWTCa9WD?p~JecRQ;d>cfy|2>tnZ7N5oAE{nCZF#b2At37yOfVe(5w`cVtPR@^J4s`GX-Z25^WVL?8IUhsy1L+6oIBsa4@7_fm zNWA_j&Hc)|`Oe=85%-2y-~65U=d+pl&>>EoK|5y7+lhKR7x#jVq3`^W z{_?Y`ihkHUOfd0A7EnCd51~UpEK@t|mlAE`?tne3$K$x!xo|= zm*UIz4BgKM!bBUg10RdQPhG|N#ZO_k6Yq3k z#gqPq4jehYH0bgdarWtQNe*=2;GQo0ePGLe;LVe@{;yY}I-;*0gmfC}e{f6CXq&x8);#C(PNUN3!_ICEZ`-@$|~^nPN` zuZlVF8u2ULG{65T^Q+Fdc-&mE?iO**tTS_b%%k7Z^sZrf_632< zdB5?dW>n%uFK6aKcX-Xk#eYP3rQ^@vzArA`;kfWN2JtS`HuIp9`;4n%o~isLF7dk6 zHQNn3*uSLUWLlq)IGG&28xNi0*vIt(HIfqN@l$jA_ikLbj|Y>Uryzd9sAl__#{;*u zIUJ9hiGtD)FRR0O5_HlZ_znsBL+aA$iT9I3zASX`2l^Q}ds1X3&O?WMS?JIoZi)A6 zx1Y>LoJTIRy`WR|!@Uugh5J@0*Yw|W5pR)hzj8eS9ok9jR4=H%M=6qzIB^41x#Tzu zUFh?weLsKc?a#!Yt-n_&#{sAPJnV{k-(6RPcy0Qd`x$ib>m(63_3K`OI9-OBIncp= z*!Q74;R}@}&R=8A_Jb~T``P!Sb$ndJe>LU5@Lg72F^{c&RFU|LhMVmGoyVSH+=^Yn zO`J4C&74VE2M+FSdF)x*mpIdPKauSkI>oWSV-WRiZQ{(ztI8wWo%tOD`#N^tyn4j@ z%i$deUdQ@bS@-iiHoSHt;+(9h$|d*f&|$ysv!F|G(j%Sq{AmAv=})nm61RMF#g+Y7 zzH8D*52OzsUoIH);66QHx-7iM4ZqAy6hQp#oy`7)4*o@+I5>IYv?0y|hjS0;6vuvE zTjpSU;xyC#koEQMyw+{S68VBH`gA7#%Btq}10BcdT-IUn&VG(O-H6xDVZR0)?1%mW z`vpAeNt{Ct_a&eM2j_U;WLemkIOiPhOF*YM_IOsU76XVgL*JK>{mdNCviGyw)qW>l z27OOL_J7&W`iu9vgAxQ1=b)Y^WIuxr{jBS&C40s<>E!8W9U5_d`HCW=h&x)x^<}$* z4)IX*v$=I{{Z(`Fzx@pPUGPh}FJpDUQGOK%D1= zv=1VVPdmkt^@Z+7^V>DSf1hbO@kjME^Pvl!@3O2|ybt;1{w(5ma=6a|9qfsBMq$sy zXXg^9g+sf94jkm8gVQ#60dWEx)-TX0j@yd*F%#Xxc;vT)I5{2OVSw)1OnxUnuZYtu zth}0d5%oO`+3%o3d%?N}^_w$v9dVX9yk`$x=>D|7(_J^|CgLamP3>#sc!PJkW~O=H3?2N6@fDm!S$7iW@oqB*I&dNhKV1E^n>exWn>o;-ec;><)~VPh?DPynFF2T*w-(+9$X?$?U?3q&%5=D z%R)XM%DMgMRpQTg_-+++sBh$zyj~60^(JwCa)?Jjhx+2456aUx-d*Ax)!$K&?FTx{ zn|P<~)7F+}d&CRY^YXX#l{jw?ck2Q1PscIOchI4}=#QRq4u3+NN^#8`=oH6o9U9Hw zF$u{1f;h4DePG%DpqoAI`|;V{|59IhO}u2q%jw9an z1GjZXoX=PFPf7ek{S;re7wF`^5BWu?_khZ2iTBAN&IBFo_nREw;$$GsZJk#q?FSt= z_%1NY^X_d6ig?$7*9D0;*5O_vbnplI3H&j6Q&Hj!aJVM} zo#NQbb3{IM&aq{P8|1JrfKHXq-fyq?mM2a= z2mgB4Z~d(lB2TGq!pg+25NIA3pu_s2gm!d^WPu9{Mf>-A=$KdFr+^D?;*ZyHUfKVk zgTIkS0?xsizQl}MaAW14 zPyC4bzMSOCJY4+u47)Flb+!D{Gtc9E1?A8EX%X?e6g0;*po5>Ah;j5nn`OlLxu}@~ z9sHa|yce)*!%E`h>!3K&&(NvnjPr80bxnL%^kl3x#4EDi%!5wmFO?Vf*2DH#PrN{f zcm{N#``dml+bPRt;&;wtZkO_07WYfw*Ye4?6X#h5GY2~O72_i~oeu0G&c}pi4s_r| z7xB&JHTDu`dS){RI&i`Z&Z0X9h|}C{=0K-7epc;$9N(He{RnZ2=yOfE4~Gu(W4|Xa zOO@J?!Ltv?ckE!V)OAk~w~UVS%k~SMoF9>Q0G?m6GsJu9u#bcec0(KzoL+yQCr)jL zbu4s>Dzg( zh>Ij$eUtbfPbt1EFLcr$c%K~Qol)m5@yT)x5$~l$NE?~>8A~g^>?hF4euDKm>^k^XG~!LKY_=tiI;@K?hEW7Hp@U12Yh3 zc|9`+I&kprC+u{|m4!HUo0vJ!!QM9>^Z4^9Ru1Ca4^SLwZ|J1GF`l4YKYhtfykTw4 z<$@0NgWbT{eI-9}?lmxTphNw(iG0)g^9vEDhVC!2e$c7<{kwj?AlBpEM-(Go*1BeU zLB~8l>!4V74QOAAcrB_bo?Lf9hjrJOgQNDejf8cVzqMS%0T&i1OWaLm6jzSF&|yD} z{G5eH{j;`8yu-5}#<@TI<9=VB_{+ZYjOYKVEuN)-+RNn9)?c(1MM5-ZJfUo@gf{F`vW@o1LwfttoG?j zobdb19Ox9sZolXsdl2WXL;TKL`yqc3<;;Ar5Amz7HQNt5jzhVvA>w@MTF`IAyWzlt zPWl7;43uwFi^0UZtoKuL{)Z0!Kz{)zUaw)qslCDM59q+b`~;3`;7H-?^6k0=ggd3+#u$-`sp6@k3gg{R$oLqwIMV;nD{YZsXYl z(zR*CDdcc|4jue}`_SNwoi>X&t2>(g37z7&tz}txJy&-0T;h!EZMGA1x%5B}9^NNL zxjODxK)kE1%{=Jj`eM)_j_ai8yM%brb-s*TUqFZT#g;*XZ}#-Nq2r|XJhgh+R}eR? zzMml5pUhLMr914N1;=_{9)8@jzQ8yJJ5*@!C-FPfHv0!U_!+;!=@NfEaSCMhhGXB) zrrWiNIHP`699duQ_OmXlns{$AxYIV`r*(M04LY=+xKVjMU#7<{;#AfBOs+4WljA4y zZDGHsIrkE8L@BfVpo1TfzXHyv8wZH9wThVo9qKn)tiP{JK0=&54HZY$4?6B2ZYz~| zKR2rT1o7JHyl%;ZPV(|JVEZqKafWyY9P+H7J27Bl@rsLLc;cssyTNW>j-4mo`GMy8 zLMQ!+aS*&{6D|{Pr_TS7^C5KbC+0P94h_3ToR#|AQ*xk#KXZuh%2%m!i#W&wm-dH_ z=NbDxAb0G0#EbvCvX|_q-p-4Q#r@3PXC4y2g8rU^EH8A@A9&vbb}SI>8S#cVocBTp zf5>s}*Ap*^)6e0a19b2Q@}5zifZ=b6ledMry+J4Y!$-0IUfSU!@!E7Y^PrQwO%1t! zP0RL`c$FR2CD1MG^6-UUn^C%d+4sqh?uDKEeVlTuKKGFAMedWUX~)irzq)LUliNJ! ziFk(q_WX5z1mbV&qv|jF33Tu;&M&}u(IN_QChB_*k^`OM_**l@dEJ1FF^F@0w7Gvk z7y7!<9+y27IS%m`Pd4+ti_6;I8;E^8KJi_;{L&8cy@9YIE;xEqV&Y78$h(0Ke!=_% zzgYc~6K9EoU!a3uF#douwnS>;+;jL&40J#8OEr;KI4YbE@s~|B`vtnt`EF~ic(118 zfsDjoJ5BNBxCosb7jeH1_PBN>EAdjzR6OZt=-?mx1}D>koWv>T@I4^t6vv)d7(O5` zaT@7-QMrzmd4;ty#l78TW)IIg8vAUNZ&an9i5p(e$8!9EE_A=RtUNn1Qk=hW%p$~} z=y1;pIxq6nre-1Gae;A9Ya4=*w| zB+hc3=PUgTo#MDHU$LK9RG|rRKI-@I<+uf%T%RNe;PuJOsx64u(;?phIvxl8t!g5U zzq@m5;w94S9obHxL%*x4!GY<28ge~y2giMOlkDw@ze?Y~kapyCgv_^s$m zyr(G@PtF6-A)dm1w&!KEs55}L^Bv0Rw9i9cJo?$D2)`3=RB5w6phG{yIt83K8-@|* zVl6WVI>oWaPkOc=Nt{>#iX+>xckvVZx^r&EvBaOJ-&c`*@7A66@7ZsPJ(2ju^?i5Q zE}gz-pG(YBSx-(Oo{uiC?03+?KNSTh#+YfuY3#7SfDZn_Iud^VyYejJOmdjtp$mOJ zvim=I%(=whulKpqKi>KOTfY?ViM%VjfcUfYJrrBN)H?Lb1*=}qKg7LS*!}aK<;1V6$9LHdphG*r`4Kn+_pK&Q z7l-x)o#MDH#P7jb`1?BIoOOsFLdWB;Js$J9=qBR1TbS+VUAwZscRlCbR^p%1@dar= zd9MuXeboEex}C%+(9b;1K?gsymGfY?J;a$g)XaenoJeW;{kfom2Z&R^AwLm1#j)Ez z!RN!o37GOyWXeCaS=M62VEB8#_&VNji-s%Lf^NM{)7(pgI~aj zIq*Djk~zdDp#uls)c|K#najk9Jjv`&=)ggo9-K>2t`TRo-XF_)L8mz1)(H!D)A#m< z=f6dqf)4hEj_+mK&v8C|y-U35UCix6p5qkPj*L@bc7w&+H+jZ4y!(xMN4Wlw`1!k= z?FSv~hxju%J!U;6&WHA94s?oRpNERndr6!sJZXUAp^CW&t`~?5$exJ*FB;HRd ze(?kGH|jW%Y)8<+p2*jPJ&VrxLY)0N9wa%?frEQY;6&{kcHZ~#(*q9e8ale*RC7k&m!B0&tS@@qjZVD6dOVTq3+S-E zI38~L+AR5sdDa(*Bg1|lmc%A*GMx_~*G14_eNk8Uzir1#Hl2K9wWt4KTm-*)$N0pr z;}A!K4t_@d0XU!YCnipa-Y3iU0-fU6`^V|8$%u1I-@}vj^{)T9tb*bkbyu0x#Ba0M zJWfD|_PAPn*R6aUAL1<4>nYiOpp*03BoRLuaXcgOE_5@O3p)5AxmaK18J(3l$@M-& z`T;s8Kdh*dlQ{Pr?g@M6hi~Vq=XcRO6*hKW;uqEHacMv3I1kL8Kl1fV0piVYxX&o_ zN6@a(j;~BBOq`hxaU`&;xK^`MGS!?`4oC*%{O6Z*Y*)dT$ z;uN0xU*P%=6<_)jI_?i{tFX9-(r`f);`Pz?lRW5{=d!SVh8<&# zs!qJ$^|&JY1$6KS^6kMnSgRIst~-pE&^h_zNbl;^{#*UTXDrXUIhP!#9yVy zV>ymO7y5qP&-xgH#HzEG`j>_M1zJm_)-HbH_{7X)E{@!7Xd@YFERbI3<@dS@E*9_kx&*#zP>*ZDNE zzd{Fp;x{;jgQgJ2>3k15#j*E;Tl=RGr-8%%&AWc!Z`Baz&miq<`Vz8uem)!SE6O!kBP_K+y=Yi{1bL<9diNkf;CV22Rd*t4}vp(&tl>f zZDMZM(7``=Hy@nx{gxBwmTuS5e$f5MejEMy9kcNDRujLdj(5xc3>~i<+!pSQp}dWf zt|ML=y)KtL=$PlS@VyZ58eZH;yaD~pc7zW8z<3Bw$EjP1Gua`I2%X~iS(U~6B-QKh zBu*k7N0j3XbQovSZdfvRd**-N|HV25^tdkT3mrI^FTg2%>?Uz~=sX?CfexIJVxK*A%w6Kl@iI>qabT2ZZLf#KiR928 zpi|{>TV=#|hl&?}N}S7j9+u-2bi9tSpO?1C^^$lobR1E(3vbU$ONjkllGksDKgeMn z0-YRJF|NUmQPzGSo{!FhmVSi}e#QGL;FRq8g*a;+{0g1o*w-Pse-0b+ecUs<_N%m? zx9bp$lPKrFE>$mn?-c8Il&eO~#KcRk^ZI4`gHDyp z&&na*A&V0)IdSsod_+0#K!^GG_@PZTBDD|ktot8|dyId4N=4iNeSRPq_qUFTIErtIUx?qwVO)hS^!1AUyt?tt zvczw%1y!O!4=6ER8+;_US5{M`OM z)-B;{6K7~u)n25Zz5O05=0TM6{>}!(pH|4shwevyjxFvL1#E0W{Bi;2cg~=b<3(Qa zPG|YPEr>Vtlll9R(7`{rKY)5axz&m|>%*Aeg@XzwoIB2dff9K6@wT#01 zz6|qw6KDMa#gY9VIyp}we--x2UZg+qM%^`+3p)4#>q~I%pC3q^CqJ3TXXwEBw2|YA z>H7>J&XrVV4s_t85&6{*eTEa~*PqQC=)j3CzMHTwWHfQsWioT1Qylv}tuQsm6DQ_1 zbNiC#Wgs> zoVva~RsV zy63l$cqts>nb5%x$UjE?1~ghqocztq^@9%e+bi-&N;F(aoPY}E`a!3f8}dl(<7DwT zYlv4Vui{C+dN)qm`J;BMC;ppK|BatOjo;3Ww_{J{cJk$3&<-7(R{Ev4^z!@}aFLB=L{hDmw(DC~w z{?@YtNAqXxVp-tL-*=FBB^>g@pbNcU+xz3TRY!^6zOK2wI`v2FBT(MRZB7#Jab?Am zc7zW8zoWaZzhcWn>f*R++Vh1@5Wbq z9`$7Rb>h49zE`#z^M3W)J4J5(ZqvXxw~2RO=j%uwbaLH-`);sf!EpD9*To?}7CQI? zacgjMT`_^m~UJ*Z{J_nI}@Aj?V-hmZ)(1T*U zBYu!R50Upef}Z)9dz&y_V4h|zztuCvv{Jp-$4gXm;ip~CUxI% z^S_UGPI1V;f=+SVRx|NE^j{A~B+eHd-;;KNE_I4MpE`cMP)zac_p$m+j!L}2O;!D5 zJT3J5SoXZ=CDmgR_pj!PE8C0Ai^g}l&gS0Y^Dz1jPh1CiwD3!`m~n|;KB_xrtvCn3(e!RGpUyWhvT0m_-SQVQay*Y|s+J)sMI-t)JT zhHASSn=h!3*EKB-=By0S#3l< z{PRuKiB~e3s<#|>p~E@G4gXl@E{$BN$3^>lj5AKwB5v!{iYv?M^d2MD8SqcDl68p} zC#=~&(8160Jb3o}2E^H&$IO8aoRq5{Kg;sn`oCrC#>BbS#>{~Zobn<*Gv{>+;>7Q4 z=0K-7_WARDg0{rDeOBrFLPy2mlzeJsg`=x`qy*b@yp0teDzZa`_CEnV|iYNO$ zbg&!t6KD^2kMt+biEGjRnE@mRI>m8YaVB$lwx1kCoZq%b{f8slDRgR@L0rJkT3zY| zwTr07h7fPZQpJ;gfbQAfw%_4e7Sb9#N`vKou`KZ-97B<|a|=K4a1`c4w> zavqpAk~n=PDUPf!bgI6VrFi!5xb#~xhIsp;DSyiGPM(|9)L@^KpTZBHKftrTLVOi| z9&~m*@!NDUw-@NpUM7ikxGV8w;*>sQ=0Jz~VxB~Ka&!(RPKgAnzsmYT7yA0gW#M~Q z;HUe0I`Q+?G>-?+!OsyIbA0@_fwPIzYq;V_KSQ@EXi&tt0h2xVn`(-6+0T*Z5igp< zxd3!nmsQk$ejcU9(j{5Pc>Ij<74@z%aRKow>^0XLI`|puD{ww#SVEkB56m3sU_abP z183Q|6~rkPMXjTy{h(9T#JJGX+TqGi@uEFH|4F*)Uz?ux3vY@g8msD1wA%lpS)mv18eIfuA7bg*xB z@xJ2ol-r4uQGb71+84T=zB!MMT8{S>?fbmQyLJ(;$U1YqHw*UC@X$Eyku-3D0 zK>L7QH`dro{C-EgX;=2|Z1#O{fH+Z}m^s|e{jKIAzd7cxBg9D-&AiWpE_6TJ<19t9 zogn_a^k%+yaTfbNV%Pc8#Lre#@#Xw2_YoMM;g@*p&J(AH+suIu;|BJ3;EY*xnK&8r z`G71Bbl|iV^UVGx*N8Ktpt(HIDULmFwoJTR#7S7jY=7_aJ^iiLqW$c@cbE7%YMS}b zh2DPL)=m*e8$atI@pIQueA&OD!!QWDV&4b<9Gm@=c>Vp&c7+a{JK|jGQr(xtNmt&? zfexJ5V%>f(##`bna5&$B4xCRDc>S4a&j;cJ=(v|`chD)0Jr8u#moLPbpzqboao@YR zjN8I@ykP%`+ruvSK5n>D-{X<~giiW%pV$Yq>J))^ugjYK03Gaybsow!CT$erbggCP zK&LovtBUyE!NM=miBqJ7;>h|z$M;xPSV^4o;Vd>=Y}ZsLWsH1nY2cRSqHY%xyGzM7ACk%HB}MAjEN z?pNRLYm0pC;O7O2w|$722OaZVmR$EtEL4ydMKWZ2SA7T z+TPlEQdAxi=!vf-6@IQ!uL^O`jWCxJI_YPuFX5lI*Q*h4${NL!^AL3KGxC_g>C~|% zaXMWxbD&ckf6HI2Pu69uL!5b^6-W9Ry3p4r_VKy?!TQ916j_aXlJDL4?6yvc_r)W3 z@F#xWH0JpPI_V$e#lx<}6E`DXIEQs5bnp-6J#aqm4h8>89d*E^$bQ<>&$3Q*H#P3490y_Uh@}Mie>#rOiTLkO) zhyC0>a@y|1Tcz_*B+u#G9(iIYU)yWFh&R+>{RkcWiFf$HIW?&tacVov-_W7IVSEH< zedU3~321BX577OnzhRvTe(A_Vh@ZcwnGap)eEa)x51$4SKZ!o~l zDB@($<&+%g;1`UG;JoQLjyNNKQ}vY`=-`))B9Fdxu1UmMs`EwUI0fC0?3P39V>Y}D zB7S&%{wT`{ohqlFb-EkJ8xwArPMqe!s=jibgAViDy6(fObgh|m2keOP3w8|ZJDa#0 z9nKq}0|)0o;QZWb9&tK^nEe4A>iPRg-mhKAy^uIhb$*DfCv>Ri@9%oMw|!Wx<4g8< z&R=hr5Vya>{Z4P=IhYqw{|qNq5I=B^x&J{IdOPyB7K`|B`Z8;XUqJg`wo~X3A1{|!tdjY({(u|-`o9IM-gXfRx3R5 z2RY>3LMQtN-uZxC!zPJDysLWuBil7}@DJAW;PgHjl{n4yK1Fh%0|$9X;A|cdlQ zIU{jKzBY59gCDRi0H;F!ti%b7YaaKY3*8U){yVpFPU2@tYvy~`f9?Cbl<)HpzhhYQ zcqsRG*cZSKk*60RPP6!C4s=d_DPF2DaUy0jbJ#B~t3xr4Psff~oH$W_F>|0(<#AiH znltCkj#9*VQd)6jKY)(=fuFThe9t_^uVsmMNXIASIt4ncQ<7$Sd9Uun+$B)XoMN4F zI8z1UMskSDK*!~DS+FPkc_C^Q;yK+9f)4(~d!FF9_g5oM$yR1RKnKp08(%5^Yx%&M z#KAkG#o!V+Z`~nX1A#~CY=pV3Oqh0>Q zo9A#31v=Od-z@5yi`39I*!(tgmvepq*a6HuuoaTe&ffwUiVKN{~~XYi}kZA<)S zI*(lPp$naFeqxw3igG@<(S{j%4b=L;SDitoXI(`xc^;w;T#wik5ZV1EG4pna=|Q*M-*10C%ClQ>sRA7?Fb z9_ajZ*}tIUbJcJ2zK93x?y!M)6Eixji?t5*!+44MefWDbao#sGbD#qUdF9~5ueY5z z(}K(#=)l3c1f0*eb`dAnZZiiu*!zMQX9MQ!B~Fa{iX-g}9gnm2^SOS-4iYcLOEb@W zK4%{l^RJ3%6BK^vgMePY^#x2E~{4hfelO?91SnrUlLrFI-XcJPIBB zi{Ie9PJe+o)g8`Xp#ul~3!JYX{w7YHV&?Kdr#N;$T#Im%IAhN#zy5FIw%fw}d6aYe zvb)3|_N%$wK*#s)-PW}uyl!2d>k;w(PG#mnH@0r@KldwON7xnR%T@Im@zz{4k8jX{ zgZBo&@m>FtIKON%bD#qUaRhKuM}142vro(%=-^l6;ezwv*%#uZ%W3v2bfK?L-BuG3 zXU|+X+@kN}b(_nma?1V%9gi>eeAl`MA`tIde8rRP(7SwBm*pR8D&^ldIUSk!IrEzB z3LX4|b0F9?=j&+1X*bKvfeswJ0|icz9I=S=%3(bN9XP**T}u1SyZz!4XRKTKS?-sh z0|)nnP@YxS6A-89WHSdk#j)RiEH^$0ar{o0+nsm!A6-^#k@xs0ehT6@ziuukbf_=h z`$2t+Tu)7$GIz`z=)l1^08T)F4{?s(GjpH=2lx5F$$lXtani;zkLS=Sj(wgU;G2~= zuXH>~_Dl0T?Pm=XaiI5?au9DtXH_m42Z9c9pxj4a`flw0#go5QM$FG~JLF-mK8KOz zgig-S7)N2h^-&5Cujm%D{h)&%a4#4f-%Evv6YGMR1D)d7<2T2L6eCVl&5`4ockuzY zmATyo%0ul{xfJnZK33(F_JfY+RhLy=oCl}Rz9CH}aY72Oa#- zR>TJe*YhV%+X7||bc$nNXFf^Xj5w*YDUR$9-mUlT^G?#U0mOfp-^}-R-obtk_Ixz4 z4e>vgG}{3>*t3H8e%R=m9f&ihqL~97IQc|BJ6yO6aZ31_InaUg+|B0(+pO-yx#u!- zpaZ9jh|k2|-HSL`tC~5`DUP2t_CCkqk_7f6PQzM?Bl|0Km~RGU_bpYsOSf_;Ut-aY z<5V3;+?n;w<%13!#H-N`uEZZgoLWuH9Ox9sZIu%H^A3Lp5@$|;;>hs>I$l4!EZhe| zx#AuDf25stcvQ*r^)C)Vg1ZEF_l3n}@Ch0`xCJN3;_mK?ySoG@xCM6>cX!v9_jmiu za_aV-%y;3xJk0q#-Bn#J)pAD@Z(xAFT+qS(`^9?@lctX+&hfe?N7^4c*ni#SX~#kw zf4cHyFU7moUjCDbJGz0szR=0>9{CQ`*DL*W;u%f#^@R@p#5ogiro@>=oJTG79Oz7r zb-eFzEwbzj-#^`y0zr@Vf%n7A++H9o2p%A3FEzIe%llIKP*n-BRM$Q0-ds zp~LyTlFCsP%OAR1JktVozR_V|o&`H6Y_yX2SDWkY3?2N1`$FJUDY=$7A8h(P=-@Am zE8rxFvXMBeR6i{13tedT#P7h57!pMM-oE$apjD2*FI1N>t zN`FBYnmrMR;OD-2l=w$g|0(&9sGj)3pf)}|3jSj-X=%d4LbLBv-YbY@2(Sn_$_l?BKe;A z#c7;d%;%lndfy@b)|>iu6m)W2x?O|wlZWy@Al`zV`gIg^@J|IXzdLaCF>xN-oOgr{ z9ON0`pZr~)6X)Y{eR-gRpV2RY^H;pr#QAVo?+55Y^D}-2e!A`Nh=1|U@9?eb+ruqB z5x?#iy&dHG7UMq3886W{;_Ql{j~nRV7p$|uIeOe!{%ihpOA0*)Iy=AITOFP_SF`Ek z5BC0F{c@~LWa1Y~_d9&2kvN3oGXIZoq)nCCy9lYltQ3h3(xo!P(Oyo1w-b%@^`-qauo@d9(| zdC)P>&xk1UdjpcCAYRNuCQr`yp~HMXZtwb=etgL9I{$!qA?%kiNowM5DP?kHzYQJc zC;qB~t=&FZzTpYBy5^HR#rpxtBBm$)0hLdX<0*9TGxoz!&M+r45htFHSx(7;4slpn zya(NONjBmPO{1?bbfLu|#xs;NN#k6^kDWG zd<9Aq=YT4Y^aFJ6{orqG7VEGZ1IiIUY9)O;f(~(0P{qyK*Q>mrzhCN#8=U_^`F%Q8 zB7U+sdV4|#dtx08&X07y#JLqt&wqANrMClguqWmRDCdfT{>0gwT+e|HoC9Ls*!gu6;$*ey7ome+Fh56m zCa!KloP>Yq?FOBDyIJS;z1y}Se(utGzNh^e^5Q6eiR|r(f2^{}m;D)ZDtrvsb%(HP zlf<2gcR}SPW&Z&kI9S)BT+?rKCC(?Cd@FQz{<%EwFXALn`BoV>|J6S}&HE7lx~i|_ zd+Hy2e-L)Km3aX1ht}8o2RiAW`Xax3Fvbw#wX(@GKnMR|9}49<8=lppwB`iPOyHyuGLW9madqw@JO7#E&2JJNXBHW3HIjZTJyP{Ml#qeCRN*E2SJ2 zH|J=dA-$)&=5>f8l)vYa{lqW+RBvbK;4iFS!Kps>Fmc+3)%zVfaPa*wa1K{IPMl4T z_2q#M@i*WguXnR$I!&Cx>U_Fv*U-uMi+_Z_s}^$N9Pv7+cR6JIL5KL;((CfthhdKu zMLF+@{jG;hFA?{k&38kflktaj5bW3D$3MjDuiB&Rzo3I3`icGK#&d5F=j%DWAD}Zi z*8ThRjyuG8{#@UF5m*1~e6T~I2gJ`E!EAT`ZN6cRzx_#{5P$j`efeelh5A^o)%>UN=PA8sxBfi&neZA%R3Y>$5{R-_4v*Oph>T#R(7EsZtjd<&olWM_r?1?VRpwN-sa6FPnHup^otYMF8_W%BucPZ zPV2sMno;qHdux-v-N-y(V-j?qF~D>29q=#Ghr;PeKQOBYwcyS3MbV zl5W$tKj=_jw0Ceyrb$Vh(hK$Vh0c9H=`e!D{n90$(h&cg;>&RXI+PRnB9!yS!wke3 zG+$p%=rAAoQ~9OY=#pJ_EnDXD3-&3%|F|m)@w3|OUqJ`IVBa2`OyhD8r;B>mMAjEN z)ED*zr%m-d#Q8o}Utj3l`^EZR_RWL^h+k)lp6}^-`;X#1!FZ7h6MvqX|H}RxIyt|< zxC47kI$4x>Z*9I`1ReZ?br(2+6H5{&ip~3B(18FeuRKFMJuy~FqJA9ZL<{5~!8<%ACP#e4$w?UA`T zae}+(InaTF^RM7Mdf19M8ExMGgAN?T2{_l6wk1xq5&H5#XL6iI0&)McXPb`1S)}4r z#szet%?E?31rqO;&3kX2^{3YNK6)hXLHwBNJ9D!A%J)7np1^)rZuBBfG7rb0>0-aJ z=)!))c`!}y2k77j++#<1^7J1>oV+&QO@z)2cjS}&jd$L>f8M|BFyg(MWR^>=o1ohh z-n>7Aero2Cu-7{8#CJ6u#`3Z}&zux>6!D9zdm7RY(3$nN&I3o58AqJ0bM@tv^T4XA zzsvO|>d-mux48PdJK}wU;2e{Pzp%aDKhPoGCW}1s%4buF^RBU;10DQ4R=g+wWAO~) zL}_bsWV}Ha+I#YM8_ywL+#Y)Sc@}Tha~LsGEg=43^-hE=FV0~&7xOs`j3a30EyFA! z-sw?#J3*lf@BPb(GjOn;10C8Q>;O*pA*+c~S(Qh|33Tq~e-2}9{d<(3@851c z@x!S&lyL$b+RgAMeEz+E>Hupt>%fe!vfy8*{*+!f*^YNF>r2TpKnj;Bq@t`p~k&H4m7lVi~Fbf7}X_FZ}?W^aJv@uwR2XkBRqqvEF{r!G4&3gEQ~Q zGvZ9Jna4tBa;*3FZ`FN8oOA>9_0!+qcNp`<_cI>7dP}?rHu($aP%i8PqkdlFJ`(52 zK7F~MlXl1W4c@psUy0Y^hTiVb!S3kqzKBS~5KcI+J5v4~I>Y zm^ghhnHK!8Ju;XWXYo`D2erOZ@pZ=Wn3{2lEDS+Bh;2C$Na#U(kVb?vFGy-?$JtD{*Ss zoZErUT8xqwrl8k9qwl|uf*d||8aSUw@%#ylKCm<;wt-3|7S_m zlwVJ|)>9#3->_1R0>ph`bAAFk*bU#OhyADIEliv;Hs@BLGdcdo{&_TSNg5{VAgy_kxgZZpV8)gjLc^Qi1q2{9u>lNt0DJ;&_whdp6IH}aVLCJv*{#`4+bCEoxCUK6d`>@i#(7E@o zb$`)+R~_Qdwz(fG^LjWh0lP)q+JHEzZ0rUd>=q!-6TbP{m^gvz{G+rRbg$JnLI?j~Jpw;hiP4ofCvEQMKxcBS_cgZP>_MF6HuD($eGRA4Qrstr zKeadUR;qnzIetKA+6(hSKV!5w59nK|Kk@!aZrWdtAJ8Fh*!XbqO+hQXUHiV+r-1#I zM;%Pu^y(b4Y?si<{f)5kGQ__;wY6)0&|3I8=*ckR8#ecRpo5=r{uAZQ96XvhkJSAs z**7GFE7345@({#JR3T2FkXRE`|twdw9BF|4|L#QJ_t_2 z!ApoUx}cr|ogK&L_!{C=F69ZwX#~HBNAuCM12+;UlaGGA1)a<Jm4(xPJK1&EyoY&Fn*jn6#eXr!~0eL?liuL^OV`UUnFio zB7OUYPWl=775Haa%72LWIg-AAh7NwlJQ|#Z`>zuxd{R9JI<(_aFZtei(1_c_S&~~{ zKj?hU{(64PY2f>^C|Axu?i24r0X+{o`CZzbVqP}#@gw3zEU)K5C+&}SAyB^cbDk0J zKpnl^po9HaigC7JuUEuLZu9*v=)egS>!?=~-VrCgQ(qqFOpY~=lcB~Z;#A6Ra%8-E zHlF$$oyEH7Y{YNGUz1PIhc2{!`EWeWA0#+wtok)yS=r`0+|a?E7&?-%I6!TkZ0C-aM_#QA7*9t1i&|0X!+O`LTqkN#i%i**vp z8GB+J;?HWW_b+tr{p)Ys@#c3=EBhuO{^HJhK6EiREx7ojuVJ8{{%~UKp~2(cyYfVc zPn5rpcM{^4?XCA0bnqAU9l$AgGC6Vf4$*U`JJK>ixWTmRI}a7`51IJ z{rTL5JnwYvSSjK~nWc{-=#>cT z#A#=9-psSOb{bRCe5UgoFEUmq{xNmVSo#4v&dXchds_dp7V#q5)LXvyg#9PjFY}7J z#KHFpWt>0`P-`A~t&USf}7R2?kIX?p(%C|+VM=CC8O`JDszh9OQI`pf-j~CrO_Lsk_U&Xn1 z)O-KHcEk-hZpMS`XP}e)>i$YklC(L$-_@@o{=om9p%d||9ni-cbg(1lQ{XHL2_(+; zt9lM}CdZoh4eHv1IJw{H>+4y+>NNakU8QyCz>2+zpUKP2gUEP;PQIVJNvu~UgzZnf z_UfLEQ3(=OTu;>=L* z1xo)w2M)fo08YuE&BRHi&dW#+basC3J82tnrmA=6{#(3R@5LXOv5WXoZTe}??!`Ne zmEwF`#fp20|Ms%!7wKo{q@VF#4eYw{-2vj|e4_U=bns80$lr|Jd4xEfY~HVg&gA$R zBcky7c*yzV#EGQt9n1Lwbf>P2|G)3aa6PSg+;AgL6F2E?{rKr=9{2h)?q@sXJ5T%> zFZF(bPL>z*6!_&>oXf;J5?yx&}FW(cgt`{@Kcu3sQ zzx7U=ztN~6uNO}pdP4kgHhk#dr*z6s>+4=w`(xv&qOk84k$*eT?*;MO+U%D>2M*#4 ze!AY^4RP|>?3Y1ja{P_0BL5k`><8lbFIB{f*3Dh<{DZ&t(7X+4|mk?&P0T zKZx&dbM8={J3;$}JxdtjR{xr(y`$#0?%Tb?7$y46BwHgACubPbo{|F{{EPKD$}?`N z7ja%k*K?qQf3ZFXr`3!Y#Mx;xPk}Bp|6<+>evw|WiJ#$_z8{7zG(PfX;BV|0pZLe` z>iN(i9`mVqygw_;h~vZmeOChKNx=6lpP2a7&X|1ZFX-Se%s;`|nk+eSd`{>&(7|7b zD{%gNnUXlmLiF{8E;M`MdmP|5znqr%K{o4b=-l&vo==F&<7UcJ8HrzRkKPW@!Jf!x zqMS$PW+hI&)p`ze;NUz8IKA8CBu)t%4s_sP-vXSh`SKE{!a99xJ_bSB4Xq!srQ+x4wNoS^f1 zJ3+_i0G!52k;mSjz8dkWywLNYD^{KJ*f>XwdX{}vlX%6{yj=Es(7|5FUxKrFR~_QK zj;tT2paTcv7C1FKH6YGZm9LWJfzIUk8_&f#=JBl>6K7Ri{d@*G_wnU0ZkFeJ@Fjhk z6F-H`_zoS)iF>!G@At>8h~s7Ru0M1b*9xhC`m(0d;4NqV-A9@%<{|sfv?abzY`tHg zgI|V;`D*E59f^}g%^##+phJBzpF@4;{}D)>12%qv&b?o({od=B-HD&qW}fM3zlZa% zD1Vt1y@=mK6VWC=2_5hASpB-F`Z(f!O{tG( z&-`k=H_)u~B;wap=QXAMwMrNN&oa^fTo?Lf9=RV$ke(!Zk2<2VR}goy&An9U(7)DK{cGh>j#CHfcXzGh(NBV3 zsmU7RH`t`FKXmXn;t-s4IW`dIc#xh09sG^<4$i1=n~Aek&0D0up$pC5nZ)}qG2Uz= zev|F`azba8(`l>~dF_FFcM&JtGLs|wQRqUOKR!CJmv}Q3PsT6X&$@2UJ?X{z1@k#>U4v=i^@6f@IJ%(hUbL0_z+$By5b*@6%3A(wCU(cah z&pq@x@Q`@b=IF~M&pqJ%4%o?S&Qs!yvbiS?9qfelHaO4Py(G>joAN*h&UEpe?H={t z6317yPZ@vEnH=lBK-{z+iPKiSb0+&i&-MkZ`O;dCz7XF_ollc|PxCVve_+oRYkm;F zg?fih@}a}Hl|uzs&x)6CL_0OvwNHS26Znq;!>#!>&v|{GzWqQ4e+7#5#g;!K5hwBz zJqJ3I<1~`a<#P&aBSt081e^CApyPAZ*8PDOr(zJV-x{-A(te(;GaN=tanGX5zSzV+ zq~;Z}ok9our5E?B>hFn9oC-F6fDRm-Pk}#^4Npv*Xe0H0fDRnQH8_bYBqz>9bq-I~ z3p$fyy%)cve|qBhsPlHx{+`~8M;;R8+&3UI@xygC??XsFbnfRlPNSEYpM-gojrfi0 znS9w_Ll@e(78H<+cv&Xt@2$vj4RM2d*A2-_oG?4|9O&R@^q1h&%2SXyGd}4#(815> z&%p6rUYIzs)%^~4klmv89M1_*ctZ7 zF}N)8rcTtyA$0H$=B42LldJ-9a;f~Sj6>*5j??g}#`BIS_bL-7$mV?$=$sdN{<80C zeODp!=&Hn9Y;#UQ|E_{{-5qskP2#;hs<)S1cTX1M(CzGXh|}N3UeLkrSck*z`64tR zPS7!Zd7y*c!;AaFsaG^2PO&5Ub_Jc;uKu03<2x59SNs0Wh*$omo(G-e?b*)z+a>n4 zB3|uldLDG%Q-k+;rzocK_f8|8h@)5mZHf0q&D&)hK_}w~_xMrY2wokDw{551575D% z14X;&{<0f!3aIuf;|MzV^KCDFpRrh$Uc}k`UOyf{$N4O$5w-1Q%A4GJ-j{eM59)c) z@wrStBcHfmUU1Mr;U8MDN>?)Vv}5Px?#lP~=Qo!z-^^oOW_<`dJ28$Z6j{?NgW_&z5%xdUer zXHphD2Rd*NKj6$tIgdEWO6WPzfrGpiRnttD>6J|NjNlpyU3{X`~U~KYr5W9`RPIdxMe(o#e$3?-mYB_lS63 zRDMG8pi4h2=gCfS=DO|)p}&LOjy--#JTJA*lI;aL&q7bCv< z^?upu)L)4=LA|ps`z`47=W?A!SCKdQ>&Xw|&X{fXhq9c|y`4O~Vy4$`cfo$hPr-h7 z)`wgBYo4<30{u7x9XNq}bVCAL`yL)R$mh^YOvu(=Nk9sE&O%vV~E%t@S*HhzW9>#F>^~?ox(=-{6ZV!l)& z{w(5@RPRwqKSKvj%wV3+ls`R}IBnFsQ<4K6{EYb%>eXz_LgGy7qxS=J?*07pzDTnX z^nUWy+DnPQY?$5;&^4WK_1wqtuU&C6D+=e`UKLnLyzn;bYUp4;A^w;|tIvH;}M4V*IwU2luZ2SWq{DX6Suxs+S2ZN(KC&#)&r<2sxmPTj%3&NN4)UrysnI2PuDMX#JRK6pB@rFug&>L=wLtW=b+wU!apNU zU7L5RpaZ9Zh|h;RUJ<9Q%BxF1KnD)|g7R!W@s2pzGU?+AI+J7V-zE+FN}SD6eUP*qr+< zy;IjFT72RkJgP4zbY?l7M*WC99)_Kth&Uskn;hv+=%hdK?iKuSt!Oghm9|+QLI*#< zzu*Mj^&!sR>YPJp?INmKM#j-N3>oP+6+ zDhqK2H#W;7*Zt68KlVnxeC0EZZmsgLP9upp-?aE@cH)NAHo4M|&`Cey95w3uU{Y@4 z6=|lo8+5Q6?t6hVx^jNvoUz%@htA|!`?arLg^063jq`Hc@~mI8esAH)!y?2VI>M~4 zM9_Zj_oPP%A^^)quIoieKNIyX5e*CiTkEP4#NBmdw^nB0O zFHQsNa+JTrrnfe!5j@s0A_%ifMS(TeHg7P`>d3-;r| z-+sFz@x9gfA=@K#$fxB|j`bcAv*ht}3tf4F17g0Bby6Vl6IIgN0XorPAT!7ws6ZQ z66cP}ugLxcI&fmHe@64q+byONXY2rdd7v{n{s#6PP_Nh-XAmck%9F|Xf)4RjNyS&i zb-S7*{cEjjzaovWf5oS>iJ#GC{}?*d7wZI+v)IP@#ED{a-VHj`7vl&xarQ1I&QNuJ zLe>|$(BcdEPVjrLT2B1F$}f@+oqN91XePc}*`(uY;@?+!DmlJEC(oJR6!&uzL|jk2 zEGl0mdCaBA9r!&*|e1I+QD_SofWsv7b0)o|zn3F6gA)(eJ@t56d4W zUcGDjazO{X%XQzWcgKiR>xG^JoyoDT7pgBjNt~nMe&;<7>pXIG&9lUJ#@F*bi({vO z_o!jdZZR$re|I{QFZ~0Z^v_oDy_`Hpt`M(C9+M~AGj#CJ7}1}W8FHOC(TnLh(1DXm ztjqn|+$PRK_5O-14|FES&+uu---qv=?LKiHsrO*ydK$WZ;U{a}ZMEL}y7=M|ar2hd z*HhkCK)(V%u3h(xI6bQ9Incpgm=A#SqvtE)G^wTMKxcBC#vyUfVEL4H#Q9v?OWAv=nXj^6|2uT;s=`A6ig-cTscPd+OOUYA(9U|isI%jcy-w*i(l($r`EX2E` z_LHT5po4#K9|WBKe-fSMk7bD;v5?*lJpNhli|yH8fjHOHI|b4Xvj1(Y z0=xg7{#z#X-|gyui;MWKU91Z6FEr4X6FT@Ed29G9WP4TOEE=KbKxcBCMlvzqZ(gD% zabgFV92p&t8R&Q(q^KRg<%5%C5m z*7uXp!5`><;g7WAn-Ql_Eeia28)>gxxcobUA8!uKk(&TLD(fy4DY z=;ZiRUF2h`=j}+meM3#2?Ej!cJ$H-!pRfnI5NF*2eYv2M<-&KZVYg`2x)ZO#0e!ik zv-9JL`#p)X|AC&vezeBHiUECzQ#6v<9{yV#_#4f{JA|z&4J7{NRC+#i?)z)&`|ndf z4I%!VwEFfe-+#xsF8Jkb$Oz*27tnK{gI_Rj1gBQ7F~s?nPtSqQv@*w3sNCw<=8>x-+rxb@y`y6cmP`%Ud7;3#9_dn zXa`##E+kHLn{!IgneD)u54j#@8F8Kr*4Nk5dM(HL z2)wJtI^tz1p>O}tA&xK)1!rseO~ffxP|tx59PHPDQ|Zzc;%tkh=Rjw2tn2%=eRdG% zb4rsV?W7;?{Ee8gIKDGw3?|-roAdS1?Fnz*UqhTH+MPAtz)SzW&)84A^OE}FKH|@) zpf4|U@|~9mVjP^e{t)q|)YtQ%lj9)PZ}3a&fMdkV!z6Fmny z__wckr?LCHGsGF%R9`>n_?<>SV|@vJe|OWt3&bm@)?c#!hOS>(l|TEK;B(3fuhhL* zr*THy?|eJv3UObx*OwDI=?8o#81_q2@jCJTusI(K9sGcK1voJh-eyi8y&s?hXKj7X zJ06aEpE%?D={eAWgMBrW=lsh@#2M96&w&o@!Z^Wk8hhn4;+$)&w>NYditxG|_cc(i z9?f16?`&Irxu8S+(0;+$S^gbyPT1^!LI+Mi@$TlmTAzqBP_-vH-a%(_tncpE8U2ko zXRDg^lJg4B-rcv}4=vO_%!Xg{jR(@``JT-y{ERMDI6oRLNd)3oN^0_Dyh4{>1yVeg!90s+h#dUrTRC z=t7&H;dkIKzZ8e~qnrN@-+IrwPwm9Sk72V+y-y9=#-;(k2rI3rzN;+LJG=R=3}LunQ7Jr{S6zp(3aSKjQ7xNqHkO#$M^ zw|S=%I`|v&Pn0uV@xsKZbIUBJ?3bZKyki{z&i#Li62~X3J|3Y9&7QD3`1^X7B!0T& zdOmdS`PTJ)*$icg-^pe@E!X$cMI5HRRGv5`%A4hs_Jj_8L0%H|?J%-3aWdD{bD#sK z=3I`OitVZrr&4P@2Rd*X2+rAzHHlNWk)8vc$?-G#9OC?2#5=W#lUU{X<@g63)*+QI zu6_CLnBnSQah?eF%DtvOakFa0H3G+7XRdLYVy{`2-`Zw^)>}W*%8Gq^P z4;}0{S>(It^=w9*O=^83+Xr+e$GTphmAe&jdfSYDp2iz~hx#Ud9zguU>fWccCv@)P z&1sYn=T|PB?Lhn$Y924g2k7Mb0^=CUyJJ!p;^ntlUqA={;NB28-+a3hCtEMQpP_?) zhNa+jnRl#S#K}5D-yWfJZ$In%@O{qoC4R}VdcJ4xB0G(N;=KE*DFcb0c&f>l{((;V z2jd0oTCmOC0fi)W>tC5NE%#kBm3y+_x9&`?wzljAfJF5OS>=2pMI zj5s;fy;j-oq2uoaIE>5U9(ly`tBChxn7&-lp?*=tzTCuV>xdKEX8eH;9IVUXx1d^^ zh!bR!_ka$ZVq)KGOTw+h*=BPu6FQS)9ml$S*g>45<4ya>cJJvplQ@LuKS_566Mv~X zPa*vYT|!^ZtNbJ8-8uU0C!X&(y+5IY{jgq!AF{MLOq?RBf0g|PbSB4|FMgLJggA+9 z>SvcP#(o&e)%MLv;w`ipSD4>^ol;@sf+ptHn(YZIr?q1`OI$9a^$`&=ZB)26?K zPOhg5HJEYUS^nSkH1c4u!?Xtf5P!YRJ9*H-&y7X@mb1_e;ykkPGjymg&Xu4%r;R(r zIb{%xT;tco>0t9dDs=D<@_*or%kFNG|2PbeX;+Aa=yRtjX1t)UMlSdoyl<;=f%1IVJX6H z{54OSXsB5p*{?z;=Z83VfN~WGj7Yq1PJR8LL;disDL7pldl4sFd_4y`_!Hv`I91cf zAWrzmCP(@cy3qXjZa1%kJB7p|{5&kVTG7)i;r8dha{R^G+FZP>Z&v5yY5ieo_eSM*We=!dMr%dFO#A%yJ z&w0xma0=S`O)ZH(MXh6`9eAE&eW(3v_WtlGI8m!acxR8C`zc*pBdr0&E!WHXP0j(OJiZ&$tONxVxoc9-wpB2R>RPTSa* zIK>9(`yJ>I4~P$Nss|1vPA;`RmhAvKaC{amrTfyYRt_ajGn@Cxp@aVlig$FIbs9;W z7){K2N&i7-b|2WsbQ;;j_jW_Fj3r*@mU`H=qjlOJIpz~@q3SoKf1wMW`s=>8!>B6WW6K(8G4YPs>_b3eZL z8PlTtP49tbE4+#Lz16%@u8W|%>7~0@YMt*LXt9;JS8V)iH{XMOVFzc1oy40|Q*Te` z;1{fe!Kr*bm^j-S=sD1t96w{k8P1pbPuWkL9ya^4&|y8fZ{PS#ePWGOaql!C*um|X3%bSrZzVHnuiR#T7P~CI{3S=XgBfKha=9+ zlX?zxcK*rlh(w&2ul4rx%s&pJrPx0y_wxqNuk{@qQy>4(p}wEjaNaO|d2izMimm5B zXL5diXH=ZmFOo10af+z>jnbdcNq=rx!~O2bZSjfM?YiC%(4l@HFW%`n z(AoJj!PjKOIjr)ua-8wZpZ-SWBPVE{(&%7H;`d5#`a$xcb02S-d%&I25`T_OzEtK7 zasLeV4DiWFoMM0K+Y5B?3+650oc)xAIDR&HOz6<=a;E2ZE5@zPL7aEx&GN{02OYm# zVU4S@P4W=0a&3M6IIjGR@Zx>Y=}8I@XHN^0Bj=saVg8iv`J?hTmrPXiCw~LyLSeTV z7YY%#g3Y=NI_#(UtAIQHELrwM(dWDJPZ+nrZ$7ao@!R*+*B?6g7w4(K38+?*IBRXr z1wec-}W_$Gk1XA@6f?t z$kU=cEpyZ%&P$v8D0HFu3-fmHx4fxO{OFU+a!P+e=bmpp{~LX4BjP`@Imac>|6=?= zImaz(Mw}gU_4b4geiUHNkqz_~whD{-2u@mBg1I&iQL3{HhUJBc&QW?Y31e!xC0I0s7YAz9oLR>iWD6 zpC9%!rigWVyR9FI(`T68ztCZwzP<6im8)Z93r78ui*;Y~Mqh~=VVu7H&>_w%s5rlp zJ-|1jGuXB6gWuq%rcZtlzraF${h@=u@tq}bo&<;6{A(U@|6)A{I@I^+Va{LetQ3hj z6;*kpzoB#QZ|nME)fF$|XFR00r)T}LH9wZGLk!~geyq2H%#UGR4SSx79-BCYqv-oh z=-?N`6FBSF$0N>|)Orqd;9&d&=hLV}#3}fPo&z2H3p;`{rf@Riw9l*WXQ2ztzleMA z4?gxGeiyYbCfg5mq48IXc+4|94e^Vs_g^F*I>aO1U5Gv_$KFQQ|9zhx=Mqr<64f&h zzoE+a$^I8Q_$#ki&-i#}Ax=_t4ncCD1IH=O^PCRJPMn%*e_C>&gWs`^K)qg#&P|-; z>fDU98+7jdVvW00z48-3nN5B}#@zz3uR6CxA>#B==b>bMp@ZFU&I3-U`MP-PeRjrTY`~f;Xx9l|bi|=Xrw9t!@BO@GPox3a@owI<+}e51>X8smziM~9#i9=^*z$>rRxy?h|Tx$dpL}1;@$Sf;~Eh!(B^$e=+HhfexqHT z?AVkzX9M-)6m;Ok6!9|cXba-pi>eR8gtZluC(@m9A_K(oPA5+A*lHIRAaW2~ADWKy#qcy*E^YtL&J=>(WpJ(HX z)3_wwFRMCv81c)WHTlxt(8)MPKM#NS1dJkHyQg}8KnH&$6ytoWMB|8)^^u+foyoDz zizgqPNSuOS_4f00Ue;Q?r#gJiRO0{TZGLA!#y52C$1!VOEla@}#D5)D&-ZNJ=QPfV z_kw2LokRT3nf2dEfG)KDqsgNM#Csj0Z%^nyjH8po4>&g8)qmh#0NT^DBTI-^V1wR& z(4qgpIotfzi^N&y*y>s*;+`=0KiaGy{wtffhYszvv*wSLifM6r-F4i6Mw?{-{Jp!4nQ~xw0<4 zMZ6d_Or9JkpmXmJ>pAfT-R=?pgU$Z6XXnJN>y@Nw9}$0&4PUNTiYmvC8r9(XprZ#} z{V?wF!#-tBKPCP}o4h=9@IU%xaDw~3B#!qYz5k)Ji=RHZ-V!H6(C_&R^9z)7Y=Mu& zpSNAlht7R_a2j*PyF|yIej)ylc_v@RCv-AC1H^o6@8}=IYr9r&SLom$?C+!A3F?Fk z`Zb@L;i#Si9XQ|O^Sg=ODI*c5hMH%{{sKCa<1{jgyzHbhQHj&&wBAn8nRdeZ$@Fw2&WhPJVhd_sXx%Y{@S>qKf?8=v8T@Jf_Ulp6UtyY;_*?&NXc@W-h+tRI6 z?iIuSod+Rb4gRf>@rl3TklruQ!7s?uf)hSZV&Xhe?GQTm{g!pT z8+B|R;;+f1x2I?0r1iVG!z&gb{`m-cJIMPJQAK~4$tX;mur~ID4t@y~=RtODDoUIc zDfH!m4t~M70KeRwR+2c?v+3;yoqNAH4gXnJDW5v3V_D*_Ph;}scmkcwr^XiV8dl6& zfq0XB^gQU~_=NV3`e%ArnRuJjIySU8!Fmjwk_)R6r&)fzKcKVo$Lq#5iPOKt@A$(S zC*5n-A%5YCdcJ3IV!gk&Dscni=lVnMAG`Z|un+9o;z}dp-M9IkCUl4w+*<~x$c$#h z`A|;pALzir_cg(BZfixHepU4x=uD2&xVSkd-DAz!zb$cA+2l{4n>*vzd#q05h}dUH zS+67UhUC!O9Xc5oIB$Y_j!7Iyyf>;pm2m+b?1lU|IR8ZIL7a^?-&KVU9GuGp$NS&_ z;@nEBw>xwu$NJrz*^7q|r{@zh&o9RzPrsXkI6*l(B^g2dy?69{=tApH@m(?S{YQ@` ze$060{6qGq&`CdIJOFQ;*LdQ+aOmeV(7`|O7dUxO&nC`5bzfAr7wEvj{x~?FJIyD~ z(U_+FBnLW^<1`|ObGsdqEGABc^d?8z2|AfizbEeP?Q6P>cya#F`xQF(ezoq`CcU+i z_?ffm>n-}T_G;6>v;&ib2?j&?3$C}q)`f3kx9)CB_VafRSY`)`X>=*BaWhr@p_&-vc z^_A;5=&+6(elc9HtAm!f@<3RR!5`&D9479D7A9Ag8#=7xs;dqlPtKRAcctC98}>yU zf}iEqG2*A)p!XAW;NW|q;I#VdByr-u*K?qQpU@wIvm)JD;#^YqCuM!1b02Tk{gDxy zFA%?9TKziFv-OD6=q2);-3wkJ{`Io@ei}O2Pa_^+k2L$Q5wCnFeLoEy{4-s|W3`&M zh?Cpq{3&!M$7!stnV#k^OQYT+PT_6(`a#F)Us3)oak{EJjO(8+sYoyB(o#@8=Eyqq@YKA>~& zho9fQ65pWyXw#oUXL9_EeBygJcV|~2&Ng)(NcKn2 zVLqPRdAj$e0oQH!9S?S|M%>6Y?KiYI*k7dZ99hZDcW z0<*p{KA_`yg*6|aBgtsuJy(8^JiB~6^5L*!^ataJw`PUjAJD-ct;M(%VZ$Wi#I;#h zL1%KT=a|3Eo<^KNb-qB>Pk)ZtIzLX*W*+e-_crS%`xDRR$5#F=zeU7P(@dYo@r-ZH zBPN`*jQ9;|>hmu$kBEE>{M;#LLlgNUTDg%}aj$07 z)T_jKRYz}6=-_AEPe6V9^tnl#=?zVeY{$^CpRMy#pMrOZH@CXpexA-xlZ)?J#eemH z`2X0H7rK@y|6WnlANOn=2JXS3{tK2rA>KrrcW9u4Kd`R?&cx;~h||sHJu2wnj}E)e z()=;A?;GNzRrm0uKcM4&#p;gyFv#)<6JNNGcQ9T;*_>oFF*(X;C&QuTEQaXK7DQ=acwe1InLgK^o%knXGT8(7~P<|H0YXIs@zMI+OGByv&dcw14$1MGoRbROh{AyN2%OpkL45IE-^*-k0%lZsO&% zX?M_}T*Wi;z0b5S@)M`xbbY+JbScde!(KRt346WRScrIc`|9lu9XObegY!PHC~=yq zcj%X757dfyI423`CNv$%~d;< z{)A4>GY}uB?~&c*iC5C5Uxm(n+&Ya8RrtP6z15Y8?`_ksLdWB{{_wCy1m&?yP5WkuF&gFmg3-S~wf7?x6h~LS^p3uRb_+B+QXM1%g&QTkC zLI)1=THur`-HSN2)O<$z1v-=CH13M~w3lM^BhE0DSCjo0bY_2meSL=!UhJPFyfTn@ z>1^`N(4kzl*6?~EZ@@_6JXHC0SuW`KyQL1JhS-N3KYtwY;%?RFA)tfZvF`-CXBa<; zIOEiLJZX36z(M~9&ZvCTh?6O=zC6%@gL9_fOj|aCIC;zKInbFL>wNw}mN~?k^q0wz z@#E=w>g`s(XLK%jKJmBC*OwDIvz$NoF&lB7WKWO9#M!pTJKv7Y%9?(uD=&igKEN+>brtbDoYvPHI`|p+O>pW4 zt|LzBXL=5FCdbcMS>P>w?`mD#O~knwML!RME=Mf&{hWULeC{{7waZmL`Mzw>7UH%_ zt)K7tDIGZIFHz4-O?MC{Mj<^1I&iSh3{IMO!Nj>(NzZ`}9PImp6ZPmm;$&-}=Rjw2 z{0v_)|4caW5OMmce2kobLWlY1t;*ZJpN)d~r!{T|bU8-cl`22;-{RJqx4xPCB=I-5 zH0v+w3A3*gC!VUGw4bN*`Sao&O7pt6iSJWaZ$Iex9E$b3WGnOECtmVq z`t}~$cgfDZengzgHumFnn>Fv>bj35`lxwXo53dUxMl_u-zZGGXvF+FV;|sNZmgR&FafLh;%K5QFc;b|`dB+Yq_+g1? z_s?@gCQh);yWG%myLTFyM85I(>!`%5tMaR|AA*kajn;E-0oT2WH)WE(y~%TL7>{7D zS&QQkrMWL<3fL)y)=cRifOJMsPcLm>r;zgN9~B;yo1IbSF) z`nBEL|0Lckn|=*C*bn0k{7|@WapL@AvtI-qIFVC7r}%1Ev@~%h*yKB*gCDRiLwV*I z<%kpC<~sw>xzD#)zpJw3UPa;~?^A+9oUg%-sBgx0)rs?XoW5Uz z&dzQ{YWWf8x=kL*GrRd4?Zo?Qvy;^${t{JR86VKO_lwhb`i1+$bGI52|Gu(=j1TB! ze84`iYuUw3h*w_OL;4vywCf$>`)`*zv?9(3oBSDc;H(qp0|t#~N1Q%y^y^^g?EKt$ zL?`0B`K^41bzN2?~mX{ygFc-D@r=dIF>>p}eQBlLEV>tw7~;NPZ+dJ|{+Mm+~Q z_yzMba1O5MN1Tb5^&IHHL0p5gqx2x+#Q&`4KnD)KLkiA9uVKU~7hm5WKnKncm5T|ZI zJqJ2)a2^MofO7|klh-Cs3LX55eMxY_4LL%bXR*xkNc%(Qem-KI?_AG(ocJqK>iM3{ zcl?cBVx1Q7`4sWbd zPUD!!7iQW0nmAF_IT$(aLuWQi?63M6^+I@@UD@|N@wTh)pv!R$I*ekK2`_Bfh zeK5RVj`~h1@tL?$Z1Nq@A&wfTIQm#>@s@Tcf?aWha|Pg6i1?lOacuG((3$r1GrEg? zl$yuFZ2vV6xmBHyll6xV_0M-_RMZzk@!qM!=p^E5?8*qlZELeWfzB+S^&G;+E>VaR zTb&2`Z~dv$s37)BRyT`I{Khutw4hV&G0@KOz8n0~CVMR64IHkIJLv2{hh24{;96*4Gz0aL~TN znHW6{aZWGLbD#qU{UA6MKcy#54x9UG(3u?TcztnSX5#c!@g>`@r{gugvy5_%ACaB- zQ&pTvK6LKq8`k@OH>&0){%V`^HS+#nACVV)nj}AQ-m5&O9A}|}Uyx@-eRtjegE(hZ zzEX0aGdWJwhQPm{@~sGJzZzF-+pL-Yy7Do_CKm;tW5kS zHu*v5P+#C;W^&$9Uec$f6O7LiQB^F-8$&tcYOC}g7^F= zbsG(I`5p6c@Mnx}PW-GNJXr@ji~!LeT*}&-I3;51`)TOlcZ^dg&mX(m630KYzMVto z-tX4&&3kA^;t#2$FDH+0Kli!Aa~}C#>_FmlQ}@MW`+<)AY_;E}S>1^@&u0If?dLEi z3qM59(Th0K)cI)H-#};j;paJM(Vu3@-j6uZH=A~n^@EQ4)1T*9#QR9)P7NepfjN2} zbjQvdlfL0cw4uaJQ;7$A>IbQXml%o^MbDY74|35{#HgGNnE>km!Lzu`6&mN z&UgP}HvfO`!Q2-98MtW-@uR)aw?F96&*Iz~%6WU@1mYC1c^?Tn_}M374qE5luR4V| znZE1&0v)$we*^C-qFjq|{!P3Nk<52Sh~ua1Amb1^_{V?aUb+`JdFL|X_$D&z zE8`G4dCoUeFyF7)Fl!a@YTVM-4?5Hj`9IWeU%7R}$#q%Jfeswl4V;8;HWKIQQ#}Vd zaIg;s&f1+@h?C3aU3KV8j&*(dXzUK+bXM^o+pnkjKFm*0&QryMi9h3;SzpPA&V77X z_q`sy*+=|LG0gTN+nwC^!oCyA>AUO@adsxxbD)D?0!4op)aV#-x@OgLpaZ90*|gM8 z^)7jmIMdYo)v{jDnH;CFb~Weqc7;7loEbUw@dTZWC#+Xtr`Ew2i1*H>T+pF@IFAHQ zo_<$|^Ji(jouC6}u-G3dRpL5vYE;&9pffqveTO>{ZWAYWO_TFq+bPb)puYQ#-6MXk z`ucK0hx%e&3QmfNkBIZFv7Q4RIM|N|=fvn|#EINe&w&n{^1}YF+rJ`?ugyE;(1C;V zU?@+UJnx7T$>v>j=uFPf_WpvutFq4f6LI`}&3ei413EspXFWer`rKFIZLOf^d3t{0 z)Kp`~uldR>HujQs$KSA5g?izMlee~K{=@k>aIU0`Oq_iU^&IG6_nYgpQh)VV>}bT< z-b`;N=o|x|`Zz|GjSSux@ow$x=rM^GtA?Hj9naGo2Ko)u^Y3VJiC4O)zMjybo#H$) zI3tfIAkO>rdJc4`U&9%EZn)6GB*giiU0*-wmKDgIqj|;6uKN}(#resSjZzTrKo)(u zpfk(mXFL|?C*vniO`MwQ`w?=T0Ug#E;oikuvHxaawV&WHP=DC%b=dU8jjFyUA;&Z5 zV1InC2b@p)G7+b*&G#*!gZ+1k^rTQ<))92&- zjO^k&Oj%OpBVGij=}+kg=-`L*F?X-(@G_P1!_RVx^O7^~7bNZ{hrZoGC*unDTHud^ zOaCNZ?<)Fs2Oa#1eQ0nh1{5bw*;;xIbSB3-pG{q>G;!)QFgbD@_jEpcO1!`Cou(Y| z7bw3-K6LKqvwlVlZ_cxn8dZt-v(@+@{SF=cK09sDfoRv>x$<7fAH%+z5BL)In$7nk zp@W}r?f{&J+iDPJ{}uiE1v+pxi#+ksF13l1_pqJ=oyoDj*WI#eede4qIWq3_?{zy3 zyz7Me#sA_@yfMf1Jm}>8ee8#V*L6)(;)TCs@??8~4*3<_C)_i>O1|r(X1Lbxuq*hL z`?Mr}9`*jO>{p?KKX85voTOO;h!gvb-XGAJ9Df7vg@6-$wLNhvsXUFWFLdtX!#YpT zzqd2-=f>8@x2Nk#tk+Tg#XY(a|FKOT13K6f>veE^vh^fRKss7Vg${l}z8K}H`(prc;v~`A4LbLJaTrdqUtS?-2=T8))z=p~vz*poG`jq)}|(IF(ZC>j#~j2lkG#lJ0BxzF$eaZR*`=X)owH zFMnCz?_&(td@VTA-}$n1HVl+6|I4++TOCVZKIpus2JiDuQOrR5!~1>U)m*%hc&BaN z6^9Pv5x&E@t4NO#pBz(M^Nd5{JpMm}wh%w8O+Nq~{0o1hoC^!?AkO}9djCR)`l8-o?jzTzu?t+|@6(wS;2T=PAQ>#+0s%-4xO zx47QU(7|8GH-ppd^)2Eww#m0a2ac~eKehkfJ>o34$-hDezr(&L&(&p*h;vxAL)p%u zbMJTSK7n)4Gvep+o#&+yLiCmn6hlq~>jM zT?8HcgK-0#;n`CVXS>RqO8Y?i8%Fb#sjve!@zk9)OW&=Y{bc_<}1=K(3yU58b0Fu%;fI5h%>_GyR^{pyxd`& zNyq0BD_qY;`m?C=X0QQ?@vGDaz)PnU7S^f_*K>SrDXmO zy40#e%yc!yi0##bT=Ry4qCfL)UxK(T59#we&>`-y-;Megj$4K}h7AWg_!s9jzk)roV_ z=DS4D!4G)%4V;?&{D`wf#fOYD=+K_gkAV}da6RH=EU53FpmX1ze|{&gF`u_u_qieQ z`;{{J($COwKGixNExy-;c!kw`Q}XP_Bh1TC|L=!d5O27uzqBKC@J9);58iu78{*_w z-@TR`=-?0Z6DUvPUG0hU(Z(Opx%Y?lec*{JI}^W3WPLv<-v>@C-ltDDq#JQ^ebUD- zbT|F^`(en-px!m>^d#QExBC7GI`{$a(}1%$N?+p4waJG=2S4C`0XWBk1`ub4`o5!_ z7eMFU57v7vJ7*0czOT)^-7`O1&n@(Z*0M^tY$| zU<~n2<%BG=h~GT7z8{3{P4wt_W42qN@`C;b_J?4{pvQBGw@sbHlI4XCd4^hQzEE~> z&fF(HO?S-~@V+eg{q`>;e!60MKS5{u$o57x?Fnz@fl>aAd(Mq{@CA8Y zYhG{rp$)`uWOLq3=JlM)f5#H~)bOgbWjEr?DaNBdn}UcRXmj2SI`|LooueJ3>$ROY zCDnK<{RbWDf4zBu59w3nbNLVRE^r4F-A&xd)%5m-?tkrHKZe^!{5^H_e9!tgk(gT$}eNY953@rU>IP8~~LV8Fu(F8^U&1^)T*M~OeTxyhIPEp+f7;s~4)bxsgx zUmKGn{RbWV7x8BQt`SxabNLT(1n%2ZXNa4vgTDUIx%Z#b$X%b;K}XYGAbwyMlP~*s z=*p}bnf%??f8!1BZJ@k)UtcEPdYkqD9sE^W#aon_CH}lHWT~=;!@xKPev_bU#D88- z?=R>Ow|IvcocR-P5oc->JqJ3IW1U|-t9y?)O;r5L{>!uW?=bM*B+7X$)g$8HR_$N5 zPv}t2x#GQsn@^q+Cq)Z=JAzL7FvbYISUtjPP7`15Vf3qxnt z-)Te;_sko8|3RE>%1<&Lp;KX__91_kOPrGrJ``@(uX*1eHhzK5ESL5BMMcI(BF+`% z7wKPlZmgMjhwXOVsKi;*S>Fz!3#~sp+<^PDot3?b|ER0Um-d5h_no^_y@#%I?F;(| ze;iB_hj?*p&f7vK{jp5=qj-)4#QSJ7-arR`;5)wP7rv)ULYz}-UMBqkoyoE0a|-(S z5NEL3ACYn4>HGk{LphuJ{Xf>u0=lYX`T7SBt_ki0clY32{6di67k37S!QI{6-Ccqt zxVyW%GdKf$yg9uCyH20W{m*=}*1WayCi|D}s@Cf6I;n{Nc>8~e7g@jGm-S{^;$Kwx z4`li6zTbDId*2Rq^CS*7?Gs=;FURdEb*@arA5+LIzZ{RC!+G$D4n7+X1?>$PFV~AW zVWyz0#61^UzYm8F?EvRgu0nL!3O-^?riReZREMle0?~ zAbzKQ|AlXj8&r5%L5%a zSZ{#yE_zwwWPF|Oy$RE7AP)qPIc5216 za6i1`Tb=mT)%SztxDK5hhX;sp_*tJ?#9M2V{}ej-2l00JCt}=s#K~Jx?;q$)j??%c z?#Zv7(||a~)OQMG{h%}JhkQjY<49G$AJsQY6XHcud4HropzD^y+?Qugvgn_CQLlm* zT>Rb#-?_B*=bE8f5Wm9~eS7kByuf|{cI~jRHStfWcjB`A&|$q=MmheHqg~ze)z+x> zpu-p?)~icPwj+L{lX|~F2mfPVfpP{v?MR%&kM$hrOpeu_<<4{^PEnip$DZ2LCHm*6 zfjx--O~rksJ)v{oKOM&Oh}=IXX~00M$MuTUuXE`-r?YUqly32=6ne{vz*rP zG2@4E#2Kj0zvcL0H$K9iu;1#7lZcnt=DZs^_yO;_z_~VU8gURmm41K@eprz74c+%F zUUL?47TDasfX;pUweAn2B$!A11LgF7_H=)Ud+V^nwCf9rU#qIVJwgY2;v5T{;wP69 zr)6zD2Rd*t{($2*c_nd5sQYu$FVMj+Xy@RZ3|LE?#y0nEp!=O)@O$8UC)r5+Z#MB; z=-l(2#^T64Zhg7Ch4=|n{6_X0=z?RKde{}?2g+M<#dhL7wuu8k2Tm%nKPc2Oh&cCD z96uCPhY?MDzo1*mgT!B86OV%q^+h{JIenrYCC(k2 z_qx!5gZUYpi>FTzC(!178+72{{v|l)rko*;k9yxG?GGI|csC8s!(JDNbJ(UmKnD)q zZ-8^^k1NFKYf~@iz`=e8oUnOr5GSwAeLLvDL3;(K!Kd5ADWS$oX&>mo!F~svuiNhv z=Z%fup))y7qwj=kl+VP`_c3t>s<^Z4$I$V2X#$MuLph&G(NfQemsQ1G<-Q-fQ%BGK zGhaC`^!Vlb@J?fZxQE#)+H2yzQt=;IPUz%5Cb8JZtiSk{cztc+d(goT*w?@hv*&#z zPHUU^9(3>n-ur>mX#C&AiJ;LaCSxEIC&v@22L8CNbi8rFL ze*Tp0zqS~kGrWpGoRc%d;&qaf~+l@<0c>FP=V(-me^7;Z2<6s=Z1( zLD%k7)a8CI?C-4atdm}gPrNiq%zcgI>EG)(j71wbuU6?ciHYa?MxPH3I@Hrw#AE9x zN=}>#vGwJGPWB(XuYmpk2uwx1rm6J%H|XF;-1h>fT=lfXIaXNDfexHLclbL``L1Ok z&fHph4s_u7tbb1H<-)VG5U1P#JqJ42d(%-qpKq8e2XRiS{E)I8K(}@p-#@~-9(Ho($QO~;C`Ft!yG)LpkDzO;8vgjX@pl&4lV6>0{K)Gk-jQ8gP=>fZ>YlBf z$DxBiFb}}5=@(WYPU$21dO`;d&Wpgg+pscmwyoE5ptJL9zcAH^6Mcutk?{`C{2E{^ zFUI@Pfr|o&A5+;)`UASYrPcRb=fA9#W5plZ3)6ZMbnVcW_E+29pp_)ya z5oeFOrzXc2=y*K0=26b*Xhpm>>fM91pJ#cLoyJv>*W+>cKZt){y&8wMu zZwKNHSLL1BG+@dMLf98szJoDyJrC%{D3?S@I&Is!-#Xk zW}gNf{1CSn`=M*cQN#&X-t?#R19a>MhtWsm4Y*oi9P!Gk_eHXuL5FfR4a4!0a)l=m zC$74$D9Z)i@7jM6?`gzamBo{G>oB5=_4tVoGl^4D^-Ebl=ukg=R}}uccY7{z9;MLR z9Xk20e{3M<+1&AbA@LIX=y}lb`wMH{h7{+Q63_pmIj>3k+vRP*doR?t`R0|xTV|6d z6FRhetcSsAKVdC#V%*cu+t9)ODMh)Stc%ySvLs`L0WtH%lA1tm9maz26X zTxU~{^>F3obvk$lgj4rDoQAj9Uxv{_)p|lhWqCIdtH>65pNkuX39>?`+Bgoyl<-$Zw5$ z<@3HzobEPx2%y7y7kM&q?_bdIclQ_HA>A&*_ogB|1JQ-Le1 zJhQ%kc>gXA@jqwQ^X2;o+{eCCJyP*yQC5cBqd*)UcKFmZ9`UbL)yMUqgFUeh04Lm? zgv6OXSkHkD_QbvcoPNEM5+~gTeSM+(-FX+jQwsk5yD5mD*XBE!(4oFKw*u#dGc|F( zMbNh&=)l3bE;#Ftq$AE;o46x%;2>`&I2oOph?BOhzC6&G9H((=BCj7N+{#Lvic|Fc z3_70gt#RnOJ#rGy6{zRg#i2j1;W%{b#CeGqd{JLd=*)Ur`_-mr1&HJQ+T_S}i0oH* z7v|kEbjnt!A1B>u}t`gJ38SZ5Sc3%s)t`ehxE z@kq!x*jmg};nG(jeq)>W=Fq`kxR(n%U%p+9I1O#~vCx5o@dli?2Wk@Myv=)S=uD2& z2%OF5s>cS^B~Ch<^)PgNu4;|f?5o#+cw5yyCE2cJyk?q+TR+X$ggE8Yy>-cf4tByh z8tgRbLv!K;W;WY}yiC+Pew+Q!zzd0~@(1iIh#55%QW&KMor5&xpPk0tvD zbnf}q{3dI2b|QW~^$t_oLFP9>zG0NJLV|9@>8 z3&sa*pV zKfQ{JOTK6RvG$uT6G-Hf%@?l6K8|s$aV)E-zRVwxX*-g6*{|&ct>pVc0h;vVIBad+}KsbnQ9YH zfesv;Cxdgd(mLW~vx#3o2M+ch;7rQBi8u{Z{6h98=)fr`zAI4S^H$=VuzAk~9XNlB z?@Lz7v6DFYZN^RLOiqCDK%6(v2)&y)ui~2RNS<#(hxl5$WSQ!c#oaa-PZ^Yje+T_dr0iEn0`NjG2&N-oX{~E_ks`4XC|3C--Ab%h18mC8i;v7A! z_YZU?$7!?|`NEo%h(w(F`%I3kA9Nfyw!WWu{U|E&LaF;$(jT7r+qw=;FeWDPFW%Ib z*KQpQyTXne@_Q4{N5va{=MT9KPVyukal9hy`!96xYrVM~H@h`E5pkj?)^nhPUt5d$ zXIblH#PL<}1L;@jQ_Vlt_*r1fw8Sl7 z^R7z9&s?gbH7XUSVS1m@>K>KDfPLZLn&~qVKizkIKYmSy6D(2Wc#P1y4ET`mqHcwg4=^UjB5I>MtoAMtZ1(Ayb0_zUZ5aGpdgPMizsyjk{Z=EKU#+Y#LuSk`2RcpkN2Bmon9mR5aM4`?Opl@I_V$e|3!I&KMyC~b+zA< z?H)S#2j|=1B#bnMINmnI!bNBm3;`FhZm!Ja&aY}F|25%*f!{(d{ zI&knkR&W9a>>$nqn{z7YOpepI@RH|;dyZh@G`D#_4xJo-Fg~MPFQV@wUQ?UAgwUaW zXusfWx_FQ{!<*~<4;?r)L|l5p{G-IF*hbHR4jkNjLU{^~KS`WMDvzZcSD}NwCw*pn zH>!7*I2ml_W9Zo4)_rKtd>4r~R=qEk{ZQ^h8!B*x^#ekUtQ8cpUWPqU@Al2F5BI5TSBB2E}}4lg;-!G0LG!5Pr~9&yrd)yGkwuBjW8DZ}O!5 zJY65R6Z8AZx6g?Gm&zL^$6e^AXDyVdd9vMwVMolBvAuP1b<=PmJWs#WA87Jqzk=>}>w{Wieb6O= z5AmZ-($^b0*bn^z_KUwUK5=3#(sQ5#r?=P_6|J0@IM?^+InbFL>v$C9T{7a_zoECA zXXDY2@2{r%On$yNI~DOO+Qf~aLw#``4ZC@DNK2eQR6I?N=g@(Jbrv|WOJ*d_wy^s7 z89H!qJ^;?MR9T5rCybs09qf;N2RLW{&Ow|`U-b5e&V9SJ`nTl6Jj7pUb5GDS|2hqP zPZi~#GNl0VJE(IsIp07h$0_7H0q;(GKjK|e_K<#t4*r3^z)795IB`zD*Y^+T;Gd2W z`8%fvqL(JldYf}I=-lT8bQopCdeQe(S>m@<-z$;zh0ZLe)0iv1TOPZAMdC!ZIe&vr z`T_3>V85}sst|9s&G{R2@B_vhaO!=lMw~_|^!)@n)GwxZ7j-kJCUJ(SII3)C(D6L& zG{TE_Tj#siCEg?zPnA6Am=|Dp9pQTiQ|mS$UNse0mGNWfaPE`x-MvCIG47!CQf&o?@dEza;)bs50|wg z&O)2}pPtRvPUB8v-uHx?---CG)0yo~`WL$M7q4x-^Qwp1mss-*1U2nOyb0bWPp&Uy zo`C?>(Jr@W7_HHVKtI?O`wrN(KX)PoM(_=R4p$EYg=ayKLe~(AoK?VY-3D zsbuKu>zRM7c?V`j9ZLKdHuoJo%R6A*r)GFMg7{I?yd?c4_o>O1zg$DZ#z~QJZHT`x z-@wjm4vZnbOWp66<2H2gS7)(~o4IEKah9m}yOIMPI33fzpmkQ6=~IXk%O<}Ibl`Ll z@7^7wW)P>9y5}Iv1D(mS-aqJHZw_$^Y%u3v+5hbBAK-g`uy;`U1;l%9ljj6F)DQQa z!I>9l332|~q+b_6hx$cK!{09`aeFy&3aPw2vVPEIRsR9y!aGlttMZD~#4E8vUoPlS zuJKQK+#A$oJ#iM<B|Kj>W6(iIQ?=466eTVJqJ2) zFpq$f@$a3)NxneOfzISu*TsAG?Iup3%6~4$2hY~U*7>dS=>5dMq5LWNo{pbB;(d;f z*D>PvUSrxp@}a}~oE*yGJuA=LvGL%ZknydoxPSHf`$^)TzO8>>2p#-|{88}N+PPC= zL7Xv3OpaU!LWgzW?#$Pd#ICzR`NL)O7yf=0{w{HQMc4ZqI{3S`@^_zN`QMbTyg$U> zJH+|*#8nT7-~N)`Ptd{NSVzE)sY*W~&Wh)H4s_sb5cANLug{5-Hj>^S(4pO6y@2wR zo&TCRCFAPt2Ho%44dM&nzb*5Q_=z&>%LyIodrb7V=kGrfr+#rg2Rd+yiuJ_7rGFFW zR5d*ZI&k)ied&yP#@=7!kGq=aInbFLr?E%8Z_QRTEOF*^G&ypfgwAZPh}Q-fH%0#{ z^gROc*0(fyvVTE`{K~{akFqYdhvDNB?U+nwy`z2`K zXeUOu=)_ObM{iH);1|Rv!0DbRHgV$G#9yIi1TKg-Y?K`{MBh7pAX74 za#sT46`!W(LC1N3T*hWG-VHvIgm_72==}j5#=8`1ygS)0&g9Zpe%_x$JQd~LH8utD z`}fxS13LHvacyuqIa3qo-~c@bI+J5vFSL!Bo;ba1zRTm;dck3&7kQ^!p36l1aTE3B zgbww^c{l2N@N_ofOjCZ5`vd5}87ao8i_>xuXX_w+d7uLa?Hc6?teuZIk%#L!(3u>k zfjAL3qe~Pb&K$e`pyEL}{iBog2X-7HEd%Et&eO8q7{plLSKe1fT zhtBlK9SfdVfLZ-e1=Hb~`7vC4Mi} z-sCu7ci#@}8RhNNvjg$^tM(=P6Ljzo-t~c#vqcx;xaR2n3>`S1a&z42Q`YXpsjbFM zSsv(2&X4!A%`Q_u`Yi8y5$BdVUzK)(j@J(kv)b?=W zh^5GdVxTDY0)V-Dn$eTB-3s_CM%& z-(>aAocue9m-MkduH~73oW`T*e184(X)y5*-81>JywEAPsCcl`s925PUv#R!k9fo1 z>Uq#r7@GFhm+4-?@Qch7SmoS7;@wWI_b+tdAWn>S(!J_Y;`r3mbD)ENpNf3W8E>5+ zP8yqaJ9O+{>%Qw!=QG5+Z8KlzR7jQ20y-l1qhfR)bx6rX4to^KT z>ifhieB0zn`+3^W@Lf2R_xEnyPh_gw(YnAr%bRCZU56by)`3K@(uz8mTo%?mT^_|6(7`X*hk(-|aRlN7 ztNS6+FVMNS+mG>n+A&&}m%S2+_)Tr@$v`LbL2nh`rJXh<8u4PPJZrLFLnr$Q)?cXq zlXkI)x7jAY4Rr7a`VBbOvicAwoK2oJ=)j3oAQj~SdKfJMaSWU93P1@KR_q_fb}bQ?JN5dZo;%t8p_6vUcd20ar_IU{ z?_Gf2?$E*R@x?h{ws1C+$5+?B{B3tw+4%>Yb;w6LhE_<^j|%cISq~`P-xU!)a_6 z?L4?nQ{wDZ?NYWU=(wF*&kww6wj|yTHNHrmKF;GXkdGJjycfSM@do+o+b49Uy{z%) z9p~B;=cUa$P{yB;A3HF|f>kFXP7c|J;GPxAIc{8M;%~Oe!wMbziu*p`Ts`+EahloW zhl39F#rhSTRg-%WXJu5=ZgL)h&iy>%GB96*|EXL*;*YhtR|=hbzS9UN@|G3LFo^j6 z>OQD!N6^WBhj{|!E%at6@iryWwKfGT6r)-)n#MygD&w&mc>>I&pJU);(L&NFgDbSgm0Hf1t zj@vaVv5Ppx3z-}lmxm5<`BvB0PZ_#0s~WGZ{bSCN-Nf~(ppUC~)<3NNp6I`y_^!@+ zzGwb+856}llR~i%6Ti1QCz9hkbhu}dT@94+#=IPy{>9Jt8+bnn|0VczjQG3S>-`5E z+JRH73oFbyMVw5{^c?6+jx`Qes@gf?3{vkdWqmyxFP(NK?fjr`;y@tdoA3Ua=I zj^`VvF+_YntzrMG#Jg;B-US`=90u<1z#k9F-Xz{2oA@Mj@CUws4vx?3JH%OYLhldg zOpep2F5V$sdi{VnbMKoRX)ox^`eA?cBkzVU=iOMb>uciGdu8%u`-2Ys%?Zc$oslri^x%;7V`y>j zp#SQ3#GhyLz7sn57x8HLw^+kZ#5u0kWpW;X4xFGIZ|PlzfBdh+X>PObhR)=;48$c+ zo*3am?f*4iSlTA94RpU-=irYoC@mwtMsR5^Q85C+KbPbi9gupU9s$M@l}5t-YP}eG9R{wjDyHe5C3df zlb!grV(a@Gbg(D(Ti_gOnwvOjY~Jre2YX^Y2G05_`HAyJ?E|Dep>y9qetbu12Hzjr z9=$N}AF1yg%60^uyg!8XJjy$DP%+}w%&4~?bg&=33k1%&H$bQUhKe#VymC_&a(=`G+st9dv*6nYuc2 z_2p`P6;XR16d7hpF9XN7b&|>=l;(YI~=RgMz%7^lJ z8N-N^XjSBYEF$d>9XQ~ElPmFP;v6ig=RgMz@;rdEpw&d;_&$lGEe~`i$7v+5mYViu zX%9~$PCb{&k?j;Z)eO`!T1Y&HVe(}Ag6?F1< z$%oE8-)Veq&GW^S2b+n1w6oVgd}$Bpc)kcQVu*W>g?k4Quk^DB|L|md1vTeZ-mD(e%IMKnD)iA>dU0>i}_z zbkp}A=uD2&NF~m9dh|U^oTdI|f0llL&h!KJ=`LeMr$hApv&_Yg5ie;3{rC)>`}S>( zo2|=ylK79O>-nCpx4MXSd9d&q;ve=l=T&JB=(t@vjdvp7S=83&i8sBco(CQC90uMa z!0(UNULszevnEfrAL!72CW&*#h>@-lCr)C092Yv1<1$>ycz@V=!A;`)U0c7-fzG|Z z9md+ayq|R?y-WOlUG?RJ4)w)5E!gej+y}(zxIoW=4jj}U9KTFYh_m#Vo&y~?cux*a zvpvs=Q~H&j1D(mSo~LGYy(W&IdOt1OtNuLIWgxFC%C$G#JK`-&r;lSn=iZ+`_A`OQ zXnd%1=_B#K`k8!LUg*4fF1Yx;55|3`@x2nq|K}|En|Lz{>v_=0{)O-0p#G}@j03;M zqx@9)WdDK={>3;8&PK1W#2HY@ESKa!XL79LUCEshh*Q_4_lIZWoi$(0pw5wrf3K~c z|8L)sAI=t?`2Bk5?I7Qgw-)2oj6|`C6Lqkj10DP#??sHi;X|A-f9g5V!7tdS!M_#8 zCM3?lzIwYs=f0m{}-wAUQzo#mvv>$Y){eG^yUWk25)JXY>w?mDCa{UXP z`|?`f!|eW2i1=l)ns$|Z`5vZ}a(u6{Pp&O28yvE}#`!Sp?X{vP@%@tN{RW+#|94j{ zNt`s<^c?Ab#7R(|a$(C5r))`meW7!2Plthg9^emoRG#=S8zT=4iG9pZ$s z$+H0+_Y;TF?s25|89EvU%JpfKi+H!v>dOV4SuU4>egn?(=#7bUGp5Or{Q)}n?ZdjC zZ8EYM@e3E!^W}cFsB-YxOuwj*fK(%Ks^ME8=^%)Y}0%_!)6^aKasIOPtWX z^&IG6PpntK$y2)naiZJ2SBCC)_QdajA1P56;y>7?wcjYwteh=aXC~u8P!-@AHiLNc7F$U?irEe+p&(#Bz~dQdOJf04#q#&Bg>Mx#7Vqg&w&mc#EZaL+F&7Z zq6X_Z(3u?TIfGBBrNsHm=3bho^Prcw_g*&gO5%^&tuH5ZW;p|l^CEvzlEZ6=6X&4G zk?UOOu+DY$d9m;L#5}>M|8jA!WBl3;#LcVX53+qihw}z}@9ypAX4k6~*b}nu!M*~1 z8q$9Y@#nSH*B?6g8}}T+$&`6Jao(!)COKX}hx)>A;GDX&i#V}*o8^)9h0c9HvEKJ6 zJ9H26&y3acJ-hE=y-zT<&;jCSm}g^8l?Na8#CisH7?|J)aaP#$XXwD`EcdM!juU6k zQhj-#0|)1eC{LNOr-^fDy`BS|oqxS5oF`7qU3&X_=3l4LKL)?c{xkDs;$KqzP|g$3 z@w+;QQ7;yctNmVIBi@N1eZ8TB{cwL5_UpCo7IFSOpyxmb4&Et(^R(kV;>7^x7s`+&bae>KS1ZcpE(S?cLD!e&S%80xJO@3=*)5k7#9z4{2+CmSHyX5Gp<61 zadqaQW-(4K%xgQYR!s7ixb0OxmE(*YSN+xcHre?|XG%~1Ij-VfKI)(Q^9SO$u<3Wu z!QYry!HK--3vs^9*V_>~_#5jLaDv->C(fc(dVfRb-rr86j`)sq`gCCq{u;MzyUpaw zeg_@j`?B5_&!0I0@viLE*IV8b`MieL#ZO;GBF>%_dJc5(1J-Xb6oWc*?ysO?@xy@<<56nZ-pW7akoH+iQ^>%{}9PA%ao}0aViL=ti{?Nhx=ojE5 zER>EoJJfhC?GN4W?4L!fOOuYyMEvh+-7D=6omtKRqmnp3k6$$#aXwBk?Izcy&|&nHtoalIgYyzE?P9%u*)I+Q@73U! zyt4}u=ZQ@}g%18j92FdA?IOfEw9+h(^e=So{cGLVH1sJ!{A#K_OTK68pdafN@m-~^ z=Sma*g@^sFwOBuX=vA3G-Zt$JI@6vG1K;O?Jxdp>Mw}HZ^ykLVfrIrVIIV645GTVr zJqJ4Y_k%y@na@ zM7(e8Zce=7KKlL!9sGfJ7vOAP+nP9DqxAlO4jim=!SUN(KCuUMagbI!XHaeOD~{R-Xh{Munidb;oBwX-YnZ>`k(6*~5VHBUj+f<1`m9jxc+ z^AuR`KgUTslz8_a>gzA>Ki5$O&64ZE)6m0qhs14RZ`ilumJ!6SU~`WWI`|vo132e1 zjUi6TqxyS=(81r^j+~(L{2uMc6KC67{e2whe&_ElB9C9mph?7EkYDc~=u9)>oXlx_ z5_zyD^_)h$csBQOpyP4D8mEt&eireL4AlEu#_1C&e=iyuwp^AJ8$;ql7=PiXW;5mz z|MNtXFV`W^!Qa?_g0m&&LgL)mtLH!m4!)lYPH^0%#A&!u&wIv)yBMxa!t;?hIo;-m^?Y3K=-?L*grDIAr@|0PkjGt`tm{-9Mjapj>wOJ@)q*j zOuRi|_2WNu;9&g)PNUP?h;uECo&y~?@DDis$Lu7|(NcO2bSB4oUu9Xg-Nczv$>c~s zcy`X>FwTj0Fb~e}BYuab`f@^NmeXlOuFCt;ihU0er*==1BikQzJdZdG^b^>xrT;PF z6&<557j*DLJn@d{VuVw~2{T&HfewB^yGD6BZ9hw#<2L6i(7Epq*6&}Q>3EU&@s{h$ z$=|;WFdlT_e$}t?RpNwI>u|Y0f)4wmUYQ4c+f_1)ZG35~^CoeRtTD?c+o6mvxs;=O z-SWNDIM<&c<1)@kVBb<%?-Ji{roR5r!QYsd!TIaq1L7pJd6y3z+Ox07XO`vU6XH}; z@if_K^=nLXqoUFH>XYI#ntQGebk`{SG{Fo}PCd&(*?0<;6!;XDEy(3;@ z599UuMVw>r#ZSaJWD|FR4*qB^)-7`I`nINFZGW9 z;t!XB^*8Ld#ycGGOR956*wNy^Mf^wV-mK)y{5rXne1s6XTn*%zdXAiVtp6* z_&{9Zk9?zVUv}>TWxS$S^Mu4p_}%2mdP4{MVSftyHHn&(IL*{|i6jR)aAbdea5e>T zrlir|D}fFi#BEWYzv84J&Vr134s<5RX(U{_kMc#1*`1y^eLw5%4V}#Qf;bh*RlH$l z;+>45zmEdl@A@0=-GP5DLU!WM_0{vC!?|@v)!$}B+Mj1?mD3^nP@GSKpZQiU;(sil zw<~n;&oq(OCv8wZ;>6FZ=RgPlj27SHt~9$4al)y4e{#HqPVVcDihb?oYDI|`D!tx* z(7E@I^*fSYDN7Q6Rc@0n%WIdX5_W|@(w=b;ua=D+p@Tm#kAZWlZ+YU3E~EDcbl~(2 zy^_u^(&Tg!Cv|N-2RisQ@FBmWzaPB@aVn~NYSK>7h5FHc|M#!Sv)*@~)vgZl`V7*K z%bwkLcN&|-x+YOn7x8y|)$bFbQ{62jEikO1 zlhDB*i1UDx^Hxvd?0Tg42XrRKx^Hj4qAzilsQmYGedgJ|-DTk14dqPVeIW5GebCzz zI``wL!}#=?_mM`Kp~TPiQEvz6Uz> z>l5((FN`66;b{7C5jyyNt2kGW)nfv2&f9#~6FSrv{zG~06_`StMhVRF{QvU%@X#}e zKQE=8@2TJ8h;z2n2WJz1w#|KR=wQ#zVw@e*X+CkPX4d-!I&cslhdp;BUre0cMf4o# zOpenSo0z{p)avUp;w1DpIdVLMj^`2U{;|QaRm7WWlh4kx{k+5YD*E4xzUzqJw4%P= z(7}Fq&jI@_$+d|%H|pp)(1C+{(BQm&zLhwoTj@E_frEJqoZ5SK5a&u)JqJ3IW3~T` zzQM$KWRvgDGy7Zli3;x{e*HdXIb}cgl#jR=?9e&YA>#WD)AOOjxad#~Gwt|R&NnZ1 zgzP6^ckru)KSumNY~B$-2Y=xm4>&#tP7)_b*;&r3(7|7FT)aBqEOA<>@<@9^=YGDj z`s+sRi^Oj{RBunu{N*s7iSL9B&2yFba|i0%Ep)IazDEOl=KgepI8jx*lJYbYO4|MR)NHPE2KlPqCZ`FQRa-cIg0Y(Lp z5AWNe&%`Mk%dDS_=R=2hzR#{Zva%8*wAW(EH`z`n`OI(1(AGuYQcG z=X>UNmw|l&{IsNWc;ff^qF+}+hw;-qO*f2@GqXE7RlN{0e&SpK{N+B8h+j^vTc!V? zLpwk`6rASwqY~$V4F@{Ae!6=@OyV3>@v?u{Pn|})?fgBGelxs@f5j#q2Oak#>-&*e zE#nh!`%BY5vfaw}BPE`edF||TBBb47{)0WA*Gf#>jw;WCv;%bTdtFuFyR}yQvE=&l z5Wi!84t|Q%$%)_cwtn9O9sG`cB{=C`q#{me^*uG&exQTjG5>+%we{6$7krcU0J_(l`wuz;thMP_mAA~ z;b)ZVppln2Yya|uW8LS!+faZw^ZuiM;7646eI-BQx3$^lL+9TAP9u?+KL@=kM*Nj( zev$rxj{Wl^+pf4r+xnh^c=Iyo=VR!Y=PFB~z4U&94*d~%vccJu&qu1XfzvH^RpM+@_j+Z2gwDM`tnDf1{2IjXqRw$8-!p$Zj8kGAdpC1!;^*9_ zw*z#rC)Sa$!^!>ih|}+(o&y~?*k^)sV@5;bbPi{>7dcKsXL79Tq#K2s5@$f@^%a2=KYWH9JGrN{;)R0Pn})APJ-@t?Fr{k;7`xfp7_~I z>-o@OzD%wo9o*FbSB5z zf4?yPmOG^(3$0Q8iOKneDzu6fy9YrlZOF1 zIZh(}4ErrRGK6^E-Szf^4t~J?1Dw9YMi8gn06hmfaFACLoSUu25a+7R{uDZJ-Uab{ zxha_@5XUYb0(2%Pz<7U@Id7g%CeF-3X1(P203G&GUB}+*vbM%%)&8AEGBMwIt({KX z2%}A|oIjzHenfkQ-TKX+O}rkH^!A4i^~JabPUq(Hi4&~i6|%n2p}uFvou3i!PR5Y> zPOif3e{Q> zCZ*j*9B-TYLg#**v&IMd#N0{zBR1bV@oZjl8p!X6^3S;(O#Fpo_3a3{v1OuG&VBh# zVb~S#?!lWidmr(>Pt^MdI&g6Q08Y0?hlumnbUg<;_y=|Xr{$z$#OXiR9CYwA&J$3c*RyXDr^X-p@<0ba;~oPz9c$ku&d_dpKSTFBKa_pN_cfX% zcu4$b{Y}2~19ZvG^0|HiF<_i}>Xj?lp$_!*o9 zjouLFfK9)H&gA^at0TT67IF4_;=EJ+PSy`P&e!~dH-_iC*26y&FMd0{{h_-xknR6L ztosr;z7el}Lp=|=qP4p}4ZQw#7wm@j*sxoTP@#|f8fQ(=T5o^oz`=eBoTPWd5yw}x zW4W${&g59{11$;Gc5B&yQzkAx?UmeG_!>57rSVPuUB(iPL0^ z-apW}_m9)aBHjZZeU+d1#v(KRBga|j_5_y!avUgiP%coKB5AKr1G-d{Ht zB~I4udJc5pU_T2^72lG?@wYj*fexIe!k=BPI*2pK=H4}QcK&Qzr#x}O=lU=HwC1TU z9nVSp3@$z2vpm%S#);^>o{YWJpZMq1y;Hd#g%11CRDt(aW;pmZWPHW<_TZpphJ7X{tkYgJZczmimCZdjswua z4>d&|={*%j5ogUxegB2dtl!UfX6+91d;W3H#uBgiCVl^f&VBo{#%T_Xnn?V@r}TW! z=6RQab930WZmy}skMu;(hYsflRg~kie6787_Q#*!;l%y{{6V*85Wl_}f27}_ga5CK z@nXn?ImF2rNB=NR?$gVFZbSNj{ zmayB0&&!FE$B|Y7SsaWTP z>b{FOS)QByMUK1BVchNP{JV3;9Y5DOlf*mPiGF*CTj7JrmHh-d_yhZX)N|I${ltm? zRnLLW&L0mB9VX7oX!`c&sXzS0xj;bg0!+XS^o6O|PbtmF1avg#B0QT@p^oV$|)pvDe|9}qufxp0s*5w&- zDtYK<$?^X9k~j+=>iZdVC=c$jpgbqzyeCdVn|Td7aIjtiC*kss#MxR!UmobdLA({5 zuob@&=WZr_`-Ki1)DxU+exZ*38n--jT3;UMz(F1~a8j=cN1Ud+^&IF-j&$9m zkab>bvH$w3Ne1G3uhN$jI`|pmD9Sn4D+_VP@6&UjGdb4yNzYZ;iPP%3$&u~YE`AbD zygN_UF*osYzth_fI`{3ynwK(Wul&UCq}F?K-jR78LW?+Ij)8@VQ`lxagbsed&+tQy zjm3!b$7a1Bpo1Tr^ZQdCx(w?|5oerwpCtVNoqQ)g)|bbp+WpEBFHq zy&5sclC}TzH<~a@d)BijiRqNbZHC!`kf<}QB&kuxVUx< z@jfTkmlry$7iuemMvHkeV~<8Z<8`sbdZAI}3B~ed=R4@o?(tp@c1?SAGI7E@ z)^ngUIZoq#3vM?v8%-z9tH^r$K_}Y{#vhcc#_?Ii3(R8jWc{E+{i3er_BY&b9&vn{ z=sD1#{b3!4^32+?kT_18ym8RMpJh}%amci;z=TuS_}HhI3FLw#|d z8=U#^RuX5R%{UF6$#ELNb@=-w)< zkuAhucvbIbx&Fhx7WS)kC6GAf)OST?yMqpX=q&oDWB)GV%#W#G7eEIN?t7p-Pp0f4 z&WEsi4s<5RIxn>^bAUL}6i3b*p3O@R!!`LBIm^S$JUn3o{_lT+x+2_5```vvezp`_=D6V)a! z6?CXC;uheHJaLIQk;|Lqk@bb{ck|kg=9k$o1FjLjQeBfT=X2=z9-g(I1r@kOyel^O zCuBdvxP^Kj`*4>yg>CM+K?grz{RK|(+YgCzzQ5j|(3u>ku|Rx}bMu0y#Oc#TZzt%? z)`s)OA9k9{`;_4gUlK27qny5e(4l^aM}kx0?Kk37t)=Hchx+vlpOx-=_^%3m?AJJE zyvBO}LC5v8#v!-Q2v58@>R#Hv>z58=zsj49db=h^B7O>+`-sq)^>!JH#CVyre>CFU ztYFq#mJ>RRmkpHxmTpTG9F*l}UMI{)DChayv523zh~7WY!Oys_4Nj>rKE$bQlScCUc-9&`ay?sVSkEpg-e^9IGxpfTiL&$ zZ&3W+KihoBO`d{cfJ> zFY?^=xtx{wy>i(2Rq1%W=`>b~@0$3`&q=(1VtO8Q(jTw~>R)VBUg9+*F7mRWi5%X8sXXS_+DZbvH&`E#FyaVU< zR3ct|Z@nL&gCB5j1^!Gp)t@+j#L{!1Gdb4%(TLVHh?6*m$&ur@XZs^--sa#UwTWNQ zW?k)B-e!j}U3@1vws(Evw~nH>19Y$_;;XP{{+Nx3GcA&y106V6XMhv>UQ^F?JJpgn#clS3(3u>kF?SJvXZqZ%w#0dB6ZeEp_Ot2Y`)jY7cOc$K71x#h z3_A3)9hbd|Z4HxO^)u`H&Gy;55VycH({8dI$~Z@L)!|;pTlibtR$1D(mSo_hqo97LRf z>iclgZl1*_t>3LIw`v&iBLwQ(v%Eip{U_>MzSSt=G`9KvC3LVG?lpk3E#o-i3{rXT zrQM)2IZmUsn14>coJgF{DxbWZf1u-j;WRD_UcyaNi8uR-X(!2pj(H9PaX{3wTF;rp z%XCX`cj(YQ&_BTOX+M`Zhg2RS**>7dIvw{nXFBrKGJ=2R8ASXE{QLPA692feqx1)K z@GJ7wgA*^_QsQhsr>`$`s4wCZ;FM3ek~quM_k?AAp>w}pwB~<^dut8xtJ$0@$ovo6 z#69ZMb2bnsn)+_1Y%kEMVa|a4@SXwn?$BTh@#?DYO-MgL2M*R5;3O!wojB`l`WJNI zVE+V8<7h#|xn%RbM(FJP8UExR;#53nwu^t~PnU60+z&`M?f~&KpVafAbKhQm}BGQw>dY54)sMm5O(ufdyP0gHu;UA z0|$8nz?o6|7I8}2e6ImIaPV#qoF~ce5$BjgzwU$%oN40w0DI0oB+hpgkCA?a&g8g^ zY2uzn$DvP&Q%A*ZWq*Ut{XGqb(KsT1&%S)om&C87;uW%-(3#~7Fp^i|epTn|8{#Zb zaSGY5psTF<)%A!AruHxRGmkv-dBY#G?!6~&ahtd&bnpl6>w?p2)K}tsQTY>OJ)r|< zvUo3$y;rE?zs48$sl54;10DQ|euna-%@vL~0XFA|(77MyoW`ur?2qNaUc{ea^SvzS zcs_F)mBhZgX@w}n!}pWq{0JTM90uYTsK4uObm9%Rxvv2o{DJi-IKyVgCeDZO`u++X z{DF6q;2bL!mpI$iIi2ih(EZLI$u8}o^Os7G6A*u~8t0__pyPO@(}*p;*BEbd65{nJ zY}Q-ypp*VUoEG(Oy*vf+R(lxV#l9JwYqe7oCt`BFKcIs@(9Xe$5k5U}?q@YQ(jU;d z_lGsk(rH5`;_oe?=X(}saT+tldh%tHY{U1&I@^@`ua%L1)$v@xmYP7^e@T`;8B~7a`uN zp=P-x4?4D&_1xxFkrKq~`k(TfJB_R2{+e$X2k~PK)Rz}JF0aGDcm%&Z+gFZwRYvIj z0v-H|_!l^nhg2fYQ=9x<(4qffTmz?Kd4J+0oNks!_CM%;*Z&aj0Kadn8pJO$SI>ve zJ>Owe7x@e3Ua3X=*^Bh{gbwyhBkl>5cwLV;! z>4p08KnH(dUIC|AX3Lb?t6gW^6OCIN48nNLkEAg7W?a1+eZ@TxXpbG=uD2&C@u1> z6z@KkI8#-Bmh%;Kd`@XSmx)+#BJrk=)!Wb0a~bTHVZX$orV_un>W{L$LkIiG{x|&T z4B}j!p>OZdfrB_9$`j`19O8UXc{*izpffqvb==JP3y8B@`BTnQp7tA@Tce!$8Z9Ay z8#T{MK6LK;rPH`5)`{0rtswqN)o)}wf=;d@5I;nDdpufAyw)~(7omfH5T6I9`>OTC ziEHC$=uD2&NGINj<$b=HII-uM?M2oPIkbiBnvyUuAuvbKg&_ z{oKF&Y2sH^c}XPSvwm)k3pXlvp7>+b{3zQ2;==fD^SEJSXU15vG-SPkI6Um~!v8XH zQ`_82gbsefJO)nJIM<1jrFW(0{ET@LoEEh{66f_2JqJ4Y8S6H1<~{mCoGnN7^@Z+t z{Sm(h{++?!iC^T!f8hrh8@q7)GHco}Cw`5y4)W5E%g|w5PC2^bxQ^ROgxnvBA>x?j z9)&0Frer2pwkznkT{(@po8CK|4#ie zCUM4=H97yTe_O}l12}nr%sDt(GA3E3*_wCR=SKmlNoH{DL zF4vdPfrE2#aF#SnL7d|@_e-Dy=Wp@;t#SF(#EE@JUoYsuX(H~E#|fLBI2|tOInbFL z>-V4*&CW!eo9Z5dobTlKp!$vuwXWgpTp{Zo?B`&w6V0;`_k^KuAJAc(t)M#ECa>N# zrfi)VvJbo|_HWC4a}obt7=1s14tB&nB9!yRp}fR78&%JN&g8fZd=DL*?i~vf$Iqre zK=*$gXZxloLi~d2o~Z247@ytqoyKUf|Hyl;IPrtjcp=9_=;U~a{Tb}BV`ypO<+QnH z3?2M~{sT_1UpeBeZ?5+bbl_wW_ha9Fsz{u-HtS^Qz`=V$l&61qf8q?Z`A#HsXfNxl z^Sn0caCPEbSNHv;KcPF<+0}o%?Y1>%G>5 ztY=$`xLKG1^@!iXU+)*_V9!xv-b|3MA#qmP+_#4g_Cz~}Un0F}LL8Smzn1od&VB!| z&Uf8bw;=vpb*?S>n1|i-okpBWJnvU;+lKg2)cLmTf6)D||IMq}o_Jeq&Jkq)!@dD_ z9doHOak{AU6FGlE2S2-npOaPTPMm7$KELEZ2M+eFC{Oxoy@<0}eHTx1pffp6qe(-4 zA2jD(U*ddK`*LY-=w#duaUPVb+`56ptMMP>L-U{Vc+s}gP~vA&c{F8tp_ArRyS zM&6OcOEp>Ff1!gv;Ae1J)f-Ekdul%}{Q;dF=gf+U#ED>Y|3q>ScS3nq9-K;?$T{`( zg3c_DHBSDe%S_@Fw7Cc1>3DKf-+~21PFdm3~YqIT|iQh)OOOxwr=rFH`H$t5no4HWPzO|&-x5oJ+khn8c zoKlVh(8+NC=e_7J0qJ%Tuak-c$^Hu+{DN~VaN0!JL!8h(^nQU3e!;meIIj=vCr;u2 zlrP9Sk3SrAnD`O?(>a0lUjAV3E5sjQGhfPk`SHa1sz{C-#Q9?A_jl02FBsQg&pI=2 z6DQ_PeLfH9OpepoGm+OP-#*LM(FNs%ao!%eN!5^K)zBR(iwJK zLD$bU|4oG4@74Fi0*s;J{a~F>p-%o9|0}NU^~rG*I*g;O{z{(h+wS}!`=dY+2TnFO z9C2GD)Q|trAwE$}8Q|uh;kI6FIxi$X0XxG#aoR^D{woy^aPJ?7Avx6(MI}y@IPLbF#d?y+GHkM7nLa@~A0jPtj> z+?A3zN!#h$7j*8|ch-3h3^b^$YNv1NyxphR(fzISu+y8T;0&zkInjE=)@T~n?`EB1ji66A}zwoX3 z2y)M=O8icXO}_MxT|NTbn}%H{b_yWgqaAv?LI?jKpFB9HeCiOV>oz?HIy?WYJXN1K zHO~Au`}Gp}Azt=xMEoW<^!9^JuJ2lj^<9?C&4{=Bg}&a_B&nVbNl zlgQ8cSNrb7Nq^q7vs}+XhxOcujk)i(Iha<(qXUesKJVyzKjEwOCa&YXo(o;pm?~fD z*m&DVM!u`opDyE85}wEFWav-a8_!Ix9A}_&KaX4A2YhKfnD_}Fn|wJA*u4)xe}=yw zHyKX6ODB0e4r6nU zx77b0-CIiBFq`z{gAVNld6`hpR2x?kr{@B_-Js+5Q2|E7^1Q#e+Gj2CeAT^d>0jsy zC`Z)wt~xX0fh{5DU$866`L^^%;;vQmx9n%op`YO#8Jsw=wh|}w3ccN+gI}8tn?~ez);F@fK?jcO41fQ!ydxZOcDL1+2Rf5u zUH{$;8<9A%8kih8UVFCwwdR+)85D*1HC6mX@?{>J5^B62TypJ;8_%|ejDI+%f<1He zh(Y{8DvztQCv>nU?hk--HD4U!bho)54;}335^=$-dEyagm(BPGo%?=a%{$fPOG4s* z{7>g+0YYPumr=Y|7v90&6QkP4O8&EKi@9{aU7LRj$HRZcPoLppG3K8i2G+N^JF33Fco){ z?GQSX6JSIY?^N2q&Q6?0s$Wa{LkIi6UN_fFT5(_P=sp~JeV zp6Z7kT_dlh{=O~5zt|VTFE5VdCw_UGb5ZDEM|_tZoXTqo6K9Fdy+Y_rj?0)W@(ZmQ zT8uci)%m;}ccA-SKg2wVa&{nafcOJjXMf)4(|dr5GPw5d#-*X{Kj=-@Bd1swkj)rd2?r@p?> z{m!2FJ@5-Z3m|@r!T*JCeTTl_QXS$C8?Cp4e20!aQ7Grl*)HPj>8s~J2fyIl3!Kxg z#>9!E>MO@1=uD2&m@V?8A577lI8#TN<&ongbY}Czy#S|iP{eoQJZwe0gp>3<=$Pj+ zkcR;E{FtR3@w%%0tL%r+{jS~Mybk=MTRIZoyNOv|$%pQDe0d)xOP_AU|J!E00G)fj z(|9WKSBxmtllY5M>+KDl8a@oPs}JJd%G9@gh*u90I&iLwIMv#@1Bmm%=H3}} z;2@Ii1GyP9!;E@YW|k}1v+VO z>_fmi+j~6mg8tOk4?5Hj_kF;L+;lQ=F4*MPf)4e|BHqK-NHd){ebqf_SwHA_|Lrt( zR!>9k2+!T0MZCIdypufWf~Pe7M{kzT`n{~dMHUio>oC(^vcG!zy{z3~czxW=e<|^o zt9+`mywI_Kt^1>?F;^0=l*%(E%PaRs{tCP^@~NJ4`!5Tb=f8;lT6)nM;zwPrzt;pE z+7G^8hxW6xzy{(}w8>8i9XMF;f#aWL3vmwK*Ov!6v_srG183XeK;qOtuaDC~XPOK1 zk~MBwe)KNlrBZP}=?Bl^mJY*9><@ax*hBm-Hut%pL;J?xL%rXO*-xAl$@T3Uy5G$g zufA~m4ssnPexO>{%JvN%x9ZvWtyMg^pMfsD0xobaJF_(A{E+oAZNA62goxVt%ed=68&Wq5YJji>8@>Je&l{j&B z>dONi+KF86hHG$>I6>+>Nw#O`+_w{#v8K}@I**tT>Mrq%9n`l&=-l%IjOU`=yj${s z_*HJ3eA#ZGGkd>bRJ&NoJIlHuA>$#|x3F){R!@jq=A+)e(7{j1#JHL-;S1ta`L5?c z2R|W>hVop9{)RYFBbwhomVScHy&bLNTH!12h<`zyn@hfDh!Pi)g*?V106ViBLCL+_~D52(dM29bSB4X z6c+c#-u&f7oDE-0`^fPWIv!7*M)~kOKEGNRg?ML_KP3-3<~a=H`-8o1RgFQsClU1a zf)4h=IS@Dlz2XpOPJBHFI&jvAdp{E|$0bfcIz0zE*gaaB;WR#_+?J3yK8hpl1YI`u zAHciwobBGUb5i1!N^X`*@}QG;M;-^*YfYAv#4D6hUr*>@FRb&ynOr*)amwe_bD%^0 z&WnBd^S0TDb6b56SJn?YvwlC zp+mjfL}I^2c~^iq`+fBN6FTWv-1CAzhIjNMUWKyy@en%rwY506inFLVadO(+6NL`- zyLjLP^@DTa9K;!;$|L;>o%E}G2U(zbdE$9TG3zJi6X@Lg)jD4-c~*(|^^Th}yX1Q| zUs?C(pGW%>|H)hZzQJyP9$Bmd271*X-b!T;x$c7w{ziX9yV=se7IC%|(6<}t;P0;D z-P;tedc-ko?kht#Yw3-B2^RRN^^*1b8jE)|Al?O=^LG9BHJnD$2rKCxQI{S~h<9Y3 zzP~~D;K7(=^Zf9>z-3Gnaq_?7wIE)imHPGw9pdDbR7XxvF%a29cPWYgOpI{xmf_5Mhig!72!{la`#D92$>_mB8J)VqaiA@PkYW_kY| z|HnD1cvqHj^Ah5Z?{4yC`+=^pYCnm-55Cx9UdNF4OgOJWxrYo}LELn8^!|d*^q0%% zDaNhW_g52VcP)MWp~JXUQh468@Y`*K{nr2gU;kQjeKI<%9m^>aSn$y`<-0RK*!^jwVl759Zb9_ zsZ8F#YllwbQ%_%tZ|_RBkN9KNIkYS13~@SN*Zae>du2}Jt?);z+82l) z)@J_aO7xL92Q!-- zX+P+GxBtTa8~pp1z7T)EiYG`ubnf{MBZhc~mHPR2;@>W=w*z#r=N$1~aYRsvHJ8!96Z;3S^H=ocWpb<$=!RIE{>Ae*5w~8gZ)S zH#u@VfzE8dh{FUJkB@SG{%a>=5pQuxlPA|f&|w`k_v6|G?P~W`>ma9bU)(F`JKl%5 zGYaa<37za`STDeSG5iw{&!>dme$c@W_>LVo1>+Uf@*pW zbS5XjczJ;BU;UadaW>X8Inw^n!Txhb?OZ(QboLPYFYxDez@3HZh&#m3R<9;N2`CCZeGI5Ufy^^@Ze zbid2rvqs#rOz|==@yn=jPWlBp_$6Y`xAPZf$r)_c-(^G*>-xX%6(nxjBKmSehjl&9 z^RF%Mn*VU;{ULtByaM}9-CBhB@omN#=-?;32L$IzrxL_zk-;pd9QU9zIZk832p?J> zOw8&aPCYfR$#w`Gzvprol|^3Zn+eMkZ=>?NEEja}!)D=!vNtLb=WGtWAE1LDim8FB z&60gRXc(1C;efZ$}l*qu0Iw(H9S9XOBI@c2G`M{nZ1vf)4n4*Cfkc+Y`yT~9cIcxTmn z2RVO1=e}K7;}&l}k0$=>68d(B{g->b(}>o9=Q*tHZS`MkiPtTo$&>BbyY^=N4nSweM&f5s?_bM!OnwI-*+0vA zg}YfvwL2eUornXXeA+_XO)5_>%MBgk03X$GUKV~3uI#dT?l_>8$g@^lyPf!38|eKG z9sG{dH&)09{qk;r!$2H_`gR(Bop>YFIcr&8=wLUDTj12Kc8fR*ZSKuL2hQ8Qyv|-- z^d52YPuKeqI@qbiOtw?rrjLlzd$it8&~ZC;8DGTue_n@Y#GC13@}xhZ!#Z>QwtsqN zeb&{z{!c01A9-B*6>(>%`Cs;P=(wB?BdZw4&cA(2yy7MG_Ja<7NVJvhw|3b_;zUyS zDrLKX4jl9gw2M{^z7l6pQ+;`$GdV89=Rp*D&v0A3Fu}j(316%CZ>9aAQ_p6=T!zW@ zF|bZV`9eN~BkotV4N+HZ?Q{7T*Q^@k1|oGU^( zqx6VIoDr&=(jU;FzL>|sNt-(saW1NJ(z3qLg%)pOe+B%MZ{rgGo*D;adxnnZtsmc` z6XzuF-$+QjF@C1~Bo8_`9CpNbkMgZpnuK_ zG*|Z}rC*^l>*X}=i1!jB#!F9}$?9H^v=eluoqpzB)`pNzV9mc!K zWlC4Nlya~;j>CNy*e%DbY{Y%2&LPS62OaubL*>}}GcslATyU$qze)dwZJdkvx77S7 z?Fk*)3F3K_GkV&5#Cc=0p9&rN8}1{4Q{qWM;{V5`X6`eLsQ@{*wK% zc(Dq^c{ESYfe!w{`9_pyZiFhtDXZ>($ofJTn!m7r2Y&XmPU4SJ?~h16be{Q6BdPcf zpYQ8h#DAvRn;ZwAlj9}U$0+Zcy)NPnAF8)2bnp+}7X>Hn+J?lLp~ffKFQEem@27w> zy^{}dI@-K%1ReamL!4_KmZb%8at+k?8|Xay!C`b0_fWq*ZcY5lHutunGs|hsleAse zo;aH(n&p)Kw9Av=9v|#iwtHvd#a8eANIyUaKcL@$v%h+G;>4M&Zx_&^eyzm4^c(ei z6KAxF_htQ{<9W-+KtDve3UBF8yumi{9CV)j>NI@B`T8;I2NQomF}**aljX&EXOwqu z=i$VAZIh3I&U1NPMq;sU8I)->@$=;}%PZ$`=rE6`-Lf=%o-%vg^K1#R|Nr~lF~m)y z@`|z@L5F^bI2!dIzGwn*`r5oR2OZ)8eCP8_vu&f>rTBSH6!%TRkGFX;@w3{jW1vHO z$Gi^Ctj^Pk(<`^Wy+a2M^3~w44 zUzB&7ic=j%LGfM71vl3c@03j*7&`a~aS;5pe8NWJB>k>G9||4XH_nId4HN$TtE7Lq z`{8Y||1zB|Eh%7Z*1>ea|`kT|Jw>N(Jv9H)`8&Rx3ikSJUragG+(w=d}Ax)$%Cpj=h{ zK2E%G74wCabHb({K_}x!tgBGZ`K2xp zuYygvpo1TAzZ#qt?=KUlM|r&;p))zwdr3oAT_?`I+9pSio8Ha0R{j8=Tf~oK^UjQS ze5cVhD(~lQPjZj=r&K;k#!Juz#5VP?Ygmylta9lg@$RTNSoT-wz_}*gFWoTvDRI`Q ze5>R@2mfGxf}hikdP$t&D&Cd;fzI)m?X`dz65Q9lxY!Jqne zD);9*h`eE}f4&f>+ax^)I`{?cANE|d(K!2S-gKW$ybqnpaT*;(e(7VcaKtHWlh=ce z$1A6CqZGe8@K2#g#0xvste(r}?`{15{)0imkWfy;%mv~th>Uq$~{*zIh z7k}knka%a8>iq#7`VaC`uw%BtMTv8LrJe(w$+5iYq*y+H>~MPcs+FDetqU*+p12Rd-D4n}$A_&bTS zRqcmM4s<5RWsEt)`;s3P)gsPj)js7o4;{w&l_la{taGBD>gU$DZQ5%WahI!oe>skL z7q?mS^q*2SB7WFfre7uByST<_;2sj}Tk)z7@&CzS^5r}Qo$O~=*MYZqObg;A%A;?G z(82F`FA$u~UE2_6OIbY!I&kE9^vCHs5NCZMJqJ3IWA(!y7dsQDWd)P--~3?Zw;tD> z_^(|53*Twv73;Iw<$4o;ach$={S2M-Gx`_&Q!Q+N;x*{5w<~n;56*>x6Fp!MalYBC zBcQYMPq)Fth_g<`HUGc-({jTo;wM(;UH%*2Vbm9S(Xu_p5Wjary*;6WJrVE0o<`OQ z#Cg$7&w&n{BVt{a{DB{F@~d+ba@>W^y}i3u%*vY!B*cXiHC_6O)Z z$CK9au=Ils#9yh_JCg6+yk}i+4?MDk_<`m1{Y0*}vEGFp#tzs{oG&WBC;JI>=x@l= zgHt2xF5-NuqAw40;NW}*I7x5rB~Gu#dJc5xx9Bh6r1^M&I5lnFk%3P3vtZHB{_#IT zyxnU4l>H1k^s}eOj_gh_DnS6snNsw#qHT{8_ij!7_yV2mXZ=L}WI*zh#Cv5kenJO7 zpx?oM=`Nfh&hK6Iet-^~vp2s}{`l`H=ZQ1OW`2aux-~Bo=eo`O=xu*}(}d&e z)FrPIf00c-06MIv<0*$^jsCpHpl$)~{R+f~u;<$dw}_ux<=K<`|9`ky!G=`@jk_i)bWV_ajnUheuhr^ z8RHY|`hH_V;_W@Fe-8#a_y_k$!KqXs32`pI({rFRIWA+QSl4fQkeoP6)V`sdSD?eZ zQZx0&0ogt;bdNV#YViA!7ssb2Zh>t2_6!~BiSx*)=ek0F5a+Twr!CtvbSB4X)D!bu zjAWUJ)4Z;Jo`X*M5$~O%T<0ETCEoI?`uahK`oXW@6#bNwIANW74s<5R$5?Wd-zhw? zJ}+?^sPiVWpFtPec!zVsC}&`;qQoD1Mc)sg%h90YFZoHAG5i{@8{++0l6Whgn(t%D zb_X5W9s9-lu7Y2ZvcwHKU~*-DwR=|q^A_rz|8EEJO5QYivi(5^Kc^M@ech5&CQi=} zdJc5p;5~DcXWF4^#F>#x-|wMAdqJEJ&c1pziBqYp$&u{^I?wIJ$2bzm?XlyTI>fKs zT+fHjGvB)YC|A8c@dMR+dD1U({egK8b~t&t5pkORt+yw1@C()v;6z{5lsGkQ-cyCn zFVus0MZf6%;H{rAkD}gd?)4^qf*EGJmhA|-(EOZL z`(mmz4ZMCI`{{163XejX%KM^%+hn90|$Nw$0#_AIJa%a1L(lP_zh0?86%0) zG=jd}L1%KT=PvIR^(9U~Nxgr)J$Kn*7OzX*6&z3ex^?vJ5xVrh@0;kXl|<$HoyM!t z8R)%>l2<1Yucghqnb6%B@$2_WtoNcPxuz2DbUnQt`Chccz`I@W$Jnzoh;wO#o&z2H zihU|@noa+UI6F7!InaTFaRHnuIp!1R$u>O)I+NoxMl{Po_KtRH5phbWcvFri(DC^x z9|QZDDA%dJ%ZL}}saY=B-=Xu|-SwfG4{p1P_{R->oCckY2k`DN${Q{2I^vzT z`5rNJ%ySqxe*<2XEgOlqJG?$#gAV@1cMQNOIl!Md>5}U?(1C;T0GxMocMzv(YCQ)! zlVd%fleNNb;*|KR_p5j3bFAM(nJev&u^ZQjX+4*tb>kMgV; z6il1~4fOVh&U3%D=Etu3ULby@c6z>dae&hpF4p0B99M|HO|7@&I0l{U#~a1|OsiFQ ziMK+n2W9*S9sC1-!LI#UJtR(oRQhomI&h|mylLa$C&by7NzZ}KTP&yiU75q|IchIn6k>Bj}{;zw)0z47He@n2Tb*IV{m?7zVeTY7ya&f#u) z4s`GX`~uFw_uq(fbiSSg9XL2Y1kSPHVbA}XKP!1u&w9@QW1#r;fcwSsXWqu<6Hf^DkwVfePZaxLH4hYk@7B|zn!``HgSIcO+UUs7t4R~h3|t6 z19^ineWJI`u;;akOC1K{FZe5DN_^tCQ}+ercmN&zj`$0lYDE$eXX6XK-=Q-(E~D&) z2rn~tHw!8neEV2c3*B(QZ-ihUu~rFJZ|4ww^@X4}R%bIf>sjSicU0&h(G< zJV^Uqd5ClAh{=)lwmT1k^B5>so{Ra3*Ws(aT+rFIzXXoL#95oqoA&20kS9iYMjk9i zoKSrzD_t$i?~Z|-j|0C{+=fGu`+gY5vP6>{df-@I2c#qk5q{p66bOkEs5x;ysQE|E573z$r!n*)zo(LHTx;UYQ0qQ9Za~Lz ztabj*RJuL!8s|6bC)+K?S{qg9Lm6dkGMXmX4u2-_yBP>{68jIf8r0cSKH~0yh?u$ke=atR9Ea*J9AE$wL;lUr6&6oJU z4KeF0+dFjJ-W>+^L%`c|U>x!K+nhs(4)(*ndT?g@Od`(NLwY|z2TmLDj!fsYQ;8G) zmYxG0II#t%_uCo7+514xfzIT(j6iW8C-suQh%;7kcq5i^N}LRKOo*hdDEA8PP}j__4S4h{RZ(rI5pG1CQb>Pcjck8vtO(T z?};;3eIMn&+0WWLX{EUh9{nERBW6j^SnemPIJL2o@$@x2nf&3`!8CW&!g!U-apg_E zjKiRt9^CYwAF)gE8}okM?{*rGqVj%F)75c^>udA92;HAn_v(0wLbvOqFJ7->*F8j;CIYx;Eb-CoH#vf zIMA6Krx9z#LAqzV>wYTY%vhz5qoCvc5tq@X)GOMrZtb6rcx?`wJn3iX0urnDhFz=1 zZrbr?56X#mG*RCPJ{gJo@QA*g(8+iLc{%WMB+W{^=vVcAfDV4ZJOj?wC^?Cf;=Y~( z9qM;}r!Re1d2vu4;(WTGuOD=g2AKJLlV?2#^KDE4;%y8ud2*cfu0LDPnGCF4g!rz9 z`g(KT*g6jS#4JIax-az{ISx8So^ABe(!}xmpyxmbe;NUN4zqo~^2C`GPCuSP2YQdaPA>HwJ{hM$2M*Sq-~^tmPn>4q_2q%i z~|FebjdqWj}xp{zCo;oZY>r5GS9_x)eJ23+qd8k`B|Ql{DJc?;EdV4j5srFzS9Vu$+3PHI8EPG#0kh_mPgJHxF5pj0Gvh|ajw8G-#X$g zviXiDbdonyoF9w#ZX@x+sqc8o@z3Cut)+2Rb`HzR9qUIKIvQU;OAW@ZEOQFV3rj#EVl`UoPlSKiCbN zUE2bQlc0f~106Ux&kxSLfkDK1@1y5HXL79juJOvABF=$UCP$99-i<%j`TSJ$v&8pr zujhL=pIh@fi7#Ft{+rHvJIK5a)`zfX`;aTdd11qW&dx6zSA`Jgw_5u0gyxs3oo^H8 zoa#^izx*;N&wb+8QvFQw|EpheeSSpz!K(ksc~<)6qS%+&b>JCsZnV+c6FT?><2?Mk zX51^{>vJ|{}gNWM+QRn~J0146=H{53B+I;_6ja$lmEnxH<9y)pFt^F4dvKg3P2 z%TB)t#NYHxpC^D09GoWy$G=1r;(Yn0=RjxI4wk-(PMmx;`+V@P=eW%}E@xR0oA`JB z$9y~N4m%WV7N7W&RQ_4EW9SfPiZjX@YgtT z&SYZUWW?DUPtSo4{< zoo7Gz7|08Nzp`;9;=hlm=R@b2Z++KrV3}&f|M*3p@3DK=5buhkym`OXAYRH>`hEf( z{DXc2&c!8li4#thOU6ggq5a{VC2$(GXh57l)OV(2`-9GGe|T5T8n3S^-Gq4ASD5va z{mxr|W4uFo6P)Tu{KKlek`G;I{yr!ACa6=hyI9njZvOtllv2A!a0*8?FXG_`&s=TH_Tw-*R;7m z>7D-_2Hr(M`GfilBfg({-$U9HI@lBA6gc<3j3iEp=K6jJ9XKP!cW@6X=d zH*guLs&QU0McJ9e@1@?+lIscR@~Q&&sWA4;;|lxT@gw&4;NQcE<`8$2&G}g9;Fm$7 z-<{qwpE!FK>)Q!*;G`DkTDLS=Oq|qz>p9TDzo*5!m66gcC(f?3djCSlVsT*2(y8p?h6`dmpgF>^BFA-^=El5_IT4Scikt{!Aco_NaX$X;0{2Ps}sm#F-pK zoWQ*LcoDkL=0C)h;OFXjiuhrR>-o@m<~xj$ae2HvT;(kBE35KLJ3t3}Vthh5~CXs`Jyb9eU>%>$~>Lcibj^uQ>Yp z%6ILNUxeMtb-7QRerfa^=wLVO2Z9qf(_`XHx3NET;Gmy?6O#B9ah?^`mj^nNV|~{? z)wp-W`9r-AFULXouKk4ZQ)-XsJW=)kAK!6X!1wS+to=mX`s&`ZjHjXFb7t1{UWD!6 zh}YtwK7RGCe>sf>zj6O^4h(zg*F5C(fAsM`bR0if?~krDA`maV%{zy3U!a4CL!u3j zOq^5d{VLgCp+i6GA9*&NH=8&!I&n@X($CA#^~r6{m)Vjp{g?A1;MenKKjKdjpXARH zoA@z5>Fo!dj8B@0dpw)A#UtKTRbJUHp~L)q?Ow}@iEHF{$0s|*ymzeP@5G%~Om7G1 zFz?~qO|GPln@NjqaL;>KccEW~yOxCbjhg7kMd;AK(GI}rG$aLa9;$m|((lm0?=!^r zW~-M>L!4JL_4S3$^StC^ToU=wv z!@xVB;5%CuB>uhE`f@^t`Z~lrbi0cdB~G%)`uPSra8`(Yxl!>;5+|z7c|YjD!TB=O zEBcKx#Ay{qUmob}>_7g#gE$>k-dVO^Z|(nK2cIuJ8@UScYuKEdh0ZLe%lK2Q*Y2LE zL7dwAO+U)<5IT&9$EM#J7?32jIuGh&EEVHn#)&TC{*_SQ{-DEnSXXtlkPiQp8eaM5 zJGh8j;HMAy8xnt04|86S;}~@CH{wrl-tKHdoN|BaIncr1hzG#QUZOd1{M7q&(%;a9 z*57dd5&S$~TM_^73;J?Ghx$H`#PP`C>FtQqM!kb6?FJn<*oQ`W>g4J~oC_*1Cppl8 zb4$b_PhxZ)U|LrS>&)hf}izHA>t+?wk9qq3brhkjK@^{bjA z!&OVyx2ZeNfP5tUdUEmz;(MHCdnIuHrBeLEn9hZ3(@h+-7vm7UQU-`#(xrIOKJzYzj zayI#X=-}5J;$8R_8#WT>(Nw)(p@UzsZbCcD)zhCjf0-QVSLoz=tEq@DiWS*Gyx&w@ zEXP6UFb=*;{=H-Lv_sW;%VpH4%=1{DaJz~7a+z69$%PJi;8~f%?{A)GgM0kjF8p!w zcmQ#IR_pBu9sGfD40arR`XF%%tk-j(Gdb4rFXh}o;?!5|R*p~Jjeky~Y74$E7Nc1Z z@o&x0`x`oWUkvjM>U}lCDdOE!`%bdF&|!YUxsdsk&-p~F4J?~M)ySFaz_-M&|x zo<(_;fYJ|$d$pq(?@2Cn-!mrtSKqB$)4nO&cItuhUO3F_hKQe^6L1d8ai89b_ODWo8_+4N0smlM1^yY(=N<79B-6j!1|2vd zBECFz?;~+;_0)5qGdVs+3z6q4pZP0sVlFc|vRy+LTAmB<%%i^Z`Wu&j%@Y=SZt|u5 zpyP4Zy3g|{MmXa2`=l?gxBEPsL>zj#TSVe_{oQPz(jL&EpJ!K&Uw&(qLx zc^wAw!m#7VqPdAT-DaH!9sGfK7Mup}^ARUp1N}S-9sH46^sl7r3lZmSD}DW-Hf2f?Kc&rg2%(eyK>iSR9A=awUhbZHJ3vvZm&XK`-4s_t)d^tGN238@?Xd=F=D z+1kWEFjCKlE;K&QkH8L{BGn^)7-dh{j-bOlS6Vqd_}$Ls3r}u$$Fs_NG1Y0#@qnC-=|Mk5KQx6`hGwK&^YF{h)&%Fg}7)t4w#|Jhmwh zbnpZ6!r*9XIKX_AbH|S7b*b(I{J8TefUfR6F z2OT)LKMPLdY{Q82=$gJf(1C;XGdLGNj3mxfn|DBo)kND5ky%O2Kpp)|w;%V?=WSvI5a;m(tUqA=H zV4MMGbo5!o$@Nw5U+7HEkMZLOfB&pW*tx{{s{A7B2c20zoG*78PVrrb>M``lr65O4P|Jr6qW2QFj1 z*w2}9cNg(?R5f{WJb(`CESDp=*Rh6M)co&bl)1-o2yHaZ;%E zC))>f@N0(n>1jRPwbO0lR4kw_dcI9|pSXWk(Dx7M zLW_eiE`p!w%p>A2tFN~sbg<(%5r230eMX$dZS@@Jz?mjE?@GTSPPblq4s<5Rx^J*Q z|2yKmRr`E$T=8z-z+vFMbJ(rLyN|@*Z!_*gXO`1xyg0%Aq4CYH#Cg%xtgoDZp>vj; z^-Ftk7|73~Tovbsx$B+FID@Hk`J9{{*U~r$S1a~ACLH* zZT4TG!+;OFVqQUcKUesjc&}{MYtVs%JTEw7VkISx?>K#XfzISOjeN6$=>5yV!6}Hd z#!uglq2urWSo?3rnQ4f(&E|Zicm3CCBo5+!7pX~l;@`A6{|McOE8Op3SJ*L5nasp{ zVsozvI`{*BgL5WYcH%6XrS}JP@W;mid_LjGi(JI%uJRaiT!Bu;lN+9Ar1@>$wS2_e zuiA;^LC5O^Yd&|>!a~H`GDzQEyv^rMY*vhTrBxnGj#q3q>pPAu)084kF`M%U)6~2L z`!5jZs0!RGOPuC5{T(`v$DGDF5#PO9U4eK*=jiPP9rLW;NiW{L3h|PwePkKWd;U(k z^?MH08doRot7T?6$76`G(o27dp(2k2jy$%-;lWTm0e~1p$lyuLB1dSnge}^-(Bt7 z%YFzQ_d}Nvy%Mj#VwG<}yxZ!$hqME9C6u4OK3RKjY|pWKF^*xKjB@r&*M_*AZO(r{ z2aa67t;yYiI9*rh$3y7QAMwsI%2OhG7vdZqtM5P1@x2*`F+=1J(=O{yysS3opP)mz z{KdXj&Z51EQ(&pST+k`=yUz_Eo<===uJj|`l!JPEK?l3z+#EQo=MEyy*AP7iI@B*~ z`Z08lxb@FPys?|Ke&;7k?w;LFFK zMx4Vo@1a6xa-2reMEp+gqGL0Ob6LHAB*z`-KIWOy`L)mR0NDS4$S1yIOD`#?L{k@xi ztoi%!C)N``aWy^P+x#x>wZOi?Yqk*ooci94jN_m~yT|$joY>>H6Q}bQegA+dFmf`9E5XDuv_X4$BEl?ugR6|5IXD|H&gwv zM$~9CW?%XF{v6^t@T(6xN&HPJ&Xj(E4*o@+0i1^AgNbv;<{Tt+;NToFIJaY6AkHD1 zbFd8=wL7W4bJ|cd&FtmLEnC%gT2~M%s}hXI$ItQCvP{CBkKpZs>HXL79bPVm;R#7SslH}B>hhY?A{eKEU+x%z8f^RF)Yazba8(`C#QabLe|;fa%Z zfXR{L9dsD)CJe6os9xg3YP_?)JD25IB;qCDtcPMf?0bnsVBaZWTM7PugRkTwo=fFlWP67Wen6Z6e@KdyyRI;0NpvfD_od5OIpD`z6v3(0R@W zS?8Wu}#1Huk+QlHzkSR%qFib=PSh3utWLXWr-8nCXWLh{DOPO;H+F+fj9+h z;&JHE?vNJ+=TpZj#3`fRSCH)vIPeF%yYDvx3m5R0~)nV6#4g&hxxu{cb_=`L~#Flc(W#X|3bjh35B&^I5ec z84t*D4tY7$_hFhx#L1-cP?7^3>>niJiZgef5@(f7y`Y2rYl?Zl{>qocIb?I63OdjB zcN*!$xwAW+-V*;eoBRQEJm0vC6ym#jt=oSj-Zqv6`U3k!V_meXZ`pG9XNPj3!Do3 zBN3-Se?12}v=e+c9-N#*qY>wYI=>~yCFnf26CVTbOoBhWbS&a0n5Z9Dp~JXWMNLTa zMr?R77OP?&Y#f9_yBf*pKGQ{PrTR4 z&z|jPy&vB5PA1}1SNRvo;dQh1`{98rvk|A5I%g{V;NADbt?S*pU2+jWrOFpbzPIb$ z`r>`cs*Umy|GiEA06Mg5oa=&L0@4>EPExh*m-d7X9K2%yPUL4riIZ2^Q*xj~yIVe! z1+zYiW1rz`^_h&Vbvsi1XHFoP^Hg zSl_KaAK)U+cbhztw|PC>t3x@vjc!Q%nKtKlq2s*Xk9%(7y_n8be2BMhwtoJB4)ae{ z<=_&3^{gCpbicd(VcbD^GbU(3`~vDbKGHwX!9Q5)7?GB6FwNC!OcB%Z7!@&Lo_zlM_BYx68`ual$J5Ce(BgN0IBF+#sj>z@_ zoyoDzYj20GBThe+&y)4_ZeFv-i}BNLB7S1^Zja=9*WaAR1o6Fxjr;tGUtxr4PdVN| zC&yE~X9xdGsJDZ7TWs<`(7`|0Uj(Q2`(4DbdsiMh_y=|XXZ7p=;>7-+-j(+;ny27> zlBrz|65s!ozP&>i+IWb4d6eHs8A$wR&-LvcI@l9&IXK<^IZmAGUcD3VFmPT8oCHHo z5~qdD_hg|%d63Trr%3H!;(W6CJ`{8&$7w_s_Z4DBzd)Qx3C;7(vi(BG_Z6JRdGSux zse_k^_fDN>mOSWWf53QwdX^Y`op@bxndS35&NvL@Nx>=H@)mJAsq^QO10C#+c>tWf zY3~uILT-I|po85HPvtnJ+^vVi8KAyLCG7+q$0;tOk9g-Z%;KlS3s&dg<$MGk=A-_8 z@1~_{FsOuCPHP+=)Z!&^+uNLfmvMYk<=9k3njF2lF3>$6$^1q33~!0wuCCsm(7`V_ zzX!i`fBJzqevS1U=ulrd-nm|XAA$9Qx0lEw57+B^Oyb6=tuG&R@H6s$sAsE#afvgas>zXlhR*Z6 zW8HVk=9`fCLACUJx$jg451E2Q3cuh?#BY>O zUw-HicVnM$;OK}|3TIvK_80d1Q2y(avJpRpjh&%`zhoRzu~{zSj8)@@^cQp{$7vvM zgYt}Ol8-nEZN?$!LR;7NPhE(3Rn$B!{o|egokkatw+Id?O8mLX|8m@fu7wfsCgi2g9XPI+ zX((TjCtMZcEU&NUKnKq1I6O~fJnkgU#p-$vbl@P*1N+q4UW+(UZRU6Az=7St88pyE zoQyX25TOHSL!-O&9@RZpL*i6cohH|%qRb8=sM;%v{O=Rjw2eth4rAKweyo}fE% z&MA)U$Iu1L`1Ssh(^#8@`%S{yy@@xph<^PI9j{-k?|{Y4+@E;yqnJDyx5#(ERzxZ8 ze7s;u- z!Fhggt{fRjoX3^*9Oz7rwSVLY@+Hp6j{5p~+dn3V_Xw6Z8c+PMHuuV)LwnpI=9O-b zCK2ayFSDGoJwhkr=Tc%nJHw!<#5-fN?*|?H0KdQwITFnz&f8sjKR{>ahb$-lB2H?X z{Ec^hu;%Z&j+;;X&JXo|_AY;CeYa>>wZ+7rr}9J6KX|ujdn10gXuR-GozlyR7xYnY zSLooMX(Im_Cdz8!gngjrKnG3{v5)%d_&VaGf2!v|2TobRnKfY(aqhm+bD%RhF5_M= zp7+aC*h-uVUrdf1|DeOV?Q5Uz8BP=_sPftl1M@lB^Vw`Wi5n%7e*SK6)KTKi%w+PUAE1LDZnv%4+;4XQbu;%&cXRD@ZWDaMf`aw^?rs9enwsfob?^g5@$p}YMmkLtA8Kf>hDpvpAj#d zs;`X8z4N!V|K#8OiuiLHne9^YW&f$99Mv+~;T<#A9CgQW$j`uEt(Ltb{#lp4-9v}| zgE#=3$&)@2=cdZP$@K$t;A9csnaelw8*w%i)0YQ2ljAh9i|+t*sUJ4v*Su%6EG9?# z89L54Sl`oIaU=rqn%nT~-qXYP4q>lOGouji+a3M90v+syyfip_yTu^RXd4c6u-DED z5ng8QZW!Q{&L4+3gG0>nNP9uoxmkyn9dqTXY4UuGQR2HC?IR{2-j%y1Pqtg=LR+um z`;{nfwZ9V)Kl(kr{h)*W@{71Ca7r@b^na`8KnD)mG0HP0FcooXztnS}gC7=&b90{; zrz6hweI`fx0lHp^b2YlWyM()c<9!B{E1+9O;#IMU>!CBtZ_`Al#MwJJcIZx{3&=)eINoT4>~5a)_|uT74V(1BA>7AXGcC~M$o2-EykCQP4fTt7sXXxxA2oT>pU`>k_ttxetF~1l ze&6Hz^4i@)#6A+r`)O7+;w7_bAJD-c$UA`(+^!~ZPVdzF13Hsq-N(vNq%LvRD30uB zavv+grE6CfR{R$nw<%D-l>0U znS0zqJ{0_m5&9CpUP=AAI_S_Yk&gyvTMwaA!ZYJ*8A;ejn zSO2|7=tAq)@_mOMQAZH}cuSKn+b49Uf3QBb<^yk48cn=`{`&HImxpp1#y@zQO7Cyz;BWj5PQ=50#Oc$|JIM(GgAN7mbU=HzKoza&II<#l3kHA^_ zdOmTQzBM^=ynzmJ2i8>!V<&SYyB_3@yO5s-|IF0I#P^M(pAVpeKk+v>BML7kPU6ho zaIE8N$_J~6bEbsJk@fXJ&HZiYz(F1roD9`=5~oZ%eR-fWIX}jY1pLlxiv)X!Q?ZQRPHx?Z z{i8k{$nK7xF$P;w|c`FBf$1E6($Sll#Ic;uLPH=Rjw2e2jMD9_~ZGv&320 z-Q-9=KtoNf&#lA{>=bw5z$otWVKVZ*j=R$~6+2))! zbnpxO1Wuw^w}}(c=6)b_@C)h>PO9Pei8EB)_xx{mv-%~d(qrPEw250$F3*0q?#ow+ z^_=)2Q}uq4`|=ovV23Z~UJ)m`%|0P?|JR;j442`PIOVwSKmGw{>VkK~*`e-1VSO^> z|Nh56{(;W;X8!#vH|!7Y$TJ*ImHYCExTgdc9O#f=m>)2{-u~FL)I4ncPWhID--z3{ znZ93o_nmT=F{u>C#S{Mud*jzU1QgCDoYibDLmo%HPrI`q>HB7f#Q5uG@x`sq2) zp`RkoKzVYGk4>CDBlPu!&T~I<8j0rc_thsU{tipFqdob#NNVJwi?$Z4?1c8s9Smb zId~&G@ebO&YY83fk8us<8u}_XaWdK5%Y+UbyekFHtBeJR6Ea-yN9e%8eNS+LGZ!Jw zc$;%8(3u<`qsvkLF6#T6#fkH#h`xV97h3;(Rg>-id~#{x53gwQr9Yu-+brz34~|vt z@lCFqqLeL9yf^jr_Ja=g!{4x9+AkG}v(YB*hYt3`I0eq`ZB>bLzqj6g(0R6>HQ#al zMGfK~9;WAew+?U`xDSBxPySGc_%Cbd{R7>OE>G@PIJ3pQZdoPr{=*YCAl{P#`tm}D z`K^lT$Vo1D-QRuI3N?Q^44h*|dFuo=CjQ`fCST^gpo9N0E`n3AYct}ENvr2TXL6jz za`7GDM%7voCuTvt{h*V1cC-tW>r9w-#PiLiw;yz#+tZKxOX6LMD;ql!|6)mfy`h8s zuy28Kc1+uyIA3kf_dy3vO7Z^7`Y*kSvt_3~?*kn;c;^7+xiq9daq`vCbD%Rh*6$f^ zxj%?F3kK-(N8Y_V>@a4R=kLp1tTT-GFSh8*37uI^r!iIJz1v+HNt`baOpY8^q2qbd zde7umJ73~0S9vYDo|18CJq3O}pnR*QIVZdOEzV8BKht7NBz}TC`gR8${0zT?^XH+- z#JM%wcSC@OkC$o#zt& z{v*8|po1L-i19i`^o7L9rp_r#ze9)dx~}s3j5Epq*!g^sd%kvxbCP}=mJt7&%{U7k z{N7WnM=~~8L7Z+XPLbn2bf|B0aeg6FnKi^o?bMI2(1qrAv=6ka@$ohhzn{wMN_#@* zneQ~PZw3C)qyEG{G(z9+q2qpUJx^KBxr2Bo{PgVz=fU6~yzhu|J-D!oILEf>InaTF zI2W8SZ32jM@|vCl9sG>=2b}U@4icx!catOi0G($)IE-C|($f8yfya&z-;q>5o#Cy77C(3cZB_yzd^aGqoh zCQgaCdJc4`FYF7>{VDj8T$433%gzcMLmG4L1pc%j8 zo3q$6;#MeSa%KDi9gkzyeXyvxUlA`&7=63)ZruHGUQ@J(8wuVKzoB}6Mz#m&WP7-= zx9yNF%}NKDc6J(@#Xid6(;tan|Gqh1Nqa!Y_HY_6v+Gylb$_WJN1-#z{Ei1WKCS+JW%U$Ssv)X z!Fv(l6mFJ~IQ3MXTymg;|H_DWCQ2tsLYx>~^!A3%Y@ayC;WDO+dkMELCMRCS0VYq5 zE6`zFt-11Yx*WA|Kg9Z8X{Q;fiCa*;<172Ae6JMo>>p>EtUuV~XFQAf4EB6K><{9% z^YWd{xMF-RUokUrN~`)yzd&dD#m9Ig&hNZRmYp~?e>d$Y>kD0I^J#T)9`W6cT*N4+ZML#}5 z2M+S|;5?~TnmA9^>N(JvoFDs*VqG%%W;x=xY}TXDdG@3A`vM<3S0sMp{rdSu|9t_c z(IyJVN5vvkCEoHodLDG4%~z3^R43k}u=@5P=PTrw;16eo+QeyL)6SrSU$Jiu&a{^< z;soc`mj^ma`$# zmg;6XB_BGu4(Z|wnl)wf&vgj=gz`7L(uVj=Z01GiU{Ca8aQ>dqfjHgO`L59Ve`;~h zF@3f!#QFT2-Y?LZ<#8Hy#5<7-PIV_vN7deBTmv1?iw*s7BxId?U5Wkz{;YI^iJ!QEzMRm(pXe{(Jia=N zI4{cTInbfLn9smTKXDXs?yGo7wqNK%>mTSx;2$Y7hWLwB93}bCdFDF|%yZy(`8J;T zvsHX0+ck8sC*nwOlJ52+&H$UZ0y=OqiufdOz%=4)9;x>WbSB4Xj1cdh-flCCI6c(& z5M=*=PVP5j-vsqrl4UOOP7E>2C3(>CI>H+FCr!VQcr8ck>nY=Yv`3U{tlv`NY_stn zbg(=2i@_;7}Rty_p&*d`z3?f&ySvH$ECvz_=A)x0Cy z3v>gvh2Ic4t$qC8vd}K#4KJqm57rIvGu{J)pFR^I@3YFVK0mn~zaJtQQKjzfAlnJ@kC& zJoBAKFR{N^B;$4BpHhC2{RTQ2-|rCqIau=+@lvXBTiO*m_y^-9>>4-TJ>ry6?W?+ebj1y6}HNPUl8_7CVh`@wn+wROXn#2=>4F-yL8 z{qM&;4v}}v{rg+uM^o)c`WZUuXPmQuJp#^sAl^B3o>|%zI`{|cY;e|1`$C)rHs_Y1 zgMVNLaHiKaZvC3SEaIo{C(woFAB>~m-_9S7_*c|>^U^=idFDF}yblC^;$)GC|8$aB ze(4|Rq<^p<3SOLBQHi(Q<~vu=!9Um^0_XhVn8cYl&McSoGjt}$X+#zG`TMnwOPqpJ z_4R{J#)stv?_`#Q#CtSb&x4M6KjQM(oad#9XMq~e(+S+%*4r(QqO_T zh&cXEJqJ424dVznug?`FPMM~9yFur9{Bam~?-u+nyGs(kdS`t(p)<>A9miVqD@&X& zYQNLlaSZDnlxuyS3dBolbIu()_yOY@IB8x~CeEzB`uaf!4$hH*lX|_AIAhc~ciAtY zgFico`$i|b)FRFzb)H@J1L!>a(`igk`-%3)L-M$YU(e=#H*|7*tmWYGvCr&=#IyTu z3v{qw6OnId+TVvbQC0p?+7CK#Fu%YLzU5mG=YdVW6*~9<{R5o6pIQ?qwYsM${Q#Y3 zKlm8O#kuS4>)I2)x=lV8I?sHEaVkFhC3&mP#7|*!{|!3W6Zu!zVM?m*#L27PHI#mV z4jjy5;Ou(bi#YLy>iZdV@C(*Q;KaSqk2sA~Ka+le&a>UD^U%eagNUEf<~+A|^N`cn zC)QON>kcFS4iz8EaS*!D;<;!EM-eZ#O`I&pFZc^~eR$EAI7d~S>DkYH!uS^FbBUAY zj3-VT6%S`XMby19=!R@w`0x97UDU6S`gN+`66!aH`c0{RW2s+5{l3^_a&D;KQ|dQB z{ccdd3)Jr<^*coUc2U1R>es1$OQ_!*>NlnOjir7K_4{I@`v0rnQ|dQB{ccdd3)Jr< z^*coUc2U1R>es1$H&it5%e+(#-0T-l1MelkfBvog$nFW%_g-YXg3fdY_U9c&^bMRp z?2~;O@$M|t_gCo9UvaJ<YtL)EDDFI61d2Ax@8eW_e_Np$lz%YAND_Jv~+sKa$FG zNj`L;@%P>4dE z4bG*2&BQrxbFU6MljAgUjLt;oI6tr3Mw}qkK4rgw4#ODyk9h#)n%?_w;w7D;_dj%= z{bN1%U#P=g;^(!wZ>K-^?=WBwl($x{1H|iTlQ)G9{#YZ{MGs#cCQi^8eLsW_9K1)1 z@?`TrMx40HezJW)2TonFzdOC}Kg8*)@&J+poyl<->&5$MEt;MoPM})<%Y6^%a89Xl z!vhT#7V4$Wf4YojmARjVOgK;6_mfR~NiKAaRKp8?n62EI7Zcq3vRL21e(`EtChqFi zdOtu1`{8eJ9MP^5r@vS0E9?G5hQOP|iS|E@hYq7a0G|U~GU_hz18vq_(3$o1G43?x z{ex1Dhr~&5Gwwl$c`A?cOSrCc)81b3^IkUg2Vl>bnVu5=p3S%i9sGjxx!?pme?gog zYJDi%HFT&i<{NPCZhAwUbE^HxaUQzR#wpwv27hz!55y0gWwt{(u0qG_%pc#!6LE9h z^l>1xHq+rar@W2a2}ieS~>3FoGa=XId3@PobRshZ_wHKV_t$t#3`cI z1^>+-)^*ME3sH%GsHa&@$@g|%!u@O5!8JW5@n;Uy`x`pxpAO>OanhY}iPzR0oOLbreufTy#=ZjVl-ik=IAJT8=jNmzp!3}CtmCb3 zV0z*QR?_pm?bi!LKm1T4Gx6j1)t~!=PWm~S@N6iwz6|+@bNrQ_10DQ~cSca2vxf^3r{{B%BmE4WXFvbgx4XEX-d#A_ zqA2kTMb`H>=oVc1^&E!Nz`Nfl@2bcpi8nTlo(G-uEAEGZ7qYnw@ro4E_ZR5kkDj7^ zJZs@ORe|b2)f1s28!TDp@<4b%O@tS?rw{z&=AB?x)6y4T@V})wenE9MSat@NOKm^5-XSP5f`k{};Z~s3i8~-v+iL{)W7I zdq5}sjs5^XE$-ckcw^LgP&v**2Y=%n1vm?9b|a46xlic8!FR&IaYXG!oNl$v`pNP@ zXL79XKsDXhmpCPD&SiQx&bo}3v3Q-9p#MPPe^7ZLxlV%)>%Y92kK}JO_GcahaXso^ zKmSnTUbi{-1Rd-M|A2Ed!bsxO@M=HbdSAWji$95T)#l!ecX@S(ky^a}eGAEb){%K@T26g|=$5e+hotQ*|cstJKuDcj(}6_#d3*zs(^|Ikk@}`wevHHyG!@d9-Uj zaprf`*B3g^^P$6dFTSg_t?y#u?@;RjSx)FsPMk+aIbRoCPMo*ue6E~Vp@UzrE}yYA z-S4l)`@8SS!Y<$!{<4broo)6#po3rVz63ab-&{wW<0>yF>kA#~i~a=8#^sxcQ`2Vu z7dp>=@iCAG20v%}t;9cSv+jq^Gv9ii`a#~E#2=^D{n8HdoCWNQa^8)(hd7hx==%$F z@C)W0a7vxuN1Q`8et`~tLAwVh+nhthi9Gjz=`YrJ;$+ZK;=fh-4cX37uF(8~_dHSl zhBHnOKbuXS0y@}pv3SqpLhaMUDPgl83>`S*L|mV%{5j%G9<28-bl_m!g?e3&aEUm* zZT4-TgMV=@7M$(-uM+2vK6?K`=h^<&{7a(cH;8}B<~*>s>yNEs{%X zfezzjaTPeOJd*x=vxs}$<0Rf+g?;v?e?a`EHs^PtgTIi61ZUO#$HY10tM@x}CdX;? ziOKH=BwO{IIIYzBbViUPy8b``Ag_R zi|1#5{Y1P6OZ0X`ya<2bJRkgV;m|kY)LyCQK!^T={1iA#{lnh*HJ`dpRZ+iE89=Jb_ z`X`?hi@5U-=<_|$q5t9C`Adz$l^Qr~w!8mb6Z>oDKgA{fh&G%@au>5`B* znX>8q4V}qx8Usf0zRQa;Nr_|lm>k*vpj+VhHUDHiXV@!6D&j5qpsydFGjtg7#QS!Y z@1`S8+{tTwK@jEHLQ~weM}^yDrxze(#3f@yjTF(tB?zJ?t`5jlVudVp0Aa9qJQ* zYeGFAI+VY*;@98V*@8IX(&_yV9q->-zweTxTN~n)E^qQ={Vd;i5$`aK`_!H|pStP&VD}Ew ziNDVN6YHNCY94nQ;YHrH#<rT8~yY%&j4t~Y`QE-M` z?nRub{(25{;9z|WPSkMyi4*gwo&z1)$KwI~j_I-RgNgHfqsftWf=;#%{`sq`q~gxRdGA9Ucf66euR-SH*PxRZJgbl_mW1m$@)c06%9KGbud zGdV6}NDqE*uuMZg;ynIta%9{JU54R{FMJHj{K76+u;pTOB)V*zol zB+_%BGdb3Cwz;A#Ax^=JCP%h={W)7}-lzYe<;1&{&*aH@(Yw5l(}>ZO*PRPTtR{Y+ zQu^{j*Z$6uL`CBNeBV2PmU+SlywG7Dbg6+dZtLUEHzxV{zBlGG_$gE5 z&BQNIOYbM>&>qK#?~YtQv6VP&8<-r~9-%{f>{xwm_vy9!yWjbR zp!2-WwYG=AJ9~&ixuDn_gdjnNP2#{5Nh;<1Brm?sWH8#K*AD z>4-;&pFgYKf6$@7;vEri>b^NfoKjEp<0f?A_)HI?a}r;Slf?OUP0xW2oC#t-qwvu) z#L2%;&wq%&lBg`L6alL59nlkfphGzQ|cm@i8tV|zFg3uewc5-X?5co zakkso2|ANwy|12W%uV84d~23RwqNh^K-Tr;pM~xc|63G&`}J;p`6I6(&Nsb$@qqY) zlA3(!ALyii@I5iu_1wxQ#M_cd-yfiZe=rY$lfC5&;*8F(=RgO}qCfb4&gpS)h!d@} zo&z2HjPVBLIr{tqaTclhUiulj(8fWu2k?75|3dtMLH}Dj!gvP$rSHbwU-OmeR6H%q z4;|*QbjsmNKQDeku1;?x|cw%N;{7?Iysfn}ci^-98g3fHtKhIIE5bN#7>Hi?!benz-oqYE=;8C?wn;S)U z=grZdQ15@PWhDONl;(I8T0itr{qXT6zhc{31i1Sl_6xzkF)b_c+h*5~chJEt^e|V(fSYiFZKpr2V|>cRmL82T||J zXNnMio{HxrA3BVact^1C@=g1L_XN8A(^Kq6Oc-B+_`Wvt8g%dv;(wI$cBL}JNn|sx zL1%KT_UszXL7Wej_3hDHdt&}WIh&oVMEupY^?c|&kCQ$I?oEO};Y~H-$8MzOLkEB1 zyiC$Lv)|PEcu>Xn);w$A$Xdi7oLkSAdE5%Bzh*5v{qT_v*k|@Jl8g1_$x|-kKT~n2 zY;Vw^{8(Q`+_>UYy-3^L?G5uH?2{&_A@S=k)sM5#p}pZ=Iyg;6`w*vr&AB1y(B6)V zcf3}nYC)WC>K>!4FLa*UgY~_Ke0y6H|KfK2e4u~t!8#9b9@n0DQ_kvnn1_|y4cIZC zh?C~m>`c6(=k@-84jlLg{#f~YcjAn_tmi;ya-2q2@y_|%%e{y*>xRC5(3$o7Ie&hR z$@B3S|9-?v_rT;y|3bGfoOvGs`G{=SH}9F#H>NvZ9Yy4K?spnQ{D52f@UC?e-&*S}d! zoEW?H{R}$pXHMgDMIL8vzuZW?#YN3>$+!|a=J^;s#5~++>Q>?nR_~R`aSJ-Mzmf`k zzC*<;*_v-v?eE9CZ(^U#uggy2f32XeKXhDPhk@_J!7t9vyNQ=yo!^$@40Q1C6mjlj zL5h9Exo5MUgAN?*%cDGr4;>^^+hP5pPjMvz^GiICPkYwiGv3XRbWLy^cfti27zdbCS4=)&7cXFVLaAG*phga5lJ9 zp0^v^<5dT-PVn;$CVoY=?;`C99sGiM1?Bv!+y&yiRr@WH106UR7r>dj^a^ois_{y4 zpo4#l)#7uECGLa}XQA5Hll>Mt*$;5e1?9?j=r-~GJg#qd(4l^~w+c@FnfHm4`GB4S z9XPmO3C^1~kBM{LCT|WMIJLw(gZ-L3Cr-x2`tm?$a;*8|_8DFi2j~4|JM}hy+(qQ6 zmOOn&{DwB~SwbiMiG57iFZPX3#4E1OBgl3J9qboTWq0^$B0F|Ihe1TO*!l zx4nu?{6cd~zAP_v$(!!|dx)E7&4*0g9-VjzZ01Fo52>jfoye$Md*O(w?s0CGSPw-W z8k_hlmh1fl9sG^)3hicF#rVWIy;{$K4*q^4&Ut(-l!!RZ*X!*Eoy=F=72higj*yIa zv(-6h>2K&f$9Ya;xQHtTo=i#nNXkFb-_Xha*I)RKX1=(;$3Q#b8zekSzM=E%ABXWmyi54(a&F?6 zw#mOh2YceY8p>JcV}9a2XnW8>{rR}j|7SSa`ALU;`CJGt(>o*gI^Gr z!=4YF)rb>IwL{5)4jimk!O8Hs2667y(fb!VaHfg)Gx4!{#Ch9R&wQ{p}JHP1s!9&~a(o|TU0_V zxv4)N4jud*Rm4lXr*$SyLPPIw=-}_v9r?Q{>l$|_&cLK5NBRf4A-gjCYe(1*;|1FD z)rh@`S0$y%lm3Cu^EhL@zqx#4KjJUWqpvsTHylP-VZVGngNSq5<{e(>;0MfqsBhZ1 z!-&(ezP>!rfrEWGaHbs}Nt{5N{tF#ANpF9r_meM=@+D5YUi$Jt2R~jBanQCh6^@QNWPH+hh!QI{6 zgS!N`purtJ-n+FM_PKRC{h#;ETC;jFi~XCbb4pH~lDmX>ZT9Hx2Oa!?eGk~Lda@P7 z$#qrFfzHkkwGXc*PB)dG`gh}zHD5n%)_UTHk7(wr{vF>s-gTz^V{s`F-fWZn_FFk#G|kTPAb6~%lFwSVX|Za(Mr zef3jkiR<^>{@;Opp)?k))y$(x_9@9*ZO~2mss<4 zvDQ2$evRyAd1e3iEMI56PdGg1GvcQ#toM)1m!coT4y(4lB+lLDdJc5(^XD}p{&u_} zPETb|>1XKR=d?#o{WoqzdCuN?Pn;w+^A2=oJI20&bsX;4|1Ty1Fa&)%85ZrS=*EaGo} zp)V(NW;uO~XcgG6(L>@Ar<&UTm*YNk1(YM6dmRfZ|8=Kpod>(ZjydKhB<|~Y`gsyM z_yhYD;1u>rN}PYv>p9Sw9H)_4tn+r{NlBa?iX+<_bfJx_qY|bi-gkA5K#qGDkKOy% z$9OONa`;>Z;tyBnDWqSZ!}v3_Q;oJys~s-}dt$#0_M9=oi@5%&_4b4g9K@O6JpId? zIPr7nInbFL>$vshdrso)RQux6j-HKM)^i1uzvU%*&S0#Qan{%tsfrI=D?A&ElZQ}H(rEfpbfrGp!IE~iUBhGmB{+RSTbl{*pffFa2 zFL5$;*Uul&fzw9hlb45SMx3!;dJc3Z$2txqAJ&pMAsO}arDx-S(}=nBAjQ>-BDN*| zBb6VK<3DuryVP~XJ0@)>cOYKVfAsZ+4)#O-1%CLFsS9!bu*suC2Tl%=&ntYSJ8@F) z*Ov!6ljAge67u`XT^IEtPK=u-N7@Ox!1m>P1~=;gP6L~JoSwy>)_v@@%U2V>lFdC%&-Ssc z-;3z@WIge7#?br6?t2l)ufeX%CT}L*k|2FP4LbM-<0?3bQfw#AiV!^qI`lWZCjgH3 zu3f|lh^Ma~blg9z{*K;aAMrxGO`aV0JoPu`QIxl2yhFqvt5;1QP#JQIjwG6?E`F#$RxZf@g@cv5uYt zoyqYrS`6g%dEUe4iSthFAIbF_bl1C2pFU{sraiak{QeG`HJ(1R;xcj5jnmuFvv}IN z&Y4&GI`LNz(br$DbFlt~9rrxANu10!aSe3v#|shH44QJ6IK_AB%L5(yW2ch*9?rn# z4~etuq{)$S4RirBfA@pKC@0=Kd=}*i@uH~tRJJ$h&`zp|_YVEmKPOJmeY0G$oj`|m z%cQ}dKNO7Rulj@2IJ279MeY1w5qJF;eSM*m^B?vd;LjXo-x2TlE4?3}gFo@ED>(b# zeI(9PmG_f&gU;kw*9&i#d?ijxm0$XIEYeR784`#cTet`~t!9AWv{y#!;f8Ou%3)bBz=lJDGh;Ka6_Z#To7vycg$<#LmaeCa- zbD%?gF%N(fTrmxCny9!~))zYW^St$bRl|?ziGNA)W&h^=NQV(D&LPHMn3*^a-|OoO zot@pPb;(AYH!6;mf0N1us^;74o=SNd5F_4iN0Sz7n=PG zh;#U#Zx$fF-Fqg`!ETt(QO+K#ix4MRy(=K?1|2w9|A2F*>0iX@8%u9D=)ghV7@RbP zN)yLhy+a_&1D(nFd9N$R4BB^#^439|g*NZUK)3dM@89;f9R}VHLb-yDS0r8+n|m+N zp?=jwKI#4`Cvo=L)DJpv9*Vp}h0QgH^ZZY}y`cjK;}7cfV44qcvZ?q%+8a8P<7*&K z3{ItW^@;OH#SL=2fzEwT>%NzZ`}9JFU})}(GnoYtH59Oz7r)3_?~LCIcrB+gP*FB#uJ$Lmw4u}kox z?CMIqz-?x^Bo8{~`54y^@q2|6{r)DN*A9~><6`I#7k~NjN9BZX{9JjVi(=n7VckB& zEvnWNvYgP#xESLU{86EDf8v!@_e5nsfewCMAo^{jWP^z_+U7n8bl}8a|D4W0Hh(mX zIK9;U16dyEOpZ0*@NUB>;%rp+N@TzCEZ^{ReIop@XvSFLuV1ZiFVKbNhs>=e60g6_ z{WIwYtUuw;MA`j`^K7BMe?f=-^>iuU8;o>g4sn{Q`wz06LC5v8p0jD*WdZT-jyLmC z(jT6kv+*%fW#)IsBCcCP{DXg)eA(ZiOQj0T-)(Rj2Sgrr>(Ld&9T~^uN;^Quc5oQT zYr{YF{#rx4;J4BKE0OIRI`k8ahu}o{x}G=*Qt9)2(4n8;yF1`K7`~Y}GrjbFfzJK> z<}faZbFLXaY$N`WO8Rm_hx#H;Ksi4K>>^H^0eTK};9#8x&c1Z}h%kkv8B4?K6%% ze4036kDDCX-l5}tMr+=5VEyyN+xyVu$$l^Mre?1&j24~06sTAtXbu7yp$ z^bd5rAL%q!i1$K|?S4SKoK8ItIyt_<9w^^|uEE6Xr@q@E$2aKUkM1I#Xp!YParSo5 z`vW?Y<21sHeZH)ZLx_{PkG_7;nf1dtMTZfu2G27|0^btv#ZY~@phLNGiF?DJ$9yDC zYqkF_{Rmy;Q7uXb4f%Bs8TTq+_kc=Yi5H~q8A={>(vNs|6}$`4jo{z&hqG+X149Qt zV&4y(M(4s2XN0=1F53fi;KUShq2HDW#3?pYZ+GZSj&QXzw4h4qjFq6 zZxeA~4C2S|*OwDI)EDp9z;2n!#vx8%Up)spa1x1r6hA@&;=CNB=RgMz=3|s6{?Ww5 z8R@6zKnMFHo&)FfsN}?%ImzTm`$HF6yn*vZ;J>Sqn)rup-ZOv>^+o;+oMH*m6K8FJ zzP`|blUD5i#JiP=I6ZC7V?YPa*KNF>d~zx)alWc}T=paAz`^@3sMqKjIf!%8#y-%Q zoS*N!h<%^_P4W=u^(@mqGTw#`>)Y(hGc|Mmx!bkxgYOHYd_}zq5Vy@dlPkwD=;Szt z_in*E@}@BH+S%A2I@k^I7C0$(6(`OP8~a0Na;*D4i|3Uh&K$K)mhIoOeTAR*^2Gh$ zne80JpKN14=r}&GzV~>mK}F*AQtL@sUit1$W#yPiOCP0rzcauUzhPYiyN*fkB)-FD zet-`C!Jom&`K&r|D%!kb3?2M~I1C)Wr?rXma<+bag3kT;WX<cno%UQLrg->P*y+f6&=+N@Q$Foc%U_VUG3Oz?K(nh%-ms zHh5CM3*Maye)VTyX?m)-=#`@ktP>(Le8)Fj($oCFLi}|~K z;U2`9uEtq8?n4JZ1d09h3gLPar>xC+2Ix$V)2J-+q>WDWB~E#DE>PMDI&&C6Jnb~L z9p-yP>(&hTR84d`)iFS-kRq6 z^)Pg5*i-ij9R~6*XlG?>OeWsaG$v2_6FT^FzPP8mdH+=6l!&3{KnKnMk$)(VWhQY3 zs=S#j4|L$*z5(jBcflOuRQX$99_UPtH9l`qWj=8N7nvN{&pjJ|t^G3O?jquQZPk}k z_DhUUsBgla%ZRhdCf@=b?1n#s)3@X*;#9Z!E+2Fz$NDaN+52mWb6b7iQua%`ciGp9 zcZ0_b-AKG4vGnsGbfL|I@E7WRH|tj7H^{8#L+76FG~S8)e2wcni2qcbr<3-8PWD@@ zt5DuoxAqWkQ)>Nu2p#;LP^<^O_#Gfl-NpIjdhLY$N;zarZ;bh4i$6!(0-lz23OFlv{~*rc{(AdEhyLhGd|3D}G zgYP=Qu5bU0LA>!c?HW4x2k)GLll*XO;*_w-8$t&T#y4=T4U12lk~Zs6=-_8LE_N-K zm^j^R@`lj;U;UgkMsnhBRC%#~>*wnt9vSmJ74iG2e4^}!&`Cev5bMGiZPODkp6b`K zA3_KJAYTvvjLwmnIDgyBf6#%mQ|$9Le3+FuBdY5AGjw)-_MVl4IJX*^9O(zo{A`W? z9X0b1e^+~bIc5CcUD!Wco&v;aVAIc_gWWKm!TzV-6eiBvPWtjd2fJas0Vn2-;>3B} zOK&&m-1iTsu~yuJEHkqd@pGyDIN5%olk*(j$3c0c`#6Y~{BN_ok_Vma56E|b*Dpau z;`yrkDzZO72Y=wt;Iz0>g*bWC{2Lkvd0%k4%&bnFN;dg3=-}5~-Hy_E&u5KlGiSQq zuh50&*GMt>Jn884^@u-Sy_YNf0NwGH6Ltk0ysz@PP6K&()H@(yBjRmT`C`d~PWlz? z9K8H@n-DLPO@0_U_!aNFfwOsW3*s!Bs_$penH;Av$uBA99p{&< z@p%5+9f-Sv~ZSF(7uATJx8GMHg@qGgY zF0$#p&&$v2T>AmYbHPusiuWge&z$=F40P}}##wNNZ5l+Joi+6w=-}_MBHy`k z_amV*Io9)zBb&}8PP{I9yLqbHnL^RwO$(6Jw^`RZ@s?-8%K`kt5ct6jbt zaRthEVf#bkg}3=`8+7mo&ZU5}Yt|Fu3{u}qmHvPZoQUH7_oqQGh!dPZf6od!ljAhz zHs|w@M+?3t&Ng+gP}&JPjxT+Tpc8z~>1fD1;w4LLmP?M$&|!SORc5a5gp-S1<1@Zn z2fO82{)xDL8BMNif6!svYpe?V>TS+Hnmsz|8uzdt0DhJh--sVKkKUfp!7sQc4$g

uCfmz7Ms zOa8e1G2)MUWbFSu_9PRL}Jh_lXcZVo#51^oe>;@__lr~6I4U!ZgE7iT|Rz2G+S zyBYeCXME>(H8xofh#yjYe^dHNeph2#@57BZ_Y5+w3$EOKOxz(K_49{Z7vTQH^@+oq zUutn6xE)|zgxznndQSXIFZJ^!bZ7_I?*^xY*K6WDRp;7d|AY?h0C5~Rbw<7;PNnqv zb^zV~wH?GN`jPmFef50L+JVd3BJ$1OPJSVN=%zMb_J8P9cMV>*A})bFnl1iLyfwpZ zp0q1;@DK9O;FK&DvemEo%It=HJLtf{Ib3j7qzFr#M22^qpaZA2$TQ|U^#^h0t9vZ6 zUeMVb&H2Z9k%@DCrEMSC@8tQ%DGkr;?(jK<>UTcY-c>yA-m4jnxKEbr?G7F4nM1_! z&6CF>&YrD$4s(`9|Xd6e=iDK{o0UXa>1l|1MMWUN}W zMyG&MC||7lyk8wEz?*ov58CyVJm``)xwkeT+jeDt=RKsV2~rR*#3?<`bPoypnP|87 z_ogOZuZMa+KnH(f-U4UDvh>8+YB+BK9qd&~yw4w@ZYJWieyy(`bbLR>IluiKF&ptR zhtRKUJsa2jtimEr+`6g&@$abpUl}Jthd8mA8sI*Ch&m-n*`NCKr*Eq&$rGsA(FQC51Usfm1*GBsCK!<+wP@MBE zH>(zLA{g!)LuWU?pXY}09xTc=IcGiM^_s2khtM(4*BUCmpIqZy1LDP2Az8mck`6raKsDBgUmcDA2Q}#dT(EsrL;#I-BEb;kzU#=hc_3i4)_a&5`wo4)yOg?tH<=tC4qfS#iX8nR{n@;@&x{_Y-u# z8!uh`I}>mCIlVnS?cWQ;cp0H=SK>!C_zODttGH?p-nWw7omlthx*GS(VdsGNJ&2!M zt(RoKgbsc`B+i9-&FVv(+6KQvXLDRu7~!AxO$QKXyvm2m`5!v=kMsV-zHVcQ_tbD6 zlkZQsEZo0@oi6!KBu=Uzdz_W!f=-#=7>}2`Hko)Wrs?Mw8IRu;=SZ^Un@*gId-WXX zY`Z(Z*P3;~EaEh`^zGQQ^^)`bqI3!85`TAYn=jiLzhC5QB@pA{;KK`u^G>~^A;$&i zFfMEjbu&xfu0OvEit`%q@8z0Hi2JptzW&gmpW(Zc@h`?oR^-y&;Po-qKj1%lzMS~w z)%+mq4;}hhpzwD_-!;TJ+edFl=uH0JT3`clO6S+t*E4@R&pWo=znS>kv*`Jrop=24 zj#xNeC#3V)PW;a5yOFX#LMQtp_MhROvgdaZufYJlU7>@2uzv>5xa$Xrv)}L@FmyJ@ z*RsCnp!Z&{JwHmEbNlRmE!#PCX$H)@^iLnlLVJhbG4G&!1Dc&AZc4QuA?JVS+{cS9 zE9z7}kC6Y^8R928rynPw^Lv+UX(2!CxBvKFnE2jWqkI>LH!F(m7wKo{q@PEM{&Qj9 zW#R=C)B71Z_!;X3*fB}J>%^&VI4=R6&H3@2A7QVQgKiV2sCq9`)(<*9@9o?t%G~Nc z@%q))_dCzl11@W|$VtFMN+e+!{3mxo+ z{Tgtlj|xki?A7!f=xmO&AGEC>fjEyE+8jB~dDah{>-u8xq7XmG!1rt%b$)mG>iKBI z|I$WZe)%2H_{veEGnI^fvHODHcm(qn?7VJlEaHdgrnfV6@E682a29roN1PFcc@sLD zUX!xBio~Xzsva@!zb^&iI=0lo@aWuC2=T@H$rbnLA=@{^gQTfe<>yA zp&~QV5O09tohayFe~e$SfAUTlh?8ZA-jC3szo1=$b1xt>alR-&%Kie~@8W_)VqG`N zFFWxYw$j@VI$qbgteIcf9|;oWCf*n;_-O<&IFHp14O?& z-oG;OeGTg{=uCbdTBbU2G8*d3es->tc6iky&W1_)`g*!f>M!mSzPnJD_)})seA$km zlksH*k&ikwvjOqm7~VaD4*n^uI$-6_)o0%7^>dv(Quyc0j{}Fl#u5L_waY8}A$0H$ z-Zery>eZnIan>95DWS7DE-Sehw?brVL!8}e-&EQUIv%&2aYM%Z9f&tvwHw(FJsoEk zo#6e(!>Rp=U!aTL-_SKrI_;NtetazCWnsrq54#fYsbO4%4*tM=15W;JJ&9APlish; z*&Jto8P~NhalWW`W2OB(?Jw!Xxtk_A1`_|VAr66#&)vAJ`w@7adis6{@x0ahsIvV) zC+8`D(Lc6q96`KKhV$pp!5@g@;E%*R#}H?t;XOd;O#a9_X997utMm2$&L7S=e)EXQ z#Gl;JZa0$e*}Ubl28%eNZ|Uj8kJ?1vU!arzLA!@t!-bwrybW#j?HoGz2XO^B*`CZL z&h)N&4s z`QPivZsOc|ufK=K{&)JT#-IC%lkADj`M3S&`wc$-F=*5w;&1fU=lh`J_T^(m$j9Hg zD)Hqg@jBJk+YdU7OYj%`uzBuD;_Mt_b7Z}tL!6Rdb@1Fv#*967Y-_NeF`k0Iq{dm| zADO2wCv@;LzK;Y>wfh%{Q+th`10DK#G!dut>U4!T-Ot+`+0UVqaSFaGfN~v)bc1-q zKj`fTo%??1d>`aZs5`{}>zTgZ@|=F4m=EjCyHA|!SM(g{;0K&XMSX+vKPJwnw|WkA z;NV<0IGe6MBhCuLdu`C!9Or&q^5w6H^DeD^9Q1U5W|eqfy1wr_;tw(Kp>ywFmvzC# zacJByABkU5?Jvpp2%Vf)(QjZ^?@eEb7rLT;K7dr066 zND;Eluj`as>b+GtzCh>Re!kYyqHpOQ_V!z0h~Goy=Vaa&y3En+@BLYRM_#4t6QtfJ zb)G9dy5|q#4j*XOU;4##t`vDZ*dgV>$i%y@;y<}wgARVdcm~enqS1&`)R0Gn4t_zL z56+-av4~S?q`u!k=iV>Q_j$%2j7$7U>ibC2e){)$e67sl9_Z&~35gdl!7i`#Gjz!B z$9NUL>#HeegYz7)FYK_sy*F`V57W1E=-?Op8Jug?QxGS);hqe1;NZO-aN@>KL!5@H z-N|+W9sG-RFgVSwr6*2am4}u64m$V#b*|5|ZOugdG|C_UmXG)X;Qj+-LrrGtC{TezsU-cGo z$>0KIi1*b{Z|Gn@>}#Rk!$MaePRj*)KR^fjq5Xr?>qI5u)Lv?Hr2U}#o&81?OG)Py zPE4*w{0b}e_JeNs+J=dT|cbLf1Hyj%HIpQw!Jy?R9;rL zU+9?U%(JZ<*qV3=)qOkJUObye{j6jnk9@j9d*V-9rf-kXVH~Wg96k5Xz56qyUK2e3 z;awZ}`)SnP zX7?d}y!m>2dgd=*D``dUkDXf#ApRffT$7Abpi8d&w0QISDT&S=H~MKr)*-|ltlELJ zuk;h{y~9sC{vJ-8p{w+Mf)4!>`;Fk7k2r=nX_UWZKZ1_e(LUDP3D@a+&VOGWPrQ~Y zUX=BN4(+hAcn7J@tx3e`Y8ZE*181ZdzrM_$Mx2J~oTDrcbl_kg3wAm-Cx|$I_0!uM zI&kpb95@$eFDFiRb*@&H2RfVMjN9JlUrijpcKSS*r}Nt^@tv$~gVz!N(O{b|+XZxT zd__JK^*(iaBk}GX)0Y=I_w%vy-bSk?TZvykx;~#F?`p9Tb9G5j-eD@&X?)}7RKFQ`tKR~C3q2P1ab;NnEwPg+y@6SE@azTgs zMPAMC9%TFDIB^y~&~uM9p*)ss_5Blee-lp(kFO*&|UQZ zq9xA}|IhIHehVGyi++r9J}h#HI8$!x>kA$H8Ep=ak3HjDBToP0dVfO4=S7|4K;P9j ziC6xp&6D=?Y#ea*tEOe{694o=eZ6JB$|=@YPaZuW&ZCfeKR^dRAkK#$`mcLJoa%;m zJD>yS+%9gvhw8i_&c!79@<0cFzB$NoZMU#*h!aQM^OF4&Ivyu|t;OO!k+vJ&6R)(7 zT`n1SK!^OuqT#I))!mXm`21sL@$OuYpijizR@3InazZD^5wvUgAxEum#H&|MZ$IeZ zhaKV`P5UTT+h6mFL)5);IUhm?KV%a70m}}AB2MaN`uah~ag1{xso0`$#H*&xDa(H4 z*?i@)Y6^c`X%dn6S6b=I3!U`GRB6X#Uc^h`ulEOZ@CWk8@T+gk7{p1U&QnN#KxcFO zte4^)z=~&M6X&d9UlTg_^SyK2%Re$c@v9j2H9Z^mocWP|W+Wzl6csnh{$k3Hz&@~R z{kqABH@2?cKhVKHQ^otY*<+<5&LOpbFZ}}@{PSA8f9rK7Epf&e&KW@G-akGT&UK@{ zH+TL?{I0$9^@R@QTv?shVTbx18}=8VgZ*Fkkswg&OqsC_fpem#4y(Z^a7p8Mb1cD0GW&am!<4)%jRVZV;O>Jul2 zI%oDfKj27V$`h;zKM&5?HVY<_ZC>*w?LnsU8rN&Niv z^?rsVZt(iRH^VsMWK!!4 z8HYoM`G)=Id`I(gv_RrMo~qA-$oB}!D{!B)p9WUBy+3%pi}42iBShmV#7{HU=F5Ht z9sG+t1UQdZPA5*D(|Qhc;3N|JVSBpHCQilJ`t>7pHpkCue~Qmnl*%xVIR5eU>s;ve zgkJvNc@KW+8=?8DR4=@O;{ZQzK7UXka3S#zq}KDHa~}`6EWgrR{(Rw<6901@n=j`z z=r;M-<1p+h$J3sJRuHea;k%H~frI@w__=kyHN=UzMeiTzY>vx1Y~`ePk+K|LPn@#q z9U56b==`4BdiWc0I?6Sz!Diysc%=6SbngA(Ykd&w>=WO&5kF2?dz_H{8#=V3de-aP z^<&|_g`b6c87Oz(FuRF+$=l}2c?CMelX!3BZkth;*ZesnIG#ja8vJ*w_YuEiIQ{qr z9sKSu;-aVh4iP7|A+G}+{B$}A_nU!h|!=iX0#R)29m_Ve?T#9wLY?FpTG z{*QQA-0v+u_AK!?hOqgv-9pFXyN`u)Q}9pDY!``_G@jl+(7`|O7dTg^-6qbq%z6%V zHs{BCWmWiIRnngKiSz86J&w!zL6@dlvO}pJ?Ar-{Bfo)iJ^ucPc-#N7dD7p|{mvhi zGbW&TpyONMrI2^yG3Hge6b8_p)H|XFGoEJg8)BC?A&hb`y z4s_taKj18V_ZM;I&d_t91E;x&a|-wROq_v-^c?7Hj>|eG-i11w`)}d|KDRlteL!co z|DXHvc$W_Kt8>w6_iH{cmU?$n@}O&5ykhuAd3OcpEsIp-c{AZ(p@}z7jW3c1o$Q}@ zcLL?hGCn-P4J>hItM;_x;J4 zCo3{E2JxS(yu0kLp3S4qbIoZp#36n<6(>u3%5%+V->_$6uLQ)YtMcfQ10DQ=?~#BL zdS_zdH*O)ES5pR#G z=fCr-%fh<{u;aM+8Ht}hxxT#6$$p1;5xiRmvk-4jHoaeHIf&D$q@DvE z`W?=hfb+FN9^x!j-&>LPgU)@wbLPd?g(*P%!BzD3)91yU{jAg8!o<7mr?0o{XZzOi zc=~*5apF8u`y{fTK?grzUW5Je4fY{Uv&Q=JK!<*Y`hpWws2p+jR{y`*sp^~L6hD-V z=OW&!hWc_rhjNvB#&N^Kb5)3wLYf{Ud*ZdPW#kgLt!A=*tBi?2dC*;CwGt zn>bqxc^BwVzl!2K-t$!TiL=L$cY#ihM>to7ay59=ka*(_c}VEo_b2Cm+=qotiGN$= zA!UE{EbjEPwu@s_>Q$J5ZkKiJ;}$CZ2{alCr#^JdV2gE$?W6fGwc zC$b@r2Oa#3{sqqBr_+d&_mY0S2A%tU>9Tgs{0ANUgYg#r3HR+ZamuQDoYK$GfrEWLb=wv^^xQlWPu6CVxP4e12=~w7}*KV+n1b)r1w~1fR@O@6`5a*Oo z18uq#Y3k=0vMM+~9Vqh36Cd9r{x!osC3Nr){DpE3nEQx0sjKPz106W$#rHBU)qF;r zjfQm-bT-G?-~6M$BF-Vh`*fc5XJ>!w{P8XEGpYS?Io_J$8SLl5eh)W&AYMduK1%iz z=wQFr!k^csd?AjP;av{sY|fAOYsI;f8a2KX=XN`NKY@<>iI0^`}HL zPR@5Fjtra>?(EO`4tX#5H^@IS@uL{-4?_q4;=U<3$uh+uPE>V&SlSUfa4Ly)cbWI` ziLZNa+HZYd=1=0ZS8<9QFQAj>CeSZXt`e6s z6ECIVU3KW}a{c(Oka)j!V5{uJiKFfX$o>Q!`cwXSJJytJ+bMV-0C`1}?@p@R#67OQ z2Px|dom>~>7vox`z4?fT_(`^V=-^L`mnc`JgN2B5*03K7oy~FHXN%Ud7;(}rR6k!s2YX_Cf<4FgsX&|_x%C|A zz`=Qca7JXTOq`85^&IGIj?4Ni&g<>DUX3^r)p>l`?x5p&%g^!=`#oL9)Fj^Bns&LQ zAE0yZ2bZ<~6|b`zm#Rzr`YO*T%L|?CC)gK(9gl`=K)j3rdOJb~f8-VMfA&v}iQ{cJ zM+hA_h`&*uQCpf5C%TFwWWR;Z<~aM?2mjW@xvT0W{orYwjqiw}oTsw2Cw@gkz7o3M z`MIY!w~+gqKk?hCe5Ld=bkfgd#dmB5hwe^1d{;@@6*~9_`!%R{`wP8@vud|~euEAi z#24V??KYA)Z44afz(HOgoKsQ95~o!oeLfC4v=_wj;Kc1dfjEVF>GNpNx%cOf@3y?< zej8@*B;uD`tuH5Zb~#;E0&%b8cC2Z{nf6hikAqIO3yiC%_p@#@iTBoUegZo90qZhw zmP8LC&Xo@Ob^#sya7@f&=_kx5&fQV^c@jGJesJ!q-j27J_&qo2`JV2V<6a!>kbK!P z;)l4Yw*z#rC-!r|S+Z&sagrIn*9{#w@;hZW^Q|LJmy!DNKnISso7Yjf_HHCjw^e!$ zbT-Fj$Yxf186UBG= zVE6qi_Ym(}O#S=~9r7pTRY$8-b9vZ=$94thF|iK-ev0Y`h+ijxe!hhc{sdydB0VlylV8r^IPwc((yM z*bVI&oH=)25@(BHzZ5!~^P`=LajC$9x5UYy?C)r&O2^}pGya_1@B{HuDmzI(cp87g zU#Pd`{e}2Z)jM^P51o5IJKvQ(6aG8#KPkIPzI@lKq8bNJ%-dhHQ@KsXaq#oe5FLKa z?|nDq9pyNP^#km1b66PSR8!|1WLyg!+R>a;e2z9pnLmir)o}j{I`luh7mD)a${d+E z_tbl3(r(bX_lwJlDb7ot{VOW*{~TiXKiN*9Q|_=(Z|uLIyaP7JB;E}}eh@lv;1_Ui z^@vNHFNXK1po9Hz{tBE_0SSq7dYXRxg6?$J=M zDTwz)-Pe`=fKK`Y;~VPVD03R(eKXvbhYtS0eK>G7q{={?c7}Vz(3$+v;C3eB#8>Cs zWPA3^AI^8AvP{ZG{9EcA*1zMstjFTrwdWOb5r1oIy}zN8{z1EkUHfLpN4$83eF^B` zAAb?QrF&M8I8)U9Y&mX0XLDRuG%;`OSyYrbN7OtZ+aGjx{eH&Nc#jSB8{DiU@sb(( z2XvAbBOITH$bH5|yo-kWzR;aWaBF3eU+!_?T^N+_O<+~x)vsd5`?9@2hxi=t$pj6k zIX-*CWx@NKSwx;BYhGXChp4Gv=RgPlBL9nW&Rt)dIEUNnInbg1VZH`uM7H|GX*XS8 zU+CPAht7S^x;q*YKdw4gA?@iI-x;qLNz;P(o7MX@vYm6h{^R{+@qM?rUTuhzE~0G* z+0LPp{TSzkVb_{rI}opprS}hX@H5r};G{j+nK;>A>GL|!p&#RWk>Jcu*OfTiKj`y1 z(8+UF{l&XAH^=oL-m;8(dqKzj*cs1W3)6>qg`4Qxp=a@|%Nii^TIEOdCw`YeeR-ji z{>Aw&*mHID!NhB0_})Eq@Gs_7a7sTNMx2Pd^z8&X_&1_BN0P7aDB_e*c_G<;p_6gP z81bFWobksIFaFx5mJZZWo7bx}v`Zt?GyjQ0DgwnzO=qIrM zrr0xxbEUXlE;)Wc2M+cJzsKY$K)$9Yw7+KxIvoXYAPlC&3eHpdw^lr49J zIFk+gU!KJcF6)6fcYHt63F7ZD?0-SW^P%&8&92j@i5JeWK9%=tu-0n(C-!!{O`Pn8xF0%> z7ksS@;+%*p=Y8T$RO>EzP6Rrf6DfT0$dg6AvIpnqkzYc6v%Gsm+yb3!yUFnqI>i6g zR0lh{e@3_6$9~@9z&Hi|jn&VH-&Eb3ko^NX_yzMnIAc4!B2Gn>r;!}!Y>qQO|0>5j z;tWyWE0Fc|EI;q$FaP|3_$vbS_VkSJdEqy{PU@6$d13}9}gJ5DhB&w7TAK>YjD_5Sg6ob4;_ zV;nvnnfS4lJ*9u3lk*h%0qpTTFdFgdEYSN0I`{|v0_XIpSj0)WM9+Z^{Rit0a0X3_ zN1P@q&n){7bngA*{LXWo>4}JcLhWNn`%7gIV*hmUXGObFgb!9E7~TaM-Wu5Rhu|%rrP-e>0jtV*q`6Fyi<1j<$Jo${PUPS^@!WM zo<0v@%0J_~11M*$0u6~*@W%tf|M^e07wC9>?XsFy=5~F0XA|NjQ}3}z9(1z(`HQ%{ zUzrxfYjnUapY$tq@GJ66sAv9Lt%;N8iJk);+A-#NaE|P3PaNOycK?#|8+7jHH|IL< zOXJSOk00WH;rm!6#JS?i;ky$5T5SFJ1s&{(cj;h<_iK6(r&2~e2Rd-D9|=xS{XWF$ zQdG}@&gQr*d?x{%HZcbfXH-?2BikKxcDuvAj5Ghb;nrZ{y;ScWNI!cP$M{*;e@1!x zZyQd0NMrAyL=L*hQXuf|#Dz5b8O#uK-H z4%^PM-11(3UDeUQM4T1SA^-5;aUAhJ>{7YrB;sdQ@5IRd4xQZ|e62xZevMFY8gb&f z>~hNbLx=jWZ_;YMSDtOb^@o2^zV7d45;tQ>ef^Co~XvP(qzdtVsB7P=AJBE(G zAMSiVFIL?J#M|9SUtalsUIyi_ew8}b{yO4xu)mV4{4wltIQA0ae^&Rsq`#oUIDmId zz{&7wIdKjc&L=@R5d{ zgwB1x@v(x$z0_p24-r4#6n%Z6v&-o`Uy`-dG2-l=Wpm^_Vme=f^%&~C+v^nZk{I$H z(7_M*b1>)JS>j|+@w4eI!)oNWWIndc0XWZ9#meuLkJm|+6`f<>+xX;gOBJRV*9~+wZ z%jW9&(EVM4Y9D@eDd}0>%39bc(3N>1r6y zpo3pzzbP6$CUIt{bFH!+Lg(IY&bT4qY#ib@QRmP89p8DLVB?qs#6M(s@4$4P;JAqE zj#fxQyp!rYvh*`_@J~~5zN=}@`owYXTzxTUeud#n`tKZY=cenaot$ydI-?8equljATe*M(1OZ^sBznRsqxB88$ zel7L;dbRrZ)$eKbyIcLPR=;!9?^yNQSN*nEzkcf1rG5*m-^}XQTm42=zn1!ay-NN2 z>i4wz-K~CCtKT{5cdYvDtA5+7UqAKhQon`OZ)WxDt$w4bUrYVIUa9_l^?O?V_HEGr zzj>}VD({8%^KAm>lUnXcM}9A5I42IB&GEGwihT3DK^cj&L$yP>9)u3-!6KVW-6}V= zm&!Lg_uI~v$x7TLDvvGW7Snzk<_nCAiK6Bt-am%@Zs=e)+}i}_(}g_5Ic3=IhR)_V z&&$NwR*X0`)&7zkw> ~GLvdVu|~4~lvZ zdC{17Mbx`&a(;pi9ISW2`7o+Eajrzu&vVd$lXUG1+HX0Qy)|*>rqgqvvpLTEWvYAa zh*PbwJ`VKMpNQvB-z$Ngh#$7HzMRmZzN18(;a9E;ar!mTbD#qU@428nc}w*m&Zo9| z4s_sz7w;eq3)6=<-D>GM(AgZ9)m@zLnYW`qan7l{n{21h+3ghhXkRP;an9#P={}fv zz0|r+&R@`B{<_m*(5smxiUiMJ=|z8Pn{POA9~z3 zi-A8=UmHi9&8l3oUqS~zBoyC=jWjKgI4@P)DE$B(pKEjWtGoTD5-+ye2mg2Dq;sEi zQK^~44`Wz|$bHh3$}tb;ukwD|ze(_V7~egIUH60!B7Q=(-j;TS4*tPD4mk0T%_mN5 z!@3$eo8ycx{KqUN&Lp)Cl=k#AzQDQ`*m0trzRZMd;v{ zr{X)XqnDf{&JdNKmVSZGz1^Jgc(jgZiT_dAP4YdB$D51uZ?_v>B)<1T+YWL(hEC2m zn7`qlF&VECFRdXj2Oa!_xEh?3t#1-%p<(|3I&g}J_#q(YUE&mMs`oQ=Hs{B>Qrt5f z>h+K~chx?Vv=ek3Ke((Y4LHxd{`eE(bvK-IgKo~tRUda}+Z`M?Oc!xOjv+6I*VXX; zBy`e`$hX4&R*5&nyKk89p@aPgi1BZ9hQElj-H^9}&gMAtv@>3PB2I&DwjX6b@H9`0 zydmmadet}Lr#8&<(7Er&E~|};^CrysNT#MhxOlG3*OM2YaR!`PG4q;}BM`#9OPSJ&z@-#5XTf3LI)1^3BhUl zG%<0es`yy?7drSC?HruvE0Pf>i)ts*{?NJiuaAZAxPpJNQ!3)Wn5G}6p+kKUH-S?n zUpnG+o}=eL2ab&Y-sa0l9B)IM0UbEFFN^Z*9+s6jVGVIPbg)0#F*s#Mi=8+ zS}ook%U>if@t@__^P&5le{r7~<==6r0P%|&-W!4r_QX9ba6(rvLY&Tq^BB;9gLMx$ z%chhdP8`F1DCoe!cMHIYpS3h`-l_K+WdDK=9KT7&>6}roUFC>VUG3va4sRBA`vR;dO=xd5sCH{BAxd7%BQ$?a)Fw_gx^jtt-HggvL`ZAtu6$!)%DkI=!MBSrkzJwaRI zM9-?{KxeY&#w#6&vpKE4zMk3>?H~268|Y8`=GpXo=-m73$M<;4a=bsYaW~@cDQNR$ zz7RUtxqg}Wck1uG5}ZH6x(Vgpov{~j`xesM7drUqqljDLr|w6bE=BYl=-?;J_bAV! zSc8ZY|4+Rgp>uCX=lE9e(@^4%O{C|0<|iL3&mZik4p&DKzgJAX9iW3fu?~eDl8qZn zoFWnR9O%ILD*Tea#6;qxji={82fr*4-!YC6cM5UlrqSCCI`@9@vF1+T?<9pdH=X$H zQ|rqK9qNmC4t8rWdNy%VseXw5VBuVW$}fR~_z9e*W#smswD!zO;v^`k_oHXyp!2-zwux(rf7)O-xsP;X zCC^VyYiuM=8dXkdf9PO0e0K%{qu3q5#lu~rMD||@DJi)*fn{} z6U1qeU(bOK{<$sksOM{)Ax=gWC&+jPy5IQ+<3Gw-Anpa?uSlx*4|MMNKhDbv|NM34 zGV%XOZ}X*ppp*VVe2enF7Z{`*&I3=eq&niBsHgZyY-K zrL5?G3lc>lPR&Yszd#4h9C7cr*CH?C^fa8OfDZnJ9Z|31#bOXAoMC?#I`{T>SyoGq zpF0`a zl}DEC13LHvaXL6RyCx&fRCTUfa-c)~`ii)@MD>)*{c);$?Q?Jo{VE zfzIYQwfVBYKqvi! z`3n9S9Jx60a;UgT`Ug7r2Y&|V(AQGL>7KwYmwS$%wc3y0)6a9FEOGKj(%TO@_wC#1 z|E2vZ690f&xBol;JLAODxvCJ~Qg)F3F~x~-#r!fnUJc^aQF$?GSLom$?B~Hhud>!A zP7`mvf1m>=w1`8FSFTT-{onNc5IURVvH}8meKK)zBjOahs2^vbv)eJw>-kt2D)W7X zuf3ZQufE|NH*_c$-cyC0hL>nX96xowLHZ9m*}t$30`E$McElTMxGw`8?2d5^ocpIc z5~sT1d<}G{-+S>sc3fv^wPq6M&1Bo3(htzNpLcw$;^MoY zDT)OV|4A!-dx6d_=Z|-U#r$9PpZUc3rtZN?e?lk6C)~?}{c3MsOuRuY_2q)ja%=5K2i+fEW zPH!UKbyYsO&jTIKg|Cm-HB0n`J=FOwKMU`-!w<<rG(aG@J2dVRAT;hS!Gc3@clV%;^YPwY^T1rYc5+7d`1;QsXWYz(sj(PZBq5@zuM)mjw0@R=yitpk!<1_=-@9Im-kJ3fjGBq?xjHof7KG> zW6*=k#Q95|myz{_EM`S;O4xBq;J-aR6W8%!Pi9?_R2lE5$mMhM4;`pocy|O&e!N0I0IEQ1r zB2K7kC;!g=PGhf_w{D((OZ+A(FC)ic=u|gRc_VM*Pj8On67K#;yxr>Dl`JoG=zlF0 zxOVl2!&YwC@9KZZkHa2*tG*KdP+I*s2Oa$5EBu`EafF_~=0oGf*K?o)XM?!Umtt8I z;*`Iozt0C9I7!7m<%0?_i1YrDo&%l9aT+O)v;E)Qj7^+vkf0J zr@-E6hsGz~;B>}c;VhU z)nWW5;`_ya79hUUW?uq2_@R;*Cr9NfLY%&8T`Bt&bkYyV-@txB_lpxRg?gu4&JWPR z579(C60oo|akAOGj|&|*7>`h%Nmt4f=eoKdD9ZyK{3++FC37kh=asq-EBy&wc<}=6 zi=mut>r^9t{l@zC3mxi<_6*La+%<_)pq-ur9XJ^_^LRVzk&`%^Z1N4z+1br=VLjqR zSNlx=)^5IHAFD|-FXFdU{Y?57InhkM81IOet`PH}ZUS=J9ao(~QiD=TpRv-+_=i1$ss zJDm^%ggRFRU3&FiOMLae|G(dFSzCRo`TmG%Xs}DcW<0N59N(Gh5#Xht*PsiEb*)T0 z*Z0(%##OPuIKEPM;+1Qm=RpT^!T*Ri;I}GYdlE0Z&Au^o;2^&XPV23Gh%-dpcarT9 zI+J6)&+F{npEwKE{VUm@-P_N18WHPe^DXQB^IhdnO$HOcf_leAwo~Yw7yAAZ=Q)kB zwRjxMm3}z!)~j|ZdC*CJAU^>+F1S64cnNLxd!d6ra84YY(13Boxvkdo(jU;lADCaj zDbQ{bamw4|!=Vd%UUC`}x9_8S1WofyCH_LSPLuY7j=zg#{ccRH_cMqW$>tu6oaeA! zLA@{DpF0cZaIV?!Wxs$fY(H4@_B&fH zBmO3}ewTdr{A|51H8jUc;%BwFw}kf$yTCUj2Md66b;Pr?eAvrk$`({_$NU@s4)soM(u4 zNcmOf*P%mx{p+L$e^n||TIJWhj109ouIQHJ0&!2TG~0{hLf1q&wCedLb85Z%`JL~n z!XG(rT_$csoBN2+!5?@Z%4NsE>%{rINZ&7@gFhyZNlNqej?TA$^2gJ0ABfX$pIIK+?x4#y#2lBPS;i@NBY6t{`NwAH}~bBg2XTJHTwU`Dg6N5|Nc({gm6KBq6 zJqJ2)Ft319vzUW8HFoR!7j!1)$9>|C{C@TLgr3A{(n81`|*`&4T>*CXD?b^37?I=N3ZIC`#< z12_HLr@}ob*yHP-hQxnw^PO+#;BU-Z;1tT#ggDzO>E})8!09ITsWx73PMoL-^&IHX zZgBn?c* z=->~zUS3{o2yv#y)yMPD!5 zQHzq-eziUBEZ4ah*d6wXnqdd=Pj1lnL+Icy>@S0}eeZ7KEKv8NWIu!s?J<1=KHt3Q z_kF|}cit?IY>&{%`6Noe%(PyA7%P}~p)d42=$L1{uh?Vz5#l|ItiN|5?<-;-9rjAp z=>&1QB-3-CgWac!eAVex=ZUk?<{byiGT{#Ko2}HxA<%K2%({FWiZ$#ELUJAv~##(UyCR`2-B zehMAWZ`Qm+vi={5_qwOPeYlr*a2kngb3W{4y064<9H?&}(8+lY?_Qw(Q_dK@f6dpu z->tVJbnpkxHGy+@N@U`kQ~6TaKA)_Evh%NWFYsNVIL{nx$z^&`Ks^)1Euy-LO* z{<7}+`a*|xgYQD1z6p25Cr+f!dOtwNesCJGMEsJlbz$Gji)6#ezW<7C8( zKij6BN(aAUJ&t^`V~6ZE0&nQQbkQi9RG(l^;0^Yzr2j~6<(7cqb%|u z-nwY!I4|Q!=<+2n&v_UF$ERx>?AVR=*RloouML&55I03e{rmwP@*?IaW*AYoe_2~* z-#J%)5Bp}YXR?>si67l&-2ol^f^{}HC6DAL&Uu@42Xv?}*2myX?3bT7ajNU<3tiZ8 zrPGKn=CjH93KM^x&AA!qWWJ=QSXV!JSd4hD+M4B+<27`$pJWo}2X8DYMZ9RfCQr6+ z=-?0BpMxEHc$XtiG4+nGBmyu1}GkW>JlXyGSITG3LphMm}%8pTcuiOoC^$X;mV86zzY7_Uljs2j5 zA26SQGj?oU;_Mu$_a}7lL&~I_w@O#5A#p|w(%TKX(a$_?4q98xb&pEsOB%#&LcG*A z_kN%Yub<_d&Fkv5A^N|W5Aoir^BHm;h7RN4 zZ>j+;+C1r9wRufk>ow$O!B6hrhWOLfc_G z`iJB}H~-p9SYlYYrEihpZ&3M5W+H9tu|LI*#lZ_VekN90~XoZB|>26WA*B`tAv z-Er4Af%_1s-@+HGiPyyDeQW5pD8= z$GyZ`WpmEd-F0+wv48U7QZVuJ+1L*{v}4!-_8a*AC~*eZ+>3(_9Nd!w=ZgPH;&>+1 z_bceo?)r*x={xlDRh<_k~p6}lGmmCK6^HKiGm6H&^t%tt7LkD|e zodb@~_vFOsnNQDw4jjx!;C#BDnmF%l-Zh5K&M$M9q$f`Q{Cc~&>z7mF9g{vjnTQ{t z-nEr}hc0ZtTff&F(<2-48>n}!Wjv(MuR9Fv^TMviy`03GUQ6#E=-?mRe+B2ABOh^k zH_&sS0|(zv0cTF>Lc|%S+O_P*(814W=ioT=6(ddooAC>}@ceL5=37Hc5&G4FU_XpQ;7sgRkvQAm={eAu9P2sI-X*FM=Wz_vpR%3l&w)CO z%pz~l=|y$oy@;!CAJCy(INyi*oero)93!p1T+m6oqy2$bziA!f9m=II7j&?@TsO7- ztpRZ^7T0s2Gdb3GkjlhwOq}#;zh90Y?!ANLG;odr^=)^j8Sz)E@mTs5I^J(`8VMqE zKA~t7AL3oftM>BJkayqn~$8vFp=*$P}Gtv2c2xs>DQm9`?~wr4kg~+ zl=}7zoy^}|c**_a`t*^+D}UeguPh&QY=7%{vBeF>67TqBeS4An4|qrEW&RD#BOU*F zK7FhB4n>O;6N#VsuF04E4?6U}@#1{*k(ZN+)8mPr1D##}^V~L_IS=*q{kQ$^L*LoN z&-YZ%cd!3>8|y{BE@zJU#P@rx=R=2eX&u!Ng2paN@n_&^*SgeG?9<-6w21gwqv-n) zbnqAY75d}oF-wUv!sfghbnq9}f8flku$(x1ZRRiN!X7W3hHrD;w=I=vHSy;c*7r~7 zxIa0K4dQ%Bho|d^cRf{z$&;0F9R)^6Vpie4F>@q2v6l_1;IFiwBAKZHrl6=?}a3 z4(mVE{~y02#2co{C;b5(`bRTyZfLFJ1aaD{d0KLy1E-pJkGn?v)5Q5*jc1Yroyl<; z38!)XB=@Ux#7Xl^Zzt%?c8h$e^_|od^DhxENR9jQ{DOQZHA}Q>ZB`td>)PMPI0pOA zu6~WU-PF29#9Zc$4@uR31#)6FS&)u()TLX4PHd zL{{IilkE;Vwx`p;yK*R3$4L)~x7cP}fsT1zM(aa-jv>4MQ{r`2`55UR=y1MdYNque z!?stq^-r=oFNqtf@{!^B2lwrteeQPmMcSRN{Uqd>VTbwA-w;28`YxvQ4|M1^xTgrt z`rYq|lgH*geCXik#-iUuANYwlWAfQ@(a*`gZpRTTr3fjIN4Ml zU-}a|lVjb7i})}Oak8pBt891f#R(20;cvWuyKZ^{;=hk$+E11fI<%K0B0e})GYN6( zr_=iZI_U?@E3n_Ys40lI&gMH+(3$1(GJX^LMwDu+QgaA zVP5f64qCi(T3L?>OI_pERB;}^ed1ii54QPEE_7x&f4q|_;`2zS^Acye&G#FjLw#Xi z)OYv9g2d^pzI*xa{NinRr{{CS{ihTq{&e*{Ov#5XY`pTFw@bdfS6xZ@ zt834Yh`06yx%_oV+ygDsuq^Rw+I-IjI`|9g8Q8gNyo$uBq}sn6H=u*Rj*0iT`y8o4 z9DkedCqfsVJ!N}XFuXePcdB+J?Fn6Y{McgMx~obp;yY~G8+7Pj@Yme|rBV-0^)oI& zTnszU%vXo_WmJ2Uc7_iALcR{1w{PncCt79wcmf?b)x>+SIW{*U&P-1|2Rf7EH2VAG zqw`-Udp9G_tGXsf#*xr*KFZrzE&PzROiSVoYGU%FpP_>vs;Pd{{o8~A6-x%Y;%C?a z_L~#AHSrg=)Ylt2*bnhEIERk4BTn45dJc3Z$GWc>Fr_1LvUSwg*S&ok>wS6uAzg^y zsEeNOZaxX~3+!;Bb`Ro@_SW;EgTI^z9J)1q=eKLV%U?L>3;xw6y@|iT$K=a?1D$DS zFQbQuAHLS|Cr-agCP&5(&>?miTk(BxaaP;g!m8j4|zO<_(@CZ z?F(Ia;~v(X;NSRr1o870(et5${}5MXOCK?B<|&-x0RPK~F~rYk^L=>e(BI%s zaPC!|K%6f{-DwBb{fF;SCKKmtIemTI^%wpQSfgAV;I)+Wx^L`%GgI5*WeEBhOC z=x>p-m&%m(!bjV9Jt*Z;;x<;}uB^Yi_Ql_!{(GA&C;p}Xw@eY1C6YmId?%M1#LT7TEMxyxq?r7wF$BFaN=KGt_$@<~@ z%qW+mcL?$PY`(t<9ool(o^1Eob&NVjHx0|T{gib z;{I*3zJ?C=PbxSiJ+Bd`p~`DXKSBpi^94Na4J>ktIHzp(d7*l^IY)y6vT;ObKV#_ljAhXB+W|sf{D%25GStU$bJOf zg4u2Ur-%Rl6nVT<(K8Tlpc*Ho|Dg-dKgjRE|EsrTB7PoqE>QBJgMX4J|7?uY-~V!_ z!!G~eygc|z{>VoBOg4Eu=-?mZiNML8F&A;{=0WIS&)@RYB zgI!V2YnjUu$5ZXQNe*=I2iEc6#JE#|I62fgOX&~j!uE%CUuyG$D#S0X@*@9^?=;ei z@358esZRWNHTC|1&T)so!-jKLut%ikwTPF^=G--O=r38t`pKhs9pXe(c>w7j=)j35 z@~9z68xW_B+Lw|X=pNdNCO0GgbDR4F^8PX2r-uCt)on?f ztX1{vSLk3joWBOgKXGf~v~}t^(7|q)$HD1(sU2}PdFky2U3mSYwHOaCJ?cpO+s*WR z=)&e(-je_ex;T94ChmKbB3ufe!YR`OCuF zM-b<*4F@`Ku-->`4*HED&NQ2O1v-=CG%BynL~&U9niGh#MdedvyMs=i`}OF=?`Y>b zJDGT8RUTRLpyPM6y^IgReEuh;F@tz3)w)uykD$Z)C~y3}F^hbwqSmX{{iW;AXA?K` zHhsJ;^QT@aa5!1ye&-JVc$I~JkOzf-icFkO{IbjS@iuheU_A@Y);s~kNw;6mfzISu z`^SnXfyDWILtkI_`iHmiA}#Nu`>tJ1{JmH8eCWdK-&mKz4o#-5CVuizJs-?^2) zihgOgrf~gzuK2sZSRaK}T2K6Ps-4O45<2(`?-HV%exaL)Gvpt=zo0WY)^{IkE!ak! zK%4XZ`gb3#_XPs$?IK<;n{$44_XW~M;ym7jID3f~Vsp+9IvH={{0QuJ?8pJ)t+u;I zq~bT&AM+PD&qp36&PEl-NZIwhh;S%5w8$l^ZyJ z{36*?;;gV)e?kX8U_A;ybUycjI5*UKLbeO&z`=Le!3mx8nmD^`&T~Nre_onrJ_7Ny%P{;&MEyno6vDQ=50(D_q=!C zPE5S9aZH}t zPty|T{suhFR&K%FMpW`s_eiL{Z3S=Q(rf2$cL5KRC6W>FL zmL&&qwnf*sH|R``w-Mz5#|0w~<|fYlf6Vg8_5oe|P5d3PXH(CwsXOwOD^A0CL)5p^ zm;%JFoIqbr=ultm`+@T;O)28sU#qtpbnxdc@qSN7&x*v!G|hbZRgP!Su|KWv*iGA8 zg?K(k_1_J0Z=d1EI||~vCSH%K5r6(0{dbO_lX=&*BJbMkSq3eMfe^@!t{QSV>qOpcc^`V7y*6Q(sH&gZ9QdF1*NI;=lE z1}!~&X&t`DXgz<_Cw~j#ZYpnbWj}E@AA-L_{hL&4Mf^(J_3a3{u*YeKu`3Dp^Qjfu z6aQ^plP~=Q9sJ}Y=3TEoorsg*gZ}$n(1C+{0;uo(rd^5C_N<-*9XRkmI9=lU5@+;r zJqJ3IV?A#fu(&sI-d-{}(!cKIBb~-PaV}u(mA=G}{>tRbaR)j%?jXKJy>HI%PrOgX z^z8+@u-l8%$R3s3%Z#3bh+p`)e%ygBym8|1>BET^r;&T(1maxOdv4Ov#QFSG-;SX} zyFnfRoWHA&BTn@M`tm@Bc7y&0PWxUHi8Idozl{rcpBDT_n*-M;E2leHGPL9{O?*RL!$`(w#9X9VZLT8rC z%fPu{aB`kKLYz>4vs`j~fezzK_sm>}Sx)bxNSfJNeAH zOS~59T{Ovqj`L1VBcJFWOKUwOo~L>jQ}UqW_F_H1dLhzN;_XxSZKeHrJ?bzJ&%%$X zcfKIbJez$%=-|ikcfQkm(kXhsCe9g~{bT4%j&=FD6RUL;;`FJf&tF3q-n^YdyjQm+dko^gXreDCbSNk8b)lSba>OBykGiiV z{Q@20wF;`E?8~?I{E*v+T;mny1@PyjNI?9rDvvDt3v}>HbrD~@bR;289Gi1H(4oGt zFUk{8HwAIh#{O@9nJ)U{`sithUs2sxkoAQQ<%E4v&Qf>M6DQ_3{X7C4{E|!gWozax z88b~eP!{&YI0Amx1(}HN_fbFoLkAAl)8KS>W+P64NcwgNoyl<;6~(=guaR>R=bU=y zLyptX$$13l6i}|M_wo`iSthewk_R2<)2;hO)8-Z?UY6AQddmGGtRqpb6tjyHXI>Y* z-JyfsajpuS0l7*Or*dOG2Rd-diSux&x0NGK85O6=_6i+1UV;-`qY`nvY`#wc9XM^p z`}CV0S0&DPb$(lx2Rd+YJ_GhSb;y%A+tfJ<$$<_Wd@l-|F5PMq=Y)D+QF5RIC$$)_ zd)KQ=oJKZr8+0be%Q%vqmYUl>~nR)vXZMt>t`iN>@U#CI4Ql@DGIq}x2b0E@g(7|pPCt&|-1$~H<+s*L>@9={&;C^f3 z80sFMtRHk{{eJG}e3{Jo=8LC05O1hD$1LjyomoF`V?~d{MRIjF43w+wh#thLx?Vrd zLl<70@GTkpHQu@2#Q$TEeti!e{NO9r3(ZFQ6K7csJqJ2)B8qk~FXuqw)M%{dKnJ@a z?u6Zf-V7(spe`my#x2l=XSW1mUw2yC@x*VrRG%k=&MfDT`x7^gl7Hv=PbE&St@?Z_ zbVKsgtI)7}&#I_5@?@y@pT*}8@A(^jz8X5j1qD=xJ^1^aPrJ{qa>WHW*8%>)-xm@8 zLrr~N9Xj|K>m6`n^jSunmjuA2WWACzwxBPwy_fh_}3`$&>bi4)ydN z&)*kn6@M>r)`dpb))P9jT!y+|Va;bGF2A36GryZW+0SG?BcB34s=2g!iP$Md=O`r|vX;JAn=y6&Mpffqv{BO^;A;d`@%^W|Z{oTv|TKNU*ohAO*3IB!fH1PdS*dfU00`cR= zG{;fd@1W!M_~X4QF%JzJf0=kqy6VSe=-?mt9pyS5a*a6gkLWqjfm2SL|EgQ-7IAJS z($8nmnH;B)L!8Inz4b0}o|QH^(oWFvdc~U0d6(-U@hYnCCChf>-u&n^Mv3zT{Z>CA ze)Rx-d7+bj#kvyyNLca(@x~n2+YvhW1Mvnphwr>1&bya-4s`Gb)`8&Md;E?#-y)lF zne+#A><@=gO2iX$w|yjDm#6x2L1&iB%Xoc^_YFoi`bwOb>Ku`b|Di)({r4f;vQ1nv z%C&EY?@6M*Z~rm+|C;x@qv|W$3v`Gp>MO^78SngD=GYw9I4|S)`m-Yw|6W@CdKEhO z7vn6-xwvw4;xs6t=RgPl*6PUXsMkqi6DOTh?_cP+{aU|Uu;zI@;(6Hc?7mwNc!Jj# zCxQ|YZ(R+2`JnSU#rN=VKNxmP)hikCx_j#F1|96bQ@p=^XhbUFJXPO?mT?MnCdWEH z)vTC~IHOCL<&o{pz46J)kMrJx_){wX7rxV|B-YDA|IR}Ez~){hC@l7iDpxK|{Gvnj<%LfAb*hM0Qb#LK zyjtq}(Xzil2Y=u<>TMjYM4X)y_5Og)U*8iztEW+rxBdkgYNe&Na9VLIyUhXbbQam+bFk(*MZwJv?5*{Z!_MI z_JeM3Wb-@fh!@s$toCr8>pQ*Hb909R+Y-Nq%{d=;&&}Nt_m%>N|3Umu9nA8}_zODj zhd-lIUf zI+xs!`16YB{RbV|e{;ppUoOw3E#7lo@fh~kz`x_`PyE6k^y?q!(EhWFeBSE&1Bmmw zpjl4Y{-Hzr&nxzS_U9WyoGfkh{(+A3GuHY4#O>k43!S95pL_9zm$6*D2Q;g~XyT7t zX!2zo4juYc-Mg1_m;Y^=io=~oh@9sG=U-q7xH|2~5_F&da0*{`9KewO>Wt6t0|-YgZT$bJZ2c=14F z@jhGVs`AMHH@h}YldzOnQN*1fRf>H&eoi5{%?2XyGa_zlkS+$)Gv zdiuZPT;H{tIE8HLg?}IALA-+U9B#3JICqch%LAPq=fnHW#F^LKJx+jV_ublUC(eey z^c?6=9@G={Iy80{=Dvw{LZ11w2${-{Zrz_v^h@#ojDI+zsJkiDB=gdKVK5>tUCWE{Qw>O z@VNZ@c-`w=bj6MMjvnkcI_DeW7O{C>1v*UT?!|f5bx!cX$i$!9 zO3!z9o%2C_|MGCp=)|8f(&Wo_3SCeFbAR8?uCGhSCSECZPCab9TK8GjMvYILwDa`i zVn;P!qCDNixVV2`BI3+gq~|~fKV$wvd+D|)8F8+y)cXOt@Zvn&O9nr4#Z<)4zeMi` z=qfzQ^S$uU73!S2^?sz|PFmtc-J`EJw{M4mc?RWLJIsSP-_$#%(x1?o_WO}vSi<@4 z6XmlIrZZCWE1E9T0N^o{M(`W{sJBBi8uguNFKEsaeQCtInaTF^CRGlNnMjTxliah(3u>k zf%6{VJiPBDPRbP~NA^4DIBxJZa*6kwytmgQ-ruWDo^039h3#jDakmY>3%04T7xCAs zaaOiR=+GX~k5KQBwn5V+Mt%yBnzILC5XVVKf!{+ud$;Bi@$dCQr5-=uA6$83m5>{&veFJ&Dsu z%~NtdfDZFP$^%V{m5(;nH6P$yJpA+5u0F)QtlEq0H_%}|z&qP#SN?G#^6`1D_K0`o z!4GKHpZM2K>gx|3>R(NM_a@$8;zYk|a%BCXL;W-5t}|_&|34}o@is=L<9FW{>=;Je z?&`e1tUq*Nw@+&v{?{L)h(B6=&rtH+jl+>QhkbKp97p`>>fV*)LkIuWP~*wAGXqcM zTt2|%Kb-ppKk2oJ#J{HANt5jtI*ccHHx-;cK~spc!sdN6=uD2&z^gE?dM+nZ2j)L-&%tNT~dp6<4H zyw`yGj__DR{6a1D_Jl6H{)%%u;1ABcf%xZb$`76NSBvv={nGSa?(!GTC4gTb?iS+b zv&j=c2Y*!%`MFdf+le#P<~vf*p+DhWGL+}S_}#?mr{2ev{Rz78`csY0`-qp<=KF*0 z_9u*6DDUlp!Nfmp^W7`x;Ga^eKMnWm_VQ4+No8SIoa+I7+-XWCz$vt~I4CE6~-;-_56Mt}Zvp>mx4;|XkHz$uD zt#e!^P7-yGRkkDOlsgRA59%8DR+KABJ^o(u<|)sJ7vE-nhR)=8 z8Q;bE$-0eR5yw#Tpp37eLwr?nLao3iZdVsPAI&9p8TO5)tQ929qPlP3Xdl6A(|J zoJ(&dCH`&I?<60(@c4-Tz<;zWCGjWO@S%ghDk^{7IaH+S%3Nz*aR|B9tdfk1WJFqVlG)UqZ*{AAYm%7L%+U-uCns;xkdvcO=`W}<>y!;t}ouE2yx4-(~lp}!Oys74gZ`iRDw9s)qPAk z{z3;o4_wXPO`4sn3~}P9?=(ukK$ovcr{!h6f8N8IC)V%ZYE~d#@O-^rpbM`b;`_v? zcdsE;h`+#3&xbB-zSCGK_PGijt4{pf>ioI12XxBqwsBLZ4Yi0DF{$3)9H&_KX+Dms zL!8z&?^elun!FoaQ_sf4c@ss?fzIT38|%cq;(pbd6KB~y z{rNZO!uF^2{9~>pKEz*gSI?L0lPb!wK}DBT`4FR}t3P8Mi1t|HMr-0Hd|>irKZFkU zgulVLll2ectbeTMKnG46an7+%lP<)mr_Q6w@<0d9A@QA-6y16dr{YWfxl8EaU#xpj zuQkVd5vTEKlOxAz=)(4|_5In)jrtOQOI*`#vcKBBKbul~SE|UI{=^%ZSI>jaS^n4i zEY|k-Ebd_9J@+?xvi`FDwN&k|^x7kJhbBB=+y2&08bz$*p~q(5Bk4|FESy6^l?=vv~8wRs20y?r(7c}K^hjl|EJP``e1Z{OT$ zY!dfmZ#CLV{CpWrzHCR($+!pm4Y2FV#5;+1z~*}s(7`{*kAjo#^d91jxB0#mbm%7~ z8*rSPbIX3>^j3L9SwHB4rt`jJRdElXyWb(=RaE7YJm`3wa~jV+^LoRp#4+N1RQnQ= z2c5LPuUH3eN^y#KgKOy5?a;ygiNw5+{nZ)bthC9GLkAAd)4-3TcU>UPz4H3&K!GZxH99%CpLG&Tjn};nKp&HSfh$>qm!y^Sh{T^5u7k zU)m(3u>kkxZNmecs^-ak{GUNwzQO%=U$Ip-!WFTJzo& z%GEOU3*zlC^y39|{9cTgap@56r(Zhxig?o!m^_*9gbwTYL9-rqoASM4Irtg*GL-ZD z(09ZQwaGg{hrCS_)xo+v?Bn}K_MhMR!uksQd!;`Tzq!pg2Oa#3b5h`}PydxT+0^$M zWxIwBoO)g23bq~*Jyc73@ z1~iXOyhiGKQnLL)C&#yIVtjKBj7_{tHv2iy!CvXaJ@KyH;}d7II)5$g4jt?j)bu{R zZ@O?%V&Z&DqVEsT$@!qlLq12-V^nhD{q;y6*Fnej@-~qFK>Io{IW_T$*_?}mF6_9@ z+o&bJXS$$Z2I7YV>ElM|!sa`TPaXNZ*V?^Vh<~w<-X75LI~PCRyAt22OgAM5@#fvs zuT!9taWnSIVc#Xk@(?fZhRKuj33TDrKUBmA$D0=*zGI!fywGuba2QyRpuAu16(-(| z5R)g{J#^?lxbFqd^&Z8E^HzPIM{=M82lw&8Y5$}YaUR*+|AWrt{J8H}fzJFqLwO1c zdk>n~o;YvU>g^33?45DXS$fxg-`zin6Ka#cfKHC<=f!(>iGJ@!yb~48a>+OYI@5nY zIL@t-q9^e@y6Ab(ahz-I_mfum5${+(lPBj1+3!77Lrar7%hYS1f93!U%ZnD&iF?IiStFx=aK`R$?-PUi2VNdc|(cwRGl}F^@T3%dC__v zC%EcJ;$KncP$mE0=Bv8bA4~jh>O88ngUnaq{0i(@KiWj%v{UCCBnLY91;4@Ruy!(W zj@sm_paUnKcn7(0v+2YsZ8M%g2miv3D35o%*~D3*-oyU4_Q!o%@LS)VNBjyY^y40M zsIMG9m&{&79RFl`4s_t)T|Ja%tp8HtOv|L_KnJ^FUIXVuk>$imqRwYXyFnLTzd+m$ z{^u8~h@T;wSx(7^E^NNj!1wmR@40*(@x9f(NIA|zC+A7zWx;FRZWHk?s&j<0e?SNS zz+d2m6xv3d=Qii(paaKy$z7V4?)BJ1oa{CAeufSlPnk5p}Pc;M6|`&69hZB!EHkg6RzO5B1r^AU8&k2F>ey?yZZ z@xt-{cEzEvE9#%N@k!#JnXIorbg(1V9pF@XbDB6kH|aUhnH+E9Pq9v&HRU{U{#Nm< z>=)35*Dr8h1?8OE`!eynsW?~m3+Q;A;52TDyve9PuM_Xm4YS^o2c7I^um{R_JH>6{ zdDz5%(7_+@4>*(d+#^m(n|n>r!5`S40Ow2hN5tu*zHcx60bSVs@HXlSf4495jQI1S z>c>Io!sc7Q7k%>eOX6RSp&uvY_o6XBzz!aPZ-|q@W}br%e!=<;oMl7)A%kEpz7hXS?*GEKzTZ)7bHu^F=0UUCe1}EG4LGj~ zJEZlBMVz)a`ybH3FR{cv%J9AMh?CDlKYl?64)VPy&xYxVh;vcpt)zdUv-5B3g2{-p z_u7BSS6Soo3pY{{f5KZmAMs?^?dQk253x=kI5jQt2dHyQa$JN?j*D2gz^-3%dk}AF zV!eN$gMYAp22PzsS%~v7zn%jfIPdoGe7`M14&sDl)pMW&2k-KtJOhX1CQdh-yc%>S z$IEyo?!PaJke@hL)w){7AJ8HGNP4JRw5N4usr?!2ch8!QDn#6yHtTD<@17yAi29CA zS&VqAX6Wq(9qfkw0#1{SC5huy=Rd>lXHEm(y$2_DgR;aaaoH@7Y^Tu4{07b~fp_9t z1>%L+ypI5#j4#rQcSMg}sY1Md(amvKmJd4Fk1+0`e8Cf{6K_gH{d@==?0-k>@5F9a zi#T1?exS5JbSB4XWESt0FFjI+IJwn3Lb86)vHh+03!cnvK)f0UOn=IF#l8Cl*7IU1 zd>RwK(>^_4o)^P?s$nH^wn$U3k?Xt|?nS{r6{9pK{u7(`y`Y1Cuul!nZ_8T}r|)fj zKZDNXcpFc|I{~qKwI)t3m0ywegf6^!V#3Vhw2o_*wLS3*s&nGf|IqP1&yRhm+I+9- zYQ#>&TdwYHNFH=s9Ok|%?D#<3qb~Bi3-P|G_*AxY=)ggI4u1?^(1SP!ZSqvmfs;ez zb?g7un>gcb;#uh6SHyuR&&SCA#5tStzr=Ue`G4N70mPq_LqBfW&Hq@Jpu7X-4k6xT zoBNy4!G48BoOh?m2;z)X_o1afp@aPp2cbMkl8+%yLv=q}_6z93?ib!h2eB^daB)2G z+o`x#@}Ucx?=-H(;`0N|*8fHP1S)?b`w4XNy(r`_VUO#+(}*|I<{l_?@DIi@aPk$M zMVv*o^z##R@DJ9X;Eeh_mpBg_=-W4RVcXAP1a;u^1XUg{B>tFY`f@^NmeXl8i^_h; zw{8h>yi|RqAE1--JJutpcO>7xi5I=DzFg43518-4Xr{;6u!H~Ioy19JGk!q_ zzu-IP;0#+BM4bHUo|_!Mpo3p94uSKs=>g*8Euyy@bm930e+T}Kl!uAGQrS(8U(kik zcNq9iBl!2O9wYvBPn-6s-iLxcv91HBSDFywIBoh1bl_l~0q05fbHvG%MqeK2;Fnt> z-YR+LB5`Vb*T5(My5E1(RpNW8_~_sI1?TZ$hg}_R690Kjy&a%~J@M`wIO)^f zCC(eG&Aa4fF@(T}$^UckC8|GDT226TFoEbL#1-kJ3f_)wE-+p>Y{Eij${SmtG z_`Ah@%vG!35-o@OpQ5Rn&_bT2 z`5ZWEh-;q$^9uNTonMF_q`vba$35uG_TXjUeIsx(M>2-|ns@B1$|>_C&>>IK@P<9r}Bp?j^eUJ_>M+cgw}T?C)s`5w}(}{df%>#yfmhXvocG1PY{w^6i979z>33O9 z=;XQz@gC|uAjp$=e{I&cZ|KlZa2^(%a^q?fXX^$%2Rf5uy>HgEQC;G!Q~T{Qj&pZ? zdrpjZ`LZ-5e#EW%azcl4{wey&(zlI?6S~dh$Z|r5eo{?2q~4jKZL@4z?&>Gl7l1v- z>~Bu|{5Jal(7`VqMSr`#&4)O*Z8*@OzF5DYJatF5AxF1M!P5 z*ZT!Jlyk7~OQp1b5~trPy=mBq^FL*^D|r$1lt?C}-D4U5Ovb=KLvi@XG{o z&wc+!U*dS$^cUz%j?+jphwmpG9pp!x{;I#o@c=rx&j5d+ejj@FBVIlA-3ZyPp$ps3 zPNR$1KTTSB5b<-`ydw^s9KX=sQQkONhY>IDQhmFD4*tM+2F|9JBZ=c}6Td)ba;*0N z8gCv;obKwqTsiLJ9)J;7-2?dc>DijOQ+A_#IKPbYW#~7NxHD|(2_4!?mB^fzD^zs~ zaaOD_?Iqg_bkhD)#rSe5!3^TXUZb}^bf_Q38`Lji{Jbf{m?I{c3Nqf_&V zjz!>X20IIbr`t6iE_P}9zeW<0s8tuXO`>7_bhs{-CuYI5-0C6lOx*&bchE$ zoSA1Ym{%&uriL*k*E3&@Og*QIM68pC0r*0>HFEtOz_5&S{ zr(Q;H@t#wl_io~CoUQjebhVXVW~2>Y{pyYz*!)^LmDT> zb>eigX+O}}`Q0!&c7L~y%j2s&CjL5= zUkPtqp66e&^s)R!)H=;!3`@r2{kTWZiJLW#S#IfX=-?;BDe!m3wXcX%^^*R+By`}6 z75%*C_;wyj&?2m#ok z${6}<-ZNQrefw}PUUeAAVm>k*8pu_xs zvi;rBFKUif?abT2d($Xi3D8_!-%RiNxf*E&@0Go-2+kNDqg z@|e)U--riL&O!MT5odaRy&a*0zcFrrbNFFW;#^SoZ2#SO<1~thb2}YprzCz0mG74G zKXmfk4&LWNc?;J~OT1<(?=5-I1vzK^?_MzMh;yIdRq*s6UTI&w9iam!o;WWOAyyXR zd~K=cKxgNVkb~KYGqZ!f{lSl6kI&X~S}z9XCjJ#wU+Hi6^50Ga-=~5--?Yk4{I>(m z@=O0fr@D;+yW;&5@O-iqCSLVkdb>gg4$jwuGwOaZ;>5Js&wvgbj6>k$JY0%6c}MEY z1D(ln8uO;``Ln#E%MmAwDvz`ibf%qdeL10n9}s`R5614=#0jv;FF=R- zV&4^Vcr3!z-%AltWkL?881Nx4(_jm+C?BDy#MF9J;Xm;xy8V@4xRz^C$5eE-?ABe?uq7!Chj#)BHhK z;?-EB?>ErFe$~ah@mJUQ5~q<(-U~XD<7KQD?{j!e_9IS!%3sU)3%Y?-RbFnxh@6Rb zj#qg(r-64i;FtVO`w_RIjoqN*bMe-5StTnDB3|;vdb`PUSu=(GGZq*|oU1nLdgx&P zXkz?}`e7t-4v*582Rf6}&Uh8Pn7$ADaobqpR8`}2R5YY--~aa?bk6ik{-+Q7`@K%% z_nACT-RVD({+-Keyq4`4y7b()jwo8p@W&=CyYC2ha=C;Cf-8zy|VDetC z^23q?omn31KAvMj0C9HPj92c>BTi$;ZhpU`YLh_X@1JM-McNNK&eK`rh(;M#5U;$- z8_Rh~#u4~_)}Cn33pI?e$rVRn-3tF?c(R)KBW&gq=-_Ak1}ENz^~9NAGoL^Q{|r9L z-)nw7WixR?ZSuy@g*~5G&w00Rv7PwE)Ho*X>0X@TWwi6;{EEkKyNTaOwRhPsq06B< z(CHR2>Ls2V=<<{7N83~FBkn&o@g8*W6TVvqKOMMzkT`E<>c>s!;3wDvob?-y5T})j zXa1cX9Y#BOzGd49;wQ9;m!LyA5oe&BRSujcPLE+`ePubJ3%flzjGF3vEBF=Go+p0k z_Ii6lhjJoc3(mVfmx*(}iN2iBA^ya>*t@ol%Dtu5&v$T;=K{a(!t2BjXs$0ObZ9^L z4bGL8w~3Ram^&PABa!ImkrUk~&WbYn`a&1BU#$136CHd+{E%9DzI*HcAKyO~?@xFP zdq(`(%AT^HLx=hIantI_FRZ)dntwgT{9B=BC~=##HOnpM59s9li@XT@b?}e3#JjHY zJaXQI4t~da51jg0KM@ z;lm;g`!%l{ySm;E(7~RvAH^yWjW}M7^&IHH!TC!+p6j zE^$t)e3oo?(3$NHc@*op!WSJ967Q%@{z`vt&0$>KzbbqQL@qp0^!d@;`Qxo zmRH&pI`{|qXV^95L{;J}u<1|G!9Te71CGxGPvUH~=}*vw*WR(t1iyIQ+Qjd#;(XcO zp$nVuFpi7wepIehm-tC+^1aZ(o;a_8a^{NNkT?-+zWV_kIEW9xIe4luaeCPF8|X}q z(>NsFxlB2wIdL+pypo*1pbKyQO4iticz!nPc>Vn4Wt0^AtYKQ6PDI9w&>>!2 z)24g*AqB>}_E|A6!G6V_wkPiJW%~XM9mdCIs^evDko{YoOo6WP5&P`md!On={Ktwf z`yX`h^FWcmsk*-_aSGXtv(T9wYkZestS@oqsCFm&2gbdy`-gSi@WSaw{B5c|O1``E zCF~A+`X=p1{3X-f^H(wDFYhad;>RvG)8#MBC*VK3I*|CoZ2C8J@E7*U!RZq)lsL(4 z&KE+5epyR==kVJfBZ(8F@*%QcLdSV%tN#ym9ZS4tDi0|A zF5frB_yqech&Y8fzEkw`7Ig3f&S!(u@z8YQ_^5p;Ssv)nZ_qx$xjt?-as1SNfV3NQ z;q@EX7yQ=^<`aLL$|FiXbYb(Y@5*)X2q6A+wNE1LAn$P@K1MlPKU+$i(l+Papo3p< zZULO6ua^^Niwy@l^pB9G{9S|1i`Nt9f@*KFe?TYWC1?2eaKxcBS?{Q8Zb&5FO9+({2kKB8Y z(>lMseSC)bSxmo5zI)?Uhp+=(%#6Qqd-~OP3Juwc#FG(U^A9;{$26pw~15wgvpWd8+3@@rd9j=`<#{U)OQrD{=T~E9&xXS znq27*cm0j=74^Td^%3#+t8O9sGklkx106VcZx|epHD8F+_PL$|9XLUEzteYc>c%sM|C%rK%%ist zbSB5zZ#@S@B2L5h`gY=8zqRiB-ufDq_@8E*d^xV#?fZ5R=OePLh)KLl!TNeb2m7Hv z!w(^4;u2@H%{gP}V87qRcbH#ZNI;yFAN2NvF6@3_y$2ApR}$j)Poj_O-Ma_iFmA-) zeZ{FYQxJb+9(_Cs9qfs?5cX{OIW=*P<}ijci)M`IEQkcTu_Gi$Gr7?=)(4o z!&s4!`(Lca6^MW6PrV(WgFO-Np`71~R3Xm8K6(yx;NYGUI6K}|BhH#0dJc3Z$7vK> z#`hAptglI&%l%A_?0?X4yK@>Lv-#d{|8Bn#&lsuaLC5D`tmn=Wx2#XRwG;LAl;_Tn z-+;Y(W@$v6Cxi4H=wNs3(}0uwVN>GF7@_At2fHIr2+pI49f{+m^8C{7(3y6}xMw}r zl%Zc2;`R8VUpKg$H{M+EEyXdd?{p{rF`Mt7Lx*}}yh6RRRq92Yvugbz>kS>ZH-~|F z9=t4{`w;KEw?58*&d#st{Q46oM<+dp{c4Q^hI|@CoDjACmgAgzae&i^mnWFsZ#_M9 z81XCZHOndO2OZnb%Qz|C14*7@6!8|`H+gctgbwSPWRr47$l3F}%0pVut$AJ?)bB#jdJBV?~3nKhV+|A{6n$z{SZ3%9q(14{Y>zfLYyxd^&IF- zj@9qJC#Dl;LkWF--SfM(f5(_VpZN3Nn0}J>bgzFqjZtFXu4CH(;=lQ3jvumJLC5XN zVIU6-yOs!9O1udX^!*7s_y_w%;B56-PMqmQ^c?8G!Fw*?q&l~XIKK7u9Oz8Wk8`~B zc)l!IYaMYu4L3Q`PSCNPyp7i4-PsCfHxe()Mw2J~3SHQ7i__>M&d(d8w-SHKRegD( zYQDAP{jjeDfGBXsb`RKY15w1+rdBJ2AdbSB5TE{^c~e&Re%r|);}#eEun zoI}JvSoFW}t@pfo-#yBFb#G9%C%b!IX!mI6elt!I?~X%nSLom$#P8tz;dzEQ(HiPG z(4oI9Sh9@bhPe?g5XW1cH<0y%&TM~(bAH_Sn#Os!F3&F$FOG_PB@a5u%Ok$)oa5+q z;w`W#A9Uv@?_P1-^<8Ig1Lsa)w`r?x6K|BwIs`iG$Nr`|-u`+aY15@$xCeg0yRzVa zZhfElRc-tN9XRMm;A~F)m^gp`uJZ#{I2M+odIBCCsCQebC z^#^n&$J@X;5^(O_`A(cO>KvS$FQ5x=zSuk_Dfx5el1L+d%~zIH<&^$}PWlt;FqC&| zn`p#qsn&xs4*?zQhqwTo&)H%Tr65LrA|s2fz4=xMI+U zoW$||O&^~?XL79P3}ST4OPn>;^>LYd`zF?XpOTLY5Whn&J>R{3ABXWwob$^5M-k%3 zTB^4Lbg(DZIq*yLJSB*e>y(}Y9XQxe183-g(!{xWUeAHf@U#C`G1OtCysrsM7)FX_45~WlJ_JQ-&1?F*g?Eg+4Vf=Rv(<0u;7sAYQDF=qtUEa z4dT@(Yx3lHFYhJSRQ)b`#B=$b=_b133B1PwzdX$5Bz^~V4@34l=-^-UA8=+Tt4EwB zwe<5ObSB5z@5YBVB#w7deSO{ScZmB@&RR2?5WiwVJs-O8=Fg>K9qnk|p7@?wO}=cO z&?z^$-fckKhVsUX+lhD^4(sn_9zzNqIwhyI5+8k~~%yAx+*QnMe+ z`a#G2#k#&4R=XGR3Y0Z@vVFVf4=caO{yxMX+2gHw)jzSG0>4(l!Nf0m(&Wqj2_5{8codw=$A%H7dXSz29sD2ApeV(M51gZj z^G?0jB>fK^w{NR|{Da35@3P7R{9FGc|Al(b-8q%`*VVgFk`GN{W^DOZLiPz9q&x4M~6NfRa@C(W}lx?@1 zc#qfU+c|Wmy{zAXo}XhiaiYC8InuB4JJ4lRg&!RJZD{}bb<4sZ`NjV1h0p7VpY@}@ zoX~-TeR;I+lp{A0=de20Ajb#jP+zonaGreMN}S--`gsw$u=|O_*dX4wKHYyO@zd8d z`LdkQndNjE*~Ryp&X)@!&M80r{0benKZk+%67@cGen0WL+pNo>gCDkv{fIJu93sw` zh5GtI2M*rzMtNdJKSrE=d-WXXV5c%`IWKem#Yy5sx}modbi9uDG9HL`Vz1sfL%cje zCQq&_p~LtVXUM_&8Jp)<>q_fB!OBS&h}(0mzP@r@KUS>!qI+E-PHt5`*}kBI{l84+ z{9^5RH;6OvpuRlN!TwmE!;i~O-zLt}GkW_&C&#rC;{M}Y|NF$7a7AA(=*)6?8;Cnl zu2bb66X*F0lOxA9=n${q{$rX#{`VfQ3~-HWh&#b=pW-?3%fHi?6FT?-egS9e;ZWiv zf2HR@hx+1sJ>ZP!^_DpCBk8{z0bSVRnl*lzSn&h#J0{Wd-HTtH2EHqc@*ny1nfPat znS43lLMO*N#0B7uTmPMS<1(8(*-xN@fAIZ3aL#s$H1gLx;r9Z14s`Gj@{-_$RE|cR zab@)OgDyP(V1E_-jyYoyf3f=Rxoq#yh0S*w@5Fa6=EjOg{QG(I{((;V2jd#boBMD= z;_b+yw<~n;59W7pHV;WkoT2&k9O&R5*a4iF<$pvN^CB2mBA6wAR!Z+5=)$(2b-f=e zL0aOsP~WkX{&8=;?=%c?t}gYP48&ih__F^%C;f9*e4jS)xy-~1Q0-OrAL!s8_zQNu zbT2z`o)*^o2Ris?t$0TxQDAQ3#7L_54|HMs$7#f0c7Wpj+~e{Sf1>)%p0poy-fujX zm-RMOKErx0WI)ft#G8~*UtZ*&G5=&!j+wZ7p3mZM+gQ z!od#WPgmooZ12#)o}R)lJLuD&3?qmV>91F7q(xVMn{qF2`Vs<_<3#SW9ZCb|L1v-Z2dA*J~E^4P~sJ}Ie!2h z-&1rNl|{U;XyZuY^{8r&FLJ(vPR0v}cVW++9mW!`sk#>;+X-~=FW!L#XR+Tz;;i)0 zk9*L;zoFv01x3nCCC+S{bD7YE?O$uZYuJA#@mJd9!`*E^yTty=w>Wc&-`XZ$2%Yo~ z_OW2ss*@HFuhlet9u7MAr-_)~&ZSyRobERF;Gi=(UdHG%oPWr)CXhIH@0jJ0c^2r9 zXXzL??owz5yq9nF%S+D{#N8j!Y(H|mb=NN$ML+ZlT}}MuY4rUDI_`(w#$55ev`4eo z6K`N4eR-inoR~>Dykz?9uRj#~dENl`kl^bTW>$;*nSRUfXKH_n!1;GK`r#>C(e(4=zfqmFV(w* zay*1i_BYrM_KKbF2=UU?*Ov=AJG(Ewf1EfIZ0aZN9-62Kz5g3^UI=kUsdHkoe$d@2 zk^h(VjrF@6t%jW=USga3J^Ze{m$Bq1zjv0V$|d69zL6XUpu;$j;M2e!eV09Q<>yY; z=kaA^#B0P&(MxZC=wN^B3&D>wf^HJ0flb~VI&g5l8k}o`?h@y@&AV>Ufs=IobBebo zRDMVtyrUrdGjt~B$9h+MXJcT@r^E?V@1x5003EM)y^J2>J6mVZydd6?`KG<4AE9%o z4zw`TZ(-sdYh8ZCcV}U@B?n#;_k+#*EYQJjcn=$#SmWOlrnZ1#f14+Yo8lYsjk$WhyYrX7h%4sTk2vbrJXc$rcdw!Q^mJ-o`wUfAaIpMEue=?=SOpep2D9#67oK}!H zQC6EASwHB^`u$w51_)j~pQ6NjpvE!DgN}J##v1Vs@qnBqi8pwGSw1zO6`R)UBX1TnKY9jx>I!RUHot?FaH(T8om*b&(=P9i3 z1q|v_hxn=5>dR|)j}~z;{Bx~n1LEDbnXjNjznd!ZSuNi-BFF= zX2i*)#%bA4pyTrkP6PK7P`^&TEs2+Iyxv~WG0$Oyiu2VaYPKfc0X1&R`3O4L3-&{~ z+BIlToV4owFUf%p9K_Y&EDPvFoE*K)`biFS;2aX)XNfeUD{(&CoCk)^ z6PVj9k8J<$#%m#=G2Z|B&Y2^bAMrgDU$zhE`aJKP@Pg~!meZIa;=e5s`Vp_0I%h3; z(8=)*?-Icu?=}u3UPuAGKcIs@;Ae2I`V1w``SN-WbSB4o&*WR;k;FNq-rtb*)1OCi z8d1eO=KFOF@y4n9W0D6Q&tncFY1z@V4zIFp0`bn-+-rjl?M}Y0Ij`$v;`sXL+Z}Ys z8`V(`-Q93Y?xtfyT>Fay#d*x>Ij0jpVrRWSpo3o#=fki0AI~Dr3!8BgI@A~a1)Q48 z=Mks(P7Z82H{7_=P$yBEIifeSM)rIdOgnoWJrdB~EPh9;O@*po3qsE5Gd7 z>pSw*-SsZNz%Jm|c>g!?ThG#$6FT^%itz7>=c|bGR;}0NI0_x=i+5{Kp7)#95$Ce9 zo2)N%;rRu53-DX{Z6bb(N&5Oihx+2X1mHApw~aWf)4i2C*Etyv*`eFs@klppbOjn)_g~&zK4mwP<=mL`q#aDhxNTv zk1ofF|Bs66WjhS-z0)4WLx_`G-P@C#sVa_xU+``a{8G63IpUnPiBq71Uyx4#Ct2Q0 z#7Uv%>&!^fMh4`Os##8s=45zVH#NQw5BqshqHGfF|Kqvi!JPz!7GFNiq zjZC56*M$!L!97-Rrp-u2ocW3L9Oz7rm(g3~EuKV3N1QJIkF>LZvg%mA{((S(ySux) zL(su5KKS778iKnH65QRL!3PE(+}$052iNfNf2a3_U8nob{X*7y>#e&o`?OyeBIp=%ar-j6@1mI1 ziC6ZS-ap=z*T8o?@^WN(paUm?;N(l%oH)%@9**QdXLDRubdhh9a(^r0L^9;t zKquqXi$uQ7;!W*{moBH>&SX1=4*j;E>bC_0gBq9qxYxZuN1h@4@M=pZ;(tu7@3+vw zez^Y#&f=!sh%+^co&%lDagKxCBK$#|qRI93^>!S@-=Unh0(uj_L*Ad^JKyVD+O$9M z^BCR@GriY`_s>z@-SGz#uT(v~U7>@2CW-jThEv0c)4{O+f)4HbMO%JHw*Br=#L1@e zROGx09ls;n(z1g2JKstBk0su1bzUOB^9`MM-}&~n0>nMZ-i7|6|4v+$k1Efnpu;+^ zSAq^!sCy;UJxM>SaU+qpQ)LQqN2~pi?4Qt~e>PVAGk2y>k?Q=r-Q7Qt&jJ7Uk3EC< zsZ^eq9G9Sj|FM1sr}wGZ#0fU+BcVh8#QTQetw)jP27Z$si;4f* z@U9nhXs7;i-;#GZak3iD4WU!+aPKRzZ$P;sC0R|pDXQMm575C6IM)GZz^8S@d1vqg zbnpY_eQ=6z+DM#bs$Ix-2c2g>ILG1R?Y9ztmwJCs@-e=8=DV!0pE*C_e)=86&(Otg zM{*p2PWq><*nhlvu#0$A4S6Wg!9Qs4uKsAV z4?4Sk*njv~g~a+~cdetun>RpTF6dCM=ndKKu@avoPW=&jyF=HdQ^LdBZmo5XfBnlb zZ_m>+#LJ-Kn6jNfC*RpPoioDzD;d7Ovw?LZ>==FH1>$cs#7UupU(<{EZ(g4(#EGrW zIb{EZ4jlAnl;=an8^pd= zz*6RYotyple9ueAIR^Z`_u?DkA6TdN7j*DD#tCrN_WD4asVDRt=)m#6&EKzipZp7P z-aXWFpo85oUZXq}j$5OCj8As@s&7Bgg*48`Im+v!^8sOqpCzV!k4lb%(4D+E?T7Du z_*j?+QQpCA!V&M@Z~A*#(7}E=#JeDol0_oUDZ@Q6=wQE&;@(c?3sH#^M%~+&?G8H6 z_H(`~8hd<9;$Kkr`6b`GJVa;y=IqLGiJz{h-kvgl6ZVB)hDJ+BoVPxD4s`Gf+BrC( zS|=q=n<{z^bnpw}=isDikdiosqT3v4H|Rq03&uC_KVM2q{1b+Is?d4nJNJ?Iwr3>% z(k*tsl>NrEkHoxz@@DkUO1vR!^y39|@J~50k3Ol9lQ=V0>p9Tb9G8_u+&A3$CJ%Aq zZ_w8dI$j^Sth@2}eaH8c3lOjGF+C5ulYO3)zSVNjF0>zvZ?M<%!6k@S;(^VR?FTx~ z{&k)sU%pv}_@Vdd<7o068T|+4{QO6G;$$7J=RgO)BA*kS1z#%>=j{SL2Rit*o%n84 zvr$!vbN#T*k?j^b_N$*2eDFBsA4N@8lX$xfaZ%_z`@xwHx^_ox;(v*(_p^8BE5258 zanB@F)B42ko6hFTxC3;>)p{h>#$Ac6k>B%bke>m6rOMohxIGN{fY8D3_zh0zyv>L+ zHK|=bxh{qd{RQJ1IIYsQBF^cbd@m61>w=%#dp^iMHxh#z;-rA_6z6`kL$X8 zX6TF`&Z~1M=R5yjjt(Vm6+fFR+lhSVzoDvl=oLqz*JB#?kn|kLjoKwJG_0P;BPNg<_4s<4e ztsA?DI0x0ap{%dB{=(m(oQHm2M*QTJ_5Om+^Em0u2kG)|CGr1mVDsg8V#)`>`V!?m zvUV-;ZW`jM(4jx!{2!cRZ3BpN)sSZooykA`8MYAT@SvZupEI8FAX*^ti;vOsF~55D zkMp~H>CXibzq#s9az2;u(%?QS?Ad9`Ug889@(rOq_({?E1>`KnG4% zG4J$Ce1tf24f(LpfrEG`%5&+?apEMMtuGIBHpkZrBfjU*dF5&1%vAS$<@^O5=C3FP zB3$g1@sx4?+CB9=afdC@w+}gg`Kj@+RfMP88Wh^>9uEK?W92XrO> z#|`IR5A+xLiIp#3C*C|mJQOrEpa|Z)7RJA^#R5`lygVYkHp_=*q=e?*OVh&=XLvY0 z2Rd*{iE%T*zVO6J@SC0koy~Dsd&Ite%#_H)$#Fx!uZPZV&N$EVv(AY7&qo_aC*I;~ z`u!Yqp8e|F2e`V&CVtVIdcGV-@yPR8tj~@Ipe3vyY6vtN6DErJgIL4O2iT~Ha~Tv2f)+5eye2kRMdS~e_Coaq1P{R|!ajQ#?S z^|calvPH1RYq>v!E~If$`gz`hs>FYuQqPAjBz_r@uXw3aP2#sNpyxw}^-n_O@OfK4 zWOzGkQ4s8mbqws2aaA4Suk~{MgYzPA9{4pN&ZSWLc@aAJ3wcf81czx%oQa0@4|JaG z>0@OO_jXR5YexJPY4zoV4tB%-4&|IaxD|0a892~^gZK5oIXSl-aW>`Cmj^m$pY2%wbyL19liceY+=qf)n|>NZ z{IQMo{(%nu!8!$;^#Q|(bH|Xs3Z2byu0JBR9YvfgE%o*Fc3i9^&hh7`8B6?U-Sm9u zJp0SXnjeGnq>|m6K>TWk{4MBUPsFQW&nJf`6Q@ESygVO6zLH7xmNdLsP{SX-G|hx1Bts- z#pPtZp;N=5d!32-9(F9&V;AuvsQ2@vKcItOG2ei*sL(#*TsFkrptCv7aWK;_2Z=Lx zj=n#5*WX+g-hD+mqdqu7{7(yQzO)~7JP*38)Z*Rd*y~RaFX;?D4?5`&x&C`H{tWTn zs&~4iKcIs@5YI$C2lu@|oZJS#LI)1sg$L(ri7UjZV2G1LXLDTE=xEa^&!N|^H;7}Y zd&1K0(8+g@8i@N|d49P=yt3+Eu(T6&s2}Q!`VBaIpE&3H>Blwbz`-~N&W@3fh|^)P zo&%lDamG0cRCq?5o1<-xT=#f4A2|8jqrN2mTElxN-tk>l0+A2-;`1BgcQNz_=nj_S zd`R4fgI%ZZ|3JK=>O4vM89Mj}>uPW|jrc;G;%dH-9O!J0%Q_m1*nfBv+S9aZ^NW?$6#OBL+4e!cU zQ3Ia;=sC@zpI+?V_hSD9{|sCjjkuYY>H80K@H5`S0cX~iSj1^>@H2Ga)Dm&!6QSZ0 zCsIv)d7!g7epVf^PipWu5phNu&P$;4yuNeJmu2Q9BYwGTdVhO2U;0^T#6GOpj8w#b zR#?x64*M`WI%HW5#-IC;_sdzg-!cBdU#lvlBYv_BdOJf0f7KUp$krb+5+}Th%gFu> z9sGrJ8K}kIqT_y!G{b=sfd%Er0PY=lb$_iJzdl&6oWX zI;=Zahi-kON|tGA-RZnvf9bb^#I0;7x4d6pOZES8LC0TgKJjF6sd)E$ptf1#9DR1r0`?AC@VAuyi zhjKQ#$NTQ`VJi_Qk>bd5LMQJhjjy@5>e-U!`$^qCRv~^HL;i}4L*qC6(tJ}5;*77O z_X~8WFYfh#6S`Y%;-qb6b7Xy?^W1-&=XS~R)F*z94tlC(L=ahe&{`Otxbct1F=^R*?;&9Qnv zLkB-&egda>#*V~Etkzl5&(MYBXN;%dH;B}g`2Bt=?&4?ReFE@*J<)^sS^DbR5pQ!D2?=-@BRqu^w$+K)Js4eM*@;4f*2E1c`vqF06zzp>g^$o7D8d2SCr7WRd(L+>LaiQmPLHw7K+iFpK^rGfs$2~_)B z=@;n0L3{$5f_qZn>?k^!IN4P@k#>X5v)!EQ&v>Dx6F-HjujG5z zPkpQ>;=SY2`)3jV7egEaI@r@+tS=spoJX7%-w+npaGsA8(=L8TR_LqF1$OH@1=Fj5=fkghW#ycsDHQ{OV9jv_4|8i@5Fu9aqohN>oWKWI@l5GOW5(3 zJ9~+fQQdQsc7)Dze|A~RMSS`2%V6R^R_}kx^$B!xeR5FbtK0}YLcA4*d|l}5dON>s z@}>6);skp+-%%enW zfNW>baU9;6r`aO*BjRPAu3v9?+b?{@dn;+LJtcl&gFm2Se|#HnMO>xio_~mUQjJ%# z{XmEDu#p1KZ`3RFfF1kXaTT10!r!-dz9If$Lq0ilwx7Pm`^0$oW%dW+Bvk!Z#uK2! zc$g+^yok%5U_5lL8{#(pLfmxfTvFEGyLE$ey%;VgeH#d!m7U9)k}36!RK5MUF%w&L_1` zmg@`X!09IDt&KyX5odBIeR-fW_4ndKVi6}|ADiP{fB!aqi2k^$bUfmfRqyvWcuMy^ zONCxb1`l%QHDf;mdtLh_5%Hq8*Vhv|*bDn=a83jzB~E0+Jss$5jp}9 zYehdx^e_u?vE7+F1;+0GK0Zzp>&5BIau9dPIK982gTFh8`ymzE=OIof z!~GEG;BSm;XgBe56(G(ywf~g;@2^p)AD@gd|&1D?6Sm}Xjn&bKliinJ#f^wqF+Vg zU>zy#37uzuIlrSiGG!IwXEN-2`JFf)>v|bpM=t-nI&osEb&H$_phG)PCHBwhH~11~ zfMNXz9XRj@>~?BJJ>sM`tRJDXIWB8gy?gZC&Eu2&h|@-W7eJ2x(8={9=6#f_P`#$a z+ir-HK?sbWLG!_5U{4WmOmXKApq&BW@D4&yaqE4p`I|@mSdH_=$nUD{qLGLkAA> zsK811*HGg4tN5$58+52&r^eanTxxr*k;G|h$lriY_ABIpqg<7e_!DoIVV?t?=YHjk zv+s;Nf%w5{JdxwDjI)&NJWFdRm^x!Q`(O z691Lj_sf0>-T&F)|NeOpFHzye$q&2N2gu7tx%=c?M%-4b_3Jt4;3urZ!HJi36>%#5 zspmjvb6nPI@md_@k~dTApGG&|LcC;Z z|1EjY*d>+)JF2 zhI4Um??HD7&-sk~mjx65mdZ1d;|_E^bK36)W=XQ>e|eEFb}hK@wIA&1Yuy&_IS=b_ zg!nI&J!Lvz;JreYFme_Jt09g5SUy@#-{jCK%Qq z(19~d?4QbPIZvEchP+|u;BV}MP@e03FB4~|T7O7?K<9bfa=ufT(eFC(0}T5o@6OR& z)>hFUW6r-#{2Ye(B6PApVtoL6gr0k!c%#&P66t5?;2+Fm;FPZWh&V^9>c>Oqz`;5X zoVn4S5vOZ?JqJ3Q zF9n@#PhYE|i0^LB6l&~`anjAIoU;DVq5k)dPp)ty+Z*?H=##xZO7W#xe}^G%BSZUx z&Mu#GpPh7JIO6zsw%d)gqj&pkKP!@m!`EsQiTKY{TvGC(L);>p@>7{5gXU&vY+gSW zNEVIwABXANz3e}@cMX4Ex)qZ+3DkTe`!jT~C&oK)x-E`NoJ(rIC+iEH=W){c-I&(n z5)wb68i!^7)PFa|`F?Bi+DVD$XP9T?`>k`tIppQUDT&k75Kn>5wx9DH@_6~Q#2IGD zBaq{2&RywfpEW*4M&dl~sBah0+2#56eO>XLp7F=C5a*4$*C*`_9e=0i+jn$g@%nt) z=p4kGXvimn4t7GE0qyMds652+F|0G70|$B8;AE>*fH?l@9evsEp@W@p?+YB?NJWTq z+pyk+j^_>MyX4hF7bjjjwLX*m$lLSZ4DI+kBxClLB7R&o?n%Ew$KN4wS(b=fBpOhb zcpv|;+neM;r-nE8dLQ>9V9$+3D-!RVdPhw91v>b5n%Ec5x2g~)p&|baI-BG4$At&g ziIYp+2l{XR@UaS4UU8m>H5<`}_}x@qj^uk6 zm+-L;i+qa()jAOWg&{u&I@lBM6TuFB8+0MgzLolM9XfEZp9SY!x*o(CWH_IK&g7R) zPkIq&&0ltTq}{ypi_3Z;;vBWs^(B5&Lp&5Z8J9TpoadiNJqHr6gCYM2Iyp|^UL@=o zSac}yMlI6&13LHv{sAX;xRJz3qT*I^yo1i>xUBra{ND3{NdCm}Q*kOe4nxQD(6@61 z!FzLIJn>$uxUb|v$2?zapSV9VVb&z#ZC3G4xgUTI`+&Fe~urDg39M^14 z#-L(1HoEt(n7`q_S}oTS|Cs9M(tpsQUm`yjoUd5}i1Wd)FM`hIIM)w(UvDPPShYWp z{_<`;=WGurw{9o?8#T{LzIWrW^L?=f1A>S@-*7%F-xs?m&UrVK-AkMrsy|4-KnK4d zuI~0rj9}u7GR&vY!7r#kIIDj-LY#_*b`G8Ab%l?$Dd%fCPic7KIPt3)&Ml!seW#V= z^p9Tb9Ow8qXX^vv6pO4MpS&CYoblLX;~o>g zX=nU*;=SQuEuO6zhI%fAssi>^SFupRP^_OZ+B=xCM0ZH{yWc6m*3r&Thj! zAn4%lxZ=CLSqelZ&UTeYC)+c0yiRw1*X-oyXv9m>Ti*`(yJpV#UhlwI#3^lvBgpt3 z?jgcXPy56pP6YLCh8%yPgT1ebczCK}iHLJf?Uy76I@mj%uy@3;$%s?ZkVgR>+uJ$b zeK?Ymc$E$3{@(QmAM3WbXJt)GOZ;-`++WrkI@k~Q1>lDrQ!)~#wqYKG4jg<}0GxX{ zvl3^YA$|y*&2iofSa&!Fah@6S8N9m};A`#rjn9E2j>tp&4hQUhAookqWs5%Pf8T#= zQFOomZTI-+vQ~)m!0WXO5cju}`tcMxxnIJ57WV9&xd`!|pV0SL=-?NeCxElfDoz~N zaXkk*aBzJxfop<3hJ^H~D1pBgc=3ULoDv$=Au&00J z8pJPaSZ_lIzaSqBoMrKA6DPY`Z_9B2I-BDhub$kmN1S1X{iAo|m9KR*Kd;;82KW)b zMS$K<(CtcK_d9EJwn`2CE310|e%3hQr!xne5_ix>y`P|ipNc3y{qEl)U$tjz-F`yc z6!x{Iwj@5{HPXJ&!B1ExfK#%5TjDHH@ove14t~Nu37jV>IufUknx|yDgf680Re15f z>+S`A5I?11K7h_Hr^`Ao;(|qb_aV-WZuY)Gu3wGb1+%%7@PyU2>3!MRl&(|K=iT(#-MjgU`YZh3DHz`o)0YQ2_#5K|I7yx^ zBhF~m4*%OY$J({f|{>(UbIpz2O9p7hiemA2{%yq|=19Ln=eZxdtg{FByyUab0pi!rruPGMJbpRjuuWqeBVMS<_IN1Ei}vfeyw3OW zK1@DE{K1Fp?<&Z71@Gf!QVl0&fo8!S6C76S24|l8m#1fmyKAt`mF1Rs?ll!ZPvh4O zn=JkQ-5tz7@Z+`57l>bQkIk3kI&^4P`9=KrZmuiD`I1FHZ$SqR)+s1Yuin>*^L3z} z10C8YzK;pc#?-foQ(~Ro-_UvXw~yr#@#8gP?h(K8etkKiv&-qS+WyApp5s3}B+jZY zHb;(M(8=)z=On0inR!o$cR827e?td9;NBHD!Kq&mCvG`C2RhVm$yEMMddAhSh=X{T ztRHj}S067myYLjXPXA^vkq7#F(RaiiQbE0f6YXm;fz@kZ>>*AqIlH{@@^ zUMc(iOPpkvZI0}x&|&{jLpikK{LrNqbpHPC(|Zwzubw{i_#fkzqhIRF2_5{pUi8zO z7s3*!rQtjC(4oG|lJb3&xue1p=VpBUcgUd&seeTg`-e7dBNP9x^fq6POVFYHjTQM% zXVOO}PWMgXVjMqI`{$mI&h-QiAS6*dG#FV;0Mf4;G7wd zh&Xdg*&NyKp!4hp=YD=*iDbl2Ur5jQZa?o_R~P#w74cJ))br)K8sB@UG-yKO^bdBp z*VTE%dS~GNw8Sr4QEzAH;IA^G|3wJMNSr7&^c?8mFW3eC8uUk2;`BB63%Zc}g>z}} zhxz6teuwgUdqU@#@3LZx{`BXRyu`my!sg5V1fA?pIIlo?{c9H_-p430^z zmm~h7v^HP*0lJXE}D> zz`^&&!TCKyUE=iIpyxnmb6nQWuBj+*`{Anw#EGKriA#G!7wVhA|L=d`ofGSr60aK* zudCr)4m#U^h|hex_bk5K8Zf#!@k$u(b3ljq_3{<%`)7~$yF2cI^$F}(BXMiuHW};P zx~7_Ha9d|T-u@=@JootAO^mDe0@@M3sEQZMc^*3W8Gc7Olhx=%oMxx=eufSlzsY=m zCG6*}#M${+&whbw z3v{0S?qeaJ9{h#=^NBwxpT6GE!G0Kjz}Zr1F>w+n({rE$2m57k*2Y*)ocrnY9O%F) zE8fr8IdC;`wkf;Gb^#syxmMgaU%PlcapFI)$3N*$=%ha}o}qrz>TV`p-5~w^0i9=m zI`esI#Mw^#Q4RHcna}gP>TfHe?~Hmsiut?Z9T)5*{>#?-`9O|~_zgP@&AEp-RnUKE`Umk?@LcT=5pS`|H<05jbe{d=vIdEDYV|nBh+qDZ zzCA%F=P9h$QQm*{o+MrqOW$9hLw^|~_J!fkpC!)YaC#1OHpdxP_8obVIBOp2+n;yi zmGeD`OWm##|Li-x{Y>vkz^<_0fV4M>m*D@?AGlWx-tn_{hyxxt1l09;Nbo_%9APEbK>-gs4ow6HpkcMC+<}oy!Vng z=hb>ht^=UMx;$=z%PotXIORSUK>P#coBj7&;`TDEkDx=mv7zc75bL`9>_c{aq z0sqfMABn#?s=ofv!H&oe0Vi#}e~A-pSPw#HbH2U9P>u7f^Ti51;m7#rNi|Q(aSA%m z?{2uPob@>lANo{S;wM+@K{>8M7o*p!ly}{ETz=Mzh8*{fGCn-R{F7h# zC)WO#y^rqLp#0;q5{mVI*v65Gf4;2Em*s^{j;rt+{FJD4bmDzcaVa@JK?i?heFM&# z%(00RTu|R`ptCtHYi`suwBPRhG#+t|sP&AjA9TB>H2#QLM+ftcc9(Cg9Bi?kwxde3ZPjRvSSaPEvaZ)|i&r{IBKWLvQ&p7|0#0mVz=1BWN7m|Ol z&I7-|`4Ytc@QdC*(0S%NzZ*DVei`DIQTd?KKc?>nq935V2b-5CUN1wQDs=D<;zHoW ztx%gdWsd3n106Ui#J+6L>w3iL)B2GYbegA+C9OUJoJPlpbh*SEle*A;Z z<~ZZECqK+2&eNCramBlMt&bH>jML{&&n5nuU-k8c&Mv2~)kvJf-1}o8ai)j1IdWWu z4&!R1Bt^@{>NK~Q&2?FC#69}*iI)=hbX=P&`zds?pGFmVAYHeuAYRd|`uai#|6;ue z|DI^HhB!IA$`f+F+t~8)dgAOe#PPg+w-N0C<=oSA6Y;AKwd*VACFnel1J3Uf1Vr6N z{BY_XhV&1k)7U_S{XJnO>cXInT2?W@^EO}l2fF6fq7JFuWr_QpyI}DhfdZpW z5-;8*y?>yCe~yZMz~8yf5@*aaJqJ3B54o%JetGJh3&dHg*6Ff-&~f{7S@*>IVi&hv zAzosYPa%2G$@j(Z-T>@>sPhft&5ohpk3t7~;oc@VrPJRb&by>~4s_t)eHL)8+`3Pk zSXuNO=xmO&-S*z~h&b_8yhDyF-nCoj`mb>RXT;x;Q@{W6Zhh&zhr6lBOX8nU`CW3o zAn)Pg{1)~+^zR$uEL8h=$$<`j!FM*m$$Ikxas1VOU2>pK=J}me`oXtNKS8)0z zS@TeT{TPp&tMbaE-%+lR<`cXpfbzd6AC~wDRKA()f6#G%cAgtAC?B49Usb%&VOJH8 zf`2f-pj;=@MJ7(@9Qu9(9sGm%061^&MI%lll}9P<2c2j8IsG$uUM%8Y%l$L`#yQ^? z91xHA6@It*(m$s89{HiL$Nu7ph}R;&-mcKWKRD+HCsC+m#A&U{<=M|p`>oiWk~ozN zd4%4zZzsR;g0#dBrQ(y)|K9O^tW+W%8@E+P;(siswgKjd3R3+=UU(l%TbRw*%#?K(4ie*Ujk0~$(ee;$nP#AA0Bpy+@Te58r{)zpo4##i~Vq-n{A0R#Bkpc zI-BFNE{Qnpmh~Np^RBvnp8=iS>=CDRS$KyQ_3J#eEAif{`+9PofsT2;Ry`4)2os|x z@tRe&%O~R#&>=oC_}#=fGe#G8$0zWe8om>%#hlO$utm04nXK(f6 z0(7Wv*tMMhuzbsS;vCMduP=0{?_3ebZ<%lsaSpV!IdUF=4srYv%CS!u;MWbyiL*buzTZIy4%RL3OYJ9rGJanF%W2}r zilE;YKnK6zH_93P%6a0vPv{ND&+-%B8LRu(W#S}L_X^}V1)XQVxU5a1;!s{s_|Dgf zUo3@PPPv|ej^Ar==Kprgbenh&RDPK(uk;V{e>dJwmg4=I9d7@`5&1ZS{=G;1+iCQ6 zg%19aarn$z|0Yf(0|z>r<7+K1&iBf?cX>*j2WtN(#~)(r(bfZrInNz1?jbk2vX1=cKVrQ^ z#6N5}?}HBY#kv;dESfbLaTXi$2%rN8ad~iteojf8;_5v#X*cL#H;fP9Bs`p!IImTC z{_D7cJa_O%PsvF9A!=PC=M(7oK7{i;OZrHS{q;kyCQfrE1laNgu9N1O|WchaG= zIWDW+M6O?@e=8Danjv2RI(t6+9w#~_;!cAORUuxDHMW1Hy`W>Bj}>0T5pqnaLA>PZ zo{{YT(4pO7odtV6o>-eW2UI?o?ElbV{fzf7e_8m;(+33u-Rm>h75tul^@-n3eP2SB z6FR$`&hJ4?OxuV!?^Sz~>niy@h(%(|HIg-$ zIERbtIsERvGv94grD4Q*px%>|{l>d^l*?L`iSN@qcsi2!vo71^l>UJ(yZRUCKe(?1 z`+aEOPrOsFY@X~t&|y89wMD!?md~iGb0obli@7tfeBa@kEVD z#PhAKpO>M7pAq**x#HfMN}RjuJ8*K`fzIYQq^;U<@Aq`rv4Hpq@9WDi@1dnojtV}!VqniJ>)qo!)-kYi#nwxRU*)sjU(mr{ z81KO;QF;Y&mKpA|K?i@y_3X1xtBJEU{Lk(4HWc7@Mh@28K>S^XJSphxa=NVEBCmTv z$Zv1N;KcroyL)b2XE`JkX(?ovxXe_SerpoF&fR>RzU_6Lhkj6&3Hk z&EI&Dc`Dyc)(<+=ucBDT^yzw)I0X%H9q7QpehhXhlKUocF8S!~1RXfoUxU;9-5ugI zGrYF}oy~F17d7ubAWq)ec6nsG_jbO(-=Ul_Ry-ztPn9?G-}v9&brs`M&A*-#|BSld zDBBBkvc2FwKFYhJ)+^%mRreERdw~xA!9EY1F7e(GCv+LTpP>V1o*2KYT>eO$S%&;Y z=)id@IKi|3CC=np`tm?$b6i&XUpd}4)j#y4ALEi~8`vCaZ|J1GCyDr8{aU{g?~UO- zJ?KzB#OYw~LkS`fCvH`J{h+fs&i+vNUS#6r`^o-*br8zAHE|5$``5F}Dfir?K!N9f1s28!Mp(8 zv+bFP7huS@h7SJ0ya3Ml8`+36UcFZ!$35uaAGx008k~zb_td)^(tgl+wx6H1R`_Ri z?|j7H8%p1Qp!3Xkey^=Z)s?7OoiauB!Y*X;~^2by<_e`%?o7S0~;rLmU}8jw3tghaCldi5FJI2W9)lI>>YT^Raxy zIx|d3bg|FSQ`fx>kC>LT9&=@A2tmnK|C{YwI?|+ulIWgD!e}n~3s7jKtp^%zldvq6XTyhh_msko&y~?^Pcm#l6YSq;#^U28`*B5vpLRukrT@Y5NF~& zz1_Uc7x^Uay{_>cLi`$+ZNBs~bUa=;zsE5n^a$epW_Wi)evbori10)3n$g7hqUJSe zKj>gTtP{bB*<>7X3K;Tjpz}OFImf?I2__OhLu&o}>)rV0>^E~hPa*z3bss3Cev?r- z>VDOoLobG3` zv+!;b_{~4;A%5wF_Bo2=Ll=_0@ooqBOX40NenoX3M%o)XIi4aO2wt{DhltmqsJ>r8 zhyL(RwI7XZ@P4pb-Y>vxnUz69jAL^eX&cIUo`2spwKjed=ep&9FCEg3ec@cEz zr>kpn9Hz*mi^Mr+c*h00v)ybx&dK6EDw8GG79Vv^=4UMz`TDyvUM2pk33k0@yMYep z?DdoZHb;!yB69NY=P-@MINW#t4dQn<YPN*FVMli@}2ZI{hkqLs3EToI@rIIvVW_nrH@XH_Wk=W z$g6_g{+#rZ_+!*Npt79Mp}u%m1)R-I-xBBaIlcX%LwzxSfRiA~N8n zd9I~4ee~gDFQ0q21{y`_l2Yl}i zcGw&zEb&&y((m)3gI^He0q3`4;fQm_@ZAaM;1|qe;5-`|i8u)j?*%~T*)PuXtcG(2=uG}8I5{bC607qhX}|yKpV>W9690m_|1J66`RCjBvP7Qh#QbTAAIe}) z=wMIe`@)`;-e(}r8-qQe0|(!w0>^ht7UK9D?s-E8PF(RFu))9NB+l?^`hEe`+<*hO9yqbGr zU%hc{5#p{^^OY+<_gD-rL{fAsU6ck6QJxZ1o$RpJLt(eu6Qx6U{~)J!#r zU-yun@7;XujKiP0Q=9lfOU7^it$b>_X6 z3_F;(y*Ari*-m8MYkdWtlQJ-Khok22PIp^8jQA-f^ql%DnEO9nj`go-jw5q>Qfj4|JaG z=bWb|Em}nU;p%-i+1|aIr~EAJw^9D`4VMu=vdWK;eCV)G$NR(?8r<#lcc!gDurtOh z@Gli#Mg0CMA3^p5=)l2#8=RY=))8ln`hK9~KnH)}oCTb*I|GOl){q|o-T$w@0{d(s z{ux7lhqwO1IEM1iDH=$8pGx}n1|9rWT#bJTVw||N@?4!C%n9UnP{kGWH!Fu;j{Wx4-VJ=J-wJcxQ;eO}&dJ`vr9H7tUi)&M3cJAkNFW zdcQ-5{)Ki7&dx)ZiDPP?(0R6}kJVY^0bUq=o%k~i`%>uaayq{k7{1zV;ykKj*H_L< zrtbx|7W=)@Ngon#te@U~(7_Klr-S_(ZG23e%B%G2FX+I*JOxf*tLMZicudcM4xDJ~ zpHkk#)M&4WQ`m664xP>Mvyf+w^0eCamN*Yz>em_2h14!4i#W{VRv(G~$#6d!I@C9w zICmMJ_+R3DHLNe70|$0QeLtNIHRZ>+>T1I|Ep*@{7xr&AE-Z0;4d0K24)$Nu^zT-7o{%Rp@%yRo$;kc$9k26!tnFf4D;706@l1Kl z(7}F5M86z=EH-g+sP`e{_y`^BcT{}ey;+(B#A$I<-+rOfq0Lnc7r6uuj{Yx zuh7}$bXnVGU(NlapQmR_L7WsY_51Q=gx4? zUx+KX+vU*o#GlwmUvKE(XS~A>&aqgTiPLD#)H&EldMCE~|3%q!5@cJ{UQia7eO8>a7dpF~ zE^DB8AHe5ved3H(?@Y=03Oc)ZIG6bLo_+$(UrN2N5%D@1c+j;er{xEqb5o&7&W_zlXdA*sWW}R>V7DXqV8zFEy24mL7Z%q4@OE?r|LF zx8V1>)t31ER9>ANN1#KyjJT2aGh1eKB+lKOcD-f0gbwZUbhl3n!py+C11{_2j7u~x z91SS$GE&_8fHWPvQru`#`e1&?z@r zwq1QJ?5n{WF~1-2Vypd*^bd64AkGQSiZO$TQ_`@{fzIamS*^u-YGjRJ#A&MPC+!Da zNaHZ#pD5?>1fz%_N8J~beCR^rqhEku@ryt4OQ`*_e9~{z6;^oRd!`6X&_%d>T4%uzv<8-ht`F8ETk!po8BLHvwnhfZ4=} zXUM;WE+oGsiZ-40IiH%$Cw?OpCzO7HPQK^$`LEv1qt5yMo>yCu_mwlnV&W%GtKa8C z2YVv#8FnZVcR6vM8seGInd~|I>?-1vNupmbd23Ist5D8j{_BXpT-|4u_Jq!}zg*S` zv2SdAV$XK+3)5azPI2yxsUkE)x0j}3Fxe6dH&a5Q13fYx&OQwc93{u4S844 zfrI@S>YcdzQR2ihyw3+6I5@Wf$EV{-;@mcr2Rd+Ki*@tL3}=ZGV0iBWI&knkYm{f> zjSIw?X?Rx@I-BEb-S5tMU0-yzORl{Y2V3($d+Ox&BQc>X?d@~iJ=NDg#1$NBF2p=pnZGfTyN zWjoiu`~EGiA@YwSjd@1At!h0a`z3TT|9IWR>aiZjEaCP8>;U^ktnrfgrws2TKnMHb z`+?we%Jr5weui@t=wQEKvEO<2?gMcqmC}#1(Anec_x%p?vrw+nTfY!*w&C3Z=$Pls z_X*4xcIuDu(ka#T^O(2u49=5L-j;*I6MtkSdtQ+32Rb>=AZ`KP{j8CR7cYi>o`DYj z#l94r#w(%`=W;4N2Rit7thjeND03|0#5Q~v0XmtFgLN>b0f>hIA&7m`2g zzvTHWX}bKxUzXJ7%W)n$loRLFuwS1}g@|)8j=r4GVPA#(p0F$X?08vdqI+Kjzk}aB zpeXU9Hq@6BI<%Ml;+*J1`%1*gJ44Tb4xB{deV-ezsu1T!G5vQgpo87e?@+Hh>1z7`UN`A z{q9>FMchYRP^J~}*M_pk2g!%dGvArVI(}DM;@8WgAJ0sAthiSXdt@%vk$CNz>&G+b z;Gg`W|IK{gg*Xq#>N(JXgM2%bC*AlS#K|{D&w&mcod19`snnmunZHBNfzH%kqNeUg zoLPo*MsM2-&gD?fj`0T(e`R~SoU*+@hw-(%>UbT~Ca*ED;rDy}SigebXvI+CmsRJ5 zavp#V_C!Adr&hj^#JSa1Z%^oKj?22)o9EB?$How6t9s{HjxW%K^nS+9R^y4+c%R;W z-tFgotXv|Gp<&iZ#NTXqR}ebb5B7xpN?(~uoZ*J|44?yNl2~7*88(wRl|JhI0G-Wo z-b3!5cP?>CM$xbDy}Re*Joj!NX(92?s60vOXZ^XibA8(Kz!Kv1SMMFlb}ZMY$d7^_ z*7jXNoLz>vDRl4y?u~*IslXcI+%vpy1ReZmsv1v8}VC}u=&z2(7~RtFYIu)%TD6>8}`-EfrE7eIE8BNArA5f zWqF{pIlflw3%~x8@rV4J8SxJgC!V@TDA!leh5EmTjlTVpEcG9h&!1JSFZ*0NMBF^; zyT7u0(Anj4e&6-!jAO(pq2BS6{mWm)cY9}Rns~~+6+2tp#uls zV}kvYS3XCaIfi!%p|d&8`zO0fUn0&f)gI)$=xrPwe}{5bjBt(k>(x2Tf8#s-GxW$! z;%_jVZ^A!z>#^MDg*dN3dDD)*OT5Ts_5BPw_y_loz=@vVF>w|f&at4gIp5xGJjmZ0 zEVu1BaTfKk=ONktptI|T_?7csMfU`+iMKVGejdj@%kwzsW1SUwswd{XBYwHt`uziR zXqVH=@xHoS=}*Mz5=mc9=;V3UwdRAG=3f5&JPY$Z{1ao(zr@dwQa`^#2S4LpFF50V z4?XS2_+4Sc_wArVeX&jgXG8d3i8Ip>cY@BdU!3Rmzs(L${MUweC%of3*9{|cMJB$# zmvOC6Vt*5KDjIQ$7~)#c!7r%rMvZd{yVJa=e3%$2*sW_#(;`c1{-JbyVv|$%Br0zE(Mr zKX$cu4&psi=RfFTnepb<_KRD|z7G{oGugP`{oz3yHwu-oHqG`VqXRlhn$aMj9 zp2s_vl~d%A{nNh`@rSwWd0hG(x;uGZU4P=vBXe1n_>Rezl4Xh4NZoIeJm_S!}mN+ z*DSu@FlX=};x_1S*I$l5(8)YbtmjeRWbua)Z}B3%J)sM!Jsq#X=Noa}k0gEq)!?N6MnhVx12z(KzRXK~XB#0kAu-@c)(m`9u_;r07u=)l4H59O(Pe<5+& zCD3!A1IJH%7xhliQsN|4c|~#_gbx12ccoFDd3{zAr@0|d6S|Q41?&j^;e2a}KTgfF zvYgPNzQ`*E=SaB#;*?kKeM$~=;NV;coHD7l5ND^l$1ge1!EOV^dT#RN?ZgQm+4iHH zpP=(>H)o#Agt0-y&#&^&B;ULEt;<^Z8}F-jmDx-DT&n%Z@fA9`zC#=bcFmn4n0S2* z`DoC=Ki9;3a_Qz_;uJIZ89Mj}c0hTK%{oq;XNLGRbe{d=vOb9Orib-U6MvG5w@Uj# z$LmLz_2d)#W9R7$#2agfyF$nQaNg%D@b?wsEmrSV%Jw7A|NIm<`K|K)y*>rF*AG~) z!MpAh`4%L?*I;`)LDt{%~mo3_FT?}E*|_N~ahi40bD)F2__5!G1M2jko<*x0Lppw#uwsmU8=Vybe{R& z?tzJX_5*!GP5&_tTKS;Om;E0)?yo);)^8|prqW@F*Yu9wuF%0hxK|ENiGRWoC#!k~ zTF!6KfrIbrfm3BxB;x#OxGxHw&2d?~O7OXS-Ad7jQ%~I&m3D$|*Ocb}+oz*kb>i|k znIK0j;tf#uMrHk=0|)g*{mMLzOPuL(_3JR`z`=SMoQjJQ5+`*^JqJ2)cHRC;?=LiM zn3OnO4EIE!vpIei))y$x;X)~iGsEz1Idq=ey>mSs=HE2Lf2!_rNc+q69L58bvp`@5 z;$$wOw;Ob@8-9b6=y(?5v^Si4LT7VaRne0&XFMf`5APu_Yo0CpD?}}aZatWInv(H+4lZ^ zZismX^{e`}Ch@{t(Z}1M^W2YIRyGk|?b5ss@k7Va&mYjq_JQ_{@^*{UfOs#<>E{pV z;E#>sUdivr8WHE1;X69e*&OG&>)lSxh!eM!T^_mq^maXld?D2LMden+Z&5(chb|=l zEQ-MWt>Blo#6O->?;q%-e~OFuNjeSaNW7?}_5Ohl{=s~YdXEX;l{oD>=sD1V;}YMS zsy4j`aY9egbD#qU??IwGt8)HHoZDOV9O!J0uXR@3lRAE`FL5HOcTZ&;5jw;X|F~Xm z=^ssttGr_$3+Fi~-{3w2iQ7=UUn|Ec=ul6@U%<)Sd?;~d8r~g(4)rXMhU0`WqKqd_ zsZX}OWIdrnJr4&g-16-WWcPU*#vPPz?$e3HeY5`O#w&cc4E$efOeOxN75ecNI(hE| z`%LiCZJ$BB`gir^g%10je5#{u^lOxBVdF#Y{SN#GewWO1h`%w3zFk5Gzi$=e;h>}Q ziQ`*U&w3hjlajaVP0I;=Nd@wlAQK#Xm-zy@v16KnK6${T*;}emF^- z3{CWYfi5J!WBmgDz2Rqxe@(5Qq&=bY%y;IEPfKu-_$Lhc@*(AoR}Z{GoQHGueqnq1 zSon?l#*2QFIPtf6$H964oK;`%5NE!jUeMY8b>_d!nD~G=VLRI8k>i#r|0O`&hw78& zG4bxJ`zLa~h7R-f;vxwOHA?4l&)4WbsBhZW&xjk>5TAt(^ItX9?-p(F|8TxgOn1LS zUOxEQ|9VOM=^ONZfe!r+`ODyZ%=wl$jgRX&(4pVS^+~78ABfXZ#ieAwfX;Kja9Ppk z@_F&bMPGWpX(D8eC-{Jvce^%Ky)QlhFd#OU}_wUfj`48_!z>cLNge6{+82Wez zbnpkpMQ}O>g(J>qLmU7)aB!~>obZ1{BF^3H`tm>rzam}=&a9fzh|^fbaim|N^XykY ztE7mF4~ZL#_#@T1lH@~&ev(Hy#INUv6)UsOa`zMLuTcIB=i?GTvLT-cI@l9&a&XQ} zN=Tf&hCFKMz`=W-;B1fIw^_M+K^udT}XaG9v=9~5~d~o zAIfghFVMj+S+H)XAHQO#TT6|8`S>;i@oO9GDgA%vjW7Asopb^^CEO|JVRa{ z$~*o`Y2rOLj33aUy(1n7PTDu+h_mRRy$_V_9XgxivM!2zp{so>5vSQE{r(g>9{-(r zFYzK&C0>D_ivKy+@4x+3gZQ()>g&zxcOMJ$HS9MlZ*AfnHRSC<2R~rG2Itc8b?IIP z^~8D+^{iUA1@X43yfWF|ptCuCR*;yNMwV_voK=hMa>@P*9mc<^s$*SiA9P{(p1tnz z4|yvn=c1S$h~IIV-XGARzJY5v-W0s0GjUR?b8%T;=ultme^8! z>~F3zy@+3Tr{134jekDY{@5JPSf8&i@!JII?Fk+1iF+)tL)W|miSv))yGqcRI7LDa zB~JcH`tmTxX*d7i5yZ(j_2=xiDd%fCms~n#4DtV+r7tIRc70vebrDZb-g7*0S}d?R za$JB;&OeB+!4J8LPa@us$@+3Z2R~r_1x~ea(};82(9fU)=Xde$*y$%Ti8IA;9|$_w z>ApC}ikxpAai%JNN;^Tvp>|gWO{HOdczm_xZUuSJu zO58EcY_1&tcWVw-WE_W}7Gd0-d@BjBa7s<#e8- zMD4bNxZ{uNx$+$4^q`Al+h>NqT~ze z`uA|40|)C#aEcT=PMl*G^&IGIj&mF>9B_&_{leOIlkM5N`Nz+~cZE>S%n8pCzke=0 zA3D$Tr^{L|@;TdYxk&sBWo^Fn4|LK$$jd}|D^yWrwK#7mq|KR!YS|6o6cdRzV95GR~EFPH5FI&ctQ z0%u3&55(!_qc0D1@H65|;9Q9Eg*apC*c|C+=tAZ54IS)+HpgWZO*@kE zs@|W8N}O$K9+!54F1z{{HqY0(CGJ}l42Vg*-fBLV^Comy&#Zef@X3fZ)7vb{iu{aiie*x;l2-=DkY?_SR!jtjr|_D)FrHiq*J=um&$n*!%X!KB0) zq|UEo{h>qs_m8gFJwmP5>ih7{^UCsXQV_SWYB#d}-p03ZzJ+oRn3{(81yy^JeCRy) zM?b51L!M_UcFsWj8Y&-M@}YzO@+$vbys~HVld-Fe{u>%T3-Py)()S1UpN|z$+&@1& zH#>1QEZ1|Ov)h3)zbanQ+{Afi`0k9i_Jm*IuNYzS6Ti+qyPVRV(0TTkkJUoF?=x&| zA>wy`thWPnwjF#dtT#|jzi?%V6JDMB$ax1ka4?R7Q}XW$#A!WCzrKJDoR(tTd^OcY zob)~P9O!J$xAUiqd+0kB%@b88PPbt;M~<7&alVbqx+mhJ2`<(mUYa#}9&|E3I#-OB znOoK+UUS1cqtKzhrC0rJc+(`KlP_52j(=gF4u91D*ns$f>ODp2SLommyq^os;lCOa zC*dc(KcKTY&Ub_E)oo6k;JEt!;O%ehc=C(xVe`GL+JDpaVzd39R(#Mx337bscozU_FoWRQTMJIOz=>=)fr} zzWg4FtEI0Obl}Vr@Agh<)}J`To9j8y*&II$-wQ##R{uVjI5E1~968=XhyLWF z`qP?Qn-3)knD3syaGnMJ)i1+{@2lQ}l>G@h)EDbSa9V90MVym{bs==9FZMg&be=Sp zI0pvl>kFOd`NcUu-sKE}O^i;2@s+9EQ$at}DFGJD+qlaavc`&tv_Su6$?v902VQ@ko^GMz3|m8#&45 z$#w!A`dw+|&^sTOJ($pKiF+Qy_yvCa#{tCea!y}w=O_*)Kj;Nip9&NqL(1 zTMYY8=6a!)`5xPr>7R(VLA_fb{Q@2QGI-~jL2X9h{))@-4p+pLUy0lIg}yzS-r>S}4u1I9 z>X%tR#w`!1y!VjSfmlz2b2;H}#2I^8-`}9K{osr{<$M)^IE$a!969b`p7A{Hxh#D5 z1m)Z}Hwy97tM8ObKSRg+Xg>?_KJZdiib1?+U-adL4*dl0s`n@m>U8$N!R~cB&M(1l z^lKdA7f^X-(m&9lpJ3hxr^&JS#Hng{*BUzb2ls-(*}5t*aayT-Q|TY*Jp0Fa?)J1r za^mMz@8L+kck`apUzLlcCVs^94|S;;NH1a^sZo$GgWh+^d6qdh|_C{ND}v zxX{60rN#ZglKC<&W}**N6K2dY!xfVBL&z$1PWgxT_5BjzR}J;$2j5 zzGNsyoV4oQQP~fngB`J-1ZQWIlEhgZN#FjU^K3_FT&is}AL3UCqvv}UmvZJQR=QiB z_=gN}TA8QVR;=?91XU(ZBsC68dqM}l;QMv3=hdmzh_mgkzI{VybH06-Z##b{`1;IR z#QAi?=E(L3-TNd#1G5(2A9Qo}kMC1B$FnJ2>k{{`12$KVH{OkB&hNAoEZva!chq~q zay~PCrv>9Z?72Hm6XMlVJn0wc;1~2Ca9sDB6Q`De10DJc>;TTzF4{roK4 z-vfWv(DuatV&FsPxj%l3zKQSB4js^$_~rNN+b49e=SPvZxwL9`;xt^X=RgMz-djR_ zrzGx0oJa-^bl?;e-xoOaun%$GtkstXI&jvAJgRX=2M}kC$|IHS4LWefi1#qkO&da- zP|Nh?fesveKNR)})no*5A{p{Ip#ul)7#yqhSmN|E*cm!-aDM@uRDb?OocC|_@gnGK zj&okr;^bHN$t4FmaAY3*oSln^GfSPrNDg#1$9WGipwKene0ACFQ;yT7I6}H$&V#>y zU?uSisrZfLLC5=5mldTEufGP>T}!;(3-#rLuHTfWu`=~q=#C#C9t!)9dcT2qz4qHY zIc`ITeG%?6?*4kF@r@y~-Ro@hH}LNr-c0<7xAgrTI`~)ae=l|0PMpOz^&IHnU%WGm z^2A9MM4Yd$_4b2KzE6vFGkB$w?z`7- z`=bu$iBsPYr-BX~NT09Pf#7%dtKaH-h2aGU#|*`*wdz_+iG;uf*G^_P=u6flm5ik60H^8}Q5QALEya4DTmH z2R~r{0zb4Y@EdW$rnmRaa@>Iq9IQ*hY4aumasD>EhXUCwV)gw6=?CbfAMhR=>ixKH9O89R_p+t^po9I;uEAMQI011U8texh z?1%TL!CC$(F>yw#?{rA}LFak?bmkNM5}2I$^VNN1$@eafd~Wt)MoHp?GkiA_I`022%U7JAbzkN~yh7@nLi!atn}>5! zUn`xsr}oUR0`W4uwAUwcK7+2QDqxZcp|XCRGt|8=LB1jE*FCC>xV=?=sBACLVPAsp zjGsEUzf#N5{oVT#?7P9wv8x*KpBvtjhYo(m_kzHgJfIeFiW%OKh0f+U&(m(Uu1lPj z8TI|cyM2V8)l7Vc`fbgI#9y66&xg))|8rRh#Xco#+a|k;N#21t|6I4*xonTndA6UAHC23%aLfD7#II*~ z9}PN`v#`kT&HJc3aW(|&%LyI&O@8H&?6)fPo|a^)yWb%00y}(p+Kc!tH|zH!(4pU8 zoCD`jrhdekKU>d%4xCaVZ+mW{LBt7cqvt>eyTMN=Pr@+6h*Pbe-fqx&_OG9HLVR~( z_|B2Uf1=__vi(385+Cm?qWp8Z`4it|C_i-YdlBXLHDw#8NY~`B`+N@TXYhw)nLzx( z>RnhlFG2@@;rtYwC66W%=bFmDlpN^buhwEbE4OSKaf+(0y>$Wf%P`Z)u{Da;`vOndD5TIh2&4z6Z|yw0*D{Y z@a_O~upjc{!5N-n3vqTC><1kAD4(G@$9Eb}%kw&AMDuIJzdBYQ z51`xNF!LO+YYJi45lwCpFHqexknILKa4`O$-uF`eO`MPI_5Oj*&ObMg^!oAM!V_r(fwe#5vSU&w&npX)5|(x6mJm zQ&7bj>0ju;!M$aaC(X;x#Br!`NOGV9=d(DU+G>L_`{(>*ch&zS2Rf7EG9rk3&VO_d zL!7*-UUEGQ9oEA~`k&sle{R|klWYAh)Xn@6h?_saw7YD7a(-7=1;RCPr)J(XC)ho| zLw*l-4Ez*{_~Rz%{R$oIhPG3$1JamyU@Ag?%iq~R+oFL@*Peijqr1tv=xcFc$MA`&`Cc} z6ysjz!&Qhk?2+C-(815hgTT+3idHAi8ugxxY%kD(6H%NGYrVb}amMA=#|P+4j@7@F z3)Ush`{w_}zu)gKiT%jaZyON*`6zvVgHHMZ=SpF}%cFgX*V^X&IOt$Mj9cJ5YVSv! zE;jiv=)ifrgYTiVxz>g_qh+C0s4Lm9r|s;Nsjp)lfG8-8JCelo>Q;V zlemM%>bcMrR}IGJi2tUIp~tIu;4sjC;P0L1dK0(qY<>TQ4*qT@;=uJa{fXn+qvt?p za;)R#jq-zuGhn;Pk^b>+-1IeWBz;NeYF=L%MtuJ(dOmcX{qHc|i1VQD{|F$y?`ORo zpo1N-?*qS-OE`u&&1}XO=#c+xs{HOZzrpIJAGWyX#~2^M_uDz1_(iho<0N$OJMK|| zv&w%madwu`bD%?g@xB~5SyN0WPE?!wy3l#{i*?_8^o3c(U)oY1&%MjXS=TcSj?5$e z<}P}^T+d*i;>g%lyMl`cyZdQ;kuNP5u#otpn(6Hf9sGqj4LkSmw}dz+Y&g)F9H)^{ z#N}xPRuHFGb+bG&E;3Gmi`EgZnR-V`jz98#HqNob zUeQ`?BF^0Idb>jhyN43~>y&C6apK$Lf1pD@L7YQ*hQ!@T9Or*rR}2v6;s<}+L;U)G z>iqy6zyIPiZi#!^m$n=r-V&R4*PxU09DH9J^?%ai2=NB=(E9^A_yhZQ;LOT@f;f?E z`~e*}*!KqK+=tV|sjU1V+ZlB5EB2+q>Amedaf+#Yg&g;w^Bn&i2J%DT*Bo$}_|K;4 z?FJpniSH(W)3)1n;)I%Oa%4H7!?=X=)mvwu>0I{W5_g=#_yT^061RyzP<`h{+7mkX z1?Nt|NuA~%aZ1|kQ$dINV!Z~=t_Kf^GjW){zR-F0i*>%!VA)gRuT|>+X-~v~U-5B& z6y@L2;U)2(+U!F?2Y=;O9r5MItbI%cxI8A5jInaTF{Q+=lMUO(93o1V;?GGL7k9G-8;DzYK85^YU zFVK0mzx6&*n0c{@e^K3&lKsV7|6+dtk{&vxdF#Kf`N4}cDw z3gSL?sK&{Ov&?3H7CLY~Z{l@nxrQ$82*ciY50=wMH*gTNV*FCTH@2I|`zbSB4XI6v|`S<#F_#Hp>~ zlAMo2$MbRP{nP<_ixRK<6n#JT?tZG%s44Q0v;Hbc{Cu-azAP_va$e{X`G*Fz%Mx#j zP5gom{+KA<@7a;80&!-l@k6$6=->~$hX}tWdRUn_1J(PgvVPFPFZT&u zn|Q;P>)Q!*X1QEOU-53wioErSGiJV-pOATF=+NIH2Tu%+yZDOg4^9K;5>VgYnl~oy zzH}y6))zYIPn<6SuVI~L#QXG3|DHZ{@Mj;er)&Kk*tw)90a}`~QxU7ehJ{|8ds;!gm;TBXa!ik+&=H^Vq!0 z4;}n7Q258Op*wMgsl2A_Z_t5*`3&rtxI{1Fq~2oIS8|{;Io9tZgiFwmI9spl?dDw` z%3+ij=ZDIy9Z38h zarT7MbD)F2u}%Z$R?GS8oB#a}?FH);_;bn7^~4L8K<`iJe&tWB@4#=IcQf(3 zMbY~cI`{!`4V znw{oVjmKUmP8D^|-%>86<9V{LfjlGZ)o=W5;ytTtmP_Ubpu_l;M~z=oTeTX|W7lT) z_=WG@fxoHHJ>n-SsjoM5@GJZbPU_tci4(Jqo&z2HdPA)H0;@hH&fz{LN7@fMRZZLY zRwBzw;(b)>QE9(_8{fJcZ;9W+CSN7v8^#ORFZaw3#0hV+{)P^IKtBQJa*Z#*4r04 z_z7_loY+yz6K7;k{d^lbaB$8R9Aj@K;>>BN=Rjw2oW{h1dFXpx)iXJX6QzyGk^LGv z9@iX3P4Vuy|K1wJyIxseF6dCdaD`ISeC1S~I>dS1QqO@7oC8Ie6SScoadNBg>d1Zp z9XPMV_k=sNZbY10yY=OP&gA$SGsQjIm!X;x$2enhWc!B>@dV%fsI@NZw+}1VxZ5et zi=kaSoY8{#eV*yd2_5Q-{U&hkJ6aRxNF;q+f({(y;lT+E-JUpUlj%9onH-ma@0o!! zV`K;7^vbA@@6aK>w@ec=V*a(SY`=rHylxlbcC@))>s>tZHF89pM*VC0`#*@kvzl3M z>2K(|=Q8g{Wlg%^AI|fT!1LewVx8IDF_#6mA64A6 zx6{a6iSK1yp4OMREt;C;mT>_(85fX8g#YH%97wz%RZiItp|fjGJ);dJPMbfxX;0R3 z$9Z3jAkMM@dcSy^kHR@d)Hn0$QN&NK?oUX+L+5#%^EIl8I5K43IO6}V&ecdhbciF# zm7}ud>YOsk^7XcHq(rkx#J^~BKT*aJoM(d_66KgioLB0clk_`urr&*yrCknCKZ>Q^XL7!; zr?ztb_0y?c#A#vk{vCAu{U)cebpXHPv~%-5;>FFYU!Ot8@!x5jd&c>`fS!kl7b~Nl z2OaaQ-+_$SyBf}D>>5&48W=Pnax+I{`J3p%t1+zUnj z`aJbIagwNgG1(rVLwguF^$2}Wa!B6W#Mynp9TD?nPFgrFLGfOL*e!52cSQ&`G=F z-WTduu0+!A!~3-aaQBsx}!I6iCi9Oz7rb^iN2xsx~r-{|Y>-Tc?epE0lo@r&it z=byaeTkqvL-_<7m(+>K1ki3_dRIG2OCTmEXp>y@+gbsf9Reqm0re@BP%U8Soj`u*| zue{5AiSPeR?=R@!_jckuebXq-i4z!IUtj3pckI8TJW=PhB+kSN`ualWd0qAW-0EdM zS9jXkmiRqenS9ybq2uq~SnpN;cEX={p?m0g-j0){YIGuA;y}F}D^d*YjMSMt5f zV>ygh<#-+%ZO$O#FG`~yZ=gdz!Mq!G2y8QqI5%xL(1C+^2~OBJ0mM0{&XLJ+9XgZq z{hb(*?|HprG;#hatRMfNlk*K9v0rih_wmF_sPf3NT+pFh=pU%x(9)BM6Qzm1T+s2j z>@aX10=z_Drx7ngW4-^NgWVDLz@&4kgj`N6- z{Q>b1|w}Zd?&K}~gu{kFP9sGgyH#om9JV2a6Hs?;EGdb3GoFg_rLY#Xx`E&2aA*a!& z;A=WZIwbW8;;&Ti#7X}^7e8{ePp_UtbNgqrnEzFabcT50$C&L<#tG=)pH#{}oB#Y6 z^Ig*2?)?q)AK3NYiSxw&VB;U?;2$~Ps50&{asC>i?=R4yzl810?A@M4x zdulQcLI;0z75Vff6P^-hkIKVI4s`HGNF1Ior>ysqI3uT<_LKF4ZqD?d@1Z!2o3+wX zzUy7`x5TTY?%hfrbZoEh-?Qw-@$hhpkHo8??oY|}Hgs5T`!6{%ImxVo?tX`LGyISv z{8!@kwYe_{9r|4pRp64(;(o}xey_XVAs+>PnZJJv{5gLZ*{0t?2R~z;4NkR1VTp6N zm%iUYXL79T;gHTTh*Lt9N5&cN*27l*wt8`h-z>Mej`(+cr;+dokFyQl$0z=Q3?^T; zE9f@vn!2=@FU}Ku|1OE>Kh4@CCf?wZ`gIR<@DKL6;GbPLlM$yyXFUfxa9W9c<$xxs zh;u>Z0c3xM&g3|aed1j9)fnlB6ZeSTPSBan8S@ce<4{e$HxU$+k$5F+-Zz5I^SKUd z-fL#bti)gY(JZfQH?kjAQ2nq`^NN?__gUiZhq$*4|133%5Pw2kegAkcYM{BGm)eDBuDF5`@d$1RUn zApW*%CSS&5=n#*SA5ZgVk}IX$@fh<>*kw)TD#Q(wM&AygLp;X&t&c}s-`TC*YIi$G zAo_c_snv)-sfoUyLWg#M?~V-_`Vf5Gk3U%;~%WAFLeLkb`ZBp z1LCJVtLJ;y4jje_aZd1ReqZ8`RrdvDyn+t)MBW2-ID5Gnaqii?_Xr(0n6H9!rF%=_ zR1c-^=g@(J^Yh?5NYs`%;gaY%(3u>kQCGYZKj@J^aaQFvIdZ&$j^nVyc(jt=Tc|p{ z6Y(;teFtf8=uj@ayNUYkuh5M+amwn;1)U1B2JBTp zH*rdo(%T(6lVknf+}oi3#7R`bFyME;`urCYy4XHbr`0s4;U(mr1@gniOc1pD2 z#ED>&|AJ2XAwbwK+v5P@9r4%O4?6e(`7qS)MbH@HY;UFKK!^H`7Uwd?G@U@4p6Xnk zZ2!>lezmWG?~1wT_h>=-}76Vt!em;Zov6 zvRS`FC*xY!V2*1I(yb)kqON*7L1&iB*FZi5_Kx;oHE|*-d&~9#9p(>Jl_Tp%j-BW0 z#t`@X0qZjGTV&Zl{QCX%<%AA?K;8?S_ffYHXGoRbHc5f$6_0lFs))zX@ z{lPkq4C=az_$#XG`QFYSYQ^LDcOuh1;ty%6w*z#rC*l|E5H{i=;uPwj=RgMz@*?2G z*n5;XPg?6a(AoJVUcZyX>88$O{5!u`@14{xa+dh>`s&Lm@15ZLnyBxmG#81pz$P9* z2fN`O9ynDVTp`Xzm2Z`C1v+qW{}G&;OKuRSyG_0XI+Noxl8Wy){?Yahar&wE6J;ES zPR6I3&JlDTyLQ(5#Oq^|XM+y)ODf{;mDi7mGg;-?q@AEMIo9=Z>_o4K^I45kvVF<* zazz#Ax;9Jn`TgoH#ZXS1w}l@PEcihDFE)7y=)l2!25|N@|3Vx?trKN^p#$f{=HX=j zxX+CFKj+aV-PNzVp#ul+ouE9GdxjxS{|NeZ8+0bedT+31>9N9^6uWC%ZO5u z$$kUSxx!5&Vmu6I_>%KuCr6j7duTz zL)_ohd*`y>K?ncf94q|%I(G)*96zS_Gj!m@UHydooaJL?;ynCY&w&np#=HXMSr;}3 zajM$fCxp)Pe8>8}^XlWdiC<}xzMS$M0rVG?v+LOW#M!%5&w&nhgFnEDRI4y?*4f;{ zgbtj3;=c5jgvE)I?~uMc(1C;b1In}LMk(U>9oKW9gC8;f1ZTj4a>R*dlb3?dvmdSR zrk`C{k@zjt`#f@dlzGGqs(;n}dMQ_y^*{26sYSkH!S7Xxf5FBt(7~SV#CknZjvB-n zrQ(Y0r_jNk7*}A=^Dk=?=cDSUvcAy$%AT-0_}_l3PyDLqOutJ$be{PR`8z{w!iCwHqoAkIFU_5vOJjP(J^^SRIy;_R=j_XBjE z{cL@=xagzj#P3`|A8$Ax=Q0Lf<@rwKv9F2qyspWS@di4?8}_Sp9XYVjd*Yt=*VkXp zcgibpms!*b_(c5G{q^;S4*te|Avpb~eae5AP@HhGoILX(C zUhs3C^`Of8%6<)~9XL1_0Zy#haf!2Qgq{N({Mk#Ki;h|| zA#s{cGda?q(0TT!!^kMUi~1;XQsRFI(w7rDvz$(2srYVb1D}+{30z}xWV?Wl`|bDp z&?5e2%aWFOou=!{1s(i=?=Qd)+ahKpPUb)K9O%HocS^wV-)lt04Q!o;tr^8T{_LkItK6?wRn1B(+UvdT|L4s<5Rx-T@cL}}uT zT4?%5*4Ml7($@$+#P5=({Zfwj-!;eyl%JV-=1!7M7%yW`D$<1HC4oWHp@meCH{_s`tm|oX<_!?3I&gJ_dA?J zfgPKr@*`e0l?Rgj1v>Zxc{Ff}Zf;GSP!II}fDRnI{|(M2S9{{T52rs50iDUQ`k_{W zj>P$(-ieU?*IPeKI?U_h*`c};KU@a0oU(sGH>VumU%~yC-K^T#7X^4?@#DVj&+{+e&8VDeEh0!uljkSH7+-? zlVi<;O}V<0IQ?ww=iU7L`}c6fdvMhj>?M9Gb^b)w7do^{?9-vXUppKmPA`?0m41Lu z`T^%4z zao~m8cY%2K|KokSDhGL;aH9Dw;-9vePeSLpUt9Cq?{fT2{6bsx=ka7-8|OCNcDV9@ zIE`&M(8166t{yn?b3P$Xg=6~iKnG4qk!MdA@SHe9qUhTTbZ9U5emu&v==Ce&%&n-8 z)6j9A%W1@4w1?IksoK3GUVfW(KXm+Fuk}5nYDqs4@3^{mCfl>!yDPZYhWg$+_LX>_ zcj)Z~9qbRkf-}8IsD(f0A8*;5FM8ie) zCG7+q_j{)iwF&P#%q|;0oG!=Q`|+mep7BnbtyRXd8yHbGoleU}jK>F9a`+!cPxOo3(;@85&FQWFb zWSoFb#t9!0C;Yk>C*GgS_2UP0_vf$bcQechcbvfYdEh6X2&IWv!sh%0bchqUUq5J6 z`@n#pN$z~mCh>0Q%em!I9C$r+a+{pPk7%7oI4Mz5T|}l zeS3oLSM3S?9{jNfs}tX^oSqMzXTEiO-n6VX@lUDp%l2e9K4Tq;@@8sSk9dJ9? z(7``gpMbMBLL=fNDX8}kbawt(xTy(oidELz&s+cCcPM9B{}#l*-tfQht?wTcakeIY zwRR?7jzjYOgE`?R6!Y1&O!Xt{{l)dM+7owPGkv+`{Y89#tHj}Ht)Ir5?e3=-Phpn> z-#QR~q0PH?(4ie5zX4919bJf%PuW+tC+NV5F75?P?Ae_-LtB{jl^p26=_=N-#|!i# zP8*y1f6#%0dxfZ1l-~V_)6nL=E_C4F-4JkI6&XaFCS~;Xf({(yX~9|aVkmK1sD2{t z1D(ln8U0R9rTbHt7LFtizK1E>D|Fv7B>qR=sZ*04$y@t(LHV#hiSm^nKAN~roAv}9 z%7=3p;56$xo;c0aJy2Ob=$fnfEB?>qTK69(<(N#|do}g_N$x+^QsDAUH$~6e@SwXt zA+Dj^*Yiv#eiNJbCZU5Jk#7a(X^GjyDW=8~>1XInj??(Gg1`UODs~`o24ynqE5{A! z<~+;$kDp<`<03!UJ7N&=ie}W?4?54|oHf6-b^j9Lcei=Z)w}$b^_*Jc$t#K9JcYje z-kwv#cn-f+PP2yiQPh0{Ij@Ayvwf`dfLMFh6Mx-x{X9@V5AZdLh<65$w%ts;?H}~> zVCXOpETj5EifSWzCw{nT2ioyOvERET{&wOgk80XmwqxkP!8ifCzSzH$I9=2}oa8_U zP8*TW+0|n&ak|;OXAhmpaT<8H0OdL8JV>1K>OB$Jub?x{^&{^N|AQB*&r#xKE~;;r z(0Oh*)_!#>^eN(xQGSr^(z||TUC*vuewO&@ZT9u$dbXWtFOE_di8I9Jz5{gdGyD#F zmbh|-I3Hi@*B{WC9BaI&+v^5#F2~UO#k+XnFg|bMbBbrX-64Jy6`y6hhR!Uf^}OTB zGWUrSFP>S>|2kf-O!Anx*A-W`8#!KL{uyaq(E^P|2icC7H}*Uuey!O0_#nqiyt52{ zcWD2LICE{*Gtj}`=uhC3O8AaAwf|E;w7%0&_vlCBU$uEBNZywmAmVnIpIXnzb_pH&A@-lanfxg%ar$@H`xiR+ z1@i}R!aRydoZklO>kHlgH$KL^9EJD+>N`lXoqHP}@jeF1e_&b+;>T0|lJJ ziN9{JzMS$rc=vrAXEQZOO`L3hnH(7(p~HG&|M~ULlBCDD?KDnQ;C%a)*6E46XPn8E z^8o00ec~`M{=hE>b7m&q2%C40po4#L?-ZPAud)*-mCd>pIy=tlpxngiuHwIJw~~W) zgYp#IU4S@eY`%X09on0W&&xvbB z+{^Xj`!E<^;jgE!D-z!~mcHLXhxUU!5jc&fS0zp!8^1$ma;*2TzLl#%oT!EL{Y-xk z%Q_!U_^~$ef^G7R-o<DOrv1LG>}IJI3P;;psGt3U^T;2Z-u z*DEz8&S;zX0v-Ax>;O*b06*evYH#{Owm;}R_e1MBp}#A(A%1J+Z^`$zpW~hY%HQZ~ zd*bI(@1#lpKquoi>0lY9Oz7rwSOOdID zp2rV|fp_-6AJ=Fj@kh+kmlHa(oKE9{_)gZE-?tKHW>1qN#|h~8oR!N+7LCvQWX~Kz zydsTEo}3Rthxy>5vE3rZDE7*oXGInZR1H@;W6U7HRs+}}w}BJShY=z5H}PMdQO(D8WWYsh`!c3Dpm z@8eUmoU&g-_p5e@d!?{v$pvSLf9Rv$p3uQBpG02tWBH53sbrHUgbth}f)oD872?FS z8GoTOIo5c%x%v&_Y*X_KX*ciYXV!O}i=?|l{L~Z8aYpjJeb;%aIG=Oq>^mKWjY5_9Z#-d#BU$y^9Afqk1Kt2TeMk zn)o$yn|#?{p)02PQ|MOfXP(JZ((Na#r(u^;lhPA6Z#KPsp@X01iSvtRPiH31(foQ2 zbl^-B<8AVuIf%0?jGhA>I9P|FUVdTo5NG=$vpvf=0UbC~3i5f7-YfDG=iD(p2Rd*H zi}Untmlq~ZKNW9ed7v{nE~A0SkG3CLoH+;0^2l}o9pd(o90^*-KNi`2z7_kAuvgQp zrHLD~$K=Ym4ITE!nyPj-Xyy82lfG`*f%>--ejRzT9Py)F*4q&}aB$BR{ z7j()^2JDLU2<%$0XFcLYO|9>D(1C+{eBd;g(TF$$)%Y*lIdt$3>;TTk(@lvpY1L)u{?1O=0Bnr<>-Th{qA4GXKP3cd(ejD}ngAVq?{1lv2euIg#*@goh z?1%US&Z$jp$-N zQmxqz;!JLCa%8!nLq*{SyS{s|@eNwIfli5I!I-mcKW zKc56==Z1^K@t>{dKxcBS^X*XGt`cW|klucnXL^oTR{xACc$4^bReb%o{z>qe*Y{PM z-6j5<4SN4TC;gK{*tJEt2gLhrl-{n;!9RJ$`R_)v9uudC&3Fu*$#EJ3#Q0RD&~xHM z-K(!3bR4&QjS%rYyU>l<9|23=5xAt} zAL92)rLVtyNAtNjpV%-;RN^$rVREG3p_Ba(@eY15?#CeBx5D~zK?lElUCVx16cmRz zLHSIM^b2(GOR_tUSMJ{P-1gpnoYd$_pC{daz`_WKR#lM;VSdA&XD`uzZLpQ=u# zw8YzPlLv?HS8=|D*f-h}J00;up6mBFq4UiD-#aAy4%p!h8Hrz`b&P+SkZfPjar?5) zOHwDuO1wFPOrG?gxAPLz3+=Z_qnyMqJxb4q&a*vyjdMx)JqrJ^d5Pbsx}Fc6XTH<8 z_Jrg3`FMqiACOFMZ|Kf1APlJ`c$n@Bw!#Cv9Q?h!iXISiaT zgumX#tVFzdnf2p7bQt%seg(%lp(=5nm)CQk1LujzpTA99gE%R=>N(Jv9H;TTR~kB3 zSN>dW;`EZ!Yr{(1CMA z%(DVdHX+XR>v}suXL79ZV8Or^#94OV%Z#jE%QCG#C-So!4AaPa8u8L4t{`N;LjRex)7(k8ee|p&un79=df>g;>3EV zFAsDm5B7gho@#x15$BHbrz{V2CdXwAKgjF+JoWk!XV-4CJaYa39p(@1SIrK!ZR>nB zuXP&OPeA!f{62`d@3)y;*)O4!@eB8uz>8F381bUa)Y}a@*sYz|Zz|yvK%DX_UP!w^ zXL79b)@k z`nTAJ9~v^9I8)R+0Mee&!7s>r!@qGV&L&Q#w)*lwXL7#31D27$C*0-IJmM4?YjR|} zgD!{q4=7jdIXu7K-Dx55?yuFC3p$hIGLEm}e%34Y65`BN`!cehLHGQdS&oXG%<;+O z`Wg$w`f=I2<-|R4$1I<0htOgDSVwiZUR~D5tyOb`dp`;5K=|d`_X7RA?yGLIKMNh~ zi0@s3)4j(!;*>kDw+dvts&PM`>35nqHPrbr>38Vlc!hJ?C|Arh=ZUwnsNOHoq2CM=^Or$) zE)l1@L(hQ@{RZO>%2Q|IHR9~9W^$w-p!4hpYo2v>n_I-+Q&-RT&dcpbyldV42=8UWKDT2%Bz}VCdOJf0e<5Ch6a4K7afa8^bD)F2 zunq^ux$gyWoR#$Th3;4WNo9OG4t6+IEjsb{sXU&HztF)iqr`W#C*+Au zoT63reu2*9xQyoF9{s)#@raYXjNXpW!H)M%oy_#3J-dv1!atifCnE0kih3?|;0AQb z7vWm6Qtp0IRrHgveUcHkVNH`O`w4WipWwb4{Bf{kD&mc(ueT#~@CVvEINM65BhHJ) zdJc5xCu61!r}xoTM9oB;Rps>cgRaAgC`*d@;=2LX^R&;)WG7zc3MNmsXYbC_I*oqg zuh4tYt*hrIekOH3OZo#kzNcheCr(P2pLmhy>gTK8jl0%$T<*Pvh`(f(eqJrtamW|J z-|Yj65htHIA0+!RbZ9rXSMoe6de&0J3A;mI9_Y|+s!TmXe%N-REO9zr(9gr6lldg% zJy5@z6Dkm|>kECkpfk(mGX56do9$A%3UL~~H#ss+Lx(u9>s84(J|~N)IPm?RtH?W+ z>|UL?Pi@Y#LkGKs74hWwsanJtb4hPE=wv)W{ug#jJ*h77Dks;+0qD>!aQ*_E990_< zr-3>LFWVP%CdWF@Je;)&aT5Ng^@O#*E{WWN_`XT?@x|Nziu@$(aBN>I;`_(b+Y>t2 z6X)!~Y1y|Oac0@HS>YiUk66?R(vxX7>SQLHSf)0Mh_efFBCV2yhliTJT9CYw=B;n_f zW21=^wVFP@Kqvi-d?m^i!GApQzWSRyX+P*Z``KxX_Tl}Uov|kqzwmH5cfAx z-fJ{jagx~N zDWQX3ksm>M-sitYoNj8JE&U1|`}KSNL---?mz%^3Wpj=RI_ZZIcV_fW+NeMX>WzC& zDDUU3cZt8HtloamfrIY>fYZLu1LAD3nU6pR4&H|Wr*z3D#Cf9fZnA$t2S0=nd6YES zUl8Y1BeP!856}f(nddV07T<32T*g|lZd&l}HSs2@`pI<@bm#}yuHAWc;cjKO{hy0? zGGp_5;`Ueh6IoyAU^na|!fx67eJ0K+o4g5h;9$K7PN=%Z(x3B-`hme2qUvI}PX_i#X*}9#+O#=schIbr@U4Ii=D?;t_wcO@D&UET{GR&t(!PBF+Zo zPw5A{??2-_75oqwE*bF>*4DQV=->ye6To@=G$nCn*~D$=P`?B-_?@UpH_{TPq|N#d zI{8i%-i1NATF%KxycU_v`bob+=XqUVorgAQkd^ojl}C{6PtHRzo}!!sOXeg_hhlm^ zKnFh{j)Sv0U0&j3vMCRA@B`La;IzG4fH)H~nB|e}3_4z)IgQq-KR*BYd$Jei6d~Ts zTzVdKk~cm)$D?(PN)RuWvX?9$bTS^nj;Qa(l4Xcj%x3=qI@o`rh(}MxmnTjf8$Ut^ zPN^(U$zGv)RVL0ToB1|$CdZnGx|O9Gar)Za3-B)g;WUPPWIvp}T9f!O)cQjD0Xpf2 zry@@=bD4{HeQfet(7}GG#CgXm%^DD=lX{Os_DkqYj?=g()(4dm`V!}Nb)G@i4?3Pl zTHmv+@v#~4YN`Gyd3NvFeiZvjr*^d@UP-lnk@F$wFdy0xI$4L;eeC!9&ZTWf++`}C zC*!8v?@Oy3{3^`QK1;9fa_6IPA0PcKLyZo^@ApK%?uHJ2PA=NZ)4W}Xb0$R3fe!ry z``swd(EEQ7CxW^kCF={F=W)W<@E7O)jYz$SKWx8QPRWPPGv8@+6X(W#_V*=zH+7Cs zwkzmlKg51F>~UfAK;q?Fs<$h2@DK9$;LI;PlsKi-y=2)Bp#ul=T5w*z89|(iyY%IO z4ji0+1?S-5QN$U0OwWPN_e*!$;+RSVgVJeT}9~c>TNV z@Fe2aRp);3mqU!wYP_oWX|S7I6mJ zoNtE?9K0h7PL;27iL-US-XGAJ9P7F9*((+hC$@?MviQZO(y1XL79P z`r9;JOPpho_48zJ<6mcSuPRo&jl}P7vmXZCui{_LD%@|@yxKzi73y6W>3`^C{KL5t z*tJ&h4&t@5*=K|f{=xg};7n|@n>a~w>gV;)nH;AP5Pcfug}Jrp`6VaRfT}1LF!fcPpMF&i=}Je?W)+g740PGdSlZ;xt$JA{if`^W2_Y#+UxQ z4!-^FD)BEW|H$zLI==y@%Osz+shHfC!Fg?zd&TCP#J!{Py|UcU!B4rBpIW6)=E^wr z$NPzB|KPu#d6)PtZ1M@v!B1VqIx48y1L6!(=N6^Ep#x`5uGh4V8eH-TaavT@`x`p= z8|xv|D?<1e#JOy9UI;qR{-J6uqVb1a8}hcmi?UHEM~K>0i9=iI*jD6`P|RG z(P4=H#U?KZ9m-iw#I-*gL?F%@wT_hi1v>PXLdq|bN;QiYGvdZ#uqWoru)~VNk%@mG zp}u`W2M*2;gR?YZbmGKT_ur*|p+kLPUvSc2k42pK>K=fsFLa*$;xrDA;(M0PN%4s9 z7h6A$K{sbQ?=vGVp}dK!BqCnQlqOG>7drSSwerv9TcZk`eSXy4Z~BQmLdo#Sh(9Tt z-apX6KZr{x=lI2XO`bHl=bBKXxI#f1rbZa4!#> z2%+;5C#uSKO8-Cy|DYd$({_6S;^a{CJ=q?i^Xwn%JiXJOMTozuh`zmh%WovspZ>K= z5WljD@3Q>R!CwVcf118FRDyh2SGxNX_PJo6&WXwp|6EDEzo3J^&@aGAb;UuP&|c+h zoyOPasp#BG!yA=|^M@LDWqXH?=hY75jM(S7KHo{aV)^v-gU+m<)4=_E)UQyxn#9?l z{4C1_oie`xdttl)ul@uV@pjwT9XfCtih0?gZ4HPM*(R?F9XL2|kMi8P=u4bpHu*p3 zOpdiboZ8i#IE@eJ*XiE&2h874&PP{T5kIVY&r$XV=zi6oF@FQUFxef>OH{rO+ljDxu zf88YB$F98|`L^ApfDSB`l5 z^2NBktMj?izPEDKs z0G(OQALj%Aj>O-E2uiY!cy(;vnSripwWjIUrMsfyyY(HpGj}!;ugpAsf51C(hw`+4 z60Y!Gwg2EUQdH&r+}F#u5-+KG=TQ0;I;ZmM$s-*b3`_arejnz^@W=gbA;kT`Oss*UuVOJwTjms(t*serElS(eao^h~IyVSzpQb?%atr-|Bn* zIPuF**7Ie)74wEZ(WXxwoOF_V-Vj&J8`e)hP5hBI@1#Hnf8ksS{FS8HdE%T5)cYMe z_zUwjaCT(AOq|hbze(B?I?r*@nh#(9_8Rdks&|;>xUSEKJB%Z7xF1b=c#C-BZT3B( zgZ(fML%lO@_?tM1Z1(G*0|)akaIW=vNSwrKe^|~lpo1Uq?ix6~TRbJsQZ>#=KS1Z% z4-TWTc-N?7&X>fWsm6C%PUy^Xx{Nr7`CiWOFK>twqV5OCI0hZYr!Bb;eQcQfSuvCA zG#-if)S~bGK-?P2AF`c5C+8o1L_e#u`U~;itN1PJ3myE6eO36S{gmI9|C~?lsm3YE zfe!vfe*x!Nqp-wTXY*bVbieX%#*p;1-d>y{BJr<`*2e?rP)^)?LODMrjY6DoHt`QS zteYw*hvc|X=frvUdKvK9EZ(4Dd=SX zfE`fp;B%>n_g0Og(htzV57;*c=l5yph?79Avn2;Q)bGH^xYT~zR?R}3EjIlhI+-s@ zDdJs`)H#UP{)7H|gwUaWnFOcn!Q8|d`c}_@4)v=k?)@(I&rh7~k;~R9+e+NYV zyk4=w#7mJ%->#rT{m`%AzpUko6K8F5JqJ3IV?7VK(XTXdE`~EXvLAW3zH%AH*F4ld z-Uc{`U*@*y2RZIQmwsU2`EPv<1LKcbZkLf)e3$9(43&u6ajTvS9e9h%=Zd?t{c9EH ztox3sw^k+Y<8vlg+C%1_zSsPpzv#UqwS8N0w0j;2`@?VDYSbWp#CQ632_5{8^LpUa ze_5M2mt*Vw51q-eo}k|6*-@y`h8su#N(!#?2PQY2v5nKnD)i$KYh2-kLap_4FL*;D-<5z15i2+Y_gP zDv$I7bTTiLQhZt zE_ARv_N!p`@|}7SC-+Ewd7uLa`EzjEcJ4=7Dh!d%mSsocTyo;j_ zqx1{jm)ZVqDDfk<)7KX|v@`67puV%#jwDW{ZYD>z3+Uu{hCBjzg*%TX-q3#fazO__ z;NRd3&o!PnAvWy-I`{$SiNG26b`o*2jMMu8I?wUPy1$Ze%QWI=Q|mG5XK&X7cy|Ql zZ@hXI@!Q(uU!j9Nr;2q{>Ynq6;~Sv&3v}QhPN6(I3NIwiHJkY+bnpw>IXLNmTSA;T zHv2lzdG?FL!1-73ukKq;{H8W}0q9WQdg8s9(I-|BC$vr88ai+=PM|!)r>-MT%VB!^ zLkAB00Z#9Jn~1Z|CeA?z&g)IQ{!i9=8*%2@^i$|e&iD6+_w#qJwq)2zoYh^-ddc_! zof$un|8*G)#CMT{@9iPpqh2OY&JUr(JnsJf7eS$0PITvOpH$?0)5D+x#0^mEOv#0= zo(go!g9`@5So|Xoh&&wpF|qX#;(k@@PPuM^&a|V;!26!yM9F%BIBO^A>kS?1T`EVh z8UYEPxxX(V_xpZ-b(*+uR6bqCC+Ix)Gi&@CvH3jlcdGo1 zK`TFeoJYhjqw*I2j_)*1hvt4&@ZwYA*BPwu|Io?#1J*UL>*MJ!iMPQfzCj27;9Lec zPZqr;PKmK*xuk!fgMZKuz-i(CkvS^9N&7+ftNw$$FZgSZekK0H9%eZuAG%-h@tqFv zU3o&U_&M*ny`Y{CooBw&IFO9*Va84#p7@bIn(J5DKcSQU$37~`+b*sT@s2#!uZN+7 z-_Sq68GJbkak{H{t+Xq2p6hMR3$&OPgZSChI3@WqFHlMuAjHp+;(5_Y?))m|u_*uh zL2-!RT=gs2exXA@K->bSf7t}Y>E2!651@lR(cZur9wrHKvZ?uktS@xGvM2fr_?`A7 zC;kDOeSPRW^PNVt$Q-9~4NXn_HfkLp+bMJ*F-$$|ig<(ahU$}ncoS^)VW0yC_cg(} z(>W_~I;Pe82RisCx|8?KV=ke8i96SkL$FJy7d7b$vxa z;>Rqf=X=**9Y$?2kDpVrDDiW*)yEa+;IBZDuSgwSf;g9l>p9Tbaf+rWL!64+z2P{G z+Twepy=FOx)8>WAk?k5fGyGuwZGD%l&ag_v3zJkoUU=6Jt@FiINu9($8$-|ccE0GW z%JH8;i@HgI@M?b&uz8-Pv|E}jihklOp z58$*K(TF&kZQ?L=p5vtTTt?4iO^N@~We6*~9_-`#{=BV=h$oCG%aM4^L!@Evq;vWDqMoUkv<^2l)# zx?lMR@eceLYq}7BojMmR`OtahTffgwH|}WSowK>e3myD}@0@_s>h(C{BvJQ^<$M)7aPW=* zIE7bDBF=y`dOt&Fa-7Dp2^?o;jGacDbn1N$*^Z&(anWT=6Yq$;t2&E#&y}5I{s=nc zXX?GoGG^Vo(&{{?bsaI%XC85n=P>Il+lyRB;JX3!!Y-RT_}g6f{vz5x?3wYv0^+Yz z_avlWpo2X(iSGwDU$U4u&(*yuX;0|bo=zjCn72e3vz&NOZ0>7A$2^zON9_O3sc_e$L(m;Qhb z{+N6s*70VC=P7?!`^&EQ2Z+1BmUsOH?|(RBSB@Mx+mH7@@a_Zrll|&p;(xWd7YiNY zSSxYfB=wx*#2IRHuNpdVK8rm4f##=)b4`8kNA_RnOwM<^iT62!PoF1_ug!OTpp)-& z4jw&mc%GcmZR1$a>6eM`XEUzKIM!5knEIzHt-0`Ji#v`XZwkMZsdk1Y~Ve`%(bT|*#Q2D24j%)oN_H5(!58i)(f5sJjN&IVh^!|Yk z{=vJ!;LK0(mN?7RJGs)H(6K$8#uE|OT3-49sJWsfk%{Dovd`9W$t`pSF!)wyJlG8 z-%#VDj3dyQ<#ZT0=YVo9%Nmh5Ep6WKgAVn@d>Wj)>7o#)it>xBFLb|}hvRqPcf1#! z_yKAj`S18Hqr12V;x{ig@z-TF+m&nw(BV4-!5PAJy|vN)eUN~L@rm18*+u$^Qoo7Ee42)CY@0^&P_{r3HN?Bj%%yPPn zabmvlvP5R$WV~eN!K6Q+L%*E3yvn(xRYv@Xn~ixL5p*RRahs|A2-%KmDINM{4b?9{ zl}+DpLX7V2eu?+iQUB8&auL5s1bzMjI`m8UADk&hKH@w~r{_Qie=D3VNk+-W_@h{>m zRrZ$i0_ZR=*b?&gYS>OU-Q#wEXy+M>vBd3f^Ueo!s3+!Iuvha96Nod^CjSZ@IQXsv zIHNmGAx>$V^%r#D)Dd}$u{CE9r<2WhN}w}2PNTi}9zxl;bBJ@vW?c-OZ0Cp%s9*4l zK;nJ0$zwu?`r(}wa9XVjB2J0?`gR2!IH)f;Lw;XMoH}0F$zeng_P&;FC2{K7%3%p`|ZTZW%Hh-cjKyc-}_?IUBv&O-er{Iws-s94x_R-e-OUxKH?`*@A=8N2p#O1 zM8q9e_(R0;vsoWP2hMfD>2m8Rac0}RcLW_ccn=WmWXO(_#PPA|m(an#up>Bk`kp0D z@4RNe{I~YUcOAj^Eqamo`E2qQ(4oFr#Cd=O->wiRip{%P(1C+<-6&7v8#joPL%pjd z+XHmq;QJ@wIOg9WPHUU@CZPid_d>u~;dh@n#cjTe1RXfA7dWLVJtod`)sJMopaTc{ zi{O-w`J6a!Y~mtx;B*!3Jki-##F=ShXXwDGChpr*ocfM9QEl?@(3u?T`3j%LpNMmy zvA&&qx4!Ouifkpp)YOzDo%|?0yz1_~*Q2?>74J5<1wgqUbjf zE`}q{aDP1qIy?KtSmr~VG3vWWGER7FzwTn6bMJ+y#4oP)Nu>RtGwp}-R4(JdL0*4U z$Qz4zy=?X+p~L+1l5dX&%VI24d8zOBF2p_LR*B*f_kcQQBFhP#^fT^(!p|k2BqUy< zr}}jpbnr9YO8_U{^rXb`Q|D7;|A!8K#&;dS`OrBfadH~^egfUE*3ktTazDA7DJ}7D ze$ua_q05>({>}GKqA7n^^T8Q>G7>MDTCd7=2J*qRltZ4^Oq-%_;0gD>3eLg64g()$ zA#Uza`hEi){DS;DIF%OWAWppxdjCQPzn~w1<98|#aUzD*`vp4BezE2UI}I*C{6rVc za?1XVyqIUc)A%Iz^9yG!Li{`T^?rv=_8W{7u*ZZ{C5SittKP2AnRay7?37V!7asX_dN zYJXM6C+N_ha2^Ak!CmVRr=9waspLQh&KmI^YK8{&iIZKelO+c_ljAfpiu>Io5;i8z zCY7I(;{kMhzx(^UX<~c{KG&3ZBUL^@@}QIb1o;ZsE6xT#;svSigUWhB2YX>14$g=E zZHQCdtM$8e9@ewCKXK}+cadeB`L}Vb{Os7nHuHAq;2+#SLAh?U?@gSG74+j3bl^B&VFAQ% z*j#T1=wMIGufU1ZcMNd``sq2)+1ayM^9jURqs}$ScI}-#okmtMFT3qKh4@+2dRDe; z=xW|<6y<|^Ugk6^ia0iJ>?g-HLl~kPq5pAqAQ7CO`Q*x z?FKsZ6YQgZ-j_abVDL!S}b(WyO6g6|M0E7#Q&=Dx6-cAndNjE1;l#;WrGe9=S2g3y`eLUhxNI` z_^l<|@nQR;#2e5|-=Co~%jGh*cH{NrN7qT>JZxoh{PVr9pa2uc9P|SPL>PvPVn}myHC7!4fX8;I@le40H;Lk z$HZx2)4rfHIo5S?&nwS}bJS-4!@K#Uui;F-fWDuW@xv?PS5f&*+5e&Y)%{V-n_-7l zncfpWK;^+CAG%+SFZdn`_*+|kCVrJLroSW~I>fKMsv#8a_V=#gtq-{47tVKsKc|AR z>gPP@_>=l|K6G~N!M{Wp;zT&0=lrTYjQtdzIL11YBkSvJd%*8d-)}o25&v|^f8jff z0uec0HW(3&_*o9??EoF@iSZld%$+?Jak?DSbD#qU`OsGt22T7K(E}DetEsPOg+@f06ML zIvFo9PQZSS(`krz$tJG?9sGdxBsi6~Wgt$PRQk9J9sD5M@1l`ei1SIEi;;eS&a)q^ z^SkgRa}YnB&3y;&=64PQ??=H7D?a2Vew{RWJ3wb=&#r6o6X&M--jbYmu{|9I&Q+m2 zUwbin#1E_X?PNQL4)$y(=E=LRH6qT?7W#e%9XQzk zggs{lH6@Op&3qd=JHG_9^drtdo9{q+=a=vI8^w9j`RUsbf4I7*C&vNk_&lk@z&<|e zUHox-;ytUSw;yz{ANCc&S-h$vaYEb7zo7#M^Ko#F{Lz&-;ce#M(7_LQ#{`@x#d{EE ziCW*u{sGmYGLO~;ieFOfX#VD=+K{#R|coUhv~#QXfxhGXL6iI3}L_Zk7pC7 znA$&+_JdB^@9;r>hw_h$fyC>i&e2NyLFc)DI*c^sc)fCaXAtpU*qrx;&dz@G7A_^u z0-L-q+s|P*#XCe5hpi+|L7Q^{(3$12?&q#)vxYc+>Rf=do45P9A<16ScN5wb-$48# z&-Lp_=sf$yWfT(cdZvr9h4>@Yc@xAB#0%)+sQ&U`$UNWD&L!^kKJKNWo%`+BPTYYu z=UAYFzp=gnC#c;n;)F5uaS}T8mnu{Fd(Fpk?ITWIweFMs1v>d%g~H-ITa^a~iC15( zkK}v_I+Sa;7+0#!IZB)esr7LXIytUj{sVi}t#^`m&((g7EEjaJJMJZbGbPVi;-psb zRC1sLC$q3q<>Ob0Gt_3k89LbMMx(pb57xZBNt}=I%zhyK2i@GYv9tZWuXXDQ$D>Z| z?-H;3GyS_?(6QZ}Mk29KHuc_p;$2UsFCTQwa~SW%`esb4$HY5SMW2^}4)(`941S#W z>KSo@dg?jQfm2-gapcff#K}2F&w#!K(!Fb_s~PG5*f9M^5VU!e0m&RE~Y8ZtNv@!#3pyR&;23-3Rp zyeEBQ5U-!j{Wj=eKjekMDIO;daW03}k1x={erTWIEZrZUIN@TMoPXD@9mf1zuPOeG z44;Jfoo(`Z(4m}oPZZ@GGb=f9rmJ(RvYgO)_Oq{n-+>>!W@_ROiK@2)be{Rv{86t3 z>4~2wiQZo_e}w!s%DH)bX5u7J_e^EKfe!5l{SllYMY9tpp88&|a7jVDe=D@cfRA^ zE3dwscJ~q{ihOL@!mWvicX4FDDvE>|Mil+h__3v z_oN-6gFmoN1}AFue#9vp$t;)TKnH&yUkA>K^n-{~?Xlh;(EZ9E_?{N{&T~VFANPUD zm;QiGeuoQo1+U|%k;IE^a~>Z$6}Al6QJ#mcT6{F|3fasfp#!Ic=w}OGk0Z{h1bTl! z2Y<+Ur2q0s#7VE>ij2?DdG?1juRE^)G~y>w@kR2z^S8qYBi?6d(Qy{>8|2p80Xo=SEqHviSMP| zG0y^LUFJ>1c@tOP4xls3W6c-jHMSAwh$@e4uio_sr;)998ah8W_j(BN%i8!AI`+f& z_YA{xUTNmD{lpuf-b;~wg-*sNe1{o!ypZMy@eXIz&*!0oU-7*IaQ0s~PMkc3o&y~? zx5fPZz_8QA$r4S^fexGt75N;_id^T3b5G^VWWR*Yu6>L>c!@ZDa+&4%ckRP^j-<=H zYs9ZmU0+{$js*K_u>Ye{w}?~BCZ7i#?1sDpIK3a-CC+2@T}x><=uD2&2o&!JzY2Ij zoLRliddYSQ9nXiI#tAWBACUJ6@#aj_^Ppp%!w4J9_kOqk{hW9$m+0*d9ok12u`jrG z+H2yB-C}ZN`+yGX9o#Q1@%+MtM83P+=jI!Wd)ba&?};C{L0?Yj;8z)U8z%Wo9G{hX z4s@t5#%Z*Voe#eeCs3V_l=X$~|2uy;Iwtg*pYxWL59#gcEq_P_j-MCuh9`bSoAs_JV!p-Si=T$M@dY*3$A#TR#XcX!v|F2UUv zcXta;u*F@%$M?T;9-O{2k5@^mQmL)%`Rz>iNY6~q+?d3BYq%!@op=ActtR4oWqr2C zCH^yYPehg%Iyt|M5^;05g$aqbSLMUY@eDfjpVQ*JWPAIh#3`fB!zBkg^dGDw8K(05B?mfi5J!XaPq$LU zxoJ4Rg$|slB47G(?y|&5Ra);y=wSbgi8znp-KPq~$zr(o2%UHPyR9zb96ftrW#UJx zsqa_N$$kKP!hZXQR3~13Lp~&Qupjb7!RgzfCUKS-@*$zKId1EX7<yt4o}2e){@B z$KytTRYBzYBoEerc%#(&WU@a%=iMJJD}%V7aQARy;?&2hf4j zbPJzGgA(7_)igF`2!#!)?{OxZgj?C-1%A3~_zk$jtmFHy8;T$<~jH(x| zFZ;2Mn=IB11+H%;ZYe`PDRl5t9N-zR9|cZ7JK)9TmlzO8GV-xYF) zIzjvsYF|*s6Y~2tWmJcJo}_>6vGKQh@<}I)eaGd?PZNJuQoWy`gTE0MpnuO8cAhw& ze%EuLvpLRv=_$D{6Q^!gy*+(fPq?kx4YE*s`u)K*;%6*s^QHfx<2b`*U6xv{m;d-4rn8?UKlY4xQ(ZPswqxJ=i8FsG=aN^%?_Nb;Z<)V@{SVmheVup2 zIZ|HFfewC%BgVhmQ9ltUd@Vf(I&koQ2g=jr$XDXrYp&-&2Tli(&sb+runj-&FWyq$ z9hdPUbZ8eicSCv36$wS07z6C`$o>NzpVtLgxaSUDg%H0H@AF8TC))*d*hem>#$v(;E$oU_-VD>XBR+5z6>ASVnVsl&(`dfVBrk|-VA9ScE z&fj6LPX`hcr;H)589H!q-Ud#K(aDKZV3EE&(1Ek24(GWC_@yS!`(=6#bT-Fr70$-v z;_0U8iPLR`&5`XNIvy9@)_JkNvMYOL;_X)LLGqxJaW?izV6P%svlB1hSbaUAgS{}G zfs^KKF5;x=uIE5!bDZ~U+pNz^oW*@@j_lvQ=0CsiOGWef!7&AipM1F9AJDP=+}5MV z-2QJ=DoVU{1NA)UnCG$%i}lLbXeEiaV1(X|(7_**1t;Z)(!{wnLC=8>?RS89N4WHk z^2DhzOkY3f?E3wPe;-HX?@(pzQ;B%H$JjjSU+DIPwDaGv9w{HGZl9m;li>X{w8LA4 zs}aA}6n%N2n;RIFc*y4iFJkB##2aDw4ij|JzZl=a%XgwS@j5Ti`vp4q7yCQle4P|P zoYG769O&R*+1t-dpp2TUqOK$LYAS#pEguqZ|Gn@>{EdA z`Q-@WwCSejKnKow5l?@7I)*qwYF?A=0y_Ail*p6Xv3>$^TDR2u0Xklf`dfZSxjzMc zoI<=e?QEXxPtc)1{WY}I>(Z-#e5VzDg5APSok83j`EBm6{99f5_k4tm4WAF+;qfot z4*|bz!r8>nR9){E=-`))Vm~9_@_EE*+(*xW4u095l=IiRj95*a7V~Y6jLV>t{R876 z>UX@}dg2BCZr_WP_Jhv*IO?`8KIgb4_{B}cACg0V?-Dwm*S^2ICcZDZt@JkH?QNhh zFLYR!;k&#~dJLM>r{xgO_>BAeu*c>DJBc5AqRp562Ris0`@`U59N^=`f@=BKOjDY9~RAhK%5tb z{8Z?`K^y|k68B@`lnkRU4|L#U59Iy)Z{?m5=Z3m(F8e)nHph9slJw0>;v`k=U5=-| zov*m97vg&inWnxae#CK29hUet z4Eee8{U+D}eFfZk>sDXku~C zJym@F1?7A5G!bzp_S2USI@A;ImV*;~RWjlX=&9#G2Tt9&yiUB+G8J(`4$^a=1E-nz zUSimc>4=kIw4MW<&2d}d>hnBa?PW&dWHRJ?L$^86>LU+hol^Gx{*Ia$#}01FO1uGT zKUa=p(8+NO`%i zp7T{+_HG3@t4XLFqMai3gn;pFsdg^=C3JH9!u$(+q&wDzcqt6`U!a435{doz0_{5xC&p}jdw~udyiEL!grWZ-|o@-6F1Ntv@*5fq9*59PuL=-V^lg9T>M2Nqnbs?xsn^ub}v{9YH7k zb6w0YYobmkUW#-2_6;5UgZT#j>HcsQaUL1Yzo3JEurB~k)&+Bk(?Xq(%JB+1@8jY3 zbNS)ZDQ~86i-p90Geh5RpwpkrJKv2tk$ov~m#g_z_DA_{OquGdgU30S(i4v(o`O9N zmtRTTdWQ2W=-{8xVmwR|XAN;08RB~Az`=e2%9H(jy4%m|(m`t7A?H2lY>wL+km3`) z>r}eoX5!pe@uh6X(DA;ybAK?+muHP)l8^DfZ zitQy{$;bMB2Oa!@buTz~o*y92eZxDE(7_+bGY03>$s@#>Z@5nd9s9#=jT_DRFLwu@ zAYLv*elm3Ki#>nJdvTs$jmvk2crR3bvh**`MG&tSQv+c8i{EBg`x|-AHxNId{+**< zApQ{bT?5&!p@Tp0GdMktT_(<2!#y(SY>soDdOYYlaqg)546?qy&Qth3lyiFFTf}d# z-lzPxeApfQE#K}E|8OxopIq{xgTIO^e{Ji%<;BVPT|Db6*bDp$5C0~9sq((%t7Du5 zXZ4CF#MxO`&w9Ik4x^Jduc-+wh(Vbnpw_eFmp$glNR6Yq(zn9qf2Z><^4P5|cQU z)b~WB9ij8yKb+&`rXg{OzbC)lek9*FKLuF$?k4Qerc6TOmpAlp=rEq;RsMQkcW~+L z`Ii{ypP_k^62GkBduA91;jhjjp3m%;k~l$z_i&(tziJ-kcjIC;O-r1bs$EKZLia0w zEfep1Z?2S)_{9wGheP)({%`lWU#$wBmG~>3=;J}?mb zKnH*16!$h-r>jexL56$z(AgYk`z{u-0ddNy^2qVc*Y=IyLw)<4X-xbr>U&TBmfuC> z|4f|JocIk?{44p;VLpnl{MD`d>T98+F7m|Jh^tWkW7S&|e{EG?;xXrYF}X6dCr;Pe zdJfNP0T#Z8gYqm$+L<_a4c~!+?pN~#_TRt{7PUL^uN(XY-LLq#4+H+2kG+T=WVp`? z9sHF_`RnwgE}K?`neXXe$Rh_o{M7!$udd!#k>eC}=wAsp@p)_TY=epOK<#r#4sg@rYa=QgT;XDewK}Dt!ugF82C;bE+IJk!cPRB&Ei1XqfJqJ3Q z;~Y;5{xgR-Bh~v-ay;_Yer|F8+wIZ<;$L~8w;yzz5978Ty1AY28?uCW*`Mlp&`Ezx z7V%)@94m;I!(d0~;1A?O!yk1+uO^OPRQ-4f9sGfD37l;)*Au5@6q_Uc0iE~u9ALE* z@2KQ>w~6=xFKzotK6KvsE~~P*x7+mNHsWV}q;Hqd!Jas8g&nN(yNHwAFrGmN&UE45 z!AtfMCrG{jCi@$7@C(+PC{KYN2Z__-oO*Lua1Xv%DvX ze}1RlFEY<-fjE!Y{M%XLOxvO7KnK4dzZ~|w8SEl)dK=0E9sGiM0-V22Um?z|WqP|o z_bb03j}H8YGj0%nl{(j!{RKMje77~_7>{RN8s8>zT7=l6lQBMt3R z?xWXJ6IS~RW#@(Nu*VbU0hO-O&EP=@zphaRwCCuXmvHZZ~IKy(OqJ@!P9+cBNl@ zi>uw%S@C{;)m7DrUoWR!ei`3D_p9}2kybT{H_`A8s$7qvox`ppGt?zc9JL=T{R5rt zAGZ}s+@n4EtUhs`s{K}3Kj`H71ll{wm27=u;;mQb*wP=+dH1*Ty`qubn-jl!J^lDD z^NnFolrw01YvNotoNGgeeuDez;Pm{|o;b}6??6Ha4&p6v#zyHvoMNB!;~jM1U_J+D z-<9sf*?39MfzIYQ`_0^(0p-&c}+-}Vih@z{ixONgK0t-c@2 zcnsq`>~O5?3gS#Ov}fqx7xWWw`es~BoNk8i=s{=l%dv3liL+4kQ)xHf{NjAiwZwx> z#Ebl@P)^>AWcBu-evdtuOlgL{nNw7q(TI1>!_@Sw9f z-}g7fx!%D!H;9wt4}Cj@&TgkT*Ymed9_IH@1~t7+ywEDYUGAGf*GM(|qgK(WHCrzC z++#^E@*G=dxli1gOZ4T0PWC6fYXSR>O#Fy=qt!c0vL8SPKOl|*r|RRU#Hp(8F-i_} zHpe+HY(DaWIOPoY`Fxufocm<6#=jx{Id$Jo_E+D=PiNj^Wd9GuFR$z%{f@jxyeB#T zY_FWNcWv_Q%V1v;_Wd*07vi2(?NizpI`|3SIRxj-*KfqxVCXNV-A{RJKT1^+lp&$9Wj>2~)V29b6vJ!t; zpxzG9!Je=$I1$h0AWq5~dJc5pz`o!Fj?Yb;QigRbbnpwt5pWLH%uk$rhCE*Ae&rYZ z9{A0}7bbq*uX?{j=bi7iR*1Me*R5j29~?=)?*^TW6XZA)aY`xTxeeb(gAV>d9Ef^% zD^r#@+YR4ifX?KfRKY6}CydGqljDtV_H$Wye;VcVKVO;n;|%+^(4oFBVsgHA$DP%Q zvrgs5$$0=e)EDQTC{L;jHHq_4eSbpM7do8#;2pem<*YxFgx&3lFGh(xkehkx5kIYA zKLk3|7eAw%<)bwu&S=B92pu>{L_X|@?M;Z&VYhxffDW91DaR>~_*~x>#A$wB&wS3L0%?pcFd zsYh?(?lhe1K?nchd#d2LbM`0B$VYbhWdDK=oG_xjxgQN8&U>|=Dml=BgY#*Wr^LZw z#HnF8KZOpQG2*@UvV%tv=c}3@WO<;oId1D#(Zh6aEJv|%#A%}XwQRT0+3v%-Ho(HY zCDgB1%1Oj)tn&EeI0T*dc`LwLB;K!?8Gah^YlpSxEy;%t?e2Hg?rOH^^nOQ(wVrl| z?|-5EkK)fFerSVTp@V<$ok?&`ewss^C5CtfI-BE+&-R~LK%C+i?e;6{>sx&0T<-=f zUPAnjXZ3tv<8qt}!48Fbt|0y$Lmnb@uxBQ5?meU8YT~Rj?8iX|4)*6zo-`%a6DN*= z10DQ=`h&B($Y$a!+5X?`R_HE&FS~Sce#%er{RBk_rTdNzJxqU{45*( zi+|nLIq_ZH_h-%!fA}VwFUMWzWc-cuG1%kyvkSy)sq*^dd;lH%gLoXAn>(%$=Z)dJ zB+!9#>r`@jKXBB58^pLdb>j>?T&kMuvf|l4~f^n&`+U*z5GRdy*6zSae~j*_Y3G?uL~ky^!|$%#Hrp~ zUq9%0K6hJ11#i`-H^fWWM$dyz^6>ow*sJB{55zlLSH12kSrBb3(Z2#ED=y zKY$K?!9ET+vCqUJ&H}@^5_C4l>6gDJ#UsvM!~U?Ze!+PK%DJ;Gy5pvl2g?!7sk~-T7XA{u()n|K}L}cxHMpANxD7Yy5H7EVq4Vzl@9%d-nL+F2n}^E~|G-+CFXIvD_&akhD~`DDHL7L>;su@8 z*Bd(65B&*#p7*8_ahAN*bD#qU;~Y3ICRHO&?r3^{LI)1sAqMAYj~c{zl}^ur&gMAJ zdmpE&L!7BeY>sTdzKw$cR)AkS>+ z%UrSlx}@kl;@oSmFDG<}C-6P3$wRi}9#QqEXFR*Rn(rt6oox~Er*^XWvYgPt&&9-g zI@@1MiBq7lo&z1~i+KcouH17aamuN6oa`Ua{i=WT72oR$8F>xyj~ez{phJ6n@tOBE zHe6ayoM&pkO14MntIEnlBYxf1>ol@^gNgi~}`~IF*b#C{cqg)~0^DO%FAn49_w)ODGy=AGN zJkDIklTVpblak_R38#oyW_&YNo_cuKs?DnCN@59r{J%JJili7~H)#~;|Y zgZ(BYenH%^h4l4?PWl7w9lXZ3UK1}UrOlK6fDZi)d8*))nem=D{)T%>(7~^1X7V}x z`s$yFvna2=e$Yjp8n`cV;=-2A^S7dj_wske_(r_xC2XGb19b31g6j24ZL5$@jZ@D1 z`H?RK-}du9>AH$GSI&!aUjg6YyEwsbf5M!rcEg@%@380nX`zYVU)^()et{00F~RNc z_=3}*VL0M6Hr!`}4t{AZ#;y1*BN1m!1HE6Mll=qViAA|8=7>hT)#}|X=@;nWm&NOk z$Gf<1v9W)gcpH$oicwFKgsBH73AD}b&q0Z5S#2KgBi{wZ@1f?E9 z`!9O}lM*MCp?^S^RSjTvx%{nNqJN}YnUZ*2Q`z;Cet-^sIGt}>gcSEG8vDn-eQAli z!tjo$>>pLtfHmp;+3i~!%zAMY|$Abq&h!?(>zTVK4SN;thqCo7dehWP7pe7=2UbLhHaT_$y z$KTMwzt|s#f2;jbnmDam=sD2A52v^B`_kEKlqXJYwLX=0gHFCLy+Y(QpNUe5cveHZ zTyp+^?pN!oJ>tH9iDOlX|ER6L-q68*h^t}0prQW6soPV}feswphXE(?klMr<(?`#N z&gMA3v$C~a0C8G1)7$Oe-m55?z7g?v)Y9{P-G?|Sz6YEBZd2mFZlbpXbg(DhDT6)x z{n3&*`#S15(1Ei=e9ts@&9=nJ*H_Ph&gQr+%=0Ku>UbTAGeqU-%l-kK9H-igc%%RI zF2qYd!7i8NK__{U#P|DR&hJ6I{_31Y@}Qgg^3b!0)90!?nr*^vJQ9sD8B`9k+NIWc4@@m{L&N5&HXrY>wNCBHrgd6m<=8W=_+OU(j*hmeU_yudFAY-yb&b-}%Ge zdLN17xT&W$6aTQf2POLrbckEOe3-bh-nk#|<%bvJ&#bB2iCfArjzI^%AioaneQT}V z#93(ACx#AwX)uN3-_uF<5hsQrpAI@XuPhL}R}T&n@3Wc*WIKjV^0L=qKa5#@lz7cm zKAq%2SEZcm<&XR@=RQT!ZYPO1V41$%$aNUzseC=xPY)aOlxJMTd%*BZg63z5zi^el zzd#57Vw?nLT-A%jnXKj)*?*z4InHyWeF?4-C#%XIk^RKC_$|OXC*t-zw{H-?pNdN) zAG%+SizUQvXyPSXW?KSKx3 z{E64;zR{GgPl=OE-Gh~Wh7KIe`zTNF=P!s;*D#MlXLG)?p^%;VY}JEtgFH7znHI|-rmrGgZhFqeTudHXHH+W z&Xo3s&gMAdzfBcG5+`*l{W#)l{MS_E0r`gyOZ@WcoKcn&I@A~6aYcQ<9SKjIv?{KU z9O%G7z5zG|S4SpJ^HF;HLuYf`mend1<$(tq9i2ES2iP3hzMzxsE3-JidtD(m@rD}K zUkL3R{8CF&5CBAB;U9B?E8DjPdMJFm?;PG=iRi+FWV<{+&-Q0Mzv$V z6R+TBy**^S5um`giZ&}2vEGmSX`@7(pR`>*;(v^(@0ZZQ|Mz!>pyxmb z&U(-oz?vZIi5m?`XLSnC-H#F#Hm+7Uq9$6@wdQ-w zXQR|4ey89zU)m2kd5;<2J4d}sZm2`NA#wCP=%hbBiTt2Sb?OuES!TT*p@TmV-=SQc z`!*&{*W!8(bl~I{^U;*-&53g=k)8vc&2d}L4)S;BR)=U!oWdFPc7o2f6Yk-HF)`It=Oi=Th+sY#1^27By5iemOefgl1ew-}Eu~_B15%2d(db>df z`{TYW{MayaFXCh`rsqIsbDaHZ&Yix*X`9#P{JVbT+Q*5el@dUmXIQw+O#ev({ndR2$$<{_!*>|LX}*0gaU!d6K>7hXxj&43 zeeia4I7qyXhI3KqytkVGtGLK}4rqUr_)n|Z_LK7ibXeErRE}wx^IO;gw>Em#b$A~h z@V*T=(>GrrPP4lD_5vOJoaFLe+V2W% zdxbdTtLxh_bbMdR`CfdFR5ys1Qtju+c4K-k9`|5S&sDc>5pP60eLbOry|##b{spt{ z5ht$71CjoN&gMApBYm#*kT{2i=-ZjE_mS{jNYpo0qNl{)sLmr~JBH4CKXF@6MIL(1 zyU&TARow@b{((-8TX^pf=b029}=-?l`=L62kRqu(@*KiL7I<)8aA|I(_ z+t0*FZ8+zL&aNNgbm#jnRjviw@$>%VhjDs;`!?S>-?7;GFck4C9n#OIzKx4+tDpFO zL)rbm5x?AOn=kzho%A>C4L|+u7m;`wm+RvQ=-_Xhi-2?GeiY*5KBDJ92TqbjOX;4~ z-X1ZCbNz{)1D(xr#{bFV#v#t@aC$%cwvXkq@a{b7JLFMf;xA39AIG3WIc2=LcS>^N zw9jbQSB?+RVV=YNo?z*+JUKbm{Jr``BaS=Oq5F=Qdx)pU@%xyfUUu z%t3R{sd&Vh-xKyxZsOjHsBf<_zo)k9;O_h5dYlP6*R$?GKZD&1jLlE{&MFR<<2rQc zpLkyloWFAvCQcbcyN3=OcZyHc&l49dPMm$o_5BAro8z`hi0{$Q4k|^QiD7Jx91o%6 z^@YnS9h=wnBPWz4UW0i0azTgsVLpVN8s)17Hb;&#zV$oj`3mPSRg>n#DW%T8WIKh9<8qf(T-+}W zpSm^ib{NhJp+mWjJm>a)`*Azsd{g;)vRu%~cmm@E>{ai0C*n=_DX+q1q5XpM&%tiQ zd11)Efe!w|IvAYkqk0i%P9J^$fzIYQJP}E}Ker~wAmWGXtuH5Z zC@0=AKz*~l9ZH;Ps-4Mx4jtl&Y|616(m(gBU1zmt-P}s-U)A3dOfo{DOQO zl(TG)vBYU<@Go@W;C)SSwiKR7oH#1KO7?T;U^m3~;9Sfxl{kG2`L)n_k4v2IKZg!A zllYfL+jf)v%=G><;xm+Y=gHZ`%R5Hz2k2lwoUee>V#<8tR5#=wLI?Yyor4pr&SK&` zG`s@@op<{=^AZ0@wVe2&4f(*n<%b4X_eK6`%I%wozjU^3PuXvv^Im@EyIfuJ?jZi) zqWbum-{o>!Z^ip-Vf+J$vtm}n|Mgtij-Zp{HTI$4w`Z646K`%Wn$L-4jjzC;FPhB6X)*^dJc5p_>1{*f7w&SncrE@feswpXGeKnoIOjNzFYMi z=xmPje6UB_i^NHP!{$i8;e0S)U&EH?k{t5nz2JQZlq=y;1bkw@q`(1BA~ocqS9|2J`BY}Rw2 zvpLRrylllM#94pW_Ji!_zK&D)J=FL8)@Q_ze(}HXo#V)gSg(j*={LO{cpPzAGERHZ z_bqYGm(X*dgI_QoqrRu!ejv`po_Y>+Hpkx@ahCI>@9zIXobyv`j$BtmhjsO?zitiN z*le+9U5)h%%GYO%we#owzR8C3Jm|cS>(2IcE_O)bznNi|``@)Er@ueX4@3NAyY=>! z{thno%eVXN(KC-xz0M&*bwLL|;~qFTFOsDs&MU(@9J=EFD_>X(^~QM# zcu&fvC0X2fBne&XeHKHfR{h+glH}kn&pHnr77rCCk-}&0Fah`(m z-W^$&_&uBG`Oslp&Z5TUGRIC>tIKcp?3>^_IN&EL)qwa-4d>p_!T-oV0B1nxCdBF6 zQ{RrDgZ~lVfD?4EIdKZ8^I7SC=)C*i85dm~)|&WJ)c&O8`_{gkanbwo?TMepaL-uA zMOd%E4(_>~iDRkzPcoi?4t_!Z0w;d+?!*~uc=r-Io8z|XRpmUp+{t=n_>hfForQ=_! zU%0I!6*=yA)fmQH!@f9lPlpWd8hl3BK==XQzknak#T!MuqlW!S=)l4LFE}4>j3v%k z!~P_6@MpqVoZsE!>_p;pH{Ab(PUd%C5Z`AgJ98@WP7bsE;22kxj`O(O)=OdkIdx|e zZ=33;k_Vk^Z`c=t-5w{ML%f*=yFmy0V;>)!D1XldF~bL`C{T!QSYwH zamd$k4)Y7jS!&C2;tw6HpEscMK8`x$kk>u`Bz{9f{);IN!TBP}8)yGI;!Rfj&~iM0 z4*tPC32?3t-$b0Q>O4_$po4!9-+`02$Ts4vtgW{nbl&afvO-nm_p7FS+)4Zm&27Fc zCv-PRa!Z}X3hdx(?2i@x7MC;JKJ57hhbjt7YM$Z+2hI`{$WYH%9;{Qf-|asPc|y352bHqz$H z@d`TI4~Q$>7T#Myd8e} z<5*YmF>&fy`hEeOe5VTWAIcTs`9`tbXT^r=N8e`zLb}Mj-w`wSOr60v-H|xCZ6ycsw$3UKsWbp@V^MGMV&cVCJn0YU;1BFefm1qoa^h^PXqQWJptCv7JmOrNQxPY%VIR=f zxG$>Mzkb#t9r2si)Y}g_-oO6-y z9sGgyFF1>@=Om8lej0S(VBH1I{fT*qliqM&4mxmf{tHf=?gfZbrnA0%K!^5$_!OMh zzZW4+WW#b(HtpS{L!Q^wHNFI@k~2Wd&zb z`*Or-sMe>le?ez++}7UN{BFzr>~7*DRCy_~y+X(9=I{B-A|D{shbqJyZ`kL74&}nS z3iWHS+mATGRi2P67j)9@7+1hcIJ_3|jxEsJ9Xi+@{R*7Do$C>&%o05ZI-BEMFCQz^ zkT|8M+Z-8x`LbaY4Y*JtEKiorTu)%V{?9AEBeDp#6NC$ zk4(nh-(M>F-#`5})HwEVcAzKj#(4+q8h+(8;>T9wkMs|8@DI*Az$rX!7I7x4aawYq zvpLQ<`9rhg513-NN6ClIdz|31{u1Bc&6#2;@gEuN z2_5W-`3Cmf^v??7{5?S5Z=eGQ@d7xx|6Wa;zYOCdbnpx28*uiVSWlc_9rSjC?pJop zP?`4)Hx}GN{FF`g^@R@QoSu^7w3@?r5a(%9eL11)mfhB8O0?*I@4deWwBKz9ALAbE z(0@T7@n5U^amJqbb3g~bWBdds%;iJG z$#YN7fzIZ*ty{hLoWD`WW5mg)@{i&r{v&1i{ z@|kmB{q&2(TbEVeE}?@zCW|;Hlj|yRUK`%yfesu$alY91(+%SEtgbH) zbnq+A{n0*3t-Vd042Jb7bawN_zOKuPxQ5RwBQLv8ygG*UAap2K6!8vEi$;%#Q$W2# zApHlO>Q@~ zt^GUsDYkzg{(>5IIpz4|8{cj968lWMI(;F257qvpf1s28!8#On&6dI1{qw$Otp<9# zLI?jKjsR!e{Sd?{)mG1e4(+d>c)v9J{4m5RtMWZ${h*Wa;p$F@>AN9^hJ`2ILc@6_ zbl&Gpx3xz2BdBU*;;-vr*ISksI_ZxVVw}ofJUa0{8{T<<4*o!XBm6O}a%|!pGvq-) z2M+Qgz^NTMK5?eF^?rp8e(fUOHQ2g85pf0?_V=LU@y_4sDDpGD&QC_XS1MkS>oMq% zpAjZyiGx{oWDK;+>0FP!ubqmxX;eOn?7uR;@KcVRx^v3BiX|6$;(f$*uxE$->4?9t zmENAv!7n$&yx8SKM&kH4)^nf(XH)h!be@%ZeOBT;HS}BPU^k3ks8{fIIf+wP^;>B- z=)Ctcmvyol#{sJ{=OKP))sN-+20GLic0@UozRFLWf`)r!(1C;fSa4QsEKHoC>i(B3 z4|K2_&Vj)>xu7_4@~QnHX*cM++s!#2HEii3{%Do|Ao;%e#cdTA&7|G2~4};`K4?TS5m8;%IQrZL30@iL>?n13LHz=NI4%7~n^o zJ8HjD+7G&4`3L*o;O8w{i}<~k+U1md=)Citc^ro`)g%6p6*gb`$CSs>S?u%8%hiy0 zZ|CXl3LX4YQp|H5zBM6EhM{^6bnp+_DeM=pw*_$$81{dl^KL(v)l9_uO$N6iemd2@ zWj}|`E~oPyjZI}b5GSqLmyq+7d`IIzf;($kK6>ie-}|c+=jZ!H?Lu73usZI1L0bl&~*{rl%4@AFNmX~dtl*5*t5LC5D|0hZr= z?k8pU%pzWab^7u`C-1RT9@#Qm@#p4yEDOfYC4Onq1G=+NH~H-eM%&359v zRr_I*10DJs?uCKV>yO>UIifhSzd`q_{)RjX@DG&QNBrB_^!|m8_ZeMQP4T^nBJU0o zZ(~Wl{h)*WkjIB|MO=QAIIRu)KG1=KxB;9^0Vj!5(y+e`9sJNH$tzkf6^VbAIGa@- zg7gD)-u>Ww_u)c_i^NZ=&g&%KxAlPA`r{LigHz&MCH_P;PRVf0z+O#7-h2S&MWyn9HzO@{R` zbSPK3X#D>3`gKo<^PAe&k>!GJV5X`?{W^9jg?5PZMU?ABhZn>JAn@8AeEFu z8?QV%GsM$=o_z}V2mCN~-w;2);oKZL_!Z}r;55qrfjBV?=V;J@gYV^lv-9g`;-og* z1A`8BL;L|w^IPAD^G%gU+6}s2_1|jsc%S08`5^*--cPNk_5&p!I`}1-^2(>h!;@)2KB=be?@$i$gySRX-W@=LYg z(TNjiSjYM17q_)h>{Hy28=LrD4f$ix$#D%l2BIuUVF z*Vg+JI@k~ES#TZ~Pez;pEo_c#chGrn*KTW6lo_;--pEQt{E1y{z8t@xlkd4D6X(sD z1Je?(u3=n&4)#O7E9zZjWJcmdRP9E_SI~ijeNk}256w!P3x<0_(AgZfwNvbK2b9f8 zoE_ctc7o1sXE@h$exH1IxIDxQ-dE2v<+(=_-|cAiGC%Pit9=*gN9ZoTeU-Og*FA+% z-xuP2rbp`w6EBj=Z;(9bWWU0B9sDqRLUH1yR{25FpU}ad@Gm&qsEVtIA)N_Vd+0PY?4xRIZQ>iT_sh zKgoygSL142;h!)2n-JghU2o{5e=tA8KT(FYAl?R*za{+x9sGlPci=qj)P^`K4EylV zfzxl%U3#CtLcR{fS!jqepaTc*7ot3izjh|hRYN=q9XQp+x-`|^?!?JCRo{N00|(;- z%CmfEZ{qa+L(hQ@9IRi!nNy)Zanc&{p`il@=dIv`ianS(rtf}2XLH=vl4%^jMvpk0 zIJH*l=Nsq}hq*hc)2Hg{JjdUfdzAB1ioYC1ysL(NZ|FSdw0B(T4o6z4<{f`4s4S0b z>B5XB?lbkSz1&ZSE}7~$J%=q{)*|KwPkfF2QrIzL=*h%Q)n4yc=-?0d8Jw=yrV*#T z$^()85IURVoPUl?oJE|l>fW&&e|#I?Tvh_{4(r(kbBRB+p}xM*p}yrszq;OKA#wVY z&~u;z2YKbN+nYv9i4$PR(}E5h?1zFgJN8QAyv(UD4|F!iZLOTd{o&Jr)x;^G>LueF z=EJVLpibz4uouZH|F=uobJ4gB2`zs#G7)6kGV1|7%a0T#XwigKlRvW<9; zI@4sTg$0_wdUQBts(L+@|;s)egS{#on6G=)kp6S=-^k(FW>}Qw3j$5TIxB_ z!G3Qi$@mTTVPKDLukI0Vnu<5&ya65j zgFFUs)~G6F`gdD{DOFW)V`&(7j(?~ zey)Cqy*^oAU-OJGo}s>e3F8sJ%p$wK zk`JBtan_ku%Kl=?Gs1X=^3FV(lz730>-_^A{Db)hoPDEG5~qhx`F_s% zJ&rpqaUQFDKWRVT=6AQ1Q1r**F*6eXfXYXa_JfZ5kMsO>!i_A%TWJ_4@NE+bT-G|I@pERqptVGh%;o4T|YSwL5F!LZkwmW&z`8L<{_6=OW6P0 zxl+U(Vb~vl4)sL7KI|XukFvz6X_$YY1E*hbKF93dup)7Csk|m>FX(KJ+u9}KfNklk z5T~nZSJIEraUAgd-79hbrSIG7#5=NF-@c%;%N1bt67LzUepHh<kpQ!zs z0IRXcZ@68%332DE{V_StLHDb9Vt_bDijlkp@xvO%Iq2lN74MP4o_im+CSD`e9%Xr< zgTD%?fURU^SK5)sHhBDn`zhcrIoqE2!3^Ucbm%w3L_aPyt21#b%+R+_=+Hi~?m>A% zHSA8D>xQ@pI`95*Sy9#nQGQFtbiIlH+AwcGhx%f_6y+@UrXO+6Ez#E(I&fe|aMs=( zN}LG>4s_rkE&yj>&`9EpJ*dBr2^~1d>j3B6tg*x?vqH~-&gKMI1H?V(;u$6q=j3*q zBik)>-u?JJ4@Trqwhx>_{Gm7W`Ebz5dsP33yoOVCW)Sbv7kzo5Lq7|Kg^~I%?235? zc8yeeHu1Z~*0&ev!09aV(?h?SN1V~&^c?8Gapz4z?@;8LzKA$C4S9Rec{t+zkHt-w z5vN=ZeRfpaj*DdM=*zPPL}bl%59=RRP{qi2bqzLCB?_`3c@ zek97Dc*sTKKT~;vvOhuhtNwJV#8u+$RC#1F&X)Ztyofsnj{J)_We4j01D)+3w-r^q zkNCC19pWUbU~^>sptI|TbCB=fbr$!0>xX(kyinEjJm}>7gnJpVf2|#lh}W!v-oMcO zs{P=*v)~`@8btho%D=LGLWllXLOHrm-GRyKC*S4ikJxtwKVHt4#P4nJH+1Nak%hn4 zynREQhKBqZ=+Gb0?opnok3SG+(I~w=q5G9R@q6H(kM@=LCsdx4v?p}l`EKij$gAjk z-rD=~{^rw)`t}8#?2kC#MR|9R3`snHbq*{0BXsbO+>h#8EG%&X)HozL(7`_#_fVeq zU&0gTcXduJ?FXHA`#I;gw%Z~TKUZ(P|9zX^+*TKHZZdyobmGS{xBX3_OINpf)4u!<#wGfIXqTj&$&rX5mz3onU1*cRKAa_ zH+0gkWyC$v+X*ufZ=~UUedyp<3!{Y7~fD% zzvFp`pJa@_-$CcSU%RcC!arli7a)GAUixtnI_V$W|3rBkRw_cg#Ht<1eg_@=Q&jl5 z_Mj5P*{Rx%u|Jrb_ zF3A2N_ATI_;Ej3{=c4+~s`N8-@N*LpH}(tGpEy@f>A(95oy~Dut;N3n`OSlf zGy9?byRXpkJmoxB{;lsY;thGD=b6rxF;2o>|F}mHZ|{fy^1V`jtMGBoM>r929P#fc zzMNN~!@Rm+`u298pMNT3+tuIdRFU_8>J6Vn++o4&_9D5^Wmf}t;K1j<{h9wy#dTTO zCxtyab)81ss=M`mhYsxr@hLdhKF=gh|FwD!bl~89YH)t9J%>0$)c0g%dxp;D1X$0+ z_pO6A&nM15k@fqP(EVyW7`Kh{?@A?KO#EEsZN3~2pyP84x8)M?aKkyvh?jhTo(G+r zm$5E{9b2ScMZ6?`+B|7T=->~;Kj3^0v6eUo&gnVO*&OHhZYzAsTd@_Vw({AH8mh9mGF%%H~TyL&y6R&hK%I z+P#~2qo3>TDZj^o{TcYBN`rmG$r;{`bEJQvgMZQg!1)sC5OLlo({rE$2k&=)vv2oN z;`CMfm9jk0*&Jt{#j3R@iE~)x+e&}>8o%N9P|mPT&k}#BVP6$G@9pLLtD@q3Gi%(7 z#2;TwZ%^o8PuvqkIisJrLY$h0{dDNSX(i4>3J$zMoFr;rO!@^n_(jf}p$gn4PM~4` z3p($1b6eXkg?^d7qh+DK?LXfm{$zE(SN1dL+BRs_xOKK{RZwr_m4o;3;X~qmH0*;x z2M+SV!RdPDDRI_x(fboRa4;T%)8OO_;#4-g`v#rOaa$F{_n^9ddqbS7YM)%%2|A9i zobTYI+4X^VXVTgAll{%NeN(q}S&W}!TYVw^+3Yr7mKQo6KLaeBZ@`Xy5?cFy-q#B- zj1SOZA2zd^Ff(Ln`_>gT&=U_~KM4GHr$Z2byV{qQ{RKMsd&_FRHxP1o7~-5V?DIhf zf5#SiVIxO|Cr%f&{*v~CPUeMunaJhJSu-;6W*F|JKxdcB*>2(|i%y(kwfPyX{Mi630$PfEV8c|6#MMfo3INK5?HhV?LXzw%Ge-x-Pb zw6or>(m$QWe0OtkR^l91KsbntU#kr&vYPEO+NRqao5ptCt{E2PMu-xezmaSEw- zSYl_%Yv2uDUuHO$pLnkBww+`g4IS2(Y=5_fcgNt*0}~1pcZ)hFl;wnuc`gg% zA$ZNl7bjj{b$%r62c2y{f2+L6a|_kOMVv@#{+0EG4)yhaQ1@ewoR2+uZs#JqNBV_o9Z>Z<2J2Tn85zrz3CfH*r0-`|AJ=J;DND)aZFt^JLO z6ZeQ+9@##iOEb{^z5v>Z&2?MIH-x?XYcwZrg{L-G_6O*AeeZ9*6ZcgEOSUH7wNLtT zLRUg{u+c^HMGaef&2HFnhw#U+2knTPQoVa7+Xr;uU_FQWhU(XeIN3w%$6x4dj&uE1 zH$^w%JX7mQIq&$|ud0fEHEmH(;y=oxFDG>H??4e>G%wMYIA2tJCjASY9Pe_8^NY{t z2N3U(;eI1@@WW`qiQaq&adOSq*AF`Q0qZXKb7-g$#3`^(KfXcdeSCAqf4ldNCjRQ1 zdcJS#2A4G_Ht%=tZ#|y)uOI2{03Ga!br|fBCH`dMRQ^ZLfesw3hrx;YdKz(>Mvd~n zCL#M7bl|+*&GUD?eY1!&DWaYO9XPnRkMh)-G?zG=3>@fej=!}*+*_zrb|GR((|BWp1(EY7`KZ)wKfy)XiA$W{Qw>O zP~cd>)T0YjC~Vi;d5^Acw(Z2tmdNJH@mSuYtEmP&*U)Bt)@)ednSb!zR@mY9w7ZFa zC7#Wf{Q^4p8Ti-xU5RSNIWe{(i6LKxgvDmZ(9*$zbWn0pHrQ%Q`Reh$ZnBK9(_>X#5O`4c^ruh;& z_@T2H@7l#kPMn6STyoxp4jg)d(h(<$fdd`d#k%S| z9(`M#i8y%-_wArN-`Un9K8trCTXNU3GAcfETUkUtUX5*73`BRwT~##(EBP@B{A2zd|^GVUt&Fx=J-x?FA(R@7zI&hFr15Wml z&50B4ke&k_?C!Ue_c_-jX-%9@kM(wfj>ieNm7yy0V(e;1ysf|K#~tXH=j=DGhMkC4 zErovEmi-3#gs|5?5xWuRUS2&1I@ld?G&s3-^(0Oobze#LW9VS_GvfRvLC3ztDXGqZ zWP69swiEUZocp6e{sW0OTb&2V@zA$8!)+C4G@RaPo)B{=@sl>x`xUzXzn}a6|JwC; zp10S2K7x344CR&Q?G;pmSro2J(GPbfc=ivk9Wdw&b>PbXf;Vfu3Mdw&5|EOGyO_k}-*Gud!2 z06OG7*sBZ6S`em3&&fM}ynBgt3i`{J1M`RE-SxS2c1iNmUwBB>-R07 zv&-dgeHQ!EMe|-H&g}#?NA6EUhxO03HhWxK3f@%tR?hk5S@^5O{rFbjKjr+=KsmO+ zvMAvngxKMkU$DM}Uyg0RL3}^Omwtf`e#s*4Kj&_Bn>ZoW{ zfq3U~>B|Kj%GED2e?Q~c@z2DWTtHtg=oXc`618T@e^kG5Sy*SlUPnfKBi`qd`f@>M zm&;|Lor6=oLWl!D??=9@sOLb3_JQ>}IEkW#Ax<2BnQ=n#0gBR=RgPl!j34q6T;B{q4Vxvr~ONQOiKLjh4tlR`@5~eVn1!or2+wKcU0?d35E4 zN@YU)$YXP4|MPk<{5Bum1;_yNQSr?I<#-R zw*}7lcP)q$5JGQ1=)AXYr+K>U*`Z%6hY=%jy;&klQ3 zjMs&DbF$mzmHvSa{y{te&hks$iF2foo&z2HgZK=bMO%9l=XW)q%Kii0ul#dg+|%vc zt3UDMs<>bBq5Bmd-@8Ql?-m$L{6Q|g|DnUYlUq3|eMnwJbT?^z#gJ0v@Fyila z>+K93{DpiqaH8!SMVv<}ewKcR4jjxM;2eEBjyQK~>B|G1&2d|9ac^tKsY%31q~cH6 zuA$@aZ@aA_;yZDXS4<;b%G7qbBo8{-Pw-t{)N_B6S;QNeR9{c%V6SLm-#A#(xy0FT zh_j)yInMRTZ#NbY=VorZJhC187KeTRE|c&_^O;MCAECJ3e$dH#bj8K{f)VSiAl~Eh z`tm}DevwQ$=H#=5!NzWy=UJa3zXJZbm1H&X!&TS&2Rir%c}(C0L|jjthqd(_=)l3f z3UHdA-$b0674#hF;AhM`;2fE{jX3?Q=p7Sc{)IhG$9+t^MyK`ufe!vjAgxra&GEM$h3=FAsFEduQ>EU*fzGh*MI-!IMGG5y&?x_uq5QBIpRerVfBXqDE@;uNUX4i~EoJDu^?G-wk<6IYJsF#2^ z+m_lKIqz9FtdIh&kKLVW+RdOmdC$4}>e>Ah(wh+o>Jw*&8&x-87cC}*>tX^7)r zUeAFJe!)Bh&d@9wh||5eo&y~?$m0Yj>fOx5nP`{?p|d&P_kqPb8SV|)iGz1fThjd&Ew>qHop_^sCpMv_A}^Se6#Ntlk2e9+7GQ?kPcvl#@U-i4H>YfMeP{mb&`1>2^?EoF@*;&j-kAqhx zPG$8ztn>?X;9%be<=M5X8gZ7W`|*+k9sGiE4V+rNYY?Y?f4g4)c082)z6I+LfAxs} z!vB81Puv?h9IQU^m#KJC_IK#ye1&}i*rV~wM#LL2UGHb;;2)f~g7a}pGvcIG^_KpD z&g7r*by^W;uOa^ncK6<{-PWiVY3ZEsnt5^usSy+5FXKQJDFb1F_x;-oR`TSEuV%2b>e5Ok;yaUML^ zmj^m<5J#gt$)^t>&X-{NI1oCU<8Lhz-`S4PWC(F4sW@KFPtYL_obh(kZ?V<~D~$3@ z7J2LgV~-$i-*5VQLI)1wXw_QGJ9Tju zagV5TFgY$mhxou>b*QK%b5ze+lO66d5L4*)uF65Zr>5+cS*;#4q{2RfVcJ+Df{XBRH4CQc8-`2=*Z ze|2U5_q+QZF8%q(cUW-G5O!-gWj*mbme$)JI@EW%_%2^n|INfnSXs}34)w+U3d&PI z!FJ+AH0(b@_p5p4P9wfgbv4Is;$IKY_dDp?77W&_VUo+9b!rr`POa8#AMp<4)AOK{ z^9;_RQU9+Q4iWFG;hraS@CV+t1jmYWj5y^C=d94dADF+v>GSd=aSr}w*H89;=)C*G zWx3k3Kd$XQOZ+gFzTH5Fa#j=l#MR*%UMZ5)vm|7Mmm6A9Q?=)_J~Dv`13nEh=X7N_Cx5<4?pEyczbA>aw^{bzAg~)M54H9iTl2^-VV^ocmnHq_~-t+48$93_-+(* z@H6Hga8j(wLYz--yIitALuYdWEPMwRoTd$O5NAYPyHP&A{DpB3oNO&h5oeI$JCo4;%3tVj;O{D5miQeF_v4`R&UagD#QnGh zVJZ?oqH6zgT!BvZC&bw(Z@})##5-u%e}xYI!O!5d>{gvP;|%u{p@V;52XKCCT$4Dv z)w|{YZk%)Wr_ZVC6922=e89K<}S4?gE(J8IZLD*L;Q&I^nB>NkAu$h+O%uO6aV#5y&dFvE#?Q5vskOi#5weT zq@8tmRmaxGH!eks1S{_D6n8lI!Gi}a?(PuW-QC??g9W$X?k+`(myfSAZ{S^f_Tk)? zKkj|e>-rQgb@kfL*^Pxi=l2rG`Z4Gg<>32TlI0&8MxUEd$U4(A;))ObA zejiwlgV6E22yQEBt4s85PvX^^iFfI%YJZXk9iK0@zhfG*@^<1C)&7v&)mMD)9$t459~+5A0@BsC0?ei=JpL8I9R`d6ErrEIJZ-o zInXJNeLucc(IdoZn#0_Vy&L!Jq4joWT(x!7mi8g!cY=kz*Iwj=1I ze_$WjHRP~!#M|ibZVYtr55`+?3Kh6SoCW&(7_#3%r#RoTp?~L~b$RKxSBX=px$=)} z*U*{YH+Nb1P65jIb@ff+j_qP@KhUZ2xviDyc^@>=m%GGy*k5s^{h^Bf*BJPJoPamCl^^2?e%kn|TJeP&_IQ$f+aZKVx7;63= zA#}>$_IvhzD}0DE@uHc-{n*dyFU}oWif6F6fA>9W+FYy*Tyk`g<{DXc9PL2Wv zi8FtP*+0;MgZ(3L+Mg^!oPxK_9O%IDo5b%>ejZkWIK_1QDcc=%isQC^6LEGAS83v0 zj%=P!q2oB)&nhI=DQlmXBVIcn#gpw1I;>M-YDYFNU-AC3VZ(#xRop*>{jyK2Nc>t3 z>ow?LKb)@uXG6oP#K}}tl~dXeI>oW?I}DFigE)EhIb>O1x(4oG#|AYFb?d3kAz?2UqfbXUyu2i1Vkz_qm}12jd^g^IOcO#L1`g zjIun?ffJ?*$Ac?QwjfSCp+o)pj=xIl@|w&06Q`@bw;<~W9p5wkmLC%LR3(v$IE7!N&N8o+_dx$bkaZL#rQTi&RpW9c6i4S zI`{|gPQX7GZY&_q{_$r2K&LoOE!V*XG%kG^vzj=+>+c?hMMeJp`p-Y;+=&H7O{a(;pi?WWK#ye_}d^cZm#>v&K0Kj@?% za1RLfOP22x@n-3BA+lW1!4Jrff%7@jS>ja7WbS{^p?^ATU@lNXc$$sJ8cxZqBH*|)(#Bb(so`B!~wSV{FLAi&-IUL&DzntbhypI5X ze0ueScv)_n^BT~>ulQaUINi$qNt`u*nK{tGuXn{h((oa#h?5|@xqi?|zv8?K$~EWo zTjCWPWbUufdG4>i*194*9$e1wf%xf8E56K!KsP7c#N`G3zLmP^iXw@s?%4@DG!gS> zf~}v4TQs`4eM6`0fcLvy7VHc=_%*i<|Cm=ylhfSqpo5?5i19d4gV4k&Si;PKPI2t> zP^*67iPI>X;>iBy-8|%HH5cP_onImm-zTb>51nVf+uwWWygn-N7iKrxL9W|yjt_o0 zRy783Ht2I^a$JQDe!)5boTr6+h+{dNhk*|5u78ukN;j#C_+YU+WNmsD9s1`WZSICm^2zdt`oBpLpdQ z?#n?3|6u(E&bXk)#7UOj>>udhAH0_Z&Xv*4h|?yc`5p&!p63ty_t;XEZAJWx+W&G~ zkl$lV9+t37*=xuAm|GKh1O?=E#EP63Da z!J%{V!G!{6eW6q3w4VdH6F7i4E%m)%=?ABCAc(Wz zhq?!b5O4N7^ZWxH{DAxsIFIIyAkK1!d+X4FGg;VaTemU9`Etoz9_Zjtj4vpURbc{g z_UrG?%J~PnpY^M1vAI8FOg@G9RnD5r2_5Q-`lFm{-cKjaU#HC+=)ggo3r^n9bBMF) zl$iq^?1uF|IQK8iC(f<)iX-g?-OucX_y_##3lLb1fovQae;wRDf9Ar76Q{{A9 zL!YqypXYc)oH`xM{RleQkC0D>AIkmyjCdvbn*9JB{BTwD#{(Z;5T{5}GY2}n+dYul>D^ zJ~ON%KjtfU<}$ZS`Q8TdE{Thc+Z?;kp5VL-)*0~sIW?aqRaoGhP3MI4>HT>+4;f+U~FRQ=$?-j>COV@BHPqFkitANh`)6{-kEA z{IdN*C;KPzTi`8>`|*eq-=UvF2minh;A~oykT|s+`V(|NYww5; z!SB^LDe>?6oBa=+Xa2YGGa`?lu~VfazK_Fs3FxGM@U9cedp>4b;$5C#Zuiik|KR-^ za4uZRK%5p1=T)Fn9Jl3rB^8~IFEl+1aVqHZ60-e4H)r~f@0Qu`A4RL1gLo+&>c{tw zzP*Pl-h+ymJU4M%`aFo-AA%0&Wy8)Yacb}NPx`#9pM`gwV7J4M^AY#(9CLet4teSN z+Ob)-*WHmY^!N8XFg}6beqABr2Q4)F7drSiNW72Mb5=3pv{`26KnMRu`<4CsvZ;$W zTV^PZ^e=RY>#%>5iuZf7XD&;;>ke@tbSM|Te*-&pd|!b$D;Ai`1)c0y`2IS0+4fW> z-baW0J9MynClQyp=T|3Aj}>OSLx=jkXvz1p-nFeooSE~@^@EP@XW94JBQ~l>yi&Sd z$+*G0an8>wDbBghNZ*k7k#xV6eCW_GvTDaH99sC`^*7ss;|4h|bvw|6_?f4e?Ft?I zgS-O#9JYIN;#6~3PeZ3T_IW*X{MN+DbU>9y*4Mjv$!(>W%IBFg?QTc>Kd+np4;{Y; zV!!|Ts97iC<#NcU$@{N`Mf*-1uN!fK9Ny=G4t_v92R}@C*^@X;^mng*76;B1>zrx_ z`x57n!}r3VLw|eIGX>3KVb%>GPIMo09vM2`w{cr5#Qwm!xm=tN1l16Mvfi z?x*Zu-i=o-3*Ws*`ETT#PW*`u?;Ju0d&0ipZ2U5dI0>~KWIKiq9E?-olnR+ZD&Ye=`b_^Z2V_$1v1paR7z5L6F_f)@YDB~#T)|6J~Y}yY_ zQt10TUiR@QYRFZ@ZR_wpmK=}JE@8K_ht?7&bA44`X@BTof5f%m92>onI0u@TInXJN zpM~=i;1ue;l{l+fnEM5Ep8JK%sx9KDdYN|-KbFJyc%VZ$u|I@z)@{0%I1N6U=P&3y zm(%`T=_>UDiJ$JUnJ>RLSx66r1)|Q4x5s~tj>BCR{D<$7;xSl zJWia0M^!l`2RhjEsJLIa_rz)9Y`$QwFLXb%C&o3DbIbg5#LuwP%!kf1-+sPvQoBpU zKe$ctWt@-mjp1KB2sJ16nqBBm$k(B~nPXlfUd|n6yFv$!&nCWKQ+3l#;zT%S=0FGk zAnrnWcGkU1oC?p)_Jhu|{ajY!Hyp1${pBI?vpBqq3?1r=@eSp?yz2>ZI_fw?wnyl| z87a^=h^=D zcwykhkHlZ%uutw?|FFjkF9ZH2eq4ulnq|Cz`;V~0%2gqc{+M6g?(khz=-?OZ%Yk!n zWLV-Xb$CY=I>m8YlU8utJJ=PGI5G9PAldJs%Ur9~vj0Cv3qPa3pj?sTMIqj!l;-&l zI?sM~S>?q3cEm5yiQicBWqX7U_QUsyP|g>dV-e?O0`q(i9XMDQf)lM{T;lXgZRS7+ zyP+L|)3NMt#2M}IPAqhu?dE6siF3I}BP1a{zP}{nY3Mxj?eW~Mz~scgklSnrId0+H zAMCJxPHN%|(C?i}|3U}9VEh4R%Fy)08R!sCLkCVK@gDl>QkjW!JFB@~&?%1F`bE6A zv$jHZ;v95%CmTA>=eVsYVt>2g$lr-~DX}V-?6=U#^(D^Dz+Pud<|E#xr{;MUI@qg| z$PeUsTaY-r-oyb&q}^`^M~77C-O3b-`632C5QDfbh6*z{3YzVWm|pX9ja)y zD|GM=<^ymJ^=?d@u{F#b=oH6o&2jVn?-Na%5vPX3`~J`kTt2(t_jS6xpJdC_ig=M7 zzPIXKKe2zOCexd?#2>D|yDY~sr|;C@eiZCDDQzd>ovCZKBXmyw==Q2Bacb)C%FB6( z{b9G?##KFu^Zq}+Q$J7K7w_AmFY&GX=K4a1`eqh!RoQd{h_lP#dx+41gZ&WL|3K;? z#2M~3mj^mEhOm~PUpi>-QYs{uBln)ENVl;6+={P{~!LlDr5EobnpZ6 zTi{%eGMhLJ9pWG8z`=J<^dOd1=SJtBCuzK94TP4d_r$oPz-8^@xqcN#Sq~ z9XiEvThHI|`2(NfTZwZ+pBIwt3Oa6AZVUSzC|7u&ox}^sqvAWsgO1z1eZG&dG=O-E z>;1R94B{!2ckrOTW(`<1X*!dHcDzjA<_rKf@xkpS{~J za$DHfhh4+Pze4=Xo6YSKI&SAK3->v|tMTAE@e(XH`w2Su8}TzZWoF+d&f2AB4s?oR z_s^0N_lZ+b?_2*{{~*tXa+WOenD~Cj%>IGSb6oGT{t)rSyGPH6A6olI_8;hAPuyol zIhT!iNt}?!&Gv*29NfPIXI!?o#JP6Q%z+Mm!Fm&%a}h!v`!S!lW|rA*(0R7ow|7V` z?xpvDiW~?_{N)b!@}c8%0N?gys_}eqv|mKx^>uiM1iG`Glpgb6f@9%bsC)*8lPX2yBKPGXOZ8dYGzcJpSJmu5J zAx^?Ciu3RKq08DK&NI}XAD{T0lbPEkbf_=pFO+k8vP8rwSH;YM4xFiCAK~h%q{PYX zFn>Y^4)WJ1&&)<^esBd+j zV#GNX&&+`i9OQ?=8TPs)abl-2bD#rfqR5lZ8099;Cx`tT=)l4H2<17`u?lhehBEha z=oH6o)fDk$$hXyr6IXwaK(zyTJJBxnH@g6IFR%_|x;c#9vU^ zTyN-LKVRX81Kk=B$G4A}106UxhXX%!%h7~5?H$hTLZ>+P_0ZR-&56@sjoEJA)O6UQ^ zZ=&~eWO<$Ps@OM%9owxNOuX_h%>IB5{(yhLS?f2PI8j2G$4}_cPjFreoCq;S6K7*A z#gXF+bUzzkeiwPB81ttRKc&MyGjymgz6XkOj;%kNI8Cma<4)*M-=yMvb?}s=hKmXM5kP>vErYJNgB`@0C-WtB#R)Epd+zM8vpo4#qR|MyF>>%QF?QR~Ap>y(2%ZulT^V0vn z`3HVPIRmC%BL1e~|Ap_evWxeT^4GaW{AoIGDaRY=V9%}Mo!i<;ZxN@_G_yUS181Um z_hsnAyTmE3?}y3qKnG4mvF@36?jdm&ImDCDfwL*w8``G|ob{AA-5v7H&?$~Ro>l)uh1^qmv}?G1DYr81Rd%J`-5{c(tF}0aPU8L;8YRz9(?)}aq2kS zFMl{o!%K2X{Tx`4?)-k1A!k5TMXh1?(N_>cL;Px}6-oCl#po-Sw9 z{Q0^q%@u(9;v5_7_IYqP;%@G2Ztu{6gS-MbF-rVGoFu)>9O%HoyaCS3&{2uA%HcjL zbc$o2Z!QG>N}Nmj{d5@@Vx00E7rL$CRZ`M^@}*(1iQh^0C+T14cpc!ju84P=*Hw&1 zyf-tI{Ui@M=GniOb1qIo;)R}X_J=$_H$ud5Wx^#T&P9j)59r_ztXJUI;!jc#=Y#H7 z(jU-y_J{p@6ZO}oA$~pwe|x)cu~N)0%{pZu{v3U;QI1p4p?wz-@tsvA3vu4)_f;eZ zI&hG8fISb#&q18a`rQ}FflhJ0oktY!6t27VJ8{-Kv}5SxKJMB?{2uR-rTK_=SKkws zeumDopIsK-r$W8oPANqEB6H2{8amhy=i9-#f3_HLVma6kI&iRG56?NBkkthcxBHgm5k&jzE1};-@AO0ef=}+aAo2j>ut7!T>oId6ZVWU zr8;qf9O7c=ocvO1cP-*v9cnHQ`^9D5EW+pL7A>wv9JgLq$bJT$Dv#UhA>L`K=GTxo z)AjmCj_=T^@g4j0zShvg{2j2bvHu|616?i|4?u@_U|_xz7t+LytK$K;m09fDwL00H zxRo5bGLgO2-wecu0A zdNlE_E>%1^ZhANG`&!Gy`|{J?jwk+|qlz#48FT^h)V<4Lp;P7bwYG@ybMx3`#A)73aim|N!}z(P$lBU{`_v8|KXZuj zb4bxu#N9Siab^34PWE5qdr;pSC)N`0%L;RSp@V;MegT{(wKozc=#-fQ9XQxe1!vIf zEyQW}+RT9t?F4ZPI8V0kAkI|%eFfQ0p!->zh`0j$n+*bpzbuw{+=CAFMZ647`iT39 zlQo^0106V+hrpS#@gQ-=6f$$511F!@k0{dQC~>Y-HglkJ;#`k^k~l3IdBgFwP95bq zB;5HR;xuosIIhkg`wYPiN#Vm;UU5cc=yUXD9Y+%P(xlWZIm+xVf(*7D6X8e(=znu!smt%hk zc5YYa8S#$}Gq+dh;4j?E0OwS`m&Ca>*35wp{=zr}&f?^6iPKh>N7@s*|J7d|AG|02 zRfqQPoxfbxees@Z$)%r(|7M`s4$#4#xc3J;>@F7K#E*H&={hed+dp*RAdUd1;KR_w zsrI|MJkWu&BsISqe57r7;*_dj=0FGkV%>}Kbo=}ZaXRR{sPr#%p8acoPj&p5sKmeO zkVkiVPc^w%XN6B6gLs!5-dTVS_QSa@)cbv8AL1Nvc;6g4a4^1sb82%u;=DX%_5*bA z1NsX%HT@D2=a@dXA^iZIXFu5UF2^G!CH}}ys+|9p-#2Ip?YoS6m4f)UBAd@AKo@PJ z`W_+X$<~=B9h;oyP4Kvg^ANDlj*V%EAJ(gL6fR40?){O0IKM{p?mPzGg+X~XCCEaY zvHE()GWc-f#2j$(| zKOgY|9Nyo64*e;a$Q#EkP>48VGMoJa9r_dAt3`Pde=16xu=>7}v>$Yy?dP(74@gb< zkm=h>5`SI^b2*_?<#b!I#5yWW?=r-BR6%j1AE1+dK-`9U?=Mn;crmJ*%LN_$fOo*b zc^a-VaXQv9bD%^0@ID_nA5&H*&a8&!`avi2M!(JAcOv_}s7bs9+ON`H(8+fN@I6qJ z@5$P_#PiedqDdZf(*D>F0&mIa2E>bB#B4X{V1L97;KXw`Ax=j9euC$Cjxd%%RY}>Dc+iRef2#(*{{5tr|jn`59a7V{E-gtl*oJ$_Ag++zVW&d zXQ0D=9(3>n`~uF0C%uR>=r3~~06K6+gig`@M=I z?eE>ZVCO$dJB0WnEpwdUEg$FeV9y1shZFy-j(=tOp~Jbpq`Kq9*m)*pq1>B;;{<#M z1pF$l(Zqk~u#W*9{DpIz;DnDoo;aHv&PhWDf1%xj6Fp!OaWeEX+Y>s^^N_tiu4+Dw z_@gHN7rxsXa+LGov*OJn{x*G|PmUkZ`Hi3Y&v!>**Qw&%(S{FmiFbakDz6;ZpaTc_ zI@oph^o7J3=x}}sIw${>&$*O19oGLB``O>y-~4d}@vrOkfo$*I#vuvCJ;TiN))2qo z1+zV&gFVr1Vb3UKHV|j>GcyM|a1hUdGwc0k;>2@^)1d;g;=J=H>mbaX#^QfCw}pF!u-p5OM~OTApyJB@3LW!YmRrQ*?dO~%-s%8zyMPY<#CjLy3MhJpICmY| zDRkiA{XlSrJv~pHK`+eZfesvecL$txPcIWETUc}Zf=+RKtwt62e(8s+*NKx_-#d_g zgf5;oc=JouZtcu-EI4nA{($mT8g-kvd#{-52_5Pwwzr5~Z z$dfhM!X_XReO>B0y>Oue|?DAv|W~b!TXNI z#d`U5)0o74T-@9ap_6_+Ao7V>UiuJkPEB(^gARVh_b6b$RRQsdvpK1m1D)ddS;!B8 z^LwF0#3__aab&yzo#%dGj~6rCO-lR-I&Uob-o>N#b8D?zrzCzKeZEVMH}c$C6XBO? zU(yg~pWerj9O&SePNKgpnvj7w6W5si0v$Mo#Chqt*|QMm;z=_HI{5eI(FiZob+D{p z|5nREoO=3xw6s5Tp8e}Tl>6(^ ze8e53xzZodfrEWsa6-2&M4YXc&31%Naoko`alR&Y++xIe|4MP>cm*AgSN8AloH|&N zcs;_K;{pB-kK4*thvR|xE6Wh)kVYq7wsXzvoZB0`UdA~<_~Tf%%EYgs=Sk^T=#)QvtzS0sIx$iB>ck1H-{+L=4mz~E zJFDsrKCtge0mZeyFMlj~UE)SfsQe=Pi?{b4@jX-2|9-s~vU+#aDryRIqLVO>Tw zAx>h4?<7J84&pbI=UkKK#7TJ093Maj4!&a!&YDoIiF3-~91L{eApQm?{?c~DaXH-g zgHCb$tdnBje^0zl#Hk)a*;)D#I?v;t+e#|(oLTR7C4S@_=J5bJ&U1d-w-oW?i3vT4 zH&^GWWIu*Z#u>P806UH--j{eOOPSjlbnpkxJAt$5y+3iL)-ZFRgFkQ|5S$CE2NNf# zso5XUdG?1r&e+gkIPoJm>??a0XV~NW2uVg0f0W+Wm;OO~@3%Bk)=w{X1;5`9JHsBW zZjU3LHQH=f=-?mhe}Gdc)MVmR>1yUcr#Roft1RB9ShQyvaRMqSj;tSaJiocEbK<+7 zsk_V~UWnRe9(0n2`3m(+oM|5M!szq1vV720cs%@vdG1@DM#S||A1@@{0o`6C4?5Z3 z@V+L>7ys^3;#vL8et-`C#Q8RGdM;c^oTIJG9O&Rryn`9cslS#uV>+4b4;|;B+*bc+ zy#AY5Y$NgV^fmLKllyA;ED?WBd@=v?OX8g*+`dHGIDiC!E7RH+1k9 z>;lf_Q*O~dy!C%?6zhY#r z+5SUZ=kIuj@{dIPk@_7zIsVFXHHa5c&I*&G5ogCnRZhu)4*o(O9GoM)ViG5@!@e(c zKl2yf>jeK!kvPOp?QrfFI?sH!wMVS4!h}me{OEcPzSk@L10DQ>brLv>eNz!9Q1@>c4?y=b`|X;=??PrxnvVFnb-$PP zgO1m`_H#G)V`U=V`ekPO$#XZy#D3wJ6xoQ=&f(rSbnpZ68L;2b__>G^wuiYq(1C+_ z9h`z!@(^dMUgt`GLZ>+2&go6y{8qrk0>o+5PL)U68#>iqzweJk65s3YU8xB1f_f^R zj02!Ue9^4=uswGtFVcBh`+br<;a$XytK((a?&LUFQ+KdMC8M@0_1CiCc?C0w>KmH*o?7nK{tGFKEx;bgo^6IJ=uFj`Rz3+z;H=Ug76`m#Y&m z#3(ZlI`*^M8q|{Gz4P^J5%1y_GY>ldj8%$M@6fp<>^4p8`;C5Emv|X(nC%7~I2iX} zw{u4t5GUwwGY2|wkS_#hLCYq@@&9b*K&LqNc=6XL&51KHrs@x}-Fi1(x~yd-`JI}B z8(I-Rb_Vl!0v+nRGCce9Uj25&IZ?#SfeswpBY@q4vUeg*!K!8sbl~h1=c(>K?n<06 zRm>dd6vu4^ROR;`zs~MSoVE28NA@G=_#Gene#&n(`V#MX8*{mM-2FD5RbzX9C^UdL z3H3NB+XZy;{qEoPFDrR6SrNUTY`@QyF3}L;U)T8^$(Q%JYU}Z`hR^zShqi4G9xw5o z9r*e0OT&pD?hmsapi}ES1gCQg2b`-jkBzVICuGvBB!*k5s5@t5qU_tv+$ z#}oH~e$Pny9Xk2GG~Tg6eH%raOuQWWzM-@ybU%xGYKS~xoHNsi|IQ(9gigE3LjS?M zgz}!;GmChcr(n))McS!#Wu{_y^}c!AaI&BXQa~tdpTr z9Je(>@CWiv;6xjLh&Xi})(Oy|zaT#VPRJU^i1S#tH`$({^E}?z-+5@X z`4sV+&o}o+@7~>TTd|Mvy5iE(v&0Xr@2AW51YPO^e7+j-8SJsY;YH&0aJV-Q9sGm+ zC~$_wyGoo$^~~cUbl~9r0yw@wH;B`yrI`br;<&BFGkLu`bKf1}m1m_Gw6h@WiS-ZcS@z>|;>XhSwOr3a2M+uVPO1-Yh_h6$6D0>a#j(#PyR-dG zoWf^R{4eY4?RvRm_b-MY>KRjty=wMgGm*BW3#3s(McZwtH4ISE1HQkQ(kNnW^ z_3ItM?WmJD7gxS!T;iV}s_HGv2_5Q-c7<|wD3X9UeG)7GNDg$0exXBs5hsE({BmXDboMj1U+BQW zd<9Oqu+@oE`=FTv9qfj25uClHY7*y0;{P(9x~x&RztTO#LT~F3zep={z6LtfcaL~i zqj~lE#Od3|%z+LZ*b();64;11tG1ar(1C+-37lj_n-XXFb2A4z#c^8)#W|UWyIK%u ze|&SC0UeJ+ZtIfZ4anbycsupEFFCG3CwbB9aX#eI^Y+9W>u^p3y2sn93_eoum7WLe z{WMSi&cu77-y!~4Kdq?yX@~lk;~&WPeV&1z;Fq&L-HD%Ni8=m*4*tbD44m^(dlM&; zUMIFR#OnW6XJr9YtKb2#?%SgPoL}|5KUrVz=50S~hg8hZ@3j9{(XA^(C!}qwLgTJsI2j^CV`NS!Z&FnAez`?u# zPVIw>h_g|Dzh3rB=)l3ddEjImyo@-p_4lPE2RgK$ZKA()FSUv|Y4vxdrGKG={S#>W z*Z(v6;_<(12(~}QC6sek+XY9gBB6Ge=kNc3Ne=4lVcBa{X?n+p66lGrezeZq82Po-eJqa-4)tj*}RVP~QrT z_7KmiXzmx#!7t&pUuNDaR=DJsb9x?gS#iaBCSIBY#EXXo4(i4(Gx*`CmWgLgT=nfLVyaRxZ_7wF&@%)8*kh;)-UDVvz>2AyZS+2es3 zJ?|1fgzLZXT~_*-9RE!l`jGe$zMA7H=wMHrCxRUs6?#gX12@ea=)l4Iy5Ou!_mMc! z9P;YWffKR`=V8(x{X(3M`g>n;JcQ24zoBx62>LN!)KP!eOOF5E`PXet5%J^Jp`nQ% zez00+$Z-)mIWGE&`6}VVaKwvtT=8Uip~JZN|Mma>e~Cs6xY#J_h2U`!^FI9Zw8}5U zfAovl&(Og?a^92ox)zR>-w|Kaz*Z;~K3@y|N! z7enWn@3JzA{hz(vZo-9?^QDg zI&d(5ffLj|Epc8t+&hC#aeS@X4~oV{<;wJHNWEwNH3l-52F|;-~MZ__AL^hXSL#h+AOK;??pJZ^Tq{d7(qUM*eov zk<{zY60PLk}kh!Z}A8fWCV4c*WDi~G9Z zw~0`X_-#^|`Oy80kA4MyjI9lbZ#k47I`qf*x}ztV^ssXInM;EEBgR4SuQh5y{I5C8 z<1lpa7y1=Ar+YRh&Uu%a10DQ@b`OrLP;26(O>MR(be`>Le+TLF`1ZvA;&7gh-$An9 zm+v*E3vmX%QSDrgySPWV<4lYn?h*P~SjVH@?|gd_@1wus$#x1I{Or?oXTzMa>-O;AgxO22RxegNTz%pI4Rjh3;p5 zc8hqo?#-dZZ*$&UPUuuQzs1es97@RwBZ*TjmTHgE&(O*F74HF}-cwGFA>NnL=JpL8 z{OqT}@v7bHwyEIvck}VyHuz=yCJ_If&a=z;06O@&lgKBm{y3R9a~;mPKnFh~jzM{r z%$ZJ{>zmB=h0e2|znw!7_l-jin@#*MC(QnVPMPuh{wu~2l=q|meB$}nE7&lF@^Sjq=G#T{OP;Tv7et$yPWuimMXsVzwGC@?+80Yo4=Yk%N+a< z9okWXJG^gKVB~t@OmLga10CAYg2Vh>qQk{E6X$DDv)!Qc>=%1KkM&_2@r&znde_fg z7QVj;`(?Vgi+JrE&LKeu`#l%$3GE)UmpEB{&Gv&19PF2)JRwR35@(FV_e7ur2lF5} zOROWr$)e8*$@T@E;`mx~#68t8yN(klf&PAiTyH_wNVos!d53PkyJT_jJrbNFLHQ!| zIZfQK4(E8G!}wZD^Hapj_~)Nn)&%!2#Jk|HE_{yovGh4!S%2tYN4zrvPU5hah*L!0 zyOtd26vzJEkb-Zo5@)c(IYRHgL*ZxLOvvAhEVc6{@#{OB>xJ%T{SNO@!w&2E-zEOu zajJdGeg~a)Q}DTH?8|{S()Eyd6CBP(LkIuh9zHm!k3S{O2?q{zPMq6yUJxfm8FT;r z8K>;pH^eEg&-=^v!W_4SckfWI!3EzF=UrB_y`fX~#(e3rCW~_!hm(9J-q331`a!4c z#N&|w@XtQ**xH{GA7 z+w=Xs=8$4sZdWE8@iRNT7Y7~M2hP#M9}~m=LY#9B<0*8CV?WP0{%jQDJk#eZWqrL{ z=Qg!g#$HI@1^m1w2JsK{QT7ar4*B=jfBr$|POS7@vnju1OsIZ-5um=Gq3Y4pdVHJT zU+lQbhyG3o9dFCO1Ks#1iod>t%8Oy1*zZ5{%77%0`Y$+tM@~kZ=P%40=+Hj!<KN zdZ!{zy>RCK0G;P?+uOnvbK#wt106W%f8b1cmxDMpepU9A<1uuKV?Vd`cxGcm8?UN_* zyxs5*_UXV+v(!cWI1c564jim!z&Td0EOFM>HTxMl_!;vQIPZMi#EGZx6G%Tp$Mf5_ zd3+AvD=2fOGVu;|HMbk+X*!74rb%Hk>dp>?o_=dz;tIyHNcIWN-9>0fjM!ft7@e}L& z3IC37ed5<2)F=Z(P$bGS2c0&<%D51r!J_v6NQ?M|Gw4*56l z@=2GaovPn;Esgn*G)IXzFakg8e1?*_qP= ziC@5Byn_z@!u|ltd4JeY;#_KK_7`;U*L0Eh`L)DI;{4UoTwmyZ7PnxXigL~lHJ13X z^m}yDU(k8xyR9`Me_Ve51mfS*_iN?24;{j3*fon-cZ3=|g?MT7dRg`-=)l1{xTtry zGBb!%!6Cl~9XJ)m`_iil&LPgh*=9dOr#MZmp<;cJbNvG19lp+;8mT=wBVT6F-`M*Wustv7dtSPs+5L`1f?)Lh_-5zjAATC7lxH>zvDbgZ+hb zD&SAJx0m?Y8<_139sGs&M!;D#JCHap9p0mW4*tSAADlgPju2;74|Ds3?tk@HwSp&z zpH#nVEA8oBdvIGD#W)%eI*9lO^!X?`jzTBLQS3Xz9-{-#5pO_$vwxt2e-NjDV+C9y z&fj``m;DktaIkL%&iKjKh|{ODxjfK;gZ)czI^McPoI9<}9O%$q;1_VNFTO{dM!%ce z3v@qgFPJ~UA6)Yh@yqFPNVXT~e#WnSkJmGCB0nSktg`0vLnpuM`ocF)s3|$WuWRtG zD9T@Q?+fC`t8KP3bnqAACUB9&fB z?@?7O`kDCi{!o0`|DcoSN6}7D-t_sbvp?o%yEZhJ7dp@7wV#VRpEWe`KU6ULN1lsn zDfYpi#0yUxe~0-JI`|p+0MvKbr(cK@l*L>g=oH6ojoG%B^5oSoMI}zEv}Qj*$9V+% zdq-Pe#~@zUWae`5dq=)j)j-bMzB=whoYo-}N5)mqA+E}>^lG{r?I#E4Y3GQzDrv3+ z#GS0)`;y}XbeN~B=n3HL$CA$d;Pjr6j5z0Z zm^sja6K&lyy62iOV=Cedy=vw_hjx-$_Sb`HiBsW=;>i9A9rsszfAT$=k$B1DncIo% zPpB{Kl&Moz;zSH-=0FEKjS&5^lTS|K9MJm&p2tm>1-qd3I^Z)!)w{!b@VJprSBkuOb z%I>ls$-F@w-H+CmO4a3h>AkyAf2`YK$1I~=#J}XQj|Cm-|Mv#ox4T`oEOAOWoC|;s z_0K-#)!koXKMnpK3(k$9dQ=NtQaCGPvm#Q)nNzV>cD^RrTlaj-xw zU*fOT`)|^J&|w_Rs{NNBb_3V&Gl9YW!}kMVpS?M16F*f!bH9WR?Evv8IFTRJBTl$H zW)5_U9Wq|Nly0y1GoG^yiz(ZC;KOKC|6st4!J(D z1#x_HsQSrrK_}NCIPU{{g)Y&CcsVnf?FAj|j`ajMwO+R;&OHbJL5KR)ImqX%(k<*v zoJ5t(^@C2;59b5y?+8q;)tz|Z^*UJE%lwXjeSG{Fvp4Z(==Jc=#>dEy=I467HD_@C z2ICy;*YQ+8;?8yGztEw7Ro4#wtwz`1YlQlKE)#YJfBooz#2>ETE0FCEI{3M}nD>fS z97>!Xqs{FFI&jig;(Kdv@{A--L5K54(1Ful^sAH!#}a4vFmrjJQylxb$jXH$5~rHO z{U^B}m_^%vTjonW=R9|g^HzqOO8i?@&E=GF9`ZBr{@3{^Q||@D$9GXA-@Ez9z8`<6=@R0fcKH6J+>gh(aM8sWgr;u(Za=e0$*H><9c4O{m4I6DH z-b;sf@}T4S!q-}TnB&ab3AYpPnvS#N_y-;0qv?Nd^ZP5oo8b5m?>NAIH?Qm>ZW$eS z$@T&r#y{k*zt-v%XF;Ry`D@r2{H8PZ62Fy>zoeg`gP)Oy1}A0RK;mq7IDZKp{2Wc3 zH>+6c2yy)M`9A4q=y?C!&%!&fC|Av>Cy19~xN3jWe$aXD&-U+QPs?D&`~NL~yV-bg&cF!QdQA_KY~K znwiT39XN>F!HI3XBu+zzbE(jQQ$_gAKjT~CtSD|S4|IxSUq9VR^?^8%<13ErKi-9+!po3;S(dVx9XjfB7!1x!%yhe%Mb1Cx6Az#JTHZ=0FDy z&N+b7;e9yb^hs~#KnFj_^DEsJ{6d^(4(G?9^E{5(&#C@;DJt<->%6S=v$y+Dr9|Gp z@t7FIZ;{m8j-Zoy{}0o;Tq*JU_q;#Ohr!M(^ZF1!oWptzI`|9YC^$`zMk><;k@bUdEAtwmz}+;?S4;-$>1 z>Mi>rbQ}-4EZ75fEM6xq@hUm^13LHv@1leAXM~Kz$*9i-$oK?0#j$^{a{0C_#5v)x zes=m^Weu?&&)zl%@v7*35@|2!l)bPX_qF`5ar}5aO>W}V((hQv_z^n9kL(9O3;A}~ zuX57-#Le5w-0z@6{OGH}{?+Ek_8Iy8UK{!k_*=siCjMcEc@H}H8S7wh3I!A+PCJJ< z13L6Oj7Q+S@9HAX;TGolLg%^P+0XH|8dH||CmhZ_dAB~apG!JZ*-iY(`n_%0uAI&# z;r=M>(fpSx#7p6@9)u45!Mp}e;NI%Qx$Y1rK?e@*4}z1rS1sZ^?qD8Yp#vvC+;8dF zs2*|J>Ab5P7oYz$SU-c~p4Xf> zu?Ly$1D)d7zb`eoVQb<%(f7k-J2HP?%07M;Pt~4y*CwfQNuGK9{PxbASTCn~)`@uj zGZjycchF(HOBKDw_)muhI_^uBxYCWd(dH|zoOk7Thy9dZk0&pj9P;~j`*Mo!k;Pun zi}(#DnEeAC{5(~B7jt>je#EJ+*SWI2K&LqNbJK$|4kXTiS?2nB8yDg|47A7Pb%zo^ z^*l2lI?v;&+xqC{eWj1-MiPH^C&icT3Ob(eTo(4NP~JM>#uBf57qfq$gMZMUz*%)* z0&x!L`B?e~I`{|UJ~++iPa#fbhx|2kKeHds4}ib6!3^S0*5jA7A9OM=mMj#X&v~DC z4)OZxctP@@lm2)o&czPCHJ^B&9L6)~ocytL{$k=ZaLBK-Kfc}15c}_m{#Z_&^$z10 zbiDs=?|-w>uO?nUJ&wuo#=HJ!&vWE^vyS+=^>`-Rq0DpO9Urul_qR6@=hk9#zkm*Y zz&$B&dac+-oXZa57Icc^w(bY=e%65YyNJ_ihPmHEr`j3fXP1?D;SxHZaV7U&;ysva zt{-%&T)x)DgB*wd;SMBDM{RF8FF}WSscgYmqXJuh|1KErC&F&=;v6CFmwu{zvYkMO zIJ}1LVEF>xw4J)^K=3{>#xL-fUpP+ufe!r)I{3Gw*vBe9^)zuRIP^2<6vv)-y|w8A zaiZvbELmUg@~(c?QSm*7?1!!p|IaJx9F62d_p@;|q4=fAJBn=yaza!bAKUD<;3Rk3p)4%{Rf;z_pS3k=0DqI zH;*&WdG?2&g>yIHFPj;L_+|2$`Otah`&v20_|DQF zdLPYgeJagyW}~!`iCgfY`P?0J+>Tt9Jm=Q`V>IIZ`pDdWpo8D>GwfM=QcU8M@rq;5 z(?7@>hdBL1oBiTlp5D$sb}l~g$0YhMe7ChO9LJrv`XnNL`CN)G`z3VJKgeIeu01j( zBVJjDeH7^6AM6W&llE>(;?%9J$|d_Zbm*6e=fOEUJuPug|EF^WzSg=T{2t7?p&5z4 zO|N@ozk%+>wXw?!`dQY~8F_CX9`typD!1FZ{fzyxcy3nW7S#DY=@;mvUw#$yV1c?h zi5JFYwkLG(3;YCoc8;EhI2S9LInaTF`3#(Kf%%COp}v^|9sGMkyvLLJMPcIP)90yV zJOZ8PanZic=&`Ig@e}1%<&=Ey)){UK@AkkB8G4r@ewSj3FZ~Ri^fS)Ef!8Q&IpY24 zu#W;A{DXZ~aOV3|Bu;M!KSKxqzz*P?I#q=@RZIRC`?;)$dHEdUK7U{0S9bVLB6O;p zZmYA{hpCdMHgT5eb|n1(o%93Z1Jpa_n|j1c-PYW`p@SdrGdMv(4TO?G3H>;#6osoEth%A+@7FAxr+4}O79jOncAN?eQKHQ4jtRwevUp;&LgHGlb*LFNa{iJgLCB%!Z&lO7hLFYNHa9NdN^Z3`K;0ogJb{I#YgZ*xc zxGv4t)x^21$5ClN=)l3fc=#d0p7q2T<}iLj2To)$P7N8jnK;QE@|4gij=i5fdAyT2 zTlIdAw7+-#%+7yTcrWoA98>Gaf5&%O*r$X&w}l8Ke!y7sIu$zD6L~^#j*mP{oO)Bu z9O%G79vGb3KF5jE%i%sAbc*A)EPwvq@}#Awh|~U-Dv#`E(B&HR=lrR+Mg)%|^+bF5 zqv%=UJq=-=*Pv77^0gj{dt!dST_jGggo-2AVbCFe;lHU?$_a%R=zUGMRkV8K?i?gJq6Cq_;-l2+ab<}PI2u0 zVEFO-#Ob2XtNy!wVCP33@R<1h9q!qA*N?yLkBj_LjhfGiA5zcj(m&8~JY)Z!+x$4M zh?mMCZkKsQoR5KjwtRg{oZ$}VWTAt9PKotx^+z9wvq|Scq<^6EJP-L<$OogGh39=C zeiyenPJqrc-(~$F=DE&ILR|PUFFM5`j{_arJNCm+&VY1bh;v7;&*VG@9XPnZ0?yEo z5s0&(sd>DCPI11i7e#y@uqzUA{2k&A=v2Eyd~d%Oc4|>H;=OO9$|c*e`CgdIioAyR z)0Q=gNxVk-oT9WBbf{;FGCclmPZ)au8@rk#@p__Mn$DZG*SSuNEmg{>2vVS43;ko_W&-Zpul#2MNCz|`0cjw}Kt&ZZI zx=r`e5`W}$#h3FQbosS|Vr3lIyym(7!SnJ|5#R4Vmyx)S=a}sa9sGp%Tj8f_v$7JW zl3s_%b_gB(gmW$6tY46mIF}vbLFhdD$z`1p=h`B-$V2?=-Ocrd4)sO+hH^GbTYxx` z`frIe|oQ;nQ6X%1&IuJT=FyDg{?O<`@)ON5xbg)0#J2=}nlp;=3hx$h1ueTma?yt%zV2mc^mLU}6ouT7lg(-cSA54!)=KYzOXh<{X%2a@ld|6SJGm+YS_ z(Har|)k3o!po2Yc2zy4fni8kgGBXD{aF9QRJ@*}HL7a4YT$SS%bc$nt&$z|dHpEG* z&k4$QXMWGv&TG}K1Mybt^ETe`>~TVlJY9(QV5-?(94Gi%g(mYkpd=xB5a*H3cglGP zI-GNlF)aVW`Mr+@ua}Tlg&(RN?oHfh^UURg4*o=c0jJ{F{=~`da1IMP#j%f5&HE1` z&cfxYJpZmA*w;%{-NT6ATemyO_pTqitt(=^lqJq6;@_E}$}igwbaEWTd=I;(xjB}2 zzPdfiegYl*vwJ1SnPHYpB+dbc{tum#f0DPGN}OW4|4aLMH%{686Ep8j;wN5Z_P@9O znIZO}QiPjJ{LyR8{((;V2XQm}Q{~eF;uYDTc+x-6!9Tc90M5x&D~Xfep&vr0IBqL& zA)hO{+HEayMvOP-kD%lF+5Isj>_+0XsI7R?-`@Gd&qDl&dLQ-QLj1hF&3x!Q&#P`L zt~mGG{KRL-y_FM=%jzH3cI@V zo+jQ_9S=zVK!^T_@1dhzM!9g7IG=Z#{R18P;~a5*cUiZK#5r+Uab*3VQ}z3v$G9)@ zBIA=?CEkVCW*&6R^R-SK;ygx$vp0zMAiVkxyNoNKLtb}$m&G%l#wZ{Bz74(y3j1Xm zdWX2x;+eni4juN#a4z_=H9pqeI^WL)<9j3E*F64^`1#T(zVtJ6@H6@)IIZJ9BhDR% zeP8InLB9lNaIIIw`O{$^6*_RRE(E7#g1?AU;HG(eflhJkABYoPp9_@ZwzuQz z%Jlrs$g2%siGRUip8~opy1}E~c%KLL-ajDJ#UJyS*NT|y4IS#;Mtq0q&0k@OGgotD zy`e+9i>n8gcZmWe%U zI0ma$e!V~Q@Az)(l{kkNGhH0wH!f`+&!CgzKlX7@ z-WMC<6A$MF<+uPH{F7LmcR5os5pgOz%p=f&gZB>r692Hn`UN`p1LHq9wGZbY&iZ|3e?X@=_In^R$K@u@$s6YS zdUqb(ZEbGK`@r+^=O_M#J7)htC*LEi=fnA*Dvt^g@8dgjy`h8sU{Ba@$kbxQ`8ASx z-h~bvoFfM3SYH=$X8mU7KnFh{&IBhQZ&~8}9o-yfK<9b<`}X~A@!m|sR~3jq+M%4# zp}x%`u>G^HtW2C~>CE+o4ji;Y)VKBM>cnZA%glie9QXsAw&iOPCqqUv2Rhh4v)DhK z7_lC47U}ZHeh-~z``h<-k{@nB{2fKi{>8lInQuQw66Q)1;wLU;w!_c%6H3l%PMkie z&778cy#l|WeZw#Bd|MM|VkR>OI`{=~95@qWwI@!;yk@&W=h<$47T$3Oe|YZR#4lCC z%!kf1-(_VI=ky94=|}t&I=+op+%BO52l;#0FVgmD#HpRgTps8Y=UYD9FE{yh@z`0!nWgVZ%5^Jrsr}V=K(T&R zTtDm7HqMjoh&Ye9-7lKsY3TNZUh&WQZEz12ic{p{v#7!6op3)L_5WkfLgMc^s`xS= z0G*8Ea1H>xnwgdoZ?!J3j@p|IZ^D7ZE-y$&d6wH4s?p+ zwla(N(|kufCC-%4iX+<tiK3VG?rJmd4c zW)5_UWA{t*Az_GfR-bc}cJtOR_&t;}bNL9wkK>R}fzGpE+}6yG?3arnBNKm(zK12- zDRgrDLtYZ)oisBV@oFwI+Z8(a2lpMo8I&(3aV9(DXP^V;^LUQq!rk#9&TNOc7dplH z_O6cT&n0HZCr-<^s$SAg&~bluTiNQc-6t+cM7&#%%slA&{Mz=1c+&okeXfehhmU>?;%yz4m}o>zxlBXbA!iMtWV*O2}x2BKc>z@$#ocX@CSYdXYGx&#L43o#~!am z9hZ?fpPriQ>s`F&Ywdm*@vn3pEDPm~Q7|jhj-ZPZf7(Cy*kRA#HgWrYdo(xk>LfAGSI~j8MBLlkJRm=Dj>I=}phNuzh;??o ze1(a#T<1Sz{h;G@tDohUnD6}uCN56A0y$N=WV?p$XX9UA@h-#lCobaO*ZDYEUg+Rv z*b(2qM7tTbp)B!EIONly0|(<{SRk12md4&-~A3e*_}AO z^tdm_8|XZbH{Z_R?&fnAF&6bEemQ;4Le8VmsdD;S)5Lj=aIg9^CycV2oS&e>I_i&1 zHGNyQ&l3E;DDI2Hjv1p2CT^5t=6Mx5?B5m8j&9ty$lA88b_B-}$RB{8>9^s;zu|B` z6gv1D>s)YlzZylHr9)LYr5&LIr;ym!xionkaRLUJInaTF_j^&EOL-;{=b=8AE6W2N z{JKo6kGn;mMx4o;%y9>F@N4Hy0quV~@KM{{&#EfEyq0; z;x4|lHrTJ&cR>B~j+;yT@%p^3tUq+HBlf$&*;aWWae5vw`vW?~vCmT%-z_0dqvPiK zdN)tm*M0MMt|0#4duG0O>oPwJ`{A%d-N|c+pVcAn1s(k5*8b{yEczp#&b4;K&gci= z_iDF+_`UC&?F=0_*iQuKM5!&r@xNu}KnH(e{sAXOg&oA{f6Z)9=zcctMiKi+Pf`RB zf0)C15;~L<=h9HlT08a=r;I+wA?F9^;1_%^tk~u!XDh7#{tgG;2LeCw&_l%U(86p_ z=-?OJCkH2G++)NkqQ6HY`y+IyZ*viU4_SJWIO+9!1+u=-dG?FFAFXY1hWNQ1#wG9i zk=xoM&i$6Hb%FS8w<>$ecm+D$eJt1&=Q?56_?52^@9R>tf1m>gb_OTtw;RM6uixqY znSU@|fs_BvZQ_j6_e!LHp!=DB(7(VRwe&vmtLt--k`J9{zJ1@_YX6w{^B0)yDfitG zAE2D+^F1fdwIyZ_bnpw#HGs1_+$-WFSz+ctr#NowW)XhZBq-=DaW?Av3$ooor`jFz zziz9$IPY72{|Dj?cet+r9rJvx9O8ZYVPn4#FN?lUBI8Hs5NGrqkgsdiq=kdyN4)0? z`z@>$;_{Dq)jRtBf^09)VSL1S-+XzB7sxz#fAGA8cmezt$-)pnmqUILI`|p!0yrO@ zh9}NGhx=>LDUN-;^?7q7;xy8Jmi6_nf7toI&yGg?zgDaE`0w~`E3vqDeyn9o;*Zw- zPqtI&k8&LkB;U7kR;kVX_hDs>65)9s9v;eGunkPDRW`yaIZ?CEG1@ zlDARhoob)WL%d1;%I=Z}o$Lq5`@n8pCKVuF7Kitupo9G}Zh^Dgw+L}&jWV|f=)fr` z;@`drN)RWcL%ac<;@IQ0qPI&CXMw)g`0v`keLrgI{&K|MsP8{ZKRE42Ax{MRMcr7D zc!wO`8H5h@gI~az(7h^gK6&L&dt8tvUk&0c(eGf$`Z>h~b0={Af7jRA#A~MCy^uWU zI4*Eolf`@FC-(ahFPZkQr1$y} z=llvY2Rd-@ZZS9)It(OE)9Ypqbnq+IN8pT(Jd`*=mf5e+dG@P4Kh^8W2;xVM@!$A} zr&0bTh0bbO$_re(TBe?Y`f;J1EY{WNR{k_}7vszKqYHgTESxeU&WH zrx2%3VlxLiaIyuYrhd}w@^s>C&1U962fw3V!fxG$%qC9DT#6&-H|RY3-EA!wamMak z^NC+rpJSB$4>~y>;ycwSZ{H`2h&QUHx!%yhe#lRN({bK1;&g3h=0FGg;k+d{Uuv%+ z&W_Gz`$6Z~e)jlde!O+WPt)Jb_b&c+TYjVXeT@sDHxd7@;fgQ)1D*5_&ZEE{aSv}L zUWqvfb1@k{9U@*v{Vs>JKXhZC6igp+dV}C|*0DtVabnLg;tg=9FLctMSl7T0 z3znWD-VA*nOOAWcDL?pH!-6<&JJ|XxaYE>LQs!TvL;mGKrh;$gZQ2o>ABny08ND0c zFw;fi-X39YC(xn2)feMw%~w~5lUDmx))P9#@v|0+eWy-aZV)G{!+HTa%tHmWBSQ7< zaeKhc*}?lx*uRB8cK5kM{MkB>CCdpN>Wg>;oGv9F5T}ttTnQaGev^*nNdLpRp>a8$ z5a+!^TnU}xxUH_qKawAxeSJ=xq7HF5baH)*^AV_Dzk{!c*HPzjWxs+>_Jg8g-X1jO zFXAO|$lF2(d&zaxfI1(E^JTHQy+Wrr_IT=E>c5F|S?A^DyzO1Q=Cao3;qM->@Q19Z|4xNiYo>L(G1_j_Nn{h)&%a6T5C_A4S2 zr;@`vkI*^!VSKCT#L1<{pMU2EyPt<;jz#>O4(}6s>*o*ReYCl6;}E~Fo`2+c0v+s$ zc^CF(-SX~!}tVUK=vrjx<#HHybpnMt*~3>W0{DTP>)Zt z9YTkBr<(3?`Q9F$d#v1^;CTo8t>CYkkd63*9rl}`gMYD~08YEQxrnpeVO)U@{!Q{R z4XuBArO8X2Ol#CQApHRy-xK$_|cT&>V zC{LW2IS9EaVO2lcR?|;d54c*WDGF-%KSszy=ep(&BNIrD%OE&G7 zJ9$G54>RJAV81*O_Vk}zhxmJSzD(KyI@lBM+@Srqx7R1m(Y5A04|Iy-w%R`8bKXlw zHzrQ^;ff>s4RpGi`h0+WzwXVz8xTWu6^n*V0zK!^Gwz5}P<*RjO;E4Jdu`a`RBcgeA@0l&;>znM5q z_4z$H{z9kh=C-az<@ZEm*4a*+aJoFQe$WL>{&645p2rO;yqkEHyD5A9yY}L?@(O>< z54n%{ZinwYKqviyaSL|r5)eqd@?FgB4?6TW#>TQ zAWrOI=5`F7`F@&xeO)3<5OIs?ckHEKyd6gyig!rr9X&_{tk9@@3=&q zDGulDp#ukY1t-Y=8gYi|{IKjF(1CMS%n#j5-XcyKhrBR!ierCIEN0|;#JQ;LBkk|q zdt&zUmf_AnB>qsHf0Ff;^AGN;!EO!bJS9$dy?&D%=wLV45uEicUJz%VUe8Dlbc*A) z`c2_@r`pap#Hps&<8pk0j{CdY!uKIjuHBQ~6Ypay^LP%O;(d?raBmm9VLd(*Z$V2l z54tNOzS4bPX~d9U`ZXLHw0^T$Of!4)(1aZ7wG<1zs%2_l=x}K{};cwtp(zKK=$M*i67A+j|m-*8!ij) zu)`ito}?jOe4YQ6c7+c9!TB?AlCI7`oW%}t8+70_72o~)I3){lMmwB;gHCbW*7HEl zJMFHUgE%jBydlR|=y*KzwLXaZixcMLCf-|JKN*KZhd4Y=ql3}TWpD@QeQ^&FcI%#_ z0C5Y|F}D}!FfQiQjvbdPgny6oYl7Fkn0LUho}mcwZ~kKLf6&1%=ttm0J6(b}q2H_Z zxojuUfssv&S_LPgNv-r9d;^ySV26 zNIUCrt%~n&Z$d!2L6DH{l$NeTKlDKm4xQ2ZRj%H|jnixIL{`c~o_m)D!%c9_(R zDW~Rx{_A=O&Qy-i1KWgt+=tAyS{`ZlSml`Yh{cqd( zdK|1^{BriIwBMCXzB+il!Muh&cVw6-`1>9EI}qrwCwW$I)}NmuIL)5g?THS1GOxiY zzxpS^nQh)z(CrXiX!c~k1pcX(vjl&Jd3R9r(S^(pbXru9=Lx%$%@zFZ=A9wkKhfnd zewx(j`PwJTmIeEX{bS0V^4J2wt&!61Cv^BJje1A6#)u_?)6e6b9CTiO8rF4%;Jom7 zA1JJT;_p%4%z4%bexXcmeYKy^h3qGvb4`6G?91dE1pl(fdpGEen}YX^IX9!cT`z1F zy!`3y{y~R-n1A5-CT$m-a5?N8bohsP8_vrHI|XN}$NSUhLbjiO+}h#uy@H>l_=or> zzzK*W`_Yo8`vpHjQM(<`VNdctutSt}hXm({c_&-X7wF({?gD4UfMbI5s1f`@j0-QQdQb-^Jq{qOkRwAcyw_D1YT9mju6@N54lGTKPw(mHUZH z2Hg<+jqB|59lCD0U46FXbN;tae}9|%&NTLCy-E4MX>?ogQ@*mVE6@?=kpFVDQNkk^ z*MA=z=kQJ?{4II!34ZZ#_IVH;?Tzo4z^S@t-ZXvS65Z`?~HBZ3x19c_Ijh!^Dc2H{+N*^vEZfd zWv@3n{F-U0EZ6!^J{6og9``)Z;nxi6-ut&_QwUBmb6%|1{LT0Z zXV9dt1Sf80I|rT133L{eknhmeE7MSLN_^pR^m-cIvhw5p_ipdRMwJR)I{&z^%k}?$ zPVJ{n1b1R(yB)*!{T%=A8@zbXLhwJU>GJh>2osb%oGPxwFgB^+PAeFQ4;~`u8 zM28=EhXl^j)QbgYtH=2RI_gXO1!vODWrDNL#N)cY=t6GK{`1nt1*-&qrN=o;Sm&i* z)IEcKU#}DV^d?V2w~^=*LBYA| zao-9ZcB7rcsTJ+0;N&;`SlbPq%L#N2^^os@j6HHfaJn1&>+u+!{kw1h&I6y!L*F?0 zjNq0x{?O$^=a$ds%)Bk*fxS&G2+l7a`HJXt`)6MVyPd9jS@68xGeU>`iF@JXE`42a z9(mk&ElWfD$)0?3oiB&vV{My@!r@BDmvCex4qe(J>E|GY*crsQtcg zl1>Va2Y5dRe*bDO1mE%4H$jJ=InRbOH^FPciQzF1p@XxdRwl78v@>G33-9k|zV_(% z=x8s=rpoUeuiY0>a4rmX$2sj^bn^b{yZl}Cot69}qX^y~X1>-ublqag@2tdA>z*jp zq6=O;kNE{1_G0~l-47;=EjSB2<`;A>$3IWM==pep^Q#%(bUO=MJnM75uPyyLFmV#W z4`=eSwf)cq{hjxJ=N|Z%?~haO7L$_;USqS*qyX_ow*y)?eL&d$0M9Z@XPS!~Fet@q1p%@E_ z25}G2e_v@(<{W}|*X&zpyW7`gfllciGS6=Mirj*i*5tA4aTDGBx9Eh?@ z4~huRq9b+=I_gV&1}FZO5`xozrOVO%7hP!aJLjD6r#CDk_-jmkH6LBbe4lgEC+(Rc zZF#|W{;;=GbUJ=ve~9vKJWxsS?j5t+6&?QJeMUI(x>gmOu7BA%=UR>Hs~+EfM~5F+7vYBpNxu=CjVAv_&y(or z57Zw{<1;-3r>)0#-qD5JAN-_svDQ- z@dKUQuk?RMDPz_-g7>XQ9P72N-H=kA7k8R5Pw;A)cMEj+(CK|{@`JEj(-ey&ubgW) zJ#V2Sj;d@X=!dh)H#*#ERd5`|zCZj`yOs+6i!OG*pu@lC)xC#Sbyf<_c8~p3ba2=& zqdd=W_h#g@4v;5S;pJ?RE>>_!i()Q~5(%>ueGH#fRdC4$e4Lulutv3eL8xb`Cl?zo_z5XnIv}Ha)a+(77D{ca)-h zcT;fIymC2u91B}MUyyS#xjfIm-289BuV~gAnvX8z_U3c=E(HF47W=;7pWWw{UytkP z^!kYQ1Ml|QM}n8-lHIQ8@DKT&a3(Z*COE%6vvbh7oOj=iiIhX^`;CkHN^t5$cl)cZ zA39mTckic6J15>v{b|=5!Aq3f&O;aUD3#dvxS+m!+i7644-@Qf0yvlw1vYk7fT2p**@cH6?)&IJ^iYoXG6S(Eo;{iJS%Q_7Io=O}` za9SU=`xhPl-7sDH<4T8kf^%n?z5Sw-{s?lMlJa|4zs5@>cy&6tJl)^Wh1TErJ^}SE zIX8*m-}Ttk(IUQTY{|uTZWrB~l4aqy2V&pu-=_)H*m^wse9M z$LtsB{)SGjg9Dr>Do=Mop-h7Jv&Xv|=-hGzI=i~cbF9l}vI^Bu8w-$_yTVipX{DY#S2`y#r&=(Im0t94R9jy!@_$z#4nNB`pf9{!AUIKSXT zFyp%Jr|4Xc|9W>x*DnMo$~Aj`2-|wspI>5HNx}d6pAX^toT2Lc{zdqTf*&WQ%h&!v zr~T7F`DaNupWuyh?EB~F@DKY(_$Pb&>VmW6c8vf1)7n4i;MAWX_ctTXuO&D&liACI z4$i6!cf~v2v+vdsoL24a9CWl7;u`9ewaiz76K}K2(c>q&kjKver?gt1j6E47_=i01 zd7z`d*pYJHsM17mVti&F@6f>^F91%1gUtmey2)SFenf}e=r3@t1hy8O4j$*R=t8!e z|M%S6f80*+YkTBpgmvA*`!AHg?8AMF)rR5YE6)`v}hdy><>dm-8+TJ@u=I8=w8%PjIRow2wpRnC_Gf~D|n3i=!heWn*sDz_`#eN zcDtg(KRRD<#kJ9bGuVTJ&gJ;eLlY;A6`aT(>$tG?zrWNv?m^j!ff?ii}4eR9h;+qhgE zUweHwk@FSW&+L;|1#fH?dt8hTzq5}IXWO+Kf|Dn|orBKh_?-S~J^RhmzXYe=&n`#T z51m}k`scZPll-pWy^3bHmp#wL|2!aP&j*6Hxuw0`gzY>a$VsL4lZqU9EcgfJ+WF|1 zSMr;AWm&B+7gz1>{hmtpyw3%HsCoZek2^XJ4^;b7%~D0U`2Idx_H_1m0v-LF`J8q$ zHFjjdIWpGux1Oibg*@)R`)-^14(i*yQ3XHgclP##&NUuwFTTG-`ODw@Oz^*~ zZ(nDiGj0lgr-e8H-r_E41jcWg(!#})(52t+ejDoYwW1kWooK$K*@ae)Vf>ZN= zy`Q6l^M&Hnsg^@{qRhz<_>PH=K1D=9b` zQ`_TJbS@{*x$?Wr?;Q2GjNp_s?;UBoqr>h^x;-7=BSsq&&jdN-7f`;;%gPJxwsda! zbpJ%heAmEqq}knvo+z+&OYnHdz7PCvEh`JYlgnOzbZ-6q;8fc|9rJuzC*jCYE!{!U(;?+bZ+sO z=l$b`_K8{wUPKRn+vA2nCx==OTshfB@D3Y0YX6|aKPl%I%KNk#>jD4%vub^N!5wYv zr~RY*PebFMnosV$>hj~B;Qm8=j(?W)>MZ#EO}Vvy(BYp*>br0q3wINoqvh@XL8tu_ zQTe-^(@XH0R=4xeNnW5+PrX}tYv;Fu7qPC()BZt+f4*LlIoXvvCxY!qyo3F=CmSfZ z;euSQwjVmzegO{lgwy2icY;$irJaKgKU`ApQ>5Dez2Iat96gSqlXmktyjwxJej7AO z@G6*jO7qZ3UXZh;mfWYASolZ5TbtIcr*6;aj&*VM_@hmP*+cKVEF8Rko>g6*-xPU1 zUhq?RysL-KEid;d0-SLwujTpb$$}Tr;~hA3_~o$j%YZJ^1?LNo`2d}?qyIVX%X~8h zuV*!Tdy(h3KBup`kNDB+If9esOP8bli%#2%_DFk~a&5lgHTUQb=;#luN8#jMyjXA! zoBpr+6*}y`TCERHc33VrGXw4QL#OTDK*hP)@~;-WPa4?mj?OJtptDNFxqrP~CpdGP zxEwtmp`-mbP9OEkovyh}`wwtfCt$bIdo~Gf*OvD3p~L>PUpRmKvQ2P`cyQ2R|NH9w zs1w6?2+pa7cKf4~=QKX&e%h=e|0r^oJ%Tr`mIu%1QXYN(dqh5G+DkbeWh}g3@bY@h zTj+HACr=W)y?J|3@b-G-cca7pBh>y&`QMHTPIgmI-Tu+JoOkc{R+o9k9fzD0oQ7Yy z_R`}mx{%*9^f~1HP~SmS&IvyN@1KIVp@p62^==LKUnyUeY*z&D zO+A;V?T8M4v{Caws^qr>XLW#`gANYg*P=Xa>)sKZLnH0$WOVqI{R=q7-`*FT0FU=? z(1o_I!u|{VQo|n${+}LsTj)aL6Q9Dbo#MIRk2UXq=yr*Ybw^s$0aqtEQ)Tw~MZs|y z^9lU^Z(j+1(=~QGqr+dElfg-J;jQ4bUt;H=!(W_Z!8zMI!ln23Qv>I^99>^@q1luD zXZUr~L>BxF=6qiB(S^+S&!f)!JgVR~KV`Rr&ZEXIlyl>PA2#zJ@RM0 zzT4VIjdO1*ekORaP5!JNFVW#2@{h6WUn$cHPSrH_@e&=JPp`Zc=Z*iI$sjnxJkBlA z;b*>2O?kc_n?-QeWOK`-{eUjC{!uHTj3WkC%^~=CJnq|}qrS9r%DFc7=Yn&#wB2s# z;BfC4PN!S>1ZP|&I|m(hqaDJTzo?Ml6gTS@Z8vlw+s)@>`dZ$H4fjJ)!SCa9%c<8N z=;VD^pF>_R<&99gq~J|6_R~CcI-dJfZ-|M{wSO5mL-w!#d#_P))Drwn&FuEm z@4a#lfqH-aqK@F?ZEfeE!w-!Aa0XoeN^s)0vvbhVFNkyCEMDGFa59>8uC^PxknQF_ zZ-)LHWt4O9l5T=Or1zevH|5+V7A6>}) zfA@Zb8s8=in<@C!%|4wT-_U9Qur8#$A6NTD@FF+0+Z7%D;WwNaNfrpsBaeMGbod85 zz!`RRvEY<#`(gI$tMUau|9iRM@A}&1Yx|+o`#M*Ba$kP!)765v!hCl`^U!I3=y@W} zU+V?0>*scVp!4#_gcX|wXS~PyA~b(2IJaGJu6vww^d?wxg(CK~Xed;_XX_fPWxA;3d51nf-&ZGV3t0l)S3SOrVy65BnZhOwA zR|S88c@IO+hx*-i@(x@6o%?Fd>MOV6C(i4!@7@zP1fTcI^!$YmPEIv{{Zr+(;M^); z_cuD1?cybp3PGT?kJCYHt47) zaWRrdd zld=iUYh#~CH0U>P|Mv&o+}iHGr_RebR3dMTC3|ZG=K;=9dGB?<%_aVwVu{@H>hTyI z=RM>x?YkVbqvqCs^O)G5!9PDV&MWx&Jp6;swTFKm;EPfP1t&pxx14$$@yY|dqw)`e zDismDCNJ#c2s#-jyvrLR`j{WjvC?mMT z%)1ZTKj`pJUDHm&1(nTy?&{j$eu$mnAKhI}@Yn6IkALWB_uK=8GiFgG!3q4+&OzsL zf*kGv!YLnERd8Y-cR9Mg=t7&nhsT~Q-gBMj)D--?H(kDNN9bg{>3?tF_~Jmp+hg*g zb$PXae8xYEZnhlLcgZio{^8yk_9#)mp5Vvy$d5*cf5xdeqiwt(!5Ng=J}#ky^M!g} z>iYi1g44z$uNNJDX1|qsB^%aUa9*1{Pi;4Jq4~Lc1-Y*6KA^SWA1r1cFVHcpQg7~E zQQm5$+6i8`EOz^$b2))dIrZ*sqPHCdr@2`l>u~}dp4XH7nxjuX()4%lCyod4_{Bsd++xs2wZ!(Z%Y!)dx|sNe*ceSTeE zbRql8=cHBVcSCxQ5d7_CA5V{S=;ZmG|2Ws7_ZY$3>5;Fb$GNJeV`VLUFKVJlbGKqw z?sH&|^rgoM{uGb%Rdg;V(5b5WSGP!$1gE@dr@DWkqknB*7rt_*g7h!{eTqwarV8$8 zlh>*3Y2T;tpXbVsm?3z@O?%Y-4{Lv=UtrJjHD(L`8IN-?bo8g1#$PY{jXKxw*1zu> z@O>5d_iM}({PZ6CuITVP`BQKLlPwaQZ!6lzRdnL{mFkH zs8Ebmf?vqwt7?AO&Mn^M1FATD#Id!4AKS$HnvV{D)iC}#QKrt~-oqCL_b>L{u+OVe z8wEdV4!fPvdHL(yk*$K$&Lf{f`-}4;%2T2DZozpQ(JhazFS?NZ<#R@FknfNT*t}2h zw??qXdFW)E=l@>uqM`={@6ciUy-WRGG2<2WzPId%;KViGnbz|MI{ZLf250KL6M}Oz zn?2q}2Z#6C;dIM)MsQ+%YUiMX(?;zNj>>snaJCn@|$pod8 z?}p~O@KW+T_Ak(dJdXJ1S9Y54M)3bK=SSKPI=`~7${R2KP53MC@2gHT_g^#z9e(Jd z{BZj7NP;unW8VxNeh5_;3HsZ^Z)>$y(_ z=Ub2SS9H=30nYfcvVZ-#H>KcZH2D>}T}R8c^L)L$ubc7n9D>s%rM*1p;Baq<@_czOx8S5R?Ood$ zoy!SylI)e`X}l<(;A|=2mPfBw(6L@g^kmZIa#Q{?>s|kI zQr~S~7Ztpj9{I=Uup4n8oZ7yUf>Y8XKLDM}@t@!SDN$B%PG)lLrsrSAsgTEg|M|UQ zwF-jY%G|Hi{IJdM{_E)k(W(f3`Iq+iM6ai-n0WvC*;2WNWZe-Q?{mI^otJK^Cisa= z-hghO=LXlzfJ1jL2&k(aZlF|oxJDg-;b{q z=^}Wy3V%pH_Bq8=zS;1%-30%x$*8T!7uyhg&PSYs|i! zj#tpJe^|*3c+GN^8CB!uu3-PNZwtTS)eVCG$YUP?9scD$JDiq3Y!RGqJnk8y!@t|= zT@&lU%7uOvoR}VYK^9c*Swz(2wq-~{3LYPo&6j*6@C!IDD58PSdC-1m|s3d;dh|axCW` z<<1Ju`Q`TbH>~F$#QBsnZQ(x!zkEhJA6>}fsQ-CD{@*SO{*y#@e%Q_fe9lLz9i1(H zUGRIE{Z!qK(CK!>e26`k?7Jm++0A!iwg1uKf7%tCLv`;6&U}yW)1!k!`~l}>?gxT% z+~XZ}bhIP<1!rZXXMz)Vh20P6LiV%&{$KieF9m;JM3=AaruYAdBPeIR&)x`5PLF(P zbl8o3Za6Wzhrjy%KH|KJ_VS>^Zsfbd$?|g~!O0xQZZ~wH*=@#xJ7Ro}oh+*0e{;)z zUV_doXQ0zpy|30{Tnxea&3unf&zI-!RcVW1E<>;I=N2vIk6|n{f+|za|zyflNYCX=yV=6@1Ro8##!?U z-tD`ty)+Mf#4Q3`_j4} zpridVFW|@U%Nh&Lf)w_8qQm|PRG#yK7R?1`qA8DVU+CQS#eFEBLq0X-8kVND;Egry zLi5l`UZAs4z0Y|sMmxbfV$S2VAJE~4g}YwQ>Df0~@Oqg0QIvD?9~}kvzB%92>tS@P zhsm36m~}y?nOipo`}vB>GdnfBtKj!B`zYGa=iVJ!dA|2KaU05UYDu^Lf?vYq@oT@MTQ#If?k7unn!I-Z?7 z#BtlV=6QWjf%%vE_jx;1@NRgV>!HIRp_!e74!?8$2j}l08wBU;=`Kh6 z9bIUCCtnf%(<@s9ztbK&A6>|N|MhOIUw;$)i6iWG(Cc0H{V3=B#CrtiWClA29e%;z zaFSKnFF5zRy5D`!<19KjjF)g)pFb!#tE<>K=)C-!yZBMTncc?z9g48|H^8~0;?~vE zPY8a$nf7v`bIa*-dZ~9X@?SYEI6Ka{96i3G({Uy1OYGMq_j$pKmfr49boe2oI?voS z@uJ|2ZEfeEgM(iv&+Qyn1!wkbI|m(hx~SsQihth_oI=}Oj_S@}=4twf#UH>K#1?RiNb`Cl?tPd#9z6MbQ=i+5M2OWN4{}fK^Xwd~{ zg2{8xenA)VJmhm&|G}@YB9`E{_P95WPWDUxdF|q%c!C%IxxL=n54;~ox%w?iBsi6h z*g5F%19=s2Do#r(I1@eE1v>mdJA^Z&eG0+pu*oft_5-?*{ovm}N+(S%_;JiW)_>!_ zdpG)(Tvt@sl1}jJcziDuot~$#5B8|oG?U<^H1|98xPuP=ur7cT5GR}9Z1gw}L!JB3ByF#-&=S|qF&WU1zGuI=Z1s!(hUOt@MKb8`lk|wW6 zmj@jj-a~-1yK{ixT=K{lMu-2f8=QFgD+*4`g7)<;I&E*_TXLk9A}%{PwTOzw&2wExL<@0PXA)V#Qp46J?je2ERTIhblCeqEP2m6 zcA*A>Q^ojAw{LXX-n?UjopL8`EO-Me*zJUl`Y}GhxpStO;GFTulR-!Qrm1|&Qd3(A zP8D-+PS+2etY46mLw#rVN`tn7H@1dbKRwQ)3wa*#e>X9Gx{iWB!{dEa-c6L{4RDgF z^M+h;x(eRs9`|t3;Scf~@JHX%-36zC$NR77@JEUs>BV_Nzv;aNXOGD*)BZrWZNmHC zKL~JQseF{cR{aDorpYtY;}<&0#rF`XU%u}K2~Gr$_x8|9y9YVE`vLFu)1iWw)a0w{ zc7l%eM`;5N7y=DflkJ=K4-c5K1`;C(*$qu5VwAshfex6 zz*(j4C*>;nv*2wp@8IitqQhRqQTXrVrP+eh&%^HMuvZ!Nj@!e*^8{y&NBob@t>3@r z7JRRVa@EbXNbnkYy!V1m@&cV=>ino@`elMQIfgr4>T?TpoLfw}a_Z2t=VgP>Ey%~C zoPVcXCAdjF-V;Pe|MD3JSD(GF=kD$Q-rHt<4gW#0b%NjGs=Z&J!_S)+%Xsu=?@fZU zeVd(w&gBF-6MmQ9kE_3No8TNa?O2Z|=t7%!=Bj&c6T0sZ{3W&Q{REvnHxF=GDT3_vMu+`)7X&|47`sn!h7YuJ(81xmGH`O&Iw&|7=Gi&uT#nE2#gOyY%|}NB zXP|j!NRKDz^mvj$ozs?|c|!0KdaQ5Jx#bFUuB-1cHEVfBaC%*E>!@TXLx=s@uZDBw_9el2X7W(=xQPz?&zK|cH`bqcO>lmC zxaVI*)yRS?G>|XC;!a*ta1;-QYKf}*@^ReK!cx?AGI{e%> zy!>8M@3qeb=TUgKoZ8RmT#o;KO4p(P2u?GThpWfC|GHmuxuSFZ{r$#mCV%F?@@J@b zDym0~DEJG_JtECVhrgJgCUx)o(~~HxgZH2L-ZlOz`6#mB-%DV(Gdldmdx(Y5`Dv8O>)KQyp5Xf;x%Jik6rFyj;(}WD_WL-I z;8pavCyNgIomBICxQ9sur%NikAJD;xrsD9fr;-a!{)Bc8I+x>fQmgoRa!ro* zZ~h7AEwsz%eXBKXH<*zKv$2lA-)zB6M9!8tw6&OwJ?*k7Z*1s{|a zoa3YI9CTiOIb5K;;PfB&A$Ie>2a!BxWx;<|-_8%~J&3959DBg-s)8TwE0?ePJv!5U z951_u>swRsBDV~SUH!j5+ooV0!RgT@Y#iQw#m{rIe`129r0e;#U&%JOaDV~-q#cLo_+i_EhN{}SNjv4 z_UC1_4|p_78^P-~%x*t)vdxaHFEH##_+@4y*5X{g{VF!>&ugU-v(nbM39oHMWOc^LoI&pW4#7W`|eKFrU& z^MM`SmK!Vh?W((c-4D@eKeH}|ck9Fi!E4&o7mtv zU;DaSPR&Oba{T9W7OHV?!JPGif8Y;$dqJoB1K-cUuG=ea7QA^L`(Nnr5Bm^set)=4 zaKd@)Q=#+n&v(;z2u=o%_h!O2UIjSYJ})57zjCzOBl!7Z+ShsLs4w@rsBf(_`voVR z2L~M-&P(C!_MH)&kCWNUgANXPPjCVfUl5$^9(ji7T#nD#-&x*ATGjKC;PjklpI6a^ zd)L7J|9|j$sd(?_3s(iN-xWI#o!eeH9|>@7FO}!_Q`_AXyqziR;{ZChT>js)xq0`t z;PfeO@2C3SMK#l*Hg;Q(XV3PP!TWi6CHl-qTA?g)<-b=wr6lk{_y3oeA_G)}Pwd9T9FE@76^F2D_ zj^K3^{e<%VRW$sK_xBmsnfO7^hv={$^9Y>OuRju;LmulWbYAvz=6ozTN51_K`}xmr zAK#8E_>Dej9UbVL-zWQD&c721{;76uf7a_QbgZ}9?fv}9w=o9>ueXk?_0~`Ak_c{; zFYV(5I_-DjEc})D&t!u4RX>-f{f-X5vrhr%c7e|Xr}rv52OWNAeu9%Jd|JVI^VsF+ zxBy+q{l?$#L&s$h{8TSKgzvxa)F(q0!B6{%eH_y9Am91M4r6v_7o0zm*g5F%3+p8~ znVaSooDrYfIq2Y!zX+$r&AftBql%q_&gJ-=(`ufZ+M%G}bnNbO^tgpi&U1lI0rmb) zs*j2YUT?F{r~4T?`q`6d8TZVI5Z&~%0OvwYc^;nd=i-8UX1l$<=&&329I)HN#-#;k zs5$S{{R^GT@qY(@XN+=!v(vmEtm~=Y!Cx8v)a7{@cn3ewSy4j1w>xcpMZw)|;z-R! zHzV@6|J~>PJxbXfOQN4I?3UO6{^GtCKEd5*;y&Gv!*+ks|GmYvMQRBC6LYUm^L1WD zB{LvC9s0aXfgW3e;}XU<{PyS5+Jc`axjo)QhyVXnaY@k9x`I>3<6Ss(*mI$Jrz~9S z27=Q%zx{jzU1;MX=UdeG`0gfxzrUoNk1k|>fKyB5S+sl5Lh$dH^}Fr|=&(T#*vo?s&K{MIR-yU; z!5K5#&OwL0xpz)^O5PtVIJGys9BprOa{lnY|6cvaVS?A>^#6qy=qytE?6bcZDR_Aw zx;#CeqGLW^TWD6n@~;04~6T`5&W1Y?^U-`bb4H1Ucs)Fo6i@#Tps&_=dS}Uo1H74!HH!eny9Xh_~Tn-mpw?YG1V54_#>f;d}%BuqLYnf7&xUA6>|N|Mv>U z=36KDEj->o@OrO+{07R~{qK!}m)YdeXuG1rKg92FcFx!;IB}EM;|_E#$L9pClHW^7 z*z-5RX`9jI==O(B=Arnnn{wCKEqK*D-pvWCzlqDJcf8p91pl~sA4T^^bc}DGnvRqB zRhFhF_N?|C-<;)V1V5@r9Hz%Nz7tD1Q?|PxI1gvp*SYBMb9gn+k2-QmaLz5VbI^J9 z!}49P3C^JH#dFoRPfT7b7$R-!xl&T^BWF( zDfoZPvX6^l<#VqCyPi$|M)1S^YUiV)A677qU$Zw--jDAr+v?@F^4G%Me1D(u>iMwv zEx=)af^xQ4{*mCkzG3H}qdb>y#D6BEInVL!+!K9I2SV~7M$cB_XN?w;T=#o zt%5!koai3+JJEUhAxhPhf^*Ew>$?Ai)emFUc=%bqG=l$=iA%K~&}lz#{y@Dq-bycc zQ%szr{eTYpk>?6${ie);dAsa_)7fJkfDS(pCsUqesdEcX4YLl= zen1y;{O12Xtp;cE3Vs#yo{_GvK3C-a1?Bv1WkJD-@A3T-bl8pj7C6V+7ZIG{+3j%~ zI_yR}g!3p%3Bh^faUTy|$aeD|r`8UyDEKi;+xL0G#`ifRRsU#I#V7a|%=hqg|3Ihv z2k$^)4`)|(!8>T)J=K0jhkqDv;C$02P;h$YwvY4Z;Lz{jJgZtyaMGW%$1mtyj?YP? z@>%AM4-%Y8o9uBFI+@QB=ydv6-jDt@cN4+OIn3qheFt_53 z$9XwA?8g2ZoCT>{3(m6LcDtd2(@O0l4I9~3a0Yv{2Xxp!FtNP<5frJT;M6VXwg=r# z(Y1-cr0k^@1Gc$5|NVs43%dwjjMnz~Kdj>&@6uzxK3{Yf{6zij?H3*O<~?yZmk;+6 zobhHKPS+cqZXe{gz?J>J>S@gCo;z@C|7 ztQVX&9&s8v{8C=cLo@eo5}W}Z@0y^4lY3hRk!P24$Tq=gUcw$XqQk$8U(_o~s~v)q z+q@s5{fjQNaqmlYepfEr9>Gs;&Ik4Qf{yxcBAJS{kt z*4g6;bl8o2QaG!AI43wY7rPv7H*_J}%|G6bl;on|=e}m=hs`hk@34#*ens$)MzhB; zUhlAwzl}ZiM!q38{{H(e{^MVh=}!drqm?dK zx1+F)fA79~r}lq}2E7pc=lkq-K&Rsc@+a}n@hGnaZ@Wj{Iy(GJd=4je#BjIX-_NV! zaV~`p4*O|v240LPIML00K;2%@xg7sEeB`1ig7eUvZ)rb-H4bM!p`5pCL>K&kL#`im zInhyH>JR5cuGoST)8xNv4mvo*3vh<~8BcK1{An)_I+qjZ{H*%Lu$74fC%V~(()|M6 z+85*2T}%GcR^2an|C;hOZJbnat30vy3v?lmuRe$GSHN#L>@&fC>T%y1o$eQHR2&z&wdYL)Iw#dT{~b5y7Q8>FxIEon&^0lJzrHHQ;}&lU1?RV3Q~9lF>*N#M_8$2i z=(InH8}LKwR|N&H*crPY(9u5z9g*kxt9~ycI6ptK*B2f29rG~nitiKt7JMFZOua`n zvRetk&Ft}>1v=WvCdHYTzKr0!^w_sWM>{#I?4R;nIl;+l-UZR^1f8}&{Q~=^99v27 zzR6&Z_t0Vga_ajG%_~)<7&)bz2eJQxfK4@L!bAFwE zPOO8!+1^0#e|hf4f7+ht8038t(G+JcN$+$8R_{{%IpPmH)E8bB>PoH*37?XH^@t7o4X5*xxxv=e9rMJ^%Q7 z%dedTuWp_XiNF2hjqrW?2!8*zc79mn4e}qcYpciK3I2^zcDtg(Ka6K^_U8CraK0`Z z{ePy@entm}^9nd!vW*m+VI%AubS@{*iM(3+dCQ_Pf^%@X%hCNA9sRk)mN8FyZu_@C zvu>h%h0l!@+&_MExjOzs7jnGe@9$^{CJO$N_^v-RKdk4}T!vr|%K9>GFn`)WhHCSyg7dq<4 zJ_Y3(aA2k2MA~NOpo2qR7@YaT*9uOIqjnBDIOJ!-iJE$&;LJN~=b&>r{`o5@H*XP~ zogVjZ!kWLrdYW=}|LRx4PoB_?({%qt7h3=CrQQK*Qf`;vXUT5oqa!~indxA0nl4Ll zZRh&nJhkC!{jvS&Ucrx;${w$x!(WWs*kRd@1A=qIBR>iq{=zPBB3?TzIJxuN>x(Wl zd(Pe~-;4Zy)p5aZ7SGN{7c$>JUV1v_vPWZLWI_?+9L<$IB3_uUqpH|t%Fwi7z} z-m(8YS*YDT!OIrY<>`JHwt3P&uIqC3q2NzSZRh{jc|Jnjr-GlXnw=lE@&`DH)qZ;N z%P$0fiizWOJ3@#5xo3g@mw)|Qa8~`|mQ!=k!Qq|-oZPRSzuw=+OZvN=gANYw=fG+D zLqx%ub=%HC=W_hlWqDde5u6cG?c+jN#~s=M<(yJ3y5J{xY4kjEYWd!TD?#}fR1 zJihy@^Cs92r<`poveAG>zY{zZph7}wz}znxHUe$8p;pu;b;Z#YN3O(Hngg6w`l z7qZ;~ozf>GJ;>b2ap2D`kX-N^_jCDrU4Sm!|9@2B-Jg_cyTA9HQJ?d-`Yy@bH=hY^ zUUP3uk8kK)D(^J}IlNa&Iqy_YCwN_dwA%q4aX>=@P8gJWLEQm+gW~}58sM)=l}Yf6 zn)d^=-_g1L@;MO_$UK+DXR``UO~cXkMz`g{oq97;6%M|KME{~(pZu6p@VYm#*Bf1E z{$HT(qh6|*NAUZ1u=^h!_fao4nfYkXqzjvA|Ez~8clH%U1h-Oem#giG4i4icoYp}l z1?QvM_VEoJ9Nt@mv$$?q!3lb9Ur(WfLwpTq{hsoI6YH9tgU;pn&!|6MBJ;BXA&b6|NR~Y>uUHt%6Ad` zI6u35Jr1G6PxuW^iWA)gXVwlo2OXTxR@@Qym!ID4B{+j_+BxX(_deCXm#^q6IHOH| zr|u`{LiV?Re%9IT0|kGR$ND6!`B^;&3=zD<)9m$@=cWM;>nQw?uh4M8DeaLTiw-|T zQRjR&(v23JUd!y|LFaP(-whpq^+&-uVD6{s`QH9+Xppl}y*m(Z%LKuz;*lSNE@Z#@ zzb_TL*A&6uW8Try?MA;ZRocW+AN`&4hkYY<1jkY1)PBmLygvzkq7`<#qQgIY9|%9+ zc|22arki|i-EYz1pTX+;6{S}HA~-8N?$e=@->>kWx8rwPAbAJe`f2-xZ5`uthDMe9 znRS~l5&W*lT)u8k=w#d+;ILlAj!m<#5WFb6?EXN9Kj?>Wjy_l|IB7lBtLX5@ma+02 zqS@;8g7e8?d;QS4_2azBKd-<1-OYm6>!h7$&+GR&e7A&p_IH6*xyb+^Zo^CJbnBTG*$8O(!zVqK({{7x8^9%ecuYVW(N@ZNW?kDK*FY8x0zMTgJ zXI?lv2OXS#dF1;XV^$m$oG5ec@eDdR+12=b{^d!*S!d2ubiL5g-m-lq`%}`~=L9Eq zYqvbQKcOqR@J`8>cVY#{EBFOF4Jv<8@Xi%?d7=5UnDJ-D{fV9?$lEG-{A{Df&sz(w z2!6V>cKe~je*C7KiL>1hoJ)1X!tvi1iaYKv!I{_BUf-~-KYh+Vm2dbc(p|y7y~^cl z|DcoOL4cD(y)(OX-~+)+=W%X^&b42Vvq7CZw2AdZaI(d)_gi%I+ib?qm!EtwCvEvP z!G7kwDg3iz_H)61n%&;t(BWt1O*j{d{UbO@D%m;cs4w#qoLLo}+wbp3W~peeFS^j; zj`k{E9q}N7;77}4=c5aazoM4x=et%$7W^`0?R<2{y107QDRa|D>wIQi%HV#_eQWIV zXRT<0pQN&#kB;_G+q8!t>eYLYX-UtmUVfbOGN$0S^*CRZehhF}r&7+=v*QZR=pJr4 z^|*jg0o?Sy}sx|^B3Q}gr6>Q62b2_-OfiBGT-NHR`(*GZcQfm!_9jS zdR#$g+~nXN-b0|gQF?tQc&$D1-_gM#F9Xidi5UbZ@K?Kk(81weIGp>}vkT7g8}{)X z9e!S>-u=n=eQv=?5zjupqYKRsyuV91<8I6=_>atdru~3UzS|q%e5Bs-C~&uc;EnHQ zuQxjCJz9vFz;yoy+k#vDJ5y+OH@pI0?-7rTu_T#_0xwo>fP1Mw;&)ggidK``(~h*JdyMmEdGb;_k2O`k|9~j{b38 z;X6Tsw=ma-oHO{}C(AaiiQq3PYd;UrdHkG@;fF83Z6P?5JJIC zt!%dgI_yatLOFjN*i&$Le_OX}ba046;8ZE}t>El3_0=47F6Z6-9hKMG=zf2}iPzAr zm+l|v-2VUXKErCYZ;|EDV8N@=&dx)pd4ENd=Y&;83=_PY&FwsNiKf4}GC1+yX1)n> z7O45`T);@d+iuSBbUQ@Hd{)H_d?||@9#Wz1jNtiZlY;A|Xa=b&>rK@Rs`;QUj5n&6b4;BvIx(1qN;d`@$f z7dj!%&w_t!n#x~Zk@y-UE1yAM)PDAq@DeX^m*pG1r z&hE8~1gF3NyZz9GY(M|;_0-m7g1^jspGx~DY~yQyQ%&UsU0bsQ+E=+&ZJXe9FJo_a=;U}6;PCDf z<%(Tmhv2R7m?zLtKkm`MdHsC1;EeUS?~2al1UbG2av%BGf_;Ls!F+#0w+nQPcX3Sz z8`-tSv(jxg7p0u6wTuM>xYi| zan6UG4raS9IAhFrEOhGhS>ds4*&e9@~gLBjU_m{JifPz4*&F5`%%#s#1ot}9`P8uko^ z;PI|KI^*8p{mT0)&aS&CwcsuAn7`2BSH?x!M}cnX1!q!8d;36#U%5{L=T5fFf^(v@ z%hBx^U1)wKK7e28YBs^o?y+t}hhI=e=&X|lP1ZQ(4yFbx|=Fc^` zAB#Mx{E5m4e&h=Fa-zeZ+$*7+)sL1FoIgxFt@|lD?a!Uc4{-)p61@B#`&j7kLmSmU z&lRXDIN#T@*AE?j;Jk(U)p%G#aK@VNhU$KWE@VIW?+e~aP)G1@co~9qo_z9N?sV)m(5Q zcDEu%3!UaAXduVCG;7)kUVF11&^&Ynv(&Hj&88B; z`|g?5Ja)fpC&4S*+Fm|%+Mn1H`vo-oM)1DuWcMdJ{E2_zt!m#r z%I55JHd->@@h5D-aZ1HiRxX7(^b9~ybBrZ=Q8N9SDkpW zU6lBBq~Po;VCSHN!~QOIZq^^hyC)kT`73aJ?@jD zbITRzL|6AHQp8y+I4jLLo<9FXNBgPRrd`$fpAbek8g1AhxXnGz^T=<*zZH#t zr>!s7KFO-N!RtoyI;sD>=34~+ceB5!`!zcJ%lQDDa?O7goTPQ^{zd0<{P&5kezr?+ z7L~BK=di`2{`?F__X_^@4~lnv&Y;_JynWg355a%>m0Nz@KGDf}Gsuaq_I2ugbwu#W znR|@7f1|_yr3|=d#ixIicgSY<6FU5j|KVK7cwcZ9cR;>wrAMJ6QA>o+CO?;?v>y#Xz!L^`yZX`?*R_)?_<|Jx!($2zg~9#pu<1p4Z+Fs zDEyuG_ebA&oQI)vIX>s^F1df+d1)lUIb!yGb^Xw}^<&-R?~i{DMisp39_Nf<^GARa zBZJIapFAj*;HRo@w;wv{-Aeg&Yu9*!v)-}$6`l4g;~f6TnIw_mow;w1d(q)n)kY^U(FpH~GE$@Bt2S59OQHEvw*t>+wD%I{e7} zCO8!;mq?)YC7xC*Cb%ye*z1c<_e!Ncx{^R4#?sByF64OEf8Y0? ztIY(zaZbBE!x~R7RQ|vIV=KXrY~ny2AE49z=iUu=U0=Gb;C1rZ%Zm>G^FGM*rDK*P zxbW}$Ak3%mTa@i6_;rKqenW@<+1G+oA@+|l>Pea4|9=FQAH zc=m?iakh&(7aRI^ir`1@Ywu_1up{eE`dRs_KM7724}YL@IsW5dw4Y}QPI`}ZVA#e% z|99Iee>qq1>zQ(DdxmYF*XOK=Ec@NDgbM|~jmJ6+U1hUFlcc#DHKf2$s?!-Uc zH?NSK@9i9Pu7BR`>#Dq}g8kMAPBYV9^f-vlt>3@v%zY~VWKZ!8g7@0wCu@J8`@hbs z#UgAG{4yqQS@Xlzo&ufYm8HK^@7XT+iB0~D?)T`58HYzpv}5k@hHJOdpU5A^U)4wM z6x@s+c{AwX&<@~qt+`ilde5}`9Uc9Wbv>NX#SREg{kbkj+Yw#Je)2h~zLwuDS@zXo z!7nq}RBh>e(N~QvjrMAE7Ys;^*>4{S&YltWc8AX8_^&UkU3n=uH_UqNzxKCeQ)Pc!Fz~J5*O+NP zpF=16n}0l;u4jb1@9$$4tYSZ(4O=`L-~_091AU%E68sH??fY`*@DFi4{#o5Rs^AQ4 zY3HDW!+TY5qW={`a5_%3bI`#V8D8E?iq<)f;57Wr&Ot|e!H$%tS>yzQliR#=rP~X- z(E8s+)xWd(J`wz*9_QoeLgxFNh3XxiLE(}Kehc#+xE_zt>9}dNI>(MTFQwo;J!rQp zI{ZVNi(M17NFz7}&G*gpxPT5$1GUa*79oS+biQLR4?36QbLPjD?-=);ky&uwMsdd% zZ6|cw>WBPYuV*c@3*O$;_Hi|A@saup^V^6GW$U~ZbipD+M<8%h6&q!x{dmCzDfDk zttcnBHLKX`kB)tzD#qUt9`$*XXXn578|YW?UpA{G_zgYwh0uBVJ4f28f)mT*+)ewN z_Z%rt?tC=`=ZZ(ZCc4o4ok`svIuNUl;9oH3u6q1I=hoMMUAkvyJ;6C;_HXpK;k7R1 z{u}kKz9dNS#(Ctsqr(rZSK!3#)AG)oJ?`P}58>iVH`n;YxW z0H>6CC-i!pHiEaQk-eXxqg>oGp?=ZMwiBElfi6dv3!Thg403pP72c~KItktv#_oEa zM@Rq4Zu(cuO*waDK0ZEpKM4Q8e_8Gu!GC7XgS0=;;aB$I;nYaqQ*d&7oU@?AereUZ zFw(U?f^)?5Q*A$VI!RW*<O?f?s%$y`1RWa{3&;y9ehR-!#E#*4yQ1f1=a= zWSs{uUhJO*Z}LEUxzOPU_QT=KKRjD->Uyj*(cuT;e>khh%@dqY$JzaWF690Y;BY?# z{@O1W3H~JW9Yx*l(BX%X>ip_?xn+X$0aaO|^5-;ZNR)f|I53R>280`+d4!p$plc{`>RYGyf*| z!#(x^!!~{fIe{vVa`cm3f`4?Oy*;91eq+6m|3u*1-Zi@f&u`cr`^3NUyWrBpVPX2I`O^rjm=LA zP7jYf8g#Os_?(UE`;IwhpB20TCeKrkf9PcV;&Te9_d$b({3&=PpSbH_%|ln@Xd02X z&$`heWG#A=boY!TSa5r(?fP z_nmw1?^9kaX)h-_{6Jg;r}PgI1*d#{I|m(pNUPq7D^M_s;MDKpa)5dczj_6iulEhmv2XD0>PA($e)C>X?=3R zNjt&LLFaM;otf(UF)W)!^3TkQ2khuv6L!a02|tKjrE-(AptLQ=?lR>d%#{^blR_+3*(Rd*NO?=pXR-M?GJSLgZ&#gza1?lIO&ZYH3uF3*j7)@ zJNL&02u}T__WGgI=a`Ixl&jw5ih_69ykqI-5A)7R$m6HaVLbvr@@${rZ|H9?FFNgy zE-L<97+6E_E)R8i+K%Y(2jd>)svSL0aF%+EPw3!GQs-GOch(i0>vQboLFaP(^K|a_ zZy-2Lm%E(*=7%6>y2?BGxI<&XKe55iM;Ef6{m(tZ=V>nZ=goV_y8QawBZYBz+m6Y6 zDVEL*UdIqe;IBdtTM2#?GhS=Iqr+c}lW>+UZ!0*PJihyZ&gHxtXVgAj`_3H&Cx;ow zbR2^&w0*ic`ML_;$$fVJgl(M}=v-IdS33TxyWsaa;_~%8ijFv@!qF9n-_EZdeEvb+ z9ri4{y|>_gb;@3Dbi}U>jNc=lEHQ0$;Lc#b({JHlUDHqS51x1V+VAM_JMRj@2^ug+ zaK7Ge=b)pXVsALb=M5E{aE7DXIl9pLksgo7wHYD!-Aufr`RGFC`<$EV+&Ol}F@ovKx0 z_$P6}d4gNU)L;7-o%Zhl<(HolE)u-w=KE8+|Dkis72uGkj$b}IxKwaTnfLfL2Oa*U zAHiw)-AciUXTG1K`vzkl~x zIA5e(fen8bymlVtLPz~Rz4BJ<*VIY=hv0PgnD^1q{>rNTnvvHI3r;fgy(`^5&@ta- zH;ycF=6ud-%m4k}QcBg|Vr)1r_!m6BgN2UzGA?4bw*5{E&PtDVfewH2J|vv+1I`Oh zHIMZaI_b|K=ctN9r{9J0S@n9Q_ehRo(X=GKkfEM=a$pw41Ou|Ut=G6B{;>e+Q$)edYl}t`c>uW zZw2q1$GnXWKQJFrzvibz_go3~2q}^ZW@E7wXoMcUs2+mfIbqG5AML&YGE^~6hdH#Xx2hGoU z?=!)#YudT?7rK!B<#X1nebdo*(hB|`#-4h9N2mMcD3!N5O~)5!@2ydso_S=%Vy;^ZC8nQP+S1f;VTpz2Ah*&;IvG-efH-_zBE9RF6;kJ_-GZ zdZ)TsOmON>wfhAf{^d8EJJU-EP7V`S>hhp-IX;K?1>gh*1_;hok9~i1a{tu-U4q3$ zDhgh8k9Dhlm!Ol%PmLJPCpa-zyLQs;4jt`I_AmeMcJ%(Wy5L4LajzZ^^mjYBPkE;E z{#EgIEe{@FIj_Z@bNkm8{6-%6{OG*=d%k2{!RhP~CrSVM@1xd@+(2+5n)zARH*D*F z|M+S2{zih|^#kLlFV($|H9s~J{5T%*6FTf!Ufnw`KD3qKY`S9acj(}7u7`gMR%!)*F1aHMzd;QS4~O54(oR~(~=GnoO$Q$c0}iL{O>~@i!oGielm8{^$lBI zoqxX$e{#6s@9>Cg!q%^S&O`McPwc6q1wZJxy?vs~v44_ykB9fQvFnnCV+AjfNBczQ z<)6TE69uQ;#jyE@c%SkNh%;4izBcvK{y`UVTcs>a$fZReqYq#a|N%386P#z>-&AY7fJbs)LbZdn~Z&Re?*7BS>M4KlVqvj zWHENs9CY-@AoU%Jix*c2&S_&W?Qe9U`J43}EhU+Arur<_9{v z)H&s)uMZ3UaI?Rv<6v~m#}}hzOS5rhg5dd>cX+VNl0?S^H~(dOdqPKh`qB&}-&~&@ zb8pWD!Siu6^&WA)?WY9)y#PLgEK35D~(0;PtXK3*7F7Zp|G4s6}Ph3_*F%Qld=^^nS>Gy)9 z9iW3f@h$=E&}!Qg;$(|tK4%OaI5?*T&YQX~h?6XlnFAgCg8G9~H{2WIyv_Ds?B=#o zigylTy?jsnH~PM$9M_=Z_3h97V)1TpldqqNmm!xbujE0;z6JFUx#BzVk|#9V z5jyw-aTuKLokPC*H4pnvpZk{W13GYUuN<7eQiLT=$js*QK&Lov>#}&azuxHx#ChWI z{uFe4ZrsnpIE-?I?H7f3t@Qmq8E2sjuK(5$-?zJ(D+cj9H!`@h(HJbAWr~0kMS77yI#Ea!{p9{L6{o}HVi}>eGoQn9#9qvs*2m9gtAjoV%8xtF6CeAm9cN7sfgR>jX52KuCt7Iqsc!zsM(4oH7 zg#CLa&P5zQhx=yGfrI#u@>I^9mpJVm>;@e;7~jDum$@Ku^0zVj5jt=%Uj*m3NJWX$ zx0aa$o#MEyR^mG`q4$>{&eN*q{sf&08`$UfwF-%I_BXqiCZ4~(e=O&H&|%(JIP~CI zv)*La=hNKQ7je#h`XA+q+sxrUEp&4H!uSFEWz0~Cctv}d?FSwF0Kb4U?r~M(Y;(AG z44vZG*WFi-)F4hb9cSe@;oZ91p4agkT!;9J^gFbY?`>YEqS*g?QM5ksJ9Jd`lziwg z4n@+AdU5%~`00nYdFJJ?7wjA_RAb_obQnLNgTJtU3r_$2&4^QIh}rMZ!C%$H`3IlD zt%x&YjJdwh1s(tF@%7~KcEpeWAKy`GC-(mu?e0kYv$a+E<$Mr2*`F}Zz^;FH?Ml2e zWz2Sk4*tQs0i4|#dlF}u!@IZ8f%8?I@9TWG4{@3~e8&ko#c^AQMSM&?djN4x>F-KO zJ3%Mo<7$zoURh@d@zVEEc9QWCI>g7wCpt$DKP9{zzJ@1q81-jt+ zTVs(AnOA8#@du1m<&=Erg5xI<@v%$1*~I^#-&>S?=nx+nyciQl-pxt!3UzL!Pb$iMzJ;$+hOTJ{&{z$qc}ko(&0B2MfE z=KK?M;NUwOs8`A8`-qdSxS0bTI5>9(&fJX$iBqADnFF2T_*yFuKBIG+J-ZwwPKyqT zBjX-)ArPSS|0}NBN*j&ujaM#xlDMn7DXxrr(CwM>>wctt+)d_tmUwykDxU18-mPnV zt?S46+IZtAZa*LJW&dU3Zf;^Ow>%$EUEB9XxOgKP z^k3+if8qTNw1Z=9uMz)X7sZ$D8#=Tn?2CcZA;~S`+;q721)bvfS&zlLe$Ou6CC<$0 z=K4YxTwJIk-ciao@d5F>IGme-4($`)A@J09-xK1T7@^83+b48r_NaG*`g{&`aNrB# zWp8M%H*^?B@P55Ja-&wGiyrgjd$7JidCQM}L;OFBnf(kM{G3e0!!$WQ5a)YEGY2}< z7ySz5xqSOGajrBr*B83r{EYKE;FlWzo%mfG+BJ0Wb5!x2uA#+3zWz0jnS6*Ur}Q&) zcPn(Rbgy}1J)g7hpCs-So_G!PxiA?o<^D-?pQ5{xFD#?EE~}ErhdenIg}7Pon%`l7 z4)yIU)(_d2#URf5hURw|pp)&oyqF(FyBM2zd+I8ljJwdGU8mCE`7<*d7~W%#r(Jgv z^Vv{k;uF8&MsvNPL%T*hM|(*en2H}?xSmLvz}ARd-_Eo(J$UN%|^T&y1a6HgAVo!DdJwEusMm7C%M^v z(1C+_IqG+0c^=}d&12?32Tokk?t7NYPn?hXePdZK=-^MpQ$P>9mhTU z`yppsZsLt@Yvwth6z_x+_mpi>3q+l9>Kf{xpl%fh)@@ajcuOT0@}&Gv#0?E&vXfpcng z2jcW@W#&MqIQI3`zS^CMbGD!2$Z-MjC+PlZ=SLgXo%oF={1?8jHQ+k?`DNwa#IN2` z@#VY>I;`K=Pxf=BS)TPHZqzpBa?5k3RWx{Axm+{%Y+LPFzac*W|HVE%i1^1H)?Lt{ z9SjxYTi^b}h_hPzPsRo4z)4n-=Na{Lj3Um{F{-|j10C8E_Q_GNCr`!_XWDGD-JlD) zJ=yP1G+#84_y?Ao%NgALiA*i05~rlTFDdIQ`yqa#zJoK*Bu)Z-4RUh(C43tg64t(O(_ zvn*9!x3w}O*L&iQHN@NA%RFy{Zd`7ai9-F+|4{$eDK`)=cXji+1v;!-a6Wv{=tikJ zom}AQx1&TpFkAi2#2+@?yl#O`mDB#NSg2*&h%@ApnZxTAw}tT!^-U0N7jYsbHTwfP z9yi?<-h%`0O2@s#%U|2fgO1;k^R+gK`!|~t9wc599am(0h7R%h++Ss4_*@vM*9UIv ziP(RhoZu*Nt1d8?6FQFfE(`A}px(ubo+MsWhxi8_{CZQ&?|WNkiLpdA-)~-(=`!(N4p-%pq{=Q56J$imG%LyI) zjQKe@&o@6H&OklykR0gX=NNOZ(Y{QzCQpcyR&!)Lg-)KI!ubP~>sp)_#JlAX|Dg*m z{`U~yrOSKlHStr8H1|8`WWST+=i7eoiFd(a{Qw>OkxI;ivgiFwoH7piVd%iYd4Jfi z+q-YX>9p3|f1!h4*NE>lCw?5_&9C{#c6weQ{R$n&Wj|}HINy+UWf&E)U_dp5NhjP%8F_Py9m;_xYex<@B{$iua3_KTAZM%KF~E95126 zc)9)I@a1d1P1ob4eLd6celp@l()Xlg{hj7Tmb<{Bd`Z`P;wt(h{1kCoD&l|cZuS#& z@Hfr}fiu~ajyQ1~)nEm=5>cXPqd|14dTt$_n2jW@~*$y^9FCa)FJ+ThxdPF z-T>cifc>uIt4|z%hkgki{DAo+IHkWfB2Es6cWR&m=iP43e?{Ehj5xC#-uZwIoXI(! z(|M3Cy;~8du^t~~yMRt{?0!sJtQ~P4pH=Pj-}%x04xdk?j>OOI@J@=;JAAMw{7~p> z7vep1xK9in?1z2^&h$tb9Jy%# z@y;z({*dzt=msZwUA}bYB?WKI{`Fjr{XMGd9fuINZ?I_}DL zC-0+SUl{f;@p?3IKJPM@2Rir>=jOn9y>2{lMmWR?=)l1}CUCa+Od*bsL!5<9aqRb% z!grcMoFhZc{mHw0sy&Wh&o+nno2w|kcX8a;>MQc`HLlMmUSEAaO}01aFn{Io&uv8) z=V|*4Uqsv!dfhI|2_5rXR#vfIyp(?#@rpU*&7p&z(SG3P$)8pdXSze)2s&`;&gFAN z%k!)w&Q6DQFmz}y$k(7eC2npe&cGJt_5xkd^U|O9Q^fg(cgwaDe^-DxUke?dZ}78l zUKi!PU353`_FpvTYoQCSymLjqqT{W7#LxT6Twdt7-MFm`q8&c%cZhf+BAWXPbj)*E zXa}f&(iF#t=j(7j0XnoF+^YrW(cY89nU-6XOU6;?6vu6)6Yq?c2slfeR(ju5_A}^I z{V=a~S-#@jUffd`h<7xtxm?hpU1Oey`Wj z%=Lqg{pz-S#5?U*PCX%BjK9o0=z@Evy+Yxa#OpLr@#MVG+x!gX@vvixEFXwJps3l7 z(4igT-2!l?#`{8?51q{M3OeMcao)4{=y}E3Ec@rYC;Bz`8wY8PqG0qG#hv%dAb7St7VTd#Rq`AJ{og1_7OXPYLp7`fJn)%+2S8K&R`|ATD z6aQmkb328O?8{RAESjeHC^8$xCx&gsEs z4s?p+Yb_Jsfr_>%D{)FVoNI<|O=O}^=I-Vg^Ba0UZ({Yw=r(RukFqjBaZ(h zbG@KL`-r}d@AsS<>LN~LZGTxF=oH6oO*(Xn&U@6KQkFQ!R;lvH_79zE|NqPfd$;5M znXihQct^%N)KBYFx%{l2;(U7gh*gMFeWv2b_5~fr;c~izUddXncHc_>yl*^7#N|Zs zeTm;z=S`&jpo1SUp2D9cF4iK>TD|WgInbfLSkHsgendUu%+vi;))%^<=f8f|8gb72 z&%zCfpWLCpLKigOZOs(-6UKaPLi|d4Usbjv=wuwlJ!RPA^|ltoJM6H(2p#-`c@sFH zy0;-@U(w|Da*msL@$%T|5r zM*L+Cd1L5M??x4PK6GhEFXH&>e3`5_bkeWb|AjvShW8`hTZg5;vU}CHRFh% zt*1FILKigO{@uAkV#B1Kf-2b41zi~euoc0f95~rZU zy)o#(!96l?(oC95oan>N<$(^Im6N$&#B?nnPRSW&4s?oRZ!gz>EGEutJr4c5_Tskk zihNeN9m|QITAzcJ{)A5UU&KY&?^=)5#7m&xNtELqbg&=JZ-6tS`Fi3sbJ%Z&4)()* z5}dvnHxp;A-e;5cgD&X);Ii;N9Pqb2*+%?^d(D1^4)w)+(!+VQi#U&unmN#cgZ2zg ziB0SKvwW>ah_&&og-xC<}{Veeh>3t-*e*vBOUcCK2c;7u2hFMSn}2dG{4Z_DATrKe{dChf%JpS3VH0;RQ1f zI_9}7yoUzf!nt3FH__q!b?8vfgkoGv)cgl=V0T$h=yW&J_Y{7f*B9shGt~+8?$>;3 zDhK;R2R|atqJDQ$ha*l?eZEhQkI;b=Anx5={w)%53h4GPIncpQVZ^(pHEu;E&f@)M z|3Sy+R9)89IJ~|e<{y)I6CB2E=up3D;`=qtn#3heI*0i?bl_}R&GWD2Y}fo$3z`c<#?2uYvkUulYOi&pDj8 zfe!wV=Me`b&q|zIr&PVAU!hYRd!91%{Xd9P+2Ok;-pvo~@1g`O%T4?PfvTL+p5DHT zl2M!!{n{ZP@nbrC*91D)bC8H{ck2}*&LoHU1|2xqPlkUlS1m@IR1R?oI&cb#?@HcF z;3Cdqhx`O|;GqAZJXfxlA&$!-9zmx#zE(wXkI9-}fjA#@dz14*=y0xO|Cw*)J|=JH z>2Ek+jq;6YTA8@9bbONS13L6Kyc3k7_>;*`9&PvZH_Xex|0`p4;&*kJ$3O==;vEBU zBEGFjoZPp}{S7*B=1<`JE=zXQCC>bNW)5_U<7eFxd5#W48W3mHLvz1^F6ecg+X@i* z%29oq5WmqA#g}m!Iyry9JPh_*U9JW3Lg+Xk;}vwUAM#w_lupowIG-Fi(1C;Vi{Q+9 z)t)#X9pW%_isQCEx8n1Kwa<4V&L~|TX(#CT9Dv)}lY-BoK3dj|csum@QOSdj?e4Y~ zi}%B~Ht0>ff)06E==gq>uk}N`ryHhQf8ssppxT9u1JEH3EXle5f?wjKIu5w3Gh*K4 zpKUO4^HnkT3+T`jZaszjYioECarB+COG z?2q*;IK?*4AkNiqX8S`ITwF>#llxom@pFh@F{XMSQT7k$xWC!oTio)eKk*($_U^sK zSYn+!_|_ug%u8L1@ooSgc-Zy8UZLp*ukF;C$|WB=Cu)P7#}`sS^~J+0p( zm+i#qyzEx79{V;r{6u6et?e0GyA);%L|+#-a!5S zv9zC@hhaU1auq8WK%8|B?{`B7KltoPN8bgnljJgSIyN=?0Xp~r@ek#RbmJ;<^7xtk z0A0|2a9J;|3U z{JbwB@;7tdKPJwP4yrt|eL=_h8@H8N@CvMdPP~P>T}U2u%(I`bsXO&G@xJPJqGfxP zc~sn=g1xHteNUVuz07up4t7WT1!rl&&&27e-}RT}fexH};=5u+zJ4Q4U59syp#ulw z0m^f9TZs3+<`p+Oe2)Y=#qqWNKFIgL+V>1YoM@w!edIU-9mbI{8*d!&b=~lcBe;)< z@>MMqfw-q8ncEk1GJav754@(Cq7bjL!*@iWgWYzN*q2kGULI-=HAHaWs=Tj2r+EOzII&g5lADoD@(-LR>N;3yK z#j)QX`SLOYaYC(C965e_cYnms!uK#x&iw8y#J}6g%!dx^sTA5FF)zM3Rm1&HJ{0i@ z{10{iApYUJW;;L!dt!e8oJj?96K7%)GY2}@v%h#}BH*`t#Mu+qTwmyd9*6Dwf{R`i zBz_pZev$t2cAUp|f?$X9hl>)wb5ye(po2YOUvL_4E=ioN4&S$f4ji0E04H9bGQ@fO z)x2JYPI25;9&s;cMY;;aDG@@|OODIXspf}sn07yOI#!8zf5tKU**ibj^XeViS0n!5 z-_3li+k$IH*cXRgm;GLo_(_VI`Ou+Xmeu{T{o_H8iv%w5ha?7djr#?C+UW%HN52?Gu^H>y$SyJFm->F&p;phTo89f<1Cx z?MnRY+06YGI&kovB5+b%?@64f`aXheN6^9l_eH)t`H;TEiIu}#U+9ANkINb@-ml5& zGm!W%N}B5n9qPNu&GWJi0Yiv$w~mibia`81UihPxJS{U+@jDoZ>;geQRHRek4cHP5!shVmiB`# z=zi?7nu>SuTMarw{KfkGr1Ud%u-}b4S#eAsxutHf`n&kxIS2s)V$ zYb?%d_$9wdyb})ogbwzbB-Zm;ZrmZxKpmfCyMPXy7UJB;u$g}oC!52&DbP9jVbPGs z#946gzxcuaJFr*~QDqfe)PJS4(v;y2i1Zr9Mkp2%~cU1vD)jyN?O z&YM674)*=Q$+Yhiak4p_e}fJj%tyhAH0&F3F6=bd3pywNx=M%m@N1s3kv@;}@A{da z)iCL6dVlIlW#Rj>D6jvqoWwipaNhws_y_S4oDZY^Bu+_(xDOrt13Q4zxqJcQ9B^ol&;@Nj zdp_-Pv?9b`tJ|aWk9YaBpZiChc|G?apg8eU>9{8QDRk(kw-ZK)Ts>EbJ+KSjJ%L>c zO)o{f%OazlrHT2D8Ch?mkn>QL;T?}#B!0)F&e<%wV4;oK^8Xa{nA@%y$)#HrKN z+)tqc$1hY|+7I8`xEgVKRx)#-QyjO|Y5#e87d%_!n#7sdNO5F;fsW7l_*%aU|Go{Z zL%gEh6i@mWI{0_W$?wBnhRx*h?_3dwLyW9X+$G%=SNaz^__wC^Z|_;nvc2x`PaYQi z4F0{arZMqn4mR5pI`{>C0;hb#=EOPEz|4UTe!=^t;Pi^pnm7?1@>0<8dcZ!f-MF+rd7>IWk|BNA?rwRQ<3o^79>FabK}v?*YW~)9XpegN}LjxScQW z5aLDNVfL?gaoe5;fA?iL@%KBdyJQ~Rr5*h#(!vz4moL-vEc&%2_n;bmr^zo8&_m^f+$6 zKY#T5EaIQj^J>|?obJyfZ;A5mdN7Z8dmZcw9r`2o8^OtSej#zvINU#m4ji0^1t-Jg zrNrr`+q>-7(4k+)7Vp&!uey>r$sO*cLC5{jZDp^+^P8Ln))H@;e&1Ny9lF|9{i2_r zMbqcC-Bt~;KXJ|4NW83i-Y9v{>F%b_d)U7RG0S`o)*rC{&_O$h)6UCv z%MNkAvS8^w#IYRi%|VC$iu)EQPu~~^h;v!rKau?jy5RcLW%0gelfc8oZ>{e?Nk2d* z{eW=_<()t8IPuor^=^EVeke2bG;t0+Fms@T{rX1Wyj19h=ZSN};eHTwU9+paZ>A&* z|Cir-xyS#~k3N>C-IWsO*y^RfMEord_idq5<+Oi?Fa7H)#MyIQ)mP3hgZ>WR&+k!* zem3gL4dN!drMS|+&`JNIzrep`{qGR(s6)JjPSw|CJrd`kN1y(iIMW^M1|9s1_j*vC zz5$Pkb78frpX?vd1>HaF`SE`Sy^K{AIl0820dpgY1p$od7+w*JdQY9k(5j}5}andQjhIh_kkJk^95^ucTZ;|~7 zI`{|U88~qlr6f*lhx{6JisQD%i+2cfjZI6Obb5X&>jxdr2mP#Rsd+#2P^ApSi>LQR zWZZ===yBC;m7jKk@;7N?WhVX`hxsdX{2fKNwNA`))?CU*yvGjnSLoz8m{RQfm0D4V zc)RsG#?rsg!N16lz`w)G79&od4(k10$$<|3#rp=}blzT)I5&qXj`S~d(!a=$fVVwU z8RC81qIlB3&;{M!{H&Kp`CZJ8>&p|r#sf1Sx}f>?@6|LYREhYfBAeqQzgKO4hkSj! zYQ!m($;^@O0%6|?_Uw_OCUK^}F#8!g%kc_2j90x%#t6Tq z$xHp-x_zD#Cs%#q4tsC*hj;TFd){H#;YP&on^4(NwkPl6x82_}>NO*N?!0C{Nq=LW z3O{8E(TX_Ps+&2`p{~Z{d%j2+hs@mMVvc2zbX4Qblk7)arI;V zF2oz6^UacHj;r=|Q{j1c;@#BwOzBr|+YR#E@JIRiy@|hBpO=w*=#U@Jr33hgjnkv~ z=iBRbg`w2Su8}Af>^JnbA#F?$%CzEj;I>oWuv;4VX#EIfCZ}irl z=uaqT?D3PGrA-klZ@Kh808yM_++zAxTypZDhy;#_ffPXapK?L7GsZ5 zvfd}Yb;Dd<=w$zcU17&f86FXD{~dFDgbw~dyaXroho{6z<}eOJ2Y>Vx?_Cz!{*pMW z9O6H8s(#oX_Op(QbzkIuZ;7`{$A9S$=z{KtKi@y9&EuY{e!o%ApAt?)~;z#qiJdxNr{KnMRKzXQ%S|4^TP&42cNWNs(WDUSV} z{FJl9G3Tk`NPl=67txPU&O|LE5&zA9?0-p4a2{%AwP?h@rpE{AALycwnzp>4-#_~~ zm>;0Ld6UExh828QX2D+fzjs1O-c?a_o zKihY+{p9;582?btPpu0RXH-1%Jz?n3-wum?$-sEUiPIvZnFAgC;4jWWB{}3G&f-Ff zBmDrKYG)WXf9AL5a-8csr!4VmRx+0hI+QC$J)TFdKH(||mPoD=jXnJ- zjM!hE+OG=n8pbyBpp*Tnq_~&kSId`pwNseu2_5{1aSrX_bEMkDIgr@QflhJk>%(r_ z>k+4OCdHBCqj&2!`~0*(%Z9}7R?y7%E-t$)yvqPPJW0`%`0?FlJ3t3};=C_7Q?Iok z&ha{C4s_sP{}G(cQ`!(Gc5^caI`}0}TS!PuJPnWasC&6+BdKt@iRKSZw8&JzuPMNj^j;H!5IT+z zKhLv>@21XcGmLotgH*jG4?12S+P^;%D*GtnWp`-T{QVL8Ih*eB#}TJULx*zd_{sHj zS$KyX_V0ILBJsZXDxRDVK!^TTOUx^>j+siFw+`(XI`p@-9r>Q_u2M6JbD)j6y+Idr zf3vScwuYTcd>{QDm~7A9twZeJPm6fipZI0#)Vd+d0zgAL!s8 zthd4WTy8mWZcQ+ApaUmDy!15xo?Kuxaq>+!bD&ckw{>e@O5(V}tS64&aK({!f{xqq z&v(7VyCrLnZ6aRBF=ifg%yU_I2LkrGGIksB8tV7dWWRt8_CmfAoE$ZF5of%Ey`Y1= zW{dnt)|C5*b8@!1e$c7uyU{nV5r6np zv)`eUc`Lj_3wthEbc=W$9oi*y(m!$^t@QMJ#H*p(k@OFA=y!O>0OhLJ@F8)&X*)^| zbl`Ll=c|9DeM+3u4sifFaPU1Bl&9IV7sRm~-cy85aenR(ihDwFHoYOvp?PNiL02WBFl>bK<17vkJ-h;z`PeouO)ruG?i*$?96 zZfvd}bh3RS-->eWS{mx}uldWUEzLaWnCG(kigUDKT7)BB^(u-d+ZA-EC%!w4ay6+F zi8xzpo67~A>^GT29&pd1P_CYRV-aV+e)mt-4?5Iuqaf3!a&0=7fOyH%nC%6f9IwKQykGXdNr)F-zvn39H*^@Un%ocHdGaHl zJ*fA5@txJu9a0dts(z17)*CwMSG?2J!eso6 zAE5IH|86{XSr5bWy7lDte8k_-*6bJPUS(rGvb)1vsgbwTU3c3OQU8~Ks zH8~D?*6Em!g1@&$apD(@W-cdm@H>8kQz&aG;-t{;W6Jmf9XM%4yt^H~9C7x3Gv{-m zQylxd+#T<@iBsXT;>iALewW*2r4aXY8g8mWywC5<<$_L?%h$SjjPp6gd;1cn%v;5g z@d-M_r-~m7$0>6)rH)Vbd#Kft)F$qfkBTeXiM)r3b}}zZiNQT{{?kq{PlkU(eyd0P z@iENp1UmQ^@e7oWahrwf;66bAF#gX;(E*YK@dVk_J zN@TVdbns&&v0iPmV-Ru5==XGGJb_O75&3A?ElZ1G#2c;mcVxVT4)%v%!Re7`6mbSP z?CU_MIQIV3@!VMAWYl@Sf7hSv{4OIV690`J5C0wCWu+4D^wsV%mH0kA&3=Xs_QZS_ z_RN%LCUN%bJf-vtbl|{G;M{&Yhd4z$o67?o{DSpAI3w5j6DO?BYe>667hK*H@zBHX zu$cJgb-qsWp$nRCfB&L*&gH~!s`Hbw-#ER0fpGxkZSrLm@qTkyk3t9kp#6h$ZSOkb zyqjb84|L$*9xga}R&63qCWm!4bl~8765tG-vyC|ArKc^cd!-nv=;#XoK< zhnP2v$?%Z)%N_FZ(8+lN;u!20@b(GuIxI5V5jyw-;|w_NT`!1}VVRi&9olz`Wa8b( z^>2t1u+nTl=z<syl1#?ABf*X$9vhnp$m?WaT0d;-TF%WE{)9Phc0No+j?J_ z=e2t>SYLk4Q|2wB__Do1C;gB4H_Cf6dT8SHOK$cXbnrjoCOF}*gdoGe#!5T}%n*{{$AH}1vRm4|qLgjGBl zSG}8;xU74tInOnEN`B&(`)baYL8t2NXWfNqGVd6abV9sknhxtu*?cmg@k1icF zYqKZ+34ejVcSv#Kzj|#hCv@=hB(dLiu6ilr1nB!Uvb{iu`og{_PtP#rh%-=g{+(a^ ztU{-kP`r*k$4&es4(D;93;Nv!U#qa_-#ZFcA^td>2bSXjbm-qZGWeY6wtAZG-}d## zk$csNTP1Ru=|fuwTORPk2lhP(4ie*eF)CBeD#P^ z#37DDhjvg|%&UgJXh57}4&OzEF6eoQpXDd`buTp`e!eDVdqNj9-~P^CwFxbVzpth_ zUOBz9hkgRPzIC@DUR{TH3?2M~d^9-6OLQPkNr&^J(7`{lpNRt@GmhG@%mrC;t5MX8%LS`6!)9-oJ72AdYqFy=y?9% zvanx^dKRrPjd-VZ`DDC=4)qN6ocBrBr<_Hc3))_?T+qpOg1iRGb@AO?;+=Y~>?X%g z=+I6|YH+>-3D(V+vEDOZXeGw^QQ;R7Kepz}dPC=FC*oY~t(!}TgZsXcGgRxq!MYyx zU9n^Zahjhnw>Rh%#~yF)uU$i&Li*mCw41l_25}GNyi{rf@!cEDet|CNc;m8MBL83G z>Sp5S*7r=MJ)wg=F;T^n00dyn#-Rt2m#Ca)mqj zhIlDrseUDS(DArxzsJxe_XpxN$!w03-ri#vAkLYFTmG5&M;e;T3muQQKku1{^E%Ca zz7y~EL~~q$4*u;V;$^%IA-?{aCw1*MbD#qU^D+1%V0akf`cvvfX0j<3)KH~yjDfnRNWbmCXi>uwq6po9JJ-a9x` zipM6-Zil=ubl_lJ49?^i@rW}_pL>zzfewB^9ub@lvl9}hz0RLWKS0O*)xO`eread! zh1Ta@q#wMShuHbm|4d2z>b1=A-CKT1ac}SHyEMcvP{GWH4)fb=x+5>$ae4KHUVA*_ zCHC83Z{P1e#Q&k!wbF0U!T%UP!I``{6LE?-#B1mj$39-hYn6>S<@EQPrN6uzFWna6 zFv^*xOito|(e;(>3p$S9zE%m5|N8bd5Ahyo|H%9objW`-*_QuN+|&2;y2EWH6#Iy| zX67eu?|RDbvYgP#xQ+ZW>OHDrVdAasW*$$WgP-xfBsi(>7bDJAecnK}7wEt#Qi=C1 zYW8vw=hPB&d7yJ@FFAe65@*O^bN}$Jy|}EfVx6)7TLt2;ykagVbf|9`vHpwtwlZAF?U&1|C#A8CPUJxq_~E`ZkI8 z-Mu(h^MB??#C&GO#umiibWHJs>lc{cz`wSMK2jWb9Y%ULU z;6xDTVr%W`M4SOX%pB;@-jL@-dCqR^Mx2r{qyNvcvb{ma?als<%HXcX7ym4kpH|p~SC{#%w?HIT*Kf zN8GcW-+v_WPGvFkpksg7^R?qzjwRmx)aLrjd~E{_zCI~k%XGh-7sK}%(S8DoP9%Qu z-xOc^2|BcAocjW&PME30DX9GUt6KFE0sbosQO2K4N4%D=);PaH)5f_-PS(wPKMR(1MzO??;y(d0-fR^KKNQg5Ayk>IH|r6 z@4J5gQjT}fVV&N+*qq0&E9UW>Q^_gvGxyGYCvGu)e^Is@=#ZarYXdYNcs5JxrMq{- zKgd(UKN0^5`R&&{X|AZ|eheKrrA6MhXx^~Isp62AhYp*XM@B00?>hT>o(gv#MDH@DXh;c%lHc&IQZTp>Q$~=GU6O>ZLSw|isNhLJi`5G zc+OPB>8a1<$bJMJ`cd;rRsJgY;-B?7_TxR}dzF^BtsU-*L5F_ir~6Up`FHR6=O5#l zm*x@qmsTq?5Px-dbN!)%9kI`fa<-k2nK&f|nK{rYj{W`b;#IQ~XNE41j2qs)|7~AC zM2nq^_*wg_a!Nkt%R$dq?CZV#kNzZn6Nmdja=lkt`zv z`duE`?xBOfFb{#h!gVe}oL3I_S)hZzFwTS1IZp}VjL`Smq&=Yv&Yq3M`MiaRN)z8# z&(|a$x}f=PYjH@0wvQ6Jd``i7OE=ghM=m4xCBC&*KwT zBTjSu4xaQgbWZ-Mo3|!$e&}&P+Rr=xxGmpC!~XX@Pt^DFpLK~}&*45Wbm4pQcR=x8 zCU^rM)hFKTxaK$l9qfm83eNFyjfqn(otXn2IJj2;&YOnKiIdjheG=#t$8F6L>$>W( zTN9_Y-UpKH3_2AKFmH2N7=KW%j0f5g?}OgIlkEdKl&f?&K37w>Ye(X|t)uKD%LQGJ zGaKo9#CWeA{=ldvCzkl1G);A5L^}>o7Lx>-Fve|yn@wpo z+;HmNI6~ZjKg{))`C;@E)IUVIQ^e^~!pwmV{%9rUb3Ia= zBTkLg=6n)#Xus}7d~c~ooQuRcw#J+`x!de6YFAd3WR@1oP9~m9Oz(A^dE4Fu6;tBkU16S-}O6tzur~v1@Xt1 z{x5u&btVq4s~Q%2L;S+E&31qe_C!2_9rncgK%7Pn@A^Oo4*C%|y-t26&KrmK`=L{u zpYJ*s;rF`_^#4wrBYGYr=gZLX_~x>bE#&@OIA6%`zveNY>iM#a_t2qS_^t%%7w%&i z;{4rJ*;|$ix>g$#PVsHu&y&B+FZx^Ztr3W~V1T)OK!^TTN%yx?+b%^awfvv{Hd4gH zWIdt~f0!bcg zy!k!M?VsPlaBV1MX>w!hmN{F%p>TziTTKb*sRGSJC+cPx)2x@p>*Y*Bd(6 z5BWFvAw;Fp#M!O)tEC^H1E*{iJ_ovYQ+eWCTW&57bnwGz(GMces6?C-dc7^@5zqza z2fQPV`tE8{jrd{p{aHEgK*w>@eh;c-#+t-ipx4E+yz(AYQSIO`jn_YFcX+2~-w@v~ zKzR@TU5EHSdOa!q10DRcL$sH>^Xe1lfx|itI>qs|strv??}5jt-KnD)S5pX`u>`0upgUuZ16vu60+y=*8uPbrd>-mRlx6sLU`&fKODPpRg#5?8i zz8Q4E^)tK!i1Idi)Q9*nr>T0&@<<&kG&e$Dk)XzI=%>fjEQpKAW^R zbkg3)2f$8k111x%hr_%HI@AyAO>lBen@*fP`rS7fzo0|?>WKY_sP$$O=ZZeRBxkp;kQavz^~L-coW5Z< z5hqkJRUTPi=z@zwYgh93D-Pt`LHrx~9EkKYbXjV({^fj`%kp2x=O&(H-b1_xBhC3I z=umHr|ETw?`}>J=t*he5dP9fxYBk+)S7fepAFe%I`|pi z14B7uwKz_k6jRI`=oH6or4Zk5ixuTGamr6p9O(z>c%R6=9!q!g9PtvZFx$`DcC$l# zU$}3Zi^Q+D++1Ghw$$P83nMPVj)C6-i5EhjgOhO(I`{+gI&cn6x=x%c4)0h%2Y*}> z=kikjaho`<$g1B-e?Z6a&us;Y``MGO-6LNAG-e)j%yU^!cXL1MIOHMm(&RPADdFYClJNmL*>8}ShJT`jj!V4b z4(%N}_-B&1PttzRZ^U`n#q1yG6vu7t67la%@5IEZ(cj$upyPODzc=}zP;%m}cUYg9 z?@jtzr^NZp!=Y0XufPOVKH0CKYor};uzlGmrziCCKvUYE&s10Cia zbu_=;;n#J-S3T^Rcf=QYhNL?(5`U7zIt@DIAN%+G_6^KJoK$)pFY7PA=eHwPv}~V4 z{qsFPjQ?ndwMzX#+&{EkW&NQGy5HF6CB9K}6Tg6ieZAw`zc>3bATROr%rUpy)OsA0 z{Sxzf*kjeKg2Zd$upWjE{Sx1k0_V)YqQsf%u>OHgar`WNrx~2GT}l$CxI-Qmx}g2z zvXCzVf57H4#P9F0j{_a*i+y)+X7;K;oB_V(I0PLy$cuwBr9x%m1nBc@ay|haI2eDx zd0MVIaiThKpi>;T_2V;tPxiMgwTKhVVZ8+1t=UcfmmdUgzPO+MB1ApnwJ2b2uh4y7 z7;b*qoq6>BzppjmI?oG}uWvxSJ^9UX5jw<0?gxI>DiIe?errP9m*o^!#s%mQ7b|P< z#$~BnE_%Mt6Bm&egn#xvY(f0Z4)3!-2S4NeX>e}sYeSsl5zXxmI{2lTn6Iat)`2+5 z!kYa8ovJ4CDfanJmld6fS52QskoNO#zG&ya=-Zw6OZEFe|Bml#RS@rgTqxh0_$%+I z^`0DWphLUbx?z&f!h^50-|g=elpWNcxHB|YmRr78kW;s-Pq8YF-I8jzr(Gcqpglyc zIhgp1H>z?67e6r10%vEk;lvrD-}#dq=+HhpiGA{QcSjMY;TCgwpabWxbkFIWPq{JU zh!e`;T`B0$&gY8r3MZ0JBF-%RzM!1PLdWgg*J>%|Wo5QcCEgzWex__E(4n0i-4~H_^-3}RX;j+fNVb4h-AJjOSH-}#gEg)X>p{_9pAC+3Y>LHs@!&Fv66 zw4dMVabCS;)-}Y5w^eau`++XYn~g&b{HW;hGx{~`H}mXz;;lQac(UHm!Ot%3=kCk5 z)KBo`pYOg*68-PPfX&1o^Uz#x=-_9ZM?yJ6{;{1nbM*V|G9Ew&KVx1C&ct22i8Cv- zYA>?B&;{q`xZ*tE(MJ1;|F^@vSLonp?6aesu|6CkPAG?as?ce7=sc01^|l`8H;S)1 zM!b~I%zlOre#U*$CUg1?YMb(iW881i_Z0D8d@}nP;|2T-zoXv%<U}O8 z*SlPv+r(+?aGwY|uD8pAT~Y565APALWmG3FSHTTPSPO>=4$K z|9_O>=YMG{oBau$`YX))eXWDXxm=GvgeBf5o#&MEA?Pq4dRKaVwbvDf>v7U;RS|jn z-fJTgH-8OtIiXYK#5)r%3+pJ<`)sqQ#EYu$J4^dP2R~r{(!)s}lQ_xscW@*JI>oW) ziLTs;L!6op`@G(bi+H`EpfOf3|-Lvaanc6IjA!#PdpV83juc^>my zsy~P`eW5viK?e@@l~LcGiE|UDj>CCH=oH7E2lyC2A8}j`=aw+;1Z}sU=Rz*;BR`M2 zQIPm$dYR)6bo_p>{X43O=M*L03x|CMr*lxqm%@%^{7Mq9fJ1wR4*tOS0?xexWr&kn zw`Up0paTcrF$E_{s0zf1KhoSjpi>;T6(IIwq8_S5oWHbRW&eUsj!PRvJ}2#nYQ!6% z^YC(9f)3--(L{3!ehW9$vwlk=@`RDP)g*2goqv~c4?4{2YwEbysPx+efjMS)=Jl9= z!JZ5J>JtCH!?`Qyls$c|LFc&NhR@c3I1P0Cl=X)W^`BioY1-O7_j<~QcO^aLvzie1 zgTr|>=-}^2z4%9I!k zBL2HV=C}u)jC;k!cZ4<-=tsN?I!`bC0iE;*_M6b|O1v0IyuD@3aRxg01HZxPy?Q8d z%Ifup^apfkzbiz%3N>RSan>|8+Y365S1xN)_P3Pp3_o@(@y0pqCqajHg8SU4U)$9a ziF2}qxt%~4-1vxh>cF2daVqhP>-?YeD|FJY$QOb4$C#PKtK^WEgbseiy#{cuHJQsC zooAAMg%0+^{0f{7xfc-UvSqd(bV1wCK7T0~VF~f8JMg`mzu4z}iLWjveqPO&c98Qv ztXE)%r^{CpXZjCwehE7G1%3kO-ud;!>Fe-4N$3>EZGC^q_X7s{ZzfKP3#vS_-9abg z)5?r|U*$`)?Zg}Bz=JNU{)bS{C*r%AYclO7UihnKdqIcxhkQQlb@j(S;?%pPI5N&d zhxrB0|9-z1kSs^-EuM81?h%5&W7#3%$I^V+KA=N=@!be;Y7{?4oR$vXU4u?>e61Fj z!oJGzmt}#|_ufh3B#ox}o1CXW7eePPlBMqU%Y732_m0l3JxkoP>CExj{JkT4zdB#{ z0`WTeD4w*N>{l4qQQwJ?1BvrGpP2(4?2qq&fpcWXHR2rC=Ne^spabVyQr>4PGw>F1 zcIf<#NeD{cxt&ysiY`5O!Gu&3_=+o)lrZk@(5PwuVvp=De;|})2VZZXB zpAs*T!+lriU_a#hz-gNHC2>O3Hv1DgaQcexg&m6emN=Vpm^sin`Qhu655yU-+v&ga zgZ&)%IowBM(jKK}>tqbD`ul#1VGj#A5#wYkaT-b2L37Oi=fe!w{xDL+i3lWLaBagYh&;>o;u-_X?G&m~p zPiwv$H_i9P?D>q1MPm|gXlYem$#cqQpg*Dhv%YSK3XC2P}LI+MWaUbZy>g2>3+R+?pZCeF=biX+EI z=&+988E|RDUkCem@))=;gnE`OlAgGi_4-W43Ft7-Kpta4kvD1X_FL_lXW)H6@bAa{ zo%ow{JCWlGbg(1dr2{9+sVu}oV%Pt_yjAkLDiiX-dm-S}jm*Jr<+LB~b=InaA;$`C(}!+TSF4%BUZ zh{%4Ko~8nEHaAh_l<^QcIWNI}6Y72NNhRXV|I2JY=->zJgMhPeNj2iM>SpFZ2To=Y z$4d6BNt_u2%pB;z!TT>LPxA(Ki4$4p!DT#vPH}v#)gm4qPuYMtHT3y^84saD`@d4) z^QSmBrZ~pKs&^U_*QbxFr|iEn9@f$h&HFZArVYb(dgdMV#JTum_nH$w#UQgCp@SWf zhk_k%e{4&f?+*Lh&?%1ny})PXI}#_{Ci6PeyF8IS-o6gomG~!nn)%+vTep=?_PmwLls~4JLqKo_(Uas*Wi2VKE(UJ)XalUzGtvpytguaZ-3&&Ii`5By+fz`WIyj8 zv)*9hY}4K66%-hr~;IMfHQQFwFNK z|N9R*cd~{5(|66T#)C{r)bE#jetnyx;DKcRo;wvI2de?W(S9`;Nn?3wS_ z3*x;$WR5e?frEAq|K^|ZhB$xkFms@TU(Seq*saY!5NEmO$ofI2>W6XP&$=9q$JfEt zzYuTNPE{`1-=NzYTD|{_bw<>aw~Bp!7EP}++*ZW;?2o2teh@#=YQ>l3g)ZZ+9XR!hSF1TK<^CAGFNufA98x-PRd# zP9;@@xWq5C%ItsWWPfyv`P=n?--s7tt=T`&!9U33pg%s}mzX#`)|)xd!9U2qfD>nP za^hHX&Gv&X=<&+FZtc@IHSzn8HuJq(x4JFN3sC-kCDRi>ufsVs=%jyeUJkqzF@7gr z_$g+)LI?jKp9#+Cz%0bstj~+e{sSHSgYOuE(=bvF;=I-O^JV{mE;#=@7x%h`hsZ(m-(HB8RGfsbKsH(9rJvx>W6thVq@0| z#H*;!C(8UAbQl-k-uSD=?rq0)e$0NJe0~1P#Eqx>iENkhJb4Z6;LQnh51)8snxVzT*CF0x zjZ>*1@k=@Et3ikQR!Q}l@*pGIHzm#ly?-d{3mxi<`-`yurZFvv;}_dpU+6GT!M<*m ztkK()-uuryWs=B8J&oL!_;+8L^KHXkHEoIz*PR3b`Ln!aj zUj2wS%i){^bnp-IMc`adI*2&!JDB|g9XJ^)@%gXE0YizC+Tk2Mbl{{A`y>7PjU>+a zY3A}kr#QaWWf50~FV+W_VoW6NSBLcsbm(_} z+M(f!bk4VF_-s#Hgv{rSX?c*^W2=mz*kS#qSzl7jM`6x>@wx!tQ55x2-g z#g+XHI@#Zl2LW%=&}GD%@WgCS=-?Ne*8?Z}qE*C+_`uA84t~M>8=TAi))D90Ud54q zfi7sjxU9!+?$6t*ZX$k0hkOEbC@1#wP|l&Dwh^b(Hgh?lLx0Jo9pYazLDi0v|G7Vb z`2_gm7w#l}w3B9gLI=O#{0le>%j_ji#lOuQ=ulsb2jJvPc91xozM1O_UC@59uh*l5 zI7^IQKeltnTBLeH6A>P$c=6oS^@DKVC?7BEn z0C6tIF>{~;$1Ubx{vj?Cr*RT92Rir}>uZ!}+_tO4xvBHKvj0IBw4d$Yhil#bCh?~@ zgRCYzZ99qf<& zDR4TaenFhCx;)bU&;@6I++PPjAod&LuhacZ&eNeoeK9VBv+(hI;9=up4rOQWq*Jn08-&#%H? zsCSGm5sBX-xtR}L(0;a`Gb`023i170s&$SmzjyuEZMAQZg}(2Qb9oHnpKv&D03F-g zes2Ef{FHbrx0}~x^4vVmCBd!-9>yomat97{XqRZG;52KLh&bu>`%SW+K&Lo<)_ifl zKEt=9#95QX>}TkL_Or{nDCRSN^h`td$stdo;>K{L&8V zD(K*Ee6JnlT=qRHadtW6$Dsq~xp*(;-s~L2seM}cTlxb!v>VK8P@a^X^AKmNj*HT6 z&;{LY{H$$az1Q&1{KPM;=Y^6F9sE*3`=v(6#+gbz+~Qg9;ru$vzaUCs;_r7DAE1Lh zk@p1W-nL@Iso}6bfe!XWTmz?3Qx|bMemD0+=z_B+><)h63T25OH>Mi@r9Ghwn(wkI ziFX>V#;QpCJ7LZ93+R*`+*TyEre_g&;X7&(EmV)Zh&cj(|} zygLv3B|Tf4IG-Hu*+2&l@<8C^j#ZyHIeVJR106UMMV?`0tj5Gy;xcofQylxcW@f17 z#MzzI-0r+<7xwpC_C0My{Hgk$t!#JRz2EZl-FWei;r`R@h#yJcyOrYtbjUZ=x!xfB z`0=y!ezeQNca-3#-Xl8__hl5beW8PWv7QDe>+7z>S)t1(>kl2`Sbg2FS4DdlK0?bA zo;Zei0r&Fnkzr#If=z`|E zEqo^we1E?Q#4mDJm0!j~=+f*y{C>chCZ2N^XjkAp4mE{%Pv4sD0Ug?7Ox+%TJH6VS zvDEh6@Eh)Bg5Pk#bmI4jZ03`_tc!=}H@XwRHD~b28 zn%O_l!9Vy7PP8{`h?A|ccO1-5!TG#m199p&?1MlDKf|8j9BjUYIKS!rX6a|>g7Y)> z^}$c?yMy>O_5QQuLl+z$`@i56bXy3I3J9 z{lpL3*X%Fo;P)h=U*?{Em^c^pey{A8&?%1F%9MofYiC_`oH!Ts{#H&8a z%yYVrE8|mz8;^+hb)wns(7|3svkE8z8FI%C1&3jI8@CS6T<0P>z3^O-0aSD$! z`xQEHaE}t@c{DIQakfn|bD#qU@9~1OuXJSMOmf)AhYo)2T7l!%xTw*I6HnJm`V~5y zOQ}*O(Z}0kHhad8-^D&e&JeMQyUyYKA9ScE&QGJB@eao$&XvJtdqJl-eir6u;Dle8 zkT}&wDvoSt&;>mn+rLMarCCzq&(ZfBr5*7-vNfeA{qG%@=uJwMUy}XyPSiiP$dfej zNlDz{y4N$Ie-|fC*a7DCA9R>!7Si##(}i1) zMs@y6#|gJ}L7WR8k);&zUnV!NSE18b3-!i0it_ptEK9s*Efi1o7wEwGEY8DydRc)u zJFb~I&?%06A8+&U%EZa`)LcLFKHkr-ABgv`JEp5nyqV$6euYkz%h%c>?vvj+S(7*+ z^*ivg{X&QK+oIw5(9hpa)9u%7MHl(lMBVBVw@n^ZJ~^+3PWlt`EBLdpPXpo|FKezZ zbnqwkSHbbW-IzGh>Xg#=H>8rC}+jyeTiSE>wn?9t*PR?&hrWbi9bTG z*JZzePWB7z6QI03q75b9yf$Y4KnMR|y$(+Pmm`Q%vx}Jn9sGmyYT!g#HHJ8)^mZmI)}_!X&d+gH+=nRe*L>o<*Yh@6Kj>8ba9_}EwHEgeN7Y?KykvTP zDtXW`&)14oo9&-9{W9WR(DPFnSE0jr6gPd`U+x|HSrx^;@cQbjhB-llTX8o=f@(I;VFl*NFHyuIpanE^@g41D))DurKUT z_|JpHd*YC9h7Nu~oC7E7+atso(Aw-@=-?N`IdDdg7%C3eAJHCXNaFm z=htNX_3k`?{XA3L%mKvTrSomFeaiDpunX+)_Vp#=EO5wwKnK6z`v~CV-Ex&UnP!^( z0-fUgd_TR&5o*sp*4-pd*!hYh;}~=t$6QuE@y^D!v3H5L%E3<1p?=8spngB-mxFpU}ZSI42Lz(idTf6WQUs33TA#+{pitc9zjmC12OSxVy_haJQfV zf=lC#Hy)he?h@P`g3I9U&frdh2OAs)cLo@omuGJ6z&^Kbr{DR1m^G^x*}qkFPRXfL za>2=QCK_@2sPRaa2Rf7U<9tsCzIW1iQY_+pQRR{S6S{yuGXLAZQLZwge^znEBi>`1 zazSTutnan9h@Xf!qf|ea?N7efT1h!FNyZ+<+xxBw&08Z5g#C7eBqe@yn{g64)EDtS zIQI^wBu*onaS}R{<1%uIcluppyoj?#)mP5@(D{`y*S%Xeo+<7}0p5+05yd&C^`&o}pnTcOr?Ze7)Lx;HXRfkzmw(Tkxnx{jY0J~nOlAXA@yXePf z=-?l$m%%9=BR6rft9*j&C(wa|{cUjG9LYsswf`vnf%S7( zaSHwp@2 zaE}R`gNe!$=W0DY2Rb|d#5h%vI1&6!&VRF?^&OBEBdZcWgUUlmzGv@%I1I!UutV~4 zHHaTDSZ@dDU{Ayq;5?2}hd3|B=sD1VgMI|gr6~1@6QIT)_x;b>Z?*?FB2EvRbK&R@ z?)#0y@Om3e-{;yewkh#@+Sm;`v%XH_rp$9xZb_WU%5HK#g--Sl#AUEwlDPiFn{8u1 z=->ymb8zm2v?oscS$cm$hx)}kcADb(b#prtXV?OhBkKp9SwG}CtbUy^yF2mXEirkr zKX~d_#QmuE=+C{0pUGw)7do@v)_ruZ{r!m(y^_A1av!~va+O-md25sZgbsGY`Wp56XYN?yq_F9? z(1q1+L!xo~vAy9$;{UGph2{7H9qNnt8s*GWekyV5sP}&)2Rd+YZW5e3(Pt3HZXAKm z1RsE=XtE_t<>Y^6E9G`uOj=IXX`C%{^FmJi-@0KroP?D z`~}{9g8hQ3E+bAml_!>dfDV2@zW^tC+*QQE`!tdR9sGd&EI5TPtR+qvWjE;u=y<%d zzQZ$m*GA&yRQuYJXZH>d-fu(sHZRypyg+phP4b}Ie&_z7%_VcIxZ66;IhySxUTvHB zRE~2s)i~EP^TrKciS~xByO3u>c|WJ!L;Mjo=S`r4e{ZbgIJD-2{lwW}(_f(jCyhAQ zK5xxo;*7Id2SNu9zWa`PZS8WLIM;2?Z9s?iR#xoqzVQhrPO!?0NPj}7%pSU5k9#vH zSF&tpiI-BH(~$iDI@Ay6wZIwm_5yL9+w7A-hx#p_$KSiHy7Mw|0@XMn>j#~Dw|Ig7 zkA=P8&+k0N4!B0VP4mrhNAjSP_SzxlllYEX#5=6=M$%r;!Cn|gV6S6w?-3{83jKHp z9XK(>Ij@6P9}uVb8a)R(*uAQFFS`BFC&bCVL2oDM_&a9~BfE$XX03feymTtBEA0dw z>epD<``P$6#QDeO92<1tz;3X27Ku11mz(~Vc^c@Dr`i6c z(e*-;-l*~1`i^{(#?gpd)n-1kdq*B|DC%1+eJtY5vl&OBgWV99fzx$TeB!)P>lE1@ zpaW;2$jil>o0vH1V(R-Jbl`lM%JWrl#pJ|^WAly}bSB4Xn?7cluGzpryHq@1IX!PUuiy#Kqto zsg;j7yH%b|wqNML!MFs@fGCBCledn(JkWuIaT1(Zw~G*GhRuF2bSB4XR2AnW7R@S7 zoLNoH^2qrbI&;28{@@2Mko#4Vo~4P`t%D6u=_C*R1ohnRU7mQOTkCnyZH|6l{QheHMz#~^;9taP;B51+ zL7dt)`4xD--{!4qiGjV#^>{mkv4#pYS*+|ucID>5R572=#P{e;%{_0Jf zqODB-%l-tN$+5Q6v-A5CCyhF{^xxX4%a~N^4ZWXQYH$GYFQ|P48P7o%P~O}xG_Gff zddn+F-G}&bo?4s}x>RBqai7_Yr_jmvpGT};Y9=2=yltwS(w@-4FXMz?qWn3QIJs@^ zr9uY|;$pOuDS;D-bEB``ztEW+r*TZoV>RkbB~D?Lca-xObUg3444;$y{_%yRGl=)j zX8#vD#OL`&>@Bcm1kQ{58Mr5a`uZAkh+9yN8?qfjhxrHJW8AcR?U7@-mWIZ6*dGAj z`^bFaU$nUg37u(Amr+IJb83uQM4aR5{;6!w(4qb_4&6I`YSOD>@YAPjnP|Lg`1>;A zURV1k(ofLI_cS-W3G2P4%rn*z&!zS)Wxa9U9rv1gw|rS^ zyw`no-raf+{^W-(#2r)KET{ClXZNpNhOfBa5O4Di;@=r!-n)?Hh7SIE)wvjV!lkaiMiz zy=wNi#80>*#=m@NU*1=@&aYDk{6m~?4@{06kL>1GUPqEn%qWqe#y3|n5feswJV*^ggjgg77F|wWm9XOb`!SRh0 zoj9$U=sD1V<0JB&=iOa8C^Ozg#yZ@!q9~rR_J+frIrbIQd4UX3hXT z2Rd+&Cj{q6U^?Q|9IWR+XL79LP5B!ci8Haa$@y>Njh}H_eD5T^UsmF$U#I6o=YHSZ z`o3}3O*x1kBgEv(cBp^f*!n(T%h-8{w=a=?-jMGD;@w8?+M`omneg-7M$C8c+h4u% z5&vWj{rm47INu7+ zq7Xmg%u?$N*`J^@Io9!X+nN@{Y2YyH_us}-hk<=|lyiY!8{*Hh$)`hya?Xx3o6f(z zPS~C}jnuteSx)HWeu*oc&(%-A?3W;)igIoX>O}kq&GhXMI`{?WiNIMhtQ&D^+1z)7 z4)w*p2RLI&^&-w>oAX=Hg*6W)Tgc}DVn^sl{Gv8_6zJr6fOrvkf2`Znfy7&{@+fj% zgHHAn*cJ7!xM2wK_Sl>Yg%1AcC*J9L*<}QAuG-|+p#ul^Bv77q1;!A^U*$<;e}m4> zujjvvC(a6+ypX4U?J45O42LEY|DoE)m41Lu`k}p8pQIi)op^=Sxfoe*=wQFa!VgdS z&mvB8b>38Rpffo??wgBqEH#_WBhEy19!Ay=I&QZ=?zM^Y0v|K}PQ0=<=dGbL%jGhX z1altoS+u3hQTs1){(%njPx520|H-xPYv}xg@4=wm9XP*&xQSHzmGy;A`V;wG@SX&& zAzl`nbGgvLpSaHiPU40eh|@;BuO$5m9XQd8@V-cjv|EVdusMeU9XJ~r@pqNK#@Ruf zy*B4Qpffp6#wir$Ea*_q_i2q5QKaqZbE^7Z@zdPX!o&R62TZK&@j(7`Xr6M$3rxDoZ&{AazkdJc5(3+_LFGi_)@;_OxX zrn0|57uNU%|AC*QR8->cQ*o{2L+74v{oe7>2r-EtL(T8f?{?oi#(p8n+b2<6;+?m- z2L&DcgZsPS6#g>-ah7${`v*Gs2k&`+)3#tz;xtkB!KMA63(J1h#rJadyiY-V@4{v| zrTw6j<6-h(?l->6QxorKDLoH5XUSRrzMlv?Vts@95384scrDdBSjJt@frGdJoHTJV z5hq%6y+5EcIZk7ZcrRzq@vOv&sLokQdqHQ8_dm~-;r<57HEu&r;yqI5sw59O$;0_D z@NRbYCZ4}d`JkKE`b?>x_qv_NK9LXVnxg>m_V>~I6S}bGw*}99h?hpq1Ja*zeuH07 z&ogU%iIY^Vzaxp?~aI%l&px$tuJ-W^*11I&cur!Jg^AIEk~PqrRU(2fxf0->13| zQj0hRY{ot4!tx8gGlFvdJ;g=*wB5~eO20too^Oo{gPjeDzg)eyE&H`yT!`@uuLB+YgZ%|?a_wtPoUH%T`A9zl^B4GIhqojCTy@?@ z`UkqO{PRZKpKjB<6Y;OxtOKBv;~Mt=P~Pj_-H4Y<-J_KK5IXn==g7fHouLccpd8^_#=1i--(mIu72Kv4t~ulzO#@p^HSop zvDsIF&d#rOUalaHue!hR-~8$@l8SewckWq3{7*LbETBVuvF{DLz3jDtIH_#x4;?s& z%fNY&XA5!W+nl?F4jkNL0>^l{ojASJ`-8GQKxcA(e5d3%zdsS-;%?$hQ{%8~r_ga+ z{^OnTqns~~H)|j94y*ny^9j%)U;a8%u`_Fz`R+H%Y0VE-a~&dXU7Ng$%n#O8<6W~g z`5s4dZrBHV;`|ou*(%vF;%B#sv!MeA@dG&FKAj{^Hk*3|(4oGFyTN&P`807x*~A6V zxsMC1MK7QdP;y<&wM-QEhJ23vh9&2a+NxZr?^AU9L z58l%OCtt^##K~!smx2zQHe!FNdd9oN38=3hN1-!0PNT0_XJ5T>pEwv-WdDPX`=8TD z)P(K6W9DPxB~|gh=S`g?9vC~Y*2X#IgUaHKjsqWXAV#QLYyGgpQN3j z<_G{jAuFChxc@a9qcNvuf#$>4ZxW?T>q46E^Z>aaz6cLG^U*+>< zy`h8sF#ds)>P{5mjIhb$LuYcV<5AwbF^JRZf65pv>$Y^ALb=+7DOsboZ0Gph~z*A z`(d96oGLFJ#EGr;W2F6{3(I~^alWSIjmpHIZgVaSI=*M+G)9a4;8VXliT73Qr^#^v zIvE$huBd-!zgom|*yIVIgFkSt2%N57F5(Osp>NO7nH;BqdV>?^ZUf@PQSpVWA9QRl z>$|bPFKt4+kU+Cs(!ZX)8*6iX_E+f4a=DDd z;{BI^xjl)~N5xNa{(%njPt!ks4@eRIs2Z32jAl)FoclJZFL5iYb8xbqK!>;w=l6D?aAGGKLY%BN_f(++2j_vod6akraq3Ri zw-e}0j-P>X5S+T#Mia+h?cYheLFaz_w8jNK|BNU8VzmyEd>I#%Rs)~w;enBT&Mn>t zJD~rd{40Vd6F-;Dx)eHaFphy!aoBX?tWtSL>0ju;@eAblTPGEnMV!$#`()6WoF94B z_WZ8FZ|~<4XK6Rnzq0>A$K$AVoJ;@5LgK}=*$0&49QKP~rOyYFu1*sM>XLpzR^iPu5ZvhO2K$O3&k zhEA@7>WX>DH_aj99kj_eLWg#P^(pE%Gt)8ReEFa56ZjeBMINGRp&;THROd-#yMzw& zQFJx#t(>?$&XE;uY{$LSwa*ZLUupgLFULLnh8-5BQ{UpxAOT-CCrtgQI z^%HA9e>ve#;`?RR^F8b5*6}e`vzx^4=Bu}Z93OG+751!?;4X0r7S(g0L;tHN`f4{@M*{KyRQ}YR{I`KqnbFEG=0@u;!d{7D?+EbodJJfKM?g! zJ^lmn>W|a=13GZ9e-2Jk=NIC*hUz)c!LPe6M}D2H+t2q#3*OC4cUU}BP{ESs%JWRbM3i0zPe@H%b7!S)T$F#Y(`SkD& z<3q)Smb|)OTx& zB*fpV;wNcO=-kKc*7r(IUQJH?`>Oo1ed*snaTwS?KzZAqO+~!^HtildJO5;!oR&D( zY~seS{L`jNM&i7+IX~u^{jBeoERB(s`2AFUrGGrzhqUg`H-Dak_@8aYBe_2h`@%2p zH|8Nu2b*{vI{4+RcsD6_xBSEzWpi%~I`kVEmz~S+L!7ZT`8?>tYS(RQ@;l9gXnIeS)GTI<$(^I8F#j9(~!_5B@*|J=qe(7~SA z7Xl~uq^`s{YO^m79qftm3!D`GJ&BXf=G+Q&Vc8S&7WidL^(DT)+Q*Rggw8$Rx_+t^ zX&~_vsyvAtALRN8_x@4NH%A5&C#H=(p@U!W-2iaDtR7CBvNrom(AoK=Q0LLanP_7- z&-~&v#))^Y9p2-K|J3Gw5_B?8pHaMH>ivEa@!Hvpuh7AM7=K{D7rUkr=bO!V106W` z#Cx%?0%j5?iOqNe9XPmOj`Cc0%q7lfHQvbf1synPMgC}8_JzcWyh^|Rg3jc)j2l6G zUTLurW=rHdW>38Eom*7FR`{<9Nt{`q|59g!CYTPSkI44f1xuZDhUrF5O z{d9FT@r&5R;n0DDc^&>}xNbdh-r4MjKxcBC#{PzUPyPD1&BTeI#tpg7fKHzO!FOL# zt}mUp6Yr%>UJp9=?!CK= zT4EpZ$ni78&-KFO%l-!)`rq_DZO(P6eqQwtr%_4djheN&K-{x2^!|lT`WN#F{1PF~ zW#SdhpdW9bgJ0TO-Nug2#z9nl{UzqivYzw|qFhzD;SUo)~@m*t`HAokB;msNWo z6L+eg-cQiMPcomrxYu*yWVJb81ReZ@_yOf9n(sAnTG)(Z(7Csx!&o$x-#Z$f^BwU! z_t)1KI@C9nxF^se%}3(ov^fU_9XR+N7V2B$(--1&sjDv!bl^C}x;fKEBgU_JQJztiv%al^Z{ z`H9zC#qE*@9rtI4(MrVWqr3PJ?;o4>D|GM&@(ifwl>ENLSz(h$gbx0|d#>QL{ak`L zX>8s}g3kST=x4O`;_pbTK3InM>ld5tM*15%_k8Pn*Jai_h(BnB$(R1Id+!?K66|rK zPi5jwoUL!?(7``=w*{PjC7i^WYLlOZ4xG5+`;nca)FRHdMf&nU2R~!~0p(eJrY>ygY~@sP|avZJm)ffxu7#SE@Q|s=A;Vh zNSv0ceaW~0I>ZHypVc%vAG{U1PAVbByIPOB61SBapJjcalj9xY1=ugg{+`4OZxfF~ z2fGD?Uqkx~pBDBdj>G0&J9H+;&pB|Wn z{DOUUlyk?Z@x+NYK+k~=^~L@HIHw$wiE~WN+p@mUx%Z3pz4OvBrxV|4v+v;9JZGJU zqMn;c{6y+JyX-f1^APs6VUJs<=Mv9*uHLTD!9SRnz}dWLA#w63yUO?vI+NoxvNhm! z&F7X&h%-#>%gOpd$K!%E4jfr>1@R85{Q&82&*H!z>)`mDPY8;*hWLeT))Uade#o=H zevOZ=Cr)A&|4I8n2M*3*f)luJ7je$poa=@ToLAyIpRwNVB~FYg`aB+V;NZR{%Cmds zLE=P8qvt?pa{P>%BED-<`Y3TK6)-u{kI=dAw+>^6c;{%!_7lW!T|r+?=ulsr=SF=8 z=RQT8#=Z3%=)i$r!CAHCEODMM)pMW&2kTaFd>4lh=hi7b2Rhgv>r8MiwYfr^Mf*&S zv_Ew2?QcErQXuFTJXjy1~nbDth|hkeFB zyhr?kyYzO34*tS#aE2#-M4S$Ro^Y(=-pSve5~peheSJM0_wZgY%9%RtOX825q%S9Q zW_?}81Ce)naQY2#+9X>J#p)+{R26VK*#fl z_59h}Pala__o=?VJil4LBi!!DSK^$Ds-JK99*{L()U|!Mn7`&JFR6Wo|JonX4&dMH z@gotxa2m6mk`JBx{_Zk%#O8Uk%-*QP?_AL2OTR-0zaLJsYS{JuZ$tA?e~A6!?;T?j zw|+&veW61?M0|t#4^JPLIA`kXInddSo4Ic!AkJr%-_{OUdQ z<%ACPZ7uTf-G`(gPQ3s<2Rd-ZiG8MyWl|I8RyREdI&iQ*3HwLNn2tEL)q5H;9)b>> z#dWgM`%-s5W+cvab>3WZpffqv{+94+R^oWKHS6`?{OB?&itlE|`#mS|A2&1kvOhqF z{?O=q=Xt(SZiKG0@oqirSnZNGaUJUYBsmU3C&xjY&j9aGlY+!6;iLC2bnpw}3vgD& zDMFl;HsdLD@C(*W;7ngrj5yi9=;t};+>cXMzkjV#iunH|{$KoVjZ1tJl_P$yTzbBY zOMH~0hAjCw;>C_(q3dtt#bKY*2PzQ1RzAJIpo71VCjlpR=PJb6Urx_~4*tUY0M3RI z)rm9CrLQk^VfhR3AowLe)FS?H8$NXI`POsoyVkpi-%jOSWPh?d*N%HTC~v{G4VkCT zgUIm-I`{|g8i3QGlpk>-sdoV+2Rb|d)H7NTC#x!t9Opds&yo}T?pdont%)CGvo8&u zd;d6$+DSOBsJx;b@$cL0H$n${V*dj6Y}la_b8PZ}(1C;fWpKvj=ti8qHt)qjXL6jz zU*f&0zR!CSr=EH@L$*8U%;DkZJ*;eVIImk`eP808Re37OgD%dD{fFYDE}`&TSu? zF^%{~ZSo1wp}sh8i~9D=IFmSIkL%Y#(1BAfD~3=b$tF zZT(*P!T7s~lQ)(bhsbyv-z(preEPrN5h!!IU(JRQAFA(_JB$kAd~m?wJ;Z&NNgpRc zhw{Y{`);2*A0W;FoAohtSPw*SMcY5Uagd4!t@k483_U{JK5Bg|#~FDq0_TG_EsT5Z z%BAh0@n#m$uRAn5LHvo?^!0}h{Tl0N^y@-NPZ6g_dOZg^^lQ8e1WvhwXNi-<+vLc0 z3?09N;b*)O@ym?PA;gQT;$Ue%=-kIIKi+c?`=Xc9T_Ju*U440>ll=~PBh~;iQv>ce~UN=s_8kK)j?^xl-zQ47+=u_e^P z<2|AgC-oq`AE7fjenuqmeT&asVi6~?S}(|c2A%tU;Ah+y{mfM~9`W%`z2rj|7JsR@ z@4w?yLgIf@`9H~r4(Afes{yfqciWaeGwX-WQyAai_X3%c5g+em%Qyl$_zV39oPkk( zBaWBNd12__FRY)zX|czPIQ1*($1Ui>vM1t7@Dp@NPyARm<1TdW`A(y~cz3!$mdwPD zsPf!$+<;EbTbRdD-j07|Bi<&P{4aFyPl9cH{$$dkT*TR7)1RP&f3VI%c}_ITOPrE6 z_bZ`uZ$ImLp+<&+#6O_+?PYuS%s&pJj>zx7j#q^E-)8E^73g43ydMlZG`w1jILXH9 zInaTFegsaRxuuA+SLGkV>VJrv!CB!~jyRjtc^Juo4&{j|;`V$QD-tJ}%{fZwOpeRg zFXHx)=T(SvR-L1i^AvQ5+r!rnS@(3-8#P`zjd~(3d~>Nfar-Pc?JoNlbh3XTE`#0L z?5a(?*f#kO=wLU@JK&UGT#q>8ZTtwG$+7NN_iodOI6YOIAltvE;~&<&C}*>*O^NTc z>A%p0)qjzX2fx_!7R0}&&Rfg!Lx=gQf^t-|XW<_PO@U!n9H+6+$#LDaVx5VzWwE|JLdWg#$NBd|G5|OiYj|ae?SL+BvS<)Hltd+S+U0KgB>x?z>X&m^do-ZIr?!BI&f}? z@1prk7(|?E>Kv@}2XrRKI)4lqI+QqF)VL$->)HHa%{P7NK9cy=Y}&VH{l{rIVsO5< zX_>LakFWNh<@g32wwG@PceP_gAV0Fo&%f+K}(49GONB^&~bjm zVSL-f`wIIKtR&vBsQP>?bSPIMG2cy`xP~|*FU9)TB(gm~C))$|AyH4qtM$Yi{N3cq zeg+-d!#J_dD_mzYaa=z7`ax%M{EQx{`Myf^6WfRrr;9#+3!VG<$Z3?!!sBzvO1p?3 zb&)=Q3!Qv7Gjnz31z+Anye}v9^@a|9#rb{sAw$Ol#ObE;x6-fBNx#ZC@?zy9#M>KF z-w&XJUlG@!TshwyC(g}CdJc5(E5=iBT+@SzlQgBt`LE+J;vMkQ1)e2-T~%Mnhc2vk z(=0NN!*@!A5P#=8eY=59`Um%$QQmR)E)#EfG`(G+gMYB!4Ni@b*NC&qOV5E0oOj}0 zOo}|Wi1SM2)#W$<9XPqfdZA8|d&G%XSzjLLOpeQ#B-RVY=?BCKC~0!!dI37D7ydc2 z_FT;3V^v9jA-&w888*#3`Lr&w&mce76wg8My5oaX#AYyFh1hoW|I?d|vEh!H>kbr@kX5 z$0z9go)!4_yDcbJMA1$P8efTb>43g|(3u>UG4uw{s{>aUv471|<~V9{WIKTl?S%WM z_59I+?h%Rm=$gru?L@9yupe}~eI{p;Xg~LZu>OEw+NX+2{8Tsf^@k32#JMkU*1wKH zoCWvw9Oz7rHBK%%KMrwx|I*hN<9b-*XZ=Z;>Ad0CiV28+?;n#d+YfZNhqd}wkMd&Q z4fUS+IWh5C#)$i`OQnCHLqD#e9G~U&KgEtrTO8VtF%N=Y<4SVkk4kCs<#-4k{DXNA z9G}Ulh?6(5o&%l9aT-}W@VoVe9chX4C9TPk_JdBwqj(n|<$4n?Bk{iH)|U%9J3l`JNok+o7CM#^xaY)pGiBLI*$KTnRYmTjwE8#fo|k zbnruF@m;ma+42+TWNDKl{Qw=W$F1))4N2rfya%dWlBa*C$!RPX>x1B{MTs|3`9t!c z<8jboU|kNo^_pFRc;037c7qP~N1PAN#ujCWv$wLI106WU#r@9GjU2=&3EksnDgr$?^PrIl#(W2`T@G*31|O%jso=#FV@>Vm)0bnuTA@f z4jjBU20v_=QkOW%Yv}C<9qfnvJvgOY4Tw`%^#f@?=-kg2*1XEBf=!5@v4Y+|p5;{> zMh%ho_y63C_Ly=~H_(~mKJsYReTO6A#}KbeEPZ)BTTeTTvZDVz%{+nl-Zt?sbg*kov7YNNeF|}I zZPmxE&>;@2r6#`C&sHA!``#E82U_3PkM?ys@#ERNS0LZlZ>ae7M-K98m1I?DoQd@s z+C{q2vxwhtwLX4_4($~C5a4V(H;*{E&*?ePfrIQ>^EDX-^12M*c`IK{&4B+d+*^BB;X9P7A{a{C{| z$=cj3kF=YoUV!YoY4bK`^*01**e79U;oMX zg!oTw&bxWmU!BIC3cSuqo#qAcv#R}1=^yB}JIuT~?27yr{PX9-*TkDLMBh)K0|(!0 z0%!Nicf^^b>Mi{Y9sGm!3ON1SeI!mdb&f;&2Riq0o5QFj-Ua@U`z!HJ*qqaW4)w)- zW0W&6TevvC=0P{u%-hg`gYh1m7=K43PR8MS`$Gp#Gm*bexiu z{mwkZ|5icYKcIvCG6_ziGx>;VXUrG- z0|)tfa0W&$N1WNy^c?8GG4}I5)aeZsh|_Gpo&%l9aTR=5)@=fYs8a0Pb}??ZKcxu7%4WqmKW{*yh#dE;wx zWIyrrz2IQ6-}EN%0P(+6(w7rDloRK}P~Y@Zj}RxIk-nVJAuh_SI>y^MyOS-tzALo- z?iAu`Pc}D~~_!;{M;4J_CfHW$(QXNI-Zvu2J&$z@7H}_i8pYw z$&-GA4*th_8JsGU!o~eHKeii&J>9_#}!BB1EIrt&tXUUwv994 zRWYw} zq~CGw4fbgNFah!Er_#sK(82Es#W{>hladgpU4A_WI+Nox{&*DqeYS3ff%2>@mV!8j zdY?ts54ujx+PCcB?OhYR4PNGXWbjJYP~J{FLWly zn(xT?IuCII|Hu6<#04m4gN^x#A5-m*%6D?y%ld?-f=WX1NL;2s9DMtJpHt&K!2Y+Fm08Sp?QpD+yT<>@2Opep&T!QoS<;FUQ zGgRdXWV?p0#L!>!B3A!LOjMP45i9D)Sx@s_c$W?JZhNs7@ptdm-)Dmk^}ZnTEqTYg zh*RZD%zr&7>kS?EXQz=u>|@uy+<`S#D@^Ye%IcP4JMnfmfUXL79h;M*6w5oh*ClOy}TXK{kdC@0=K z9`UUw@v}BG{VMAZ9pW3mvQctP`^ZQ0k0MS| zwZ4=69y-%bKhIHKkHhC9HXa&FyqRnD;~jJdBAVY_f?pqI7)|>zxX%LnRctVk_~&ib zE6`y+%B>u{Yr*%{{-b{0XT!V#e(*n2h(G{5`pB5ndrh97;Sl@9QHw7i-f;D9r|i$rg%vNL-=p5gA1)&P zxh;DCL+74v9WQ!!SVsIis{CP%7Z>JT{Pdw{pzS@$+__f~_kzmrO1sE>7S7eax)i$zq;FTyppOe*WBoyVZ}mMl+5YX`*}Es^?amwb5ii>Bdiy~K z`(ZqW{SwzZM4VhZ^&IG6Kg4<9wEBFMIN8sb9BDu3+}qE(4qvwDB=PG!((^rAhdYes z;#~XOv8Rb2@=b3C=wQ$3qJKmRI8U6GZ}c4Kz`?!)>{-9;CE`@Mt>-`o4&H?Vr{}Ay z#L4kM&w&n{q9T9lN^z4oeLm?q(3u>U(OujRUvTRValBPKk>eh8m`__REA@PMy?<1m z-FiQ~!;bsJU3JkcpKO0}+`~Rp%o#JLF8pVk;yVo7^M)OL*FGk`Z-~DB(4qdr&+|N3 zdHi$Yq(5kKWc{H-{o`-`^5x6cgQ4}uyo2&(@qJC)RJ-)`hc2xChkY6FUANy7e^QX1 z51o6y(?Glm{^5}yh+kLTW0vD1bTZDxcTK_TR`d(;0&UtWbZGa8W5DT@!ie{4{&ROU z^FEDSXF&(f3~@gG{+9^ENf}=3(pd2z~RXNv?6h}kj2iOJnycQ`g@waa``C-`;;~Y4NLlO|@{YE_pIy-yj`8^47 zvToMb*He3QpaTbS3pj(m zIPYh96X%G{_j#d1`$PN#&c#9nh?7vA|CaL>bne@S!`R=J-=WLZqcHKm+V~MVvz*rV ziZhlgMx5}2O}oi1p23{081ZLj7M{D@okX>RmHA?n5WXeZ*1V#p_a* zcs&;C{RPq}OAM}2M z4*thH0M4l6J&2P$k-i^7hyH_oB5*2n?n9iQ941HF6FT?(#~PQ-PC0=1;Y#cIp2ZPP zzIrBr_-AXIeAz#tll>F>y|BmE?Zb%YusIhD9sGlP$lz=rHi|es>goLh9XL1- z3XY@0IN~(0xvvQwI9_jq^Jo8Mzj) zf164CgsPmfJwk{67kZB9;K%0QKK?w9k98;bd%Wfne~q`^4$#4#$lHN4DD6VxL`bdY zKnKoq@x7)8pBEG7oz1yr=)j3B-jTV#XgP8EMbMWAI+Noxc8dAVaI7ZIH}y`N>`&0i z{)GKG*eNLPdg9&6r7stBs2}D(aQg1wM4Y~P^&IHH!Ml9mtnavuIK|57IncpQ^~AnP zzcjmu6Vc`#FLb7ze$Ja1?@+E?pY{-Mff{#YyN52UaTxD6fL~+l0pe$AYSvrwp+mo_ ztQ>s1+m)6ZK2HzL1L2+t_^~@4A^w2&db>gg|6qOsXJXb9#JON|E(1D~W8K%>_9U1% z+10rNSzp92Vc8RZhjPAOeU|u})cg4Vjqfxjh<+H{DTMel)w)jhL+E5b{2=nLo$6g7 zUY^-{|3C--To;@PMXwX*;y67AI`{|i80`1%`z_)uw($>iVfhDt2mZYS_lRG0jJ~}? z=bmrPi(gpwkoZ?8>+K-(;#l{hoWpuOCC&$RuU6)Lpo3o!H-PhJk(b1oYQurfY6eoSLi4JKx4?hXG!gMn*xcKP4t_zt44iZ&lMyGwM7>|2 zLwzy6fRic8Z^SvN`l+lhbngA)G+H*xP51r+gS?1ebi7$k*?*yv{kL8;J_i~!F+K4H z+T<6ZgZ;*feeIB1nThj4<@01egAN?z8)3h+<+Bs#)&jjBpaTc*O@MPaPHy7lu<<8! zCdWEn4vUbFI6gM>w0^vF8gb+Ez0KRh3lT4oddETf4?3?#zn)KY8hAGb^&EYsDDlqa z*S81gWE?SE_%Zlf3F2M(uFqdU2m8YxC|8mFWr$PvqMiet$+6~xZVYn}=hSt5zQ)u3 zg!g7q&aJ5{6aVsElP~=N9p59feph_X?rOwqqrPJ+d3N6wN8F3@&1+nfc*m0I`vY|F z2l5x-yev|eIEixVInaUAcp>lO=6_tDIJYwCInbFLr*T!}B@0e(Oq}mN`gRMQ*>17E zau^vl^Lqx-%Qqw5XPfgd(4kz|SB0I@er-vd`wjKwf=>3QJmOsZpjd5*H?ytYUeLkr zhzC)wfD;{vQ`e?`(3u?Ty6MB*F2o5@>smQKVLjx2ezJc5I#tac#BZwdb&~H{{>U1a zPmkV*_>r6I?J47Ov}@S2_tE~uiEi`G0CeyR`~=R%VFAS1*jHa3=-?OJV+N;PnPJ57 zR`IX28+7jdV!bc>$$J#>@2hp49GCR>Wt~P(v3||{VJz{Is(TTV2c7IE$P=LcPj*Zs z-ZQmcl<@;}@CV{La7J&KN}SF%-?f9zZ?43tRHmp9T|KF0Od+H zat`tSvdKF`XO_!l+&jYK+?y^7h?BCh+1}(j3Az@_@aeo_uWvg4uFCJYj5O6a4*WCk zV&WF7uIED6L~)NFd){R7hkc=WBYd|5^$tj}oVcr9`u+hO{EGDtICDZ)5vR6#|5COO z=uD0^p13f29dVkfyuIvap2ZVZ{-BbZh(D&aS>ON4$2n-&q5GGu#P6uyDV2Qau%5zw zo;(kpFPc#9=XtMkBK}LYYbWuSsrRjf)?`6+(H6U47#W52NGr!Pl?iSuE&zTe3C z33+z-A^Y*O#3^Cp2k793xBdBDtp<}qh_kwjSsv*J=y=`fXEYan@Tqczc<*}X{QzB9 ze!%xmL+hRVI`Px1^A)lkLx*;Z{0%rcKHefuM73U%9O%Hodj#OTzkQE5du`$b=)jpG z&eQc?_>efoI_cX5bnqv>4~Ft=X!w*kQEcWn==i&0F5^K@o4XtMx6ZWypHTo(3#UM);~@or+7E5*~st-e$7u#T&nM%(D}3)Rrb^K zIHC6`aQ_qKdr>_y@qBFNG3YSQ)K+mpZCBluH;&E-&D-I;1Nc>AMkjtzn{^R%@W(ij zmmTvk7I8+ZaYXh{=uD1vJZf+_9&xg%^?;mLJsXd#alydBiHILt)mQR8jXSVE4trkp zO-B3#HgN%T@K<)_ulj52)p$JOkI*=7mUz$Q?dO!lpQ7S@X=mu*FU(IU=f2`;h!aoE zzmfwTI5=MqPPRE2h*PwdejJ3(UQf{9_?tS zxKA^rK@Q?oj$!hopP_R

f&r6){Hs2SA4xC$} zUwy4zkT^Y6yOaKe&g3`^$MisoXOpHYN}T9UeSe2e=25V2Lj6vC|BHC9s_Oj;oqNAp z`$MxArHLP>k|+Ja`hAl71IshV=Dkq)eG z3p&hm_?~*d>LnHrU3b96k1F1q@aj;F_;GB`M?wd_>m87(S-rfY?^As1JNKLN)FS@P z(`Fne+t(D;F40c04uBmp-mF8M<`?uF=wQ!~V|+d>aB2hMM15j%cf zH75Sh5BhRKhjJpHgZjQq+=4g}x9ZCYo%?cHznk8EQyb!Eu(>xOzndOGP1J3CM?UQR z>69ye!g>MaKTy8|@vGY8Z=i$UFnGJ6FNJ4`giC~oU3YoLi*iPd*VDV z%Go=||AH_m}m3x|{+35P#cl(+)D;uzQ~l?^>h0UR?$duanA~$bJbO{DbqP z;C#(Kgg6_n>iq+q$#EKZXAYc=e~utdWc8kmtRHk{Geln2VeGxj-`yE-*N=D&ZtKeh z9qNbkj40RC2;+$}-NsJPp?*t5JUo8)B;t6l*ZU7T84n}Bf^yCAol3kDHs5uD4)sG` z4xDM>W)kQA6}_FHGdb3LS;Xseh_n8c$&vlbvwYdl_AmA;i;S92{B$q$_JdC51Gb6t zYt5Q0Cf?d_dLDGrADFLT$Nh1Z6K~Hay&a*0Kd{dQPU{1!h!bG*J`i;92gWUMg8p7d zoI&bc8R-w`+>b}T#tN~I`O13}@dLf|@fmdP`A(z1@XyIg+lZgEfIgmtj{V~_l8FAd zs>m+l#g3!rLB~AnzL`&keZ-5SzEdFWEBDRnDsb=R-gUgYb$7*Oh~MD9zQqm^KhJx8 ze}WEi8S)#+)5XZt^TV(F2F3yKhh95I{MzUB?G-w-*9zkN>xYJ?h*L-9pXEFP9XNNz zI;3{AbHpjwf7+^nSuYtE zLdWyI!>B6WquVn#0`clAKgjw)hjQV(ChFJ48HG5V^6Se59nU8Y1LG=q_kTnu-flJC z$Z|mkyJH*!XWjbP#JO2V-wvQNIX=eFbG)w_Z$fZRym(`aO zI?TUWm7^P7Om;D-*%@UAhvhxok8jfvf3!`1#e2B$_tFjQrxuGd5odtSIZfzHj&*#> zP%kTS<~G*X*R%1-dhX<6!5qYosm?|ITfbu*gTFR>%uW1ZHuC^dOfo#-kEyJlfFk(ybQt z*1N_d#6#eZtXiM=F(2vA*FlGNkGvZ=cgp$_C-OTz2RgL-H>>zuUACu9iQ|1wf36NX zIltBl;C_^ISWDuS{j4t+bf{mhy&37eUqj!v#3>M8->;!V{nCl`Xzhp{iPJKcz8ym+ z>xcLLVD~0#x)9GNy}n$~p?=se1!r*G9>lq9bAAsxa8O@xK1J?LoWE@Hw$Q;&IL`sj zy*+)2^Hkj*ko_Gxj{mIZK$iC&NW2m@=icqkf#5qEDBqd; z+!OE1Ct65@}ri36c?KY#lg{|J9o9J+$|QB|BK%MTs=WwtuQ@Edad zb+wr*UH-!PP}uoI@ioNX(pBGHp@YA0&l{Yt-`5jonoYZh&g3|aA>tj9PP;b~C-w+^ ze}s<5FRTAI4%kk-*EaE+Xa08@z9)E|4D7s{_;si0%M0DoPSfc#H+6A zFWWtI@CV{+aGrlTM4XH^-x-7s9DMHtoLYO15htOI3 z{3}!FG4b2jtdHdQS4quR#TQMQv#-#BVz4vrEuj3t;hzz|g4&0c{S7*BFz*sz9P53XJk6sMC+bSmFS5R#o&U1FKX$=8Hu1gI==q-ZbJ!hrxS2dY z@i(s4^Pz*kimC=1Jg{}&_R;XJqV?VJW$_aee}rmhvVXP{=c-_Ll(Xi|WW>om+AOE! zKnH(ed?AK{$cH&G|E2+o1-`H53j%_Fk^LFe9|*6)}cuUnY-F>U4-`5hCiqfySpX^Io){1mUKQvCQ3{{DfX|g_FB{|sFfqjldu0@Iy8qkufc-U;b9avh#IL)`O#D{yG-7{YcEb?yZCO_n%CWlzvYg;ywI^f zd=1#xSGV|@B`-UjE8!C#WSFY)6h(~ncoVSGs` z&fA>4GJrT0^XNIyp+95ZMR~q18$z7Q<@NQ2&i(lAYhXVI{9}Vh5dV8QJs&#veCxX} zFAIz&e(TbDJIHrm5D%f8H`|RT&dB0=4s`Gf&MkuzKhtF5WQnWiK!<)&VkNs*H|xEVXTvtRw!H6?#5&?)_!GSFU(?nPU2L5tLH$6e%V@_KMtt7mpHT3y%E_jq2u+3)3`m1_Yr4h zIY_)}U(Iq!9(1>d@Vmyl#Ch3}+ee5u>5HBR9nXgj1ML!a+x+YV@ow3Cw-GwnAMpw} z83Rre$1hlKf9SwL-V~go70(mL>yw@X9XKf$Ev55hK}jwXCrmW`y)WoYjy2A_aOE0t z1}o10HO{>6duo1MxlVlA_)#;zzK#=M0_n7uH?i1txt*y!RFL{(=te9P<-6;ctH=&X*c`4s<5R zx*wQg(O2RWuCKSBXZwK;qd~qG?qSiI-DeEu4E9k4hDaLv)TG4KctN-G@3G7?2LLuT`QTNkjehE6XAH+A{ zOs`ptIH%L;;}Gafjx|40{7DJoG>T+$WPLr$uUh%lFO(sE>J|=J>dJa7*IkR`0UOehi(-vG$Kx z!`c$(z%G4#J+-G?$3N)Rk@&_nJs&#v{`&cTuNZ%WN^~WDRh1`@?F%{{e;vkOVx18D zx(D$F1nT_*9sGmyPw;c}6}^eG#%A3Pot=L=)$B){^)~M`c<)g}Z6Ar>;;P=B(7ESZ@7G5^{gwFrZQkFN_e<-E{rg1=jhDaYJ3nsJ+Y>tY z1#t!H>)jaN>#gMdB1lL7d(;=Lw-RIX=dZew^oYWJ^PwtZKg) zI@s;=8ZK9Zq~65orp}j34s_s5DbD*ECC=w0PAgx%AE7fjKi@%WlAhLCr?=)M&bsz` zJ3+_o-TIyP5*rH=ubr}!^r!sJd&|XXE0w!~^G(+GVpex3O5DBbzNZ}D<$E!WRDoZg zJrp~2+=;II5#}ZM_j2kI#E)w8UJP{b3--ytIeWD;_OuS@#J{KcC?=7NWZ)-aaP-$cjtJ)X_ON8 zWjgqGBu=b_X1(Nm4xQ-+(1LPBfJ`jr!<0(82ET133HN%p}g#g?bKj;NYGk zI3XVbh!bq{-FE0qjy10nuFOK>6g#g!r|;=H2=6nXoKvPPA%5Qk`uakLcJ}xr_tVJt zmlLOKU6Ui*8FccVG2%Fst8M<(#JfLVUoPn2hcY4`bam!B;zYE02L(EC@V!%%=bXg?(UfybB#6PSqLu@e?|e z<21%^-%tC#`7a$O&aA~IN49I|413M4v5{Cv0c z^_KUr!iv1c*n<~|^ZC7=10DP@Nqk?zf7VsvWQn5B144&>gY##wTlMxq#3`fpr=;DW zb8k26_c}JGxjOaK1caq-og zSj;TtCG3Hrp;uOPoYaO^#gWLx=TR?yjvO z#Q%0s?GySK(JF91ecLP}aW7TWbD={%%D>~M!P_2Bb;TX%FYs%M#94^@y_UY-(7~_B zn}CxgQFh`?R_Cr{oCqB_Q^ofVBA?AgoFZlQ<$=!RSo>9%Vfl#jqPofXcm2v~WU0t; zf0SZ{i2t^+$(QX9I@$hU2iWgelVZecqTX{0&3>6h-gm0ABylF&*bh3`uj_ICzRaTV zWr@?UfZl%4g%)>U{6c+KY$;Ft<_lk^XCXy1632%N^@Y7=L!dM{COpaTc%D{#K_t4o}G@Ad0M=)eic^_K1}7-=K5vPhSJy0|0+j%I3sh-%j6Npu_qrmvZcs zJV*K#&yDfbVc;DF@H1{`Mf{wT^>%;`_QX92aE5eiN1Ux{-6ZV^9XQA{fO8{uC*mwn z@2*G=bl~7z1~?NobS2KsWBPhQ2M+Evf-`;a-^6)vPtSo4oYCU^S%`NZ;^e)r=RgMz z;tG^!`Tc&x*`v-iNc%u%a;)p0bq59!XRo>+DBG=P>mO_Uesb0@;>R@1b%x}78owtO z`;Q}Ak0SnAoAa5_Wh~0?+##O_yLx+#C0@91djCKN|KQKy)QCQjI0wV)*H_Si(^TyL zmTKrvoSN!BsPr>*CdZne_sKegIJNhh^^)_oK0ohc6c6M$uhQ$;#CxfDGR}hzabAh! zeUsE~*;~DjWxa=Y(tjRt!yeI>)9xM~*1hn<&_audckQO$pU}Y%cvlUaixHL)=afx< zgU;kw->b@cXC-kWsrD-SgJP)_9Sz_~hZBXL^1HaW7K z&|$v8_Ylvun$vB@Cw%wUx{r0M*jC~Xd!a8U?_*gx{lD%Y&L3)gk^O@?)_D0ug1yAq ztnNX{`g#_RIShP94EC%Lc!2n|AL{E19r`i8TLn&;UPp*iSov4>W9VdDhjk2iKQf*m z-lT~7aSA&04~$#jWV>;SI8D{}p`{<7L%V}L!MU;H9B~?{ywtz*gZ2AnpL$#(em|9; zk$g|TZ?;&(-JP>vBYuKxrah&9p!>UIzeGLnUn**6*TgS^h_|$$-al;DpZ9Y`-0@|{ z9pcO8OX2XwN(;Jb<7?eTg=ygM26{(ugCgeWwzh8M< ze6foqC0?Ed`t|}H;thNsdHl=+j|VoKrSgXk1M5rh-#<-3{093?zU*(%!M_++!O8YM z4RMaztS6x}Io5G6=j;r`S-MJJU(d!pUt_JvUp;H%Mf~6cdOmdS`=7(u9g+RtA&ocj zTOQKe6FS&)is&z`&gLXe)^mCebl~9LA^b9=XI|oLv{_d}2fs8I?~GhXQ;;~t)Ot&f zThO_;n>GIbaAb;+>34Q!5e2Ve`%sbUZ&fjF{`6Q{FGSZ&l(Aw%IR- z4)sHR2lWdRy(V$`d6*ZH`R{+G)*+6s&AvNyC=bRNl;=s62E=)<&h^Q04mxmfejJ=5 zaT*h+{3d z^F8$o-rs~B^1qx&{0BDq4Cr7_>i_xI&d;m*&L>{G(|SL6=4Yp|`3uKk)%qd%6K6DNXtmtS(AgFkR?1e`O^ z*AXYxI(_}1GwX+Skab;paN8#01=`$`^Q>Puja=e=ri;V25kHq&kIVK0UF8C!qfDHF z_nEBUFFjd(7xBWVdu?)@klzcesT{qw+M_c$^ZokH(r6KXr%%0)_;i^_x1BriNnV#>N|9y=Hjrd%| z=H6$BpWJ3$2OZY2d6d7h_O6<5O7Z2c^D0rqICrqr1>)yhtM?alcK(_Z^9pgst9~H; z&i-;3rNlk$%g!6b>1D%#4t~dY2ftTddz(0~`s@7yoqNAKj7Z}Bkp8zH6Mvf8pO^K8 z&Mc?X2od-0|BCpMIEM=B&$~k>$6LI+gL-f5|C)G}#_8Y7hYtS4xpi;~9D7HcKHK#i z=->ymb8t>4|3sY6hFL#3-azNx57z$IaM)MkPgeIM{vF@P*eJfk71m+A{yp!ws;60g zxi1MF_9bUsopF0q{&cSV)N*mpzxIi6#68nrZx`s$ukqcUuM@|AJG69(t6!%T{rYb2 z2*jVU#N^BN2_4!2zB>&6MadS0IGwiYIncS<`9JCCzUsY$(TOwTu*s40ScuZ`dfS@! zxL-du@kZa*+t0Ic-D%7c`!20&#V3B^^ZN2aC;KJxy0Bw`6p4xV$6mc1p@ToLt_J7C zh-AcheL&BF4*ozM7My5rQWB^07k&MpGwb&&4tgQJ4>_hyTH=k!r5|sh3$34HTt;~d z#mY$hk8j9t)h?r*jhL#cs2lvR^}Ia(oR(3V#21L6^M5`FdSnU+CQ1(`n%SSClhG(t^bQ zq~2GT{T({)@78g6SoxyFtDyGNLmP+ZdFP(nXH<^8=m*%BLpkd;FGbw)>irQ}PUvJm zuO#mGzPeSGc(;=4{R18RjQu^7E7RZ##EDy3&w&mc# zx*Bm}jWjvZPSEjq@pHfJFrSC@ZBdJOw^bfV@}QGE>;s{EOB2^6UP2YeN*;94KIGbT zq{AjPKRXS)Hw<3O`wfVfc#pom(8+dzbtrgIMm8qiW}E#Y=-|&OVtthxu*9g zbSB5TUo`M)E8_UsoX7EWJjLHbeRGU#NBsOY=K!H|Kfd@H5$|)JB5jdQ#80f=!xn$=&q#%sBs-uGQs;xDk-mxm7ig8yKLB5VI9&Ys-*{R-&N z|N4ph*E1*gA1Yuskf87|Bn4jl&e6j8N~Ul-YJyv1$3|z-lYU* z$f?=H=~_r%9_YZqx*nWof%AwHtlo>4<$=!RSl0_P?=B+FWOa^8ws%k03wWOu}j?*-C#yccHtf$NaFp z-G<{GXOvcPqxD_v#r5|Rf3P|SCi(I_YZK-80?8Y+T2N`XYyQE01pMZ|@(A%`*xc8I z4*thH0?y%-fyDWqM;|Xh2mfQ-2Pf(I)5NJ3Vy+iueW43&KF8k!|7q9r#2@nGf8ks2 zFTH<$iTD!|>f<@Y`8WSbU-n>|QLb~8SeK)`2ijgEUJjdkVbH-pV?~~3`I{i()as`9 z4|FES+J9CIy+fRYYJDlkAASF^^0K6UK)i}-o#`3R`u=a_3y+DnT*a+&{pSAtU#AgI ze19|Rw1{YT=SSM`5r}hT zo}L38I9N}iJj>cfAx;;Ydjimbvu`H9qxd*k4B}jUq%RM2@MpN;d1#+`&hgm9N&V5} zNPj|iJMk2HHxc_3sNah|@rgIn=DiK*P(Peg0Ow-%#Kf7KR38UI2M*#9a1uRDMw|ef z^Uct~P8jdNDHty`adzc1%OmXs9mg3?W6yrRx0K;xI^w0ZiCdtH`tt)p51S8m<)b1u z=D4Nxs!YWDQ@v*{%LiSpO#=ga7Fg@bV|1^=OroO(=$?*|!0qnOmMGoRc zvw6=NI`|Xklfh|lH8*j}sB`*q9D@#=8D)82VfB>!#Mz+Mv62HF+Qo&%nP`6^V0{th z{H3=Obka^(f1rLp7X3xMIlJ`z8alIFzD5Ueezg6f(!_bO(&Wf-6}r&kE_~+<xL6X!}{PvkXV z2mjHHh?7y}=ViZv4jjy{;9Qy6lsGZecO4`LI`{?mp}^^PrzLSx+Qbjgh1Slc-qi38RF2+iCi*^HR7Eg7W%z9Y*{OHs^1k!}y4MB>fWZd;4tZ0#{r+M#Q7> zV~-+!X`Az((3yVnF)E37KI?86LmXdq??J9NphNvT1Z2A5y&LNdYu@Zu-3i1^_sndM zvfp@`H}l=d@8ac*IEDDb!|LZ<=;XV2SWm!T@peul-g=ww!9WLpl~eOsj>MCSzFE=4 zm6yS}YVb#NnMHi3&3kFkp?~6?L2znJ3?NPqn>;IY=%2~NJ%+eL7ZT^5P5cQR=aHSp zu-W`B|G1J%iP!Cm>2KLTq2v3QPNR@GM=|%^3gR77?-ocNblX}E8?*88+TE~QBatte zyL1ile3XCXd;=Xg$XCPuEoW^dPD2|%LT7TU=a1?w*h!o?-}HGa&*l|hW1iC-pHR+8 z4fhj2=NUa8I`?^3A7kV(UcbCOb%^-ebC`TNZa{}|!>i-sp5X%~s&$9;yPYo+9V2eW zruuTr@BCC#{c!NC!?pU3n(rDnan1~OdDS71_;FX7eAy15Lpwmf1gAyNY2w6Ec}>ZI z4($N(8#uiipC?X_boy};I`{E~(>QA6rS<)@JC}&RRGkNq{RleWXQtjPuQeY!wCy$G zbyVkFBoFye585mIb7w~g@m78|%j>?KI}FS} z;H+==mN?IC@^H|>KQiBcpuk7sL|5~^j4PmX-(P%<^Wwhhq-S4<|2Ui(S4ci|?)g8z z<0|e+OuYPq_}&ThaSL?PKUc)Owtif=x4-8%*V(M2phN#@C(ehQD;a?}ht>PA($CO= z^K~1~3pGDSCQbpH@2EisPI&R&P|&I9#A$4E{sB6Z<1{Y4K-wERX>W}4sNcj=nTR*4qTWu>p?)ca zy(6X0N}L5X^c?71oJC9N97pviIf!$rk_|`cU?=2-QLl2Ra}(#j&AA-tOgrHm?a%Kj z9pQdgZ)$$xZ7XY*OU7f+As(AB;@q13n>M@RCG0z(oO9+DChqtO`uai#yW!pkIEN<{ zCr$@%JqJ42ZL)Yr_|TG4#K~v#jtX>aH>WX2@H$Lz5bvqYx&=Ds`4};evLCwFuSmRP zFU|Q<`T;ulAzHUiRW?`rb>6gs*srXd!%5tbS0-2XE9hju!aX^cKYmmv-cD6c+25gq zU-6D5IIjb16DQDSUWd-)So5EkdevfR}Vu}=&;?_b%1_}>%h`ww*R7v3)gr)#q|#0jxEp9r0u zzv`9hK%8zi-(&UEo``o*&P>U=5WkZ;$0F?sU1)Jw9nqgYUhPi&M43!`%KikM>`#5f z`eWAWUc}q4&WXwX1ReZ?aTE3K)}}9Ure)Fl89Mj}?_z>es@Oo{1giZ?X+P-P+t0d= zY4vRg@yFTt-&6nKo*K#@^UDb0k0_@14|LK$7`MRto@Wg47G~7j6*~9__ng3a7-<4= zj=$5dhoFOhkmm%a()-E8iK_Ndq<^3b%|H0=1o&6ZO(Xv62WB}XAG*-^$a{j{dF3qP z?^N%WNIrDPm!?yW+I{Td;h9Tky7HxX7YzKU{pS-uwYp~_`xA8V7vey0j*VMPoHAMT z{(=tv!ng%an|{lQlPJ30p3u3sr^E0T@ps*HtBJo>onw>rg%0(-+KJ=u{KwW2Cqg`Z zeW3$qL>K0azqg4v+3oD6bg&!paM{Q{kPzc`I3e{y^`D)L{%Z>-8M z<2&eNd{XFC5z^}03v?#O z8dvwrn~YA1gWMDD|0U0a)YrES_Xbg&oV zG1z_Tw0gwpqQ(I^enMw*toff74H^-ry1HK}`-Nxi*2h?1g5Q}qp1djX*Q)VQ#>vnH zL@@6q7|{cw?x^&`b^mH!B<>IQ@3bK9qw4zh0v+s#^$P4*YDpX76sxD_KnD)uEpXNi z??9Z64fGu7;19$#;IyvXg*cIGnjGm5=t3Juc8K`xM4TSPZ&cUh%k~T%$8SI1(GAaj zNEoI!@jCkIdCdw9m*9;d^e%#uCc_K=4W!GAEA?e>@Uun zly5bWcy&kVdC*Be;yg0!-=euc@d|r*jv4nQ!O50m25}~g(Ax_-vL|lPAYV=#WR~Ts){-%fwmK zJt7~YTNUn?&C)L;?z5eGE_6Q1p&^kId=8s%k1J0%Uwm&g=dG2*wRioH^FLdtbEng$8SobU`af0}zY|h(3=bmqUKVx30 z)5KqYN^b}Geg@vNggx^lJWrgKHs4Qy4t^;h)}x8aTqe#@b$(I$7drR_aRJKnw8(Yh zyjJ6<^b2&M*{$*3jC5}>w9~~?z_ZE)>q&E?B3hMc@xz8`Rs?p zdtx(hKnFjdor80+(G%hnR^KI&et-`3!+9TYZj^dKoPDZ1a=wO+=MATk{pN8x5B%-V z*TfssPj4^i24|~LqITCFWl_Ft;+^3%N8S{84>}o7AkPB(O|9^ac&q(Qp7aBB@F%`|4bJ(PVc!3qzpSa|Z^?nqRo_>om>?|I8U5mH3I(K7(wJ&`JM{7W)cM z>%}DAB=!D;v@3M*5AvMwPwCWgiBoW{-p|m%Kd=KhFM|^hXOQ}Ch4c?}q4@{*Yr#LV zJ_+$t+PwP=oqN9ZK4GlBDTt4EpQSzJJx}ECQO@*D(-5c28ofQCgI};82hKn9vk+&I z&38bc0|)2Dz`5Nm2XS`y)8E^G4*o^{0h|lD@(`zVGX4D$=-k`i*TDC8z)xPg0P)AC z)AONo&;Pk@7VC)HRSOe8Ygm&n`yX_2K1i58E6u|hW)vsh5%r$1Hu3aR-@t}~&-I`tB0z%OhnP6R*Q$ zy&s^1Ke4|9f5xxaoH#u*={eBBpR2O)cY?CyXic1sG4=I>E{Afo$+M0tM&0(r8}+Bj zlk<{i#DBd<-yY@pzcR|Pg$F$Cn5X0+*ZBpEvnYT1 z+hd5o^IyJu+jhtV;+(h1cW{4p7|#px_&hS(6yk)a^XYOtg${nfJcoL{37STnKUH2> z`USes{8IcO=a0ruoJIV5>YTXbLnre`J9=F1u|Djt{1MitDF67O0mScX^L=IL(9Ut5 z9Gt(4EF{irWd~_b=eggH*g^XLEbKK>9PQ1H= z@@@~=LA>cR^y43NXkQrT!3jTR4{@^E`}rF9jsf^9 z;vXS?J9S@I`X4&?e5Wx@?9YTdah&+&Rr{6xflki5xW|a{K3{Z-c#WRx{R18RgZ*T1 z+SfQooCuHg9O%Ho`DbwI8kdOE`m>$`9XPl*3Xb=>tHi0Q-hGwh5p*WU$H*DT@$!Tj zLB#p4#v>UoLx*^I*v7p6x`w#hLX1aVDvIN|FPe z$+5ogv-JLB;$(}h-`~VLK4!bZ{-*Wa&}TEA5${k6Jx`zK^f9W5{juRSLx`79-N%uB zfDV4hmo}1deMe{4`xLmZg?7?1&RgPsRrh3Ne}xX`Hu2rg?2&qBtlV(D%g;Ek0e<{? zABmsR=ACfp;AiALz!{SCD{+QIG3_b+44uibp7VHi?gw$E#xXgvzMhVMxHpe-rd${9 z!|!?3gQ@g<=-iKg)^pJn+eIM$F%_@K^2>A4mDE6(Go3*6s(f48Z*R?gE`K2p48C`i9K;`9-n5UjGj#A5&Rv1CB5EGu47EA; z1syo@d`g9-`H3^FpuRlNfrEB{^32OzlsJuS&dWlF_Je)}PMHKHh%;T?%aHbmE;Rq5 zoq=C;TWR7qwArtK&OP60loolw7q!a~zpGm3%lH&J8J{K<``?$oR3zSXb$&v&7wF)h zDI!klxX4MIr#AOIp@V<0Zv*?S?p%X7<^I(7Z|L0H&zf)Vp0Pghr~gm+1gEiBtRI3a z_!58IA7*?l{R5r!57q&&N3T;&iT6e28Kr-qgMZKuzyA(@JpuIM3D2#Pb_t@{+*7-+%n~ zALs%$%=_;?|9JH~K>c=7zrO0%seX&A-|Xr)rTUGjehu{-vfkw0R=;P|?_Tx0R{aL3 z-|^~qfcovEetp%iQ~efKzvJ_n=XNLj$FM*DG3{&p{&3#z-N_!OZ1NcLz7*btySOED z-Pu*AxZ*gB^Jq_B^7kTsC!2FT(4jqHodnLiTmKLz)kS@Kf)4Fzbj%raPUGFm0mPa0 zRNtPUljq8wi0>>esWXIlBh2@PuuQ&d+ zhT(pr!@xQg`~jc+h+j{wL*%#t9sGlHI^aBBJB~Po)cawQ10DPmOT3@?WXvSuEcm4N z4|M!~ruDvIpQ2NVS4Dk4Li)$E`-V;8pM`gASa? zg?RsDdY^5?nWo;=lYWM-)PKi?h_3knenYu#&fi76JaP5@fX;3_2Rd+YUlp7heLoSW_EkLxI+Nox28r_mOG|$v&WGbB zNBR>wjzgS=w}@*dMhf%s_k3&^b-qgSpp(3$;+|K{)8UDi*XG;@bh1Bn7whus{t=0{ z=9=Da(82x@M4sTto~Xnr7o_Jv2m7b4$KS*1F+3)5W^U5kA3Cm|uYvDUpj|vG6_a=DoaA**HYhsm-`~nVP9mJZ={pU1{B!~JK%gT%H67ZQsTDQ zs<#7lYzK#d{sP|Lu~HH*`$WBepo5<=Z-V20C=GG4&D3+CGdb2g%aVUG5GVICy3unFiB-HD@na>`^P$7~JEwAViCH5fzua`x zm50YW=_voMl$D6zU}LoZDy6@ngTJv)36B3ED61p6f`IqaN{+rq%nyvvVER_eRT~Xh{5mrSyE6Zz-Yt z9XZ^r0x#O_a^=G?{=rWv+chD6-bN;0`Wrg<8}|Xg*;lg#aSry-bD)F2!&c|}V)4Sa zAx@6ACP(@kI_IT6zkR3Bx{esJx;^pU_tD$Wvwbm#@$MDRvnLvKCjQZ}`g%i$dSje} z{dx}SPMi{R_4S5Mj@!834Bn@Ly@;2&x4vA^!LR$*@OZQH$v?z7Jyg$u&g5AA&~MfN z;(VN9a^$?{nID|Smj)c4eeXGh_}Lce?FXH-AMVM(eov>5Al|-N`g%hL`(b?tjyu6AM2i8o8J_X?E)C3mxi<{sB$~$8zFiQgOblFLa^B z8~A(R$BML?_$N;2?FpTGzV%-9?Z9=!AE@FbX-|2t8tY}0^UQ=z#5v>9I@@93y%lh> z`D`Q3@LPJnK!^U3QrzE4k#ZMthTS(gavp<@`-juOyY?to_Tat5JFN0Ak_R3097bhv zZ)i;ZL&QrKtgk0@Xn**Q9m-WT{4wH`QTThYpd)wvr z1rmSbMtwPvBHtP@QIB()G@XiEyb$?tY zUTgLJF4_K}L;W!CfYbEub>b|!tgjz*;2<6Yr)=OY;#@za=RgMz_J6>6JnkNGuI|@! zpo6^|h<+5w`G`2#j+q?U?xEv$?=w9~{FTEgMlPe}qwtvsw z+w(QhPhiK?vtJXxt<8Nc=+M6^sQ%Tq&(L0V2OW0xFT@$(H)#5v`1S7T{RAES4gZ64 z)9W*FTB-3#wqxi_j<3;GyyFq;{Ws$Hsywi?Cv@)T8(#zWHBip=MZ$jiJ?|Oah7X;4 zzQZ^ZkJkgOBL7KzFO?sYc7P7{Tp-51l*b|wC(d@gU!VgA>uJ3qIbqWep))y7 zqk!l)%^b0avrokVvL8an{l;Oe65nIV8YLd_`q<3-(4l_C#QQ#LKPM!P!zND*oyoDD zAO5i~DREZW#5wZ(a9P#Ql9#(vv3P`yu5~uPZw5ca7@Cs!g>1%o=uqF$A}>DLk(M}@ zFX`I_bf_=#i6~Fi=oyKFytwRV(7Epi);#K8L0-h4b5}nuddBxL8j1TCvmbgBe~KEn zWPTeu#A`9|6WI*hkC_n)}K!K@rLlKV+fa4)UE~jGM5-xG&|2^KiAE10DQ=eg@9VEtQGWevY049XJie zJ>y9os}ZN@QauMcljAf#{loe41667f$4|x2a@>ZF<5g?FE*h;a@lvcZ%O(4VXZ~~= zF~vFIIL8_gzu#t)FUt#^8h%~pFz}8n{83|gW8!U6`yp~Zh7SI~ddf#sF>2}1+rQ%OnF6dCMed7FYT>nnQ$)Ms) zX?N(P-SPc#*lT2iZp4dY6IViKXZPxrdlF~-BYi($yE_crcS3ot7Ws!bJ1^=v(3$12 z#`{~s4IoY$wf>Xs-_v*>{RHJ)bZ9W~o2dCg@}YA-9{U*2D{y=|d*pE9Pf>A~oHwB} zcSj5(d9J(Fjvra(n%D5I9LhbgvLA6z+T>rLgTJw^0H;^Ham2C9zd#2+9TD#heh8XK zoRTX4BJBvBdpkOea<9_RJ1&)TOeKC{oAnNKW;uO~1>*kVg;+C*^K^~bE~P)9LmboP z#p50eAD>hH__@v%-r#lm{sSHSJ5KBacPX=& zI0J3+p3uR+xHp0Fy!c}|aq1m5%Om{@oqPZK8iT|-bo$X%#6PC;Ad(NAd%iXA`Ec|) z;xDopk7V8x`!%pb;hvj_v-Y~)FVMj+Q^a~R&Vp^kIj;7>jtOlcopm39Wd?fuYCuMZ{S~S z5=i`qHs>&)gP(Cf4V+J2XNYsaCcg(A>WldXoI@!u5GRv5&msE;DgP-xDXQO@0MBaPYlPlxK4D&%`;Z=5<*w=wN^Bqk&T))pz3j zu}MF!K# zRdL?wXyZu49jo$Yvi(7aakZ`jS8qA{@rL@is1TrrOjOI`{szp4*SPHzDy0C_hPiddf$;L-{ubB_;kk zHBU)CbneS<&G!^qm6G^9R9q|VEc-e3nNZG-9n%sgz=i`I{Eqw&IQ=qZBu*oneh!_< zaT?Bte10X`125vVSNVQ9A3+!9XZ!xozu@7#8p_r4j5qO)9yI%-&pwB>fWxr56&Z^{t22FBHm-0^E=SNzv$=S zTuxeyIFpx|<&y0UI+NpLycF+2ce+x7IH%P(F5^zjj<3`FU?l+{imQ3XVA&~D#lrq>%)i!#EZ1ste^BJ zbneGbYyRhjqcQRCtNfL0hsgh=SM6_3go!7ohuiL&M-exo+(#ofCvIX7_qVo+^Hn8J zwIWVcoB0?z^b5=pFy@O=y*KzHT)WKTzjNfPvUKxr*Bu#A&zRIz?CZb_4JGT>$|bi|EY8Q zL;NRdUqJRJ=uE%)7@szBIom($Pn`N{9+d49I>eDXp04u>EIdrLPwO~2efePGUQ_d- z^p~gOWGaz|pFCwa@n75IPoTrNg!4FPSA*;N5vR9W_ewiL2M+cN!8w?C9C320el9uC zfrEJroa@&o5+|#=Cnh=2nH;Ave*1pX1SLnqJir)z(ecrlvKAYOUZ zzarYRo3xHN=hZkM+Z}W!$7%Es`&uWfZ6Z#7wZ4_@1Ueo^ z9R|(=!cH$^ZzJAbn|=iy>W6*?PM-@qiF3lHUqJ^B>I=@*eS3+M%VxfW4tBb_n(ysb z7;%s|v()~kv=emPKYff-M>!vGu)$kGlF3&S|5*<-G%m z8&2iTZvx-w?eLb7Etn0eJ&s-<|v=#dC$N@!lP`#K*k<1ZI@3yyW7Pt3^MrO?(Po3-QC@TySoMg1PBCo zcYWvickh8+r_Y`HCajeYcdgmKbXT=hSIe>Y^R`LC6aSRMxdQL{xxZCm|8aU}sz=ty z#D5i5^+P!xLx=HLeitlKHICOC%#KRj_>ax^DWQ}78~54ZuUlVZ5HCw&bNm1u{Eqk& zoSZG<5a;)rW)5`dmnlS?Sv^Vu;_M%79>1XDI5Y4&g~$0jh`lE#CSD&s9?E_Ro#%e( zwg!abb>qdV$%#Ke=Xd3J3!U@_;tkmG$wOb_jdz&GpmXxaz9{L4lhk3Khy7tc$J1j) zM&eY~`4Kt(bKb_D2X9g!D{+eG^D5F0-i?oLE7!(Lv_6S{F$eK4d@{Ei=s2EoTermb z*bYt3L%jQO%ZpqI@BHDiu)YAl{i4#u&){n=Cv+GOLWSl2m7!8O;!Mu3II@31C;fnZTa+u^_lm^J zU&&l9=+M6qmwxnAb zu^%$zoRg!>@}M{d=O(~!`K<}@-*hwk1v>Z_{SKV@J6jN^aRoC6I>iaFaQ+FL^6lFY zXOI5=n5-{!p64I?xEeZj2jXvSVzwvxzh}OEKO(}d&csjH$!rI?AAxocJ4Bw-oj9i* zzMBRe{DR-$#2M0?I1h)L%L5(!g1j?0rAzlG&dxq&yFusKZua~4?FtSie##;Lg>T=N zJ!1_g{+^CzJIH-m*adcYuxBK3emHy=3p)4(-`xWz-Sn}<8R+m`Ug#9Ze(yGYt%<}* zs@sVi@6Gpa-Bw0%k7{4Msl-d;Fn&SD^SFH23s?U_gcRnEaQ6U5ZB)uwfxon zO4m$W5 z^B_3Uk1rw4Ik&mpK?lw{v5!}(-wNVXu4U#xr#SZUFHNpB#Oc-CY&Y-5Kl{4-_KWqz zKi0*}m+S5%dcKNZvBllH+qMSHSH;A-`^xIyiJx?U*$&Xbo;b$@zs#AijW|U|nmN$H zo_Hq*oJ?(Z5htTAkDRZd^E@sFzB?iE%olp^BmOdnb_ks+XW(}|MV>2dt3$+zR8Q4c zjuX&ncLbeZz<7pwmrikvc!M0`Lg?U!Ho~71|2#>YD0R$!fDRnI|AF!h9&?sBzqd4V zpaUm`IB&Md^#^gjImAQIDUSWT*^kVZiIYN)PjcJ~?z|cHT~N;+FaIKLc|HHg_68ls zy$0H$seP9I^ZCZlbG&!NdpZZA-6Z}eJc4r|ed1ONFt=mqINuR?u1VZ?>QUq|@h-GA^PrQwWa6Aqu@KLR zm(F250iEn;$QPo%$=Ds~EKcG__`}|QU*H_|H>!CQZ z{dqfo;O|h*qFJr4zveM}>iqe?-&Id%j;Z;+=9h zUjrTUTo&Rx)c@tvn8aJxTIDNc|AP+x#yAVksO)ixvvP=;1D%t6KPEZQIr+KB z+$_YooW? zj}I+gmpF4A)^pzNkK5<_i_rpzzemT_vfp|)F1Re53xhr9{oR=Or&^iY3v{pp-V*|6 z@4M#2$*bVx3ZMb8F(iC}%DwbnrXkGL&;cl=Nz4hQ@g&I1D)cytwrLUo0ToP6Q^QR#gX$GbZUG@|K!^Hb-yEFG*+&v*x6Y@_`a&1n ze2ly-_=_KmA^sSh50HH5Jo8=FZt>34sK*nCKc%(V4$z@}T4Mfnzn?;!!4C5ibaMX1 zdIt4Ad2a^sGU#?B$6e^)XT-_i99T7nIFVAD>jxd`_jD}3e=>jC0^)4Wpg6MsLC5cf z`&-RM%%F1=hY~FzUVgoAD*GRFA^z`@PT;>}se1nMPO-mrSiA!ecg}L+9@PCqa-l=N z9^UwDwQKD*>3(OwKNmaoYT}mC`z5j+%KLN3<35QrbMBJ*yZ6FR*r$S@N^V_8{D2u| zKS2i$#&K{)RNF+HaJ$SL=-}^o%Ma1M`?c>|i1YNa+27Fd`znFwDJESfUV}9|h~ znFk&7?Dzhw*WE+B*gwqml=uE&KiI28@dLyu5#9XVC+J{zyju#+k&lOolV5+gQubHq zz=Z42Rg;E$I-qm9uX&Wan3-@18-*io)6Mw~MvwxuDe7t>q zS@l~i;&u5=@#MJA>r0n~c?sqEJ~MS*Lpo1TCb*@|h1H{h$82b~=MFrT1& zX?~<9-Y$plNd{|-)T--5((uQ!i>(7~_Bn}Cz#W)b4t z(f9SGU!n8tSC=&~*K^7nT%J*a_#++eEkUQs8F(K+_|rdh8REDPo98p=g7asEZRLpf zw>}pv`>FIN*3q!v$dr|clh)zB5p?h;<|S}iO{hYgC_d(X3Z0Wb-$klUoZKZ9=im9$ zelGv=M1SJ9>TKqFmnU#r_r$s0&DHA?|G+GBzlBcvd9;{^GQMg+ytfO@c7+c9*)85_ zD>|kzapvtcbD%>zetA3>t(UK5ZBCq`*Uj~VF5qeIU+!tTtpKr~JoZ~F;_ZKH=0V5v zm;HOmQP#F2-qH7F9=^vL?KS7Yk%vaR?oqE3@zR7hk9W|){_qDl<)U{ZPU-|^4s_@r zn2*58@}Vbj4y98Z**~E3Jl@&+N004&iGL)CneScya9I=M@_FA4T?Z2X_ta)PKnHt1 z7w-y0$}yBUt+JUp(18Q{!k&A|jv!8`!e$P1;It6y<#iFq5GPYYGY2}wv477X`r+}! z8R_sn>00_6q8dMb=9edmdz%XeOeWsAEUH{`-3%Sp&BJo_s+X|+AidA$ZzZqHer(lj zI&oj-H*=xGd72u7woDlFH}YF93-b#6Q7F}H;=a@6mGc^Oup`b_f)nTVJmPfDr^+Yu zgwU}a-PT?4eWacX77@?4q?rdD^ZczI;@!fxW0w)He?_zXpo3p>KYDWOdyG6mdH=oQ zeXYxWtB5;Pw`Vy%K!^2J4LuR%s(k)UtSkeA@`H#UVArWJ*Af4uwuAH!bm%VwLawH` zD(|I@#JQ#GFFDXDj(xvv!ND!W`B+`qQP$VH_%Coi7xPoZu{((0tF+ng&~d)dZ9Nh3 z!K$8nh;;#=0FF|_$7BJ zznb#eDdLPDYmPIZ0|)nMQLpl&&k-j>Ix`14#qke3pUwMgbA2unr&V6Xk>ek9=w}@h zgrDDT+HgJo+0X6f`EixF#~LZFYO}K-Fz9$*vX7^JsY8DMH9r^TW#s=!q@TSVPZNvplt)e-hWMf5o68Fw z=Sl4Os%%-p6YtwdvmIr=3ilA;j}28K5hv$oGY2~O6?rFc0>=0-r;fRuK?e@j#o$!A z8J##+9Nu4oPI2t<{K^usiF5vqx!-yh&j(odlDwvQyymue#P5>D+;5=^ZasqW5_b5O zKN0bt>+?~vJwk_g2KnN=ZANCec5iXeIE#4@{Eg3(5`V7FpUQC-I`|8}!Ra(CC2@}G zb-!nh-CzBaryo4?Elyl<2^u#}{_fw@kq4Vr7x3yd3xpwrXjnio0n$0LI?jO6YKdC zk;)S1io-rBbl_m066M*ry8>~R#4*RQ(815>PvD&E;wDb@G>Rkp33Q(QY(FbK=jeqWE(Bg3hPh#6$OLT-*b@;v6#k6Y*Lr;)SYZ zwkvet;5{mEsxN3qoRtmC9O&R5dE#Eg-`P&_rGKE4^9$BXu*ZmS{fM`>q1mp`!9TcX0M68V1Bp|inwbL~{DbrR z;OtpBlsH**UPSgE=>C8GGoi-_;@9^3Z~j3Z9p#@;XbkZ$Xgf&%KsTT*=il*uGk7sW zP9R>qI%c~<2mgE#j#}&zu?{}>Q}1o zY~s~&7$2ZRxiJ2KGqJ>c;(V!VE*EsNU)&b!+#Hb>6K`ZAv)!SC-Er;(<*M?>GUC+N z_LA`%bf{ml8uw_OF?aSV;=E~Xt{-%4cl*0dE1Rt&-o5sUC;ORq?>5=z_YG|~6Td}g zv;E}!j{R=f@5Ht3#L4x$Ilh7pen9>KobTs%6K9gcxftl+htMM55HNN>aZ>5~$FluH zr`j3zCtOxFabG)r{lmojN1vyc?E^YhF8jO_BIj}9gluMxo4uQN?D@V@FHaG_Qe!h; zu0u2Fj<(_AxG_&VoDN!t;@wsFr(=lo#DDMLAL!s`#MR*Zz4sz<(p5IM7w8nn-`Z3x z!mA9OEeo70U9J$PvA^QTyeV{+kGf}=|HHj-rPfB(c~h5_eqUPp&f3e2*NMAE`$_s6 zI@A;W1m&yr=q7P$>V164fesveuLGO{%kL29oPG~Pa-ajJyqI4ew|GFDd&A6jhfZ+< z-?v@L-;Jn{;R$gPk5e4kzo3)jczCfdnJ3N*;&q;CE*EsFT>e&Mv5)xM#@EE@J;K}` zphF(L&-%&bVlG-=1oaIg?kg<6`kuI1=9}vaovbh7X85z-=+DF()ZJWP=uqFX;y(W6 zB;Sb>QJ?>l?H@YSx9`1#o2_s^@8jb;tSDc_%^`mLnqNJ(++1JiSYJ7oWWPTA9C;k$t-=Yms_iIet=*)Pz6Q%Cf( z+I^!D=cGQbEz1KP+8xf1pgavS#Uf6aNalC~IzD$|e|MzbKXHk-BawN&=Qu9F@)!HJ z+h-&sPAPrPUHTI`o#I@4_ClU7>@2aPAeH-+GiIPJ>lu4s`Gj@>k$A&RdB%?)7H-K^L5V z@OR+nzEXww=eGS9zI~sre6Q-nFZGAn4sxFkc0oC}Ca*=DsXBix`!{s(i`=hieZMYo zK7=&)Kj^^8C&rsT3j&DKCW4s*os)l;_i93%3;Mp59QWZj&;E5=UqbQzcZlpQh~F}r zd7On#_7B9r@I%jst%(;xpA(dRfDZPX50%>IQ= z`WN#C>fe0Y6yi+Pd`jO!h1q`Itt;Ht zo_g$`%WW4B|FXW%DE$px&lWYVbzFzMLV#69#LcmMmk{rBa&vj1L)=_hJEmpJaKkS} z!*_pN7VHeW-V9hn{4RR@49-8;cLS&Tl?}wX_SJko1UmQ``7dxnAKFZu4f^|ovOLhi z&zL{Jxi(-saW3ocX3Bm7o#%PS{?5?s9=nO(Tc2Z->uCAT(8->YpXEqaVxKB^;JrlA z|H7}@Pu#}yl^tdKfll^6*cW!FboMauj&}9VFSx%3j&=V8adK5LbD(qb%kz$BiPQUU zbsp*8`Nh7E8sf%9;wPQV=x>&8)h=^yB%fAD=(*fndV8^oLNNS#ZQ z?HxMwn;GJq>lD9#h~qwD=0FDy=0lXHX8L=~* zypnEEQPT_wn=9#tpu9WU2mF{X-+SU$^Hcob+732*H<}#0)&}GvdR+ZP^iXq-*m@zpN@fLMZ zJUPBYhxp3x#@q{4 z?+8JMaRB)k_-lP5AL6XKVjl0IQyjO2d%)m~`5c`%(Lb2|0v+eU0<7(#-yhl>n|O0$ znb-5sd2UB8D{LH&-)hv0Py9AX&FgvSU_YF%L%lnMOH7>gIn5mCz`=e$I3+hHBhGM_ znFAgCP`DcJzua%`OPmR{6-Uln(8+nLpxA$@)F3VKN;W&WGPJ9IZYrsGqeG z-*;OwItTIlv@`n!I{4-ITW)WEX2?UF)ei3jL&tuxpR@e2H$U+rO;P2N@s)SyEbZ~t z+tG!I|8BFH?`>Qir99hX;*#RT|7)wcJwR7l8)SCe9?!;i`T1UrjDt&+DNWpx+AeZF zgbw`?c`~%COt;Dsr_Nfl-=PENbtb-lU$%cm;;cVq=0Jz`gZ(pInw9)w-*}uGvWAHsml=I^9=EP5?-<6X7gpS7p zm(@(f0~O}9Cf*f?d#BLBe%L2Nxe`=wPn^Mt&HjW=aoiTZzXZ2LB~9QtC~0u(C<($;?>snm-7*Hn2$!)xKp%p-kw2mU;T#s z?(D8b{fJvQlPafdH_%}|3eXN-c4Op>q_x)v%|{qtVAtdg1`&Uf!~0jz!OzH-fm1I2 zFyeg6YW6d9PW@y^!jZ&DSIAsnZ~I9Ok!On=+8aQ%^^;ctmb(G zIw$|6p0SWPZBD4TT-Mh+|Jd(6PRq8G_|03H`QF`o^tUc{<9Vw4>=ne%HcatlKZOqc z^uXr!DbuV&{>{EWek1Z4;%3}sF1K8t)z$s9M9XY`O{dM(ajDD7B>cB-#CqbdeWCcW z9YBYEiv16?r^_*ZCr-CS=5_!b+5zG+a3(F;N}O2^@7+P?c|B#17rU0xopzH2yW+eQ?9t`e0pf)pW40@F;9%YZ=i>My#L3{mfe!vb ze*-6Z-V?-WJVBM`-}O_sl|`IWIdJ|o@dK7AzMPMttW^*uf5 zSLmc4Mv3?5TOWu(yfXT}j`Ra`h!3vTe?Kr({h#m8V_y~ZZ8|Ipam(v_J90dR4sjy# zO69KjUzreLWKf(a_hZkNk52r!jg+6IJ)v{*%dp(BiIcj8nZtgu&)?y#_{8Z_TXAH4 zy_>)7{P9*|;_uh@d;TpS-@k>wa$inH{L&@O<%bUbLR>$h+`J;ccibQ3FXV^7518&t zeDCh5`di^Pa~yNWKP_=8>vIyay+Vifn&QdwyNT;W)A6SLeBzfh8HpQOpPP{O_3nJ4 z{X4zSN@gYgIS0OX-{}o}2SmIpG%I6H;y=;nhUK^bogDWNr@>$0zvUs`JBN2Zp;P_K z-@Tdz!yzzvZRf$_%&tuZQ(0TTs%Q`CdU8*Oj zLHuv}9=Dtypi}h^u&N6`CI3*1IO%Gb{REx#)7joRkB=$8HONm`$G}hZcGV;PMh8DZ z2R~ta1Lt-BhQv9i-$9dpf)0L)v*A*LWch#gGvpb-E&69u;{N3@??C6-Pxg1EuFh#m z{EIqYD%*j#?@A%>gz~2t-j?`d9nP^q2mh7R17!c&`$`Gh!talYztQnpv<&|V*l_jpon=s}z;Ixi{h30-jh!hDBvE^FI|_)7|! z`vY{r@$tQB@S|iNK>Xol%zWtJuY%fNF=mE-_h;Lm`QI^OKeB4cA;hohR(xq^=-{uh zB0n|fz;DF4uIC@gfeswJSBLsuy*HXT9dDY~+t4XaV17re3x@0&Pn?5#KTyv9(8+Z{ z$y2=E{y1SW@n&x_ualv}c=DikrUxDV9<1~Gf#=XgoRWR$bmGp``=YX((8)Lj@jUF; zuNKy}yV!c~+|O$ofIozEPW| zZL??hQ#|`V+Sd)sh<9zZ;>rH(U7TZo$93zVcA?^b| z^64GK|EBYU(ofKN=DVz_;=IkJD|?8aP`3j)-arR?b`|-(D$5TLXQIAeEIH7DgM1U} zyQ%dN;(T=AKnK5|or2S-#tGsa*7uX8-JtVqH+%o7ndl7hkLi2M|Bi3>OSe>#UY;#o#F(Zuh#ELgEP7G4dRs7=Ro8* z1|82QZY!(cZH)8}@lxsUphzBck{7Wd?`z#^aF2L#^}G0z2VL8;OHXckwJK=e8Sx(K zn<(8Q;%(ON63BJ}9pVn;KjQAl-eza=Jwf9k`VaW&Z$Bk|dxvvi(80eqM4oy1?3cuu z*v&j1Lg&=)R@Qz?oX`3@>9W4w^*fjKOnhf5W5SQbpE=50PUuih>=(jrW&Zv`oHvuq z<2H2gOGZ6U{kir_=sV%p>vgF8z43Yne-QuiOmjJr&w^hDi*a@4h)^Ma&F{_`Z{|R! zIQDa(@hgTS&J%}z=X4Gf^9$_fA0ra+Ht2pQ{S2Mw@zs8>Gwo|1;-}Z|H^})UxO<%& zw#Oh&pXp}%@jk7~nkVA>=>6jmC)pe`2Ris+d{ln-V0!Ze#ChZ}zCgGBk6yog57K3o z75RwBRg)0!jh^SEouE_Y^0)Sh^VZoDry$Ofk*a=joev%2@-H2Gq==qosqP2%?{dZb zClzt8jaFRQPULsF0(3t+n=Ag7mU~wP^)uWzM>{;bI34l*bw8H%hYtM=>t=9%Z<&cW zpB=tW2^~0i?-87OS+fx*ghRW9PI2sc`PaU=h~vA&Y&Y-vm)nXa_OtH4$V>cQ+HP{5 zgHF36Xx|>+u}8hfY%WN=k`C<F5=@mWU^g*$G4Av7haYpe&H3W{IXrk@el1Bb{MpyGI0tztaG4)UvQ5M zoUzONh;u;OQzz`=J_!O7^?ia0ME)_>4}gL?(wlup@>IIZD zBCavS`=Y;_F54S)@B`v)_+ikG@x+^8Z zUeKYQupc;mb}u4MbBA~fI&fx)@BKtyvWz%m=9%pUo#NQ@Iq%x6B2J{qiX-Q9@7ApW z7WP3<-|X4f5r43rmn0uL&+}e@RY~k8#{aaD`1SQXCHc@{J&XOQF4KmF+!^I(o&o+t z`E%~uLj0x<^A>dQ7xwYN2^g`1I7J-hE$HAc#JS)^_uE69=nna7=z_B+z9RvCxFQFL zzewk=r9GkZ%y(IDVsoCUb><_)@8J+nK?i$I5&3~MAC40z$3%1chYlRfE2!_VU8jkY z!6AN!4t_y@0LL0~o;U>^@&eF#wwrxjl%nD#;_uSyE9n>S{O-2ai+QSf^{d2RruThh zya1h?r_lakkH$%F5O0(ohl3k0k*@=1!aui&^Os&nOAd6(KY{V_EIxP9Xz^X*l+)v! ztRHkAQy=NNa*=P)`a6ZlKQwOnka#T|)@#tIa`{`)#CgGsS)UT8neGoV{{S84t7}dE zj6A+ur=WeocVb_#@uwHWO{w?aWPPEN{zSilABr7#L%dRYA5QuKI`p@(V&1B>ac!<4(;xo@PkjrP@#U! zV@A>Qhx7w@1w%GXFg#1s4N8+S$|PA!ML9(3>n#xM9`_#hwReAanAIW9rx z*$?*o?V@rqh#yAJo6>&f{H@EXE%yD+$BIL|W)AB<=+KTazM$T{!X_ZjW<5{J{s0{~ z*uMj(`h~>ADeVx)K?gq|PXJEu8Oe#W%pvasU2uLF>-(Ad!-|Hfh(Fw6Jb{kyYq+c$ z!VfVrq$A!9hw%hD*bnwZy%$8!M4SZ<;|X-&V0{G6yX#qrGt42*gARUZo0Q-EJiRn0 zaSCcX$^HPHXFs?sfAKzI=1F;pzs(^(2AwLW+xqhb$6-0@7bMOlJ%7o51s#t|ZY!0@ ztCrnSig-1~s(4KDpktoPLi>h41~w>5yj9K3_tl_-U-289y_+i#XK8P5IQH|*>7u)d z6Kaq7exA4Iv~d0oGpssr(Sk1C#= zm!Ja&?}URB_U|sl`KI5`ksRp2LEZwKa!+~?Xa8e!d7x9A!0+0N`&V02^d(Ns6=pj@ zr^c6;L4L*eeNe8{3kMKyZXd;y<1}=^`4#a8_|X#%A^wu3=JG-(=gYsN^LRb;?Qr5< zyJ)r}bWZ;89X5(MGtZeh><^cPye{h3{moe7~ZPBP#1{PMc*%% z?b*Ay)NQQ}$Kzr3&6kLOEuuQ#FUJe$+x{=a zIqUGAy7~Qix0Oq*mk%ucLA)If_y3^_&VTPOh6?>_o^h?iJM`>7x0O+xD-Jh59B~G} zR^yUvSJ0_)AaRP@&*=VBATP!h8ppyU1;xKAnWXJ*TnfOyACD4r}Ybe_v= zp9l6ONJ9K$AU0fHuUb5lS zhIN}2w10*3Q0PBXH|8Mzs6DFua(smjQWV0|)m4QJ!KWS`lYRCUd)h zPI27Uj2WlMZ>{6DBTni%iX;67UBH_ZD~kmD%o`wIfpTSN(1~~lJDPdWaU5gc558@6 zBVM?I=6cHg;IZPIMTChxi4(fIxm?ikJmj+A57aYd>b}I=+Ra=p=-@}(#|9^0?*QTy znr7xe2aY`V@v+_z;w0E?=0K-7_W89|e>gYgKQXB*}cr;o#TN1=oL;csvfwp~b^Xb$TD=serse(tJY z`lZC5sq+l79eU^A!1d7$eivZvlNH4Onn3y2wm#B2>|61;5MV74=OX8=SVP>V`u%EI zZs?Hb#P=R*e5&VLG|tcW!iI?RBb`ofAb!Sl=5`64Q#<%DVl#0rSZ0oF2hYX&;g8ze ziIZA?r&QJ#y5QPDQ?Y*Q8*Ml7LpboE3yzQTsA$I(kM1M>SpEGsS$^o?udKQu+$dal zK!iCz<9+PAfM06BA>t2r_`WQ3@YiAC_xA;l5vOcUv%jE2zlB{;o;E(Gh!ahJ?@iVh zy5Q`I{s(^BeCLQ?&Vdh|XTIC=5%;AJ7rRLO1TmEzWV`}h{@V4b)Ty*hpVtep@ZJi_ z8_D+y@#5<^UD^XW_#fXP2o*EMsw}Ne1g-aw_XL0GpVx>V`nh@hhYtQndjO}~@SDW> z>zkPa9sG~+7o3v$?-1vckFuYvFLa*$W6y`=dUc=pemWl}`QGJ20^cbR^YS079}_>S z!+SN*!Jarj1Uvk8`8jc#IrJy!z(M{LoEcMJ6K7I*bH9WRepxTRbCa*!d*WP=ZuSdw zp8evswv38HdGK6SKNEkNewRu18|da=<$G#)e*pDfH}gC3e8QUhA#^=o)U6mh-p-(N zv*krz_SDu;VSde{#yF$ew=5rYJP*4p#I-12jn!d^x4NRazd#57Vm%H{yh#y>)8?9) z1D)aou8+m{=N~lmAx<0}SIhcA$M$zwxDSVN-HI54c;7Wo=6|45<+8u4UT{-v;so4R z^^lS7}ol^p26iCvBNSvr@_K%Anx&Gmv#aokpPv2Tz! zZ5HArilsQRT|uY%4fYM(7WyabRCH-};vI=(=0O+t`L<(z2{s1JugHsnS2$H};$_XN zcyc~~4)be8-NB1TdUba~$K65u{)k7xZ`ZUG@lP}{+YvhW1NXVX>5;1}aYnu|fAr@xg{yuUE8ogZ;-1}Ki~ z_t0VeQ^@_cVdG`lbiWUXP%pB+x$8Fu}lZxim{8Jker?5UhD*GRFa=!|3 z7RohwQd8pPIcc^Zbe`h?`?-$0v04&;@)mQw<+%>L3yyL|n$U(g;cuBa(7_KFr@@I{ zu>*0!L^1bY=-`KL;@!aIVY(2fVO;aL1s#ur0T%Anp+be{cS&u=tL z+?)98+-APF>m9Di$LaX#-~e9H*pyjrE|C&w@7=7*c|zjtk{EAuwwseEt`%7=44DBt%Q zV~D$0pL>(_gbp0+AAl42+j!zEbvS1P9XL3*4Nk6elZkWOAzud_IItf$&j(B=P6vnh z2Rg+Gu#mR_=WEv4#96Gr>mbK>=z{AP7{|cBb#ET={q#8{$%oD}-(|fN_h9|@FCu<6 zhkOiluxAMo2maM(8F7X?ymJd3ILOPPzT=y$BF;5^K1sGW=oH6or4`?Gd7Nw=ac=8( z&gDD=9nV89EB!ux*ChO38;N%^gLz#G9qNbk?WkYQsauFMNPizg)(<*xFn@z{z3UF* zOf6w94|L!l4hQE^?mfg=QNYZBPI2tx_lo!Xi4)G@I|JU0-vJih2S+*o-gcPyY4mw6 z*$7I zX1_qEIBu(q=m+)cT_VncxQZkD0d(9C?E7K+^Iaw0D2IH$cjJ-U!ufUBFYMdv#IK{z zLCW$%C+F!Y;@!BfM{W@>r9R&#{Q({Pab5VM@1VQH`8%!IAJD-c{t@{7xkdFJ5~qO9 zv&s5F_xsvsRSL9-7?e-N`9Rofdd{cBTbJ2fF6dOb0>7VhnAcG^U%ens6+OPm_yIbs zPx?J;mv&o$9YN)bFYM-C`-Zsta+%8q9qf<$zNqKQ(I1Gjs;HR*9qhkIeDA1C#V^EZ z;c#9RI*yCoR%nrLNEPJ=@xnQLmk2uM1)jI6!Q+tc*^ps>&HKeEuj(nsIp{8SReJcN z#K|W)4n8fR^A9eojqt~T-@_6=k)PR)(7_+bqr)E|dqpJ9cm1Bb>{rmiA9KZhsulHo zh!eGvDv#_B(6Rm8RzBg6=gDI*PtQY=2OabLt+?X7^nxMd5N}gCRX*7-po1UYm+qD$ zpjJP}JkaIC@rm16w;P+E^?oYyKmn@2*3gH2E8TvzIw&vDOZeGmR$}5$*Wc5T{(%mD zM*bIm-tsgPagsXt89K$W#|K5LWG7C{ZRT~nckzMC>MHJC?eob^{31Qg>u=~#-vcqY z|Akzhk2oa;nK{sbgZ(PlEq19w#7Q;R%z+LZ^cQe8o-0b6)Y@*+kI*TO{d+x+CYL16 z^J}U+vK`Cs^~6vA@$Jq?K6-q&`}ImD7jeViR$SSRz4NP`U+F}7;-7Z7kL(@a-+C2| z{hhK>W#V6rZ(cV;hyGpluR~kApZl4Y+b!M0;d*jf7gZ>Imq1`Qr^WNcIAm|h)Fpd!Ux*65m5a+`fbN!&>c`NW; zcEKzCygl)1>i0#Yy`W>B%Zjp|_@N!%X~4UyvcF+{i++-}_$1;L zcj#}>!9U1zgX5ok8gULdod1SSaopBqu@0EEekO7HH#GM*=(vB_{awBAT;gpXqIhy1 z{I~v&b#Vdl^Xm6jB;PxK+wXZio3w=Z{`y|3uGtsvjgyiY(^(LgzW(7hug5=Zi|8JxctT4&OV2 zE;#-=vG03${7K@MePJFCpp*N)Z}05+-15-R_!4y2?%r{qcv-fY{Q({PitiYM(_qYF z;&jyaz+``h4t~Wt3Y>&Fo)f3mA+ukh^Xyl*g?r`Tf4=;R`2F>HGHE~PWPW5a&;QDK2RhIGaanQ1`w)lvgeLx7hw&UbRZh2s z_dHR~HMzqR$F0Y6>1XJ;-`VdQWr!V_cuO7jt>pa=oL54*`aOtBoEi@2UZH~@(B8p0 z^(iKCIylT%&;{p*FD3X~O{&=OiGM<$QYSSq$@f2|86SmxcZTPP8aliSs6&nFAf_cT}u<8@Y23r>lM^O|~=WRQ+)N&mPw= zy_TDJ4S)Y{>o~;!sCUuE`H5fs%75XztiQ$n{-r+(5r3V|pUHLu9sGm5Fv>ZgWijGJ z%3+?DpaTbaDR5H8Dn*=JRm~jez$tQv-^FdQ%SD`!4(mzi6vv)lj9#GvaSHV|&lleH zTYu}Z_zpn+&Tit5$*=e_-vJ%+i{&OoUwiIM*`RZUxE}>OhW+SA+}knC<%SOPTtnTV zzvo#$fAEUqLGv8qaqvq_t4aJ;13~HREIeJ`ktw5f6zJkY4WoA#L3&w zTwm|}WM6MI4roODnv2bR@75cE-+>nW@NL~@#J{}WY)|N5PmG(e=bjj?h?9GknFAd- zmU&jkT!9Y$!M-*)?&On*)7Ig92y}`Q_?_B;G`a^Lf(lSjWKb#gi;0 zPELn&htR=JkqYp=k<%eo5~p}}^Lh+A*lA(FMSAz)V)8A-X{gWlNdG}6^MN?`jQV|9 zy`6ZWj+o;*=z{C7cs~mK0~K}?e~^C9R*nbI1;rIB+ip;W)5`VAn$|nyvXnuah^9a zbD&ckdp!Ner5nWQ&_;1&yYV(oL%tm4oICU%;P|pLG@I{$||uC0^Gm=6((x+7J3Y{L`^bTHT8KC^_L(`*fiqgP zuf$P{5og6sGY2~O0sR=|Sv;&1aZVgE`vE%7{on5A>PgEIKhyv--&;T9`!gv2lnWJz zzhjo-OFu)${nl;87VkY=>fyDSV{^Ji}GcF9upJP6SKVHP`OZ=AlysGpo zbnt8PEgXL>m_Lv>P5sUOfDZnMCgPz4lZO&#UqrJ%pyPPRW$hB*TW(u$1o2WiyxR#K z%2go}`*p>Q(Zm_4<6Y@j=;XMEJQM65%4Y)co~Ab23p&_+k(lTD9Gpy?{a?)ECv@N- z9zuCmx0p_xIuXp}flhJk=gUGxnN6H&I-Zy9_215yg}2Tl{z!e^Q}VqVKke@lH*2|w z`0dM?+aKN~UfzM zcMxy5gZ-dGy>X5d_FFQ44{;{fFxMM8>}QtH1HzN&yGuTMwK8ZtM*Io>*~JHlA2yM> zy+8*)BmWG}ii$^wGhXLK<+u(VI3>k-whb?j6X&6RpF?t>gWb@-QJz{eP7`NP7PH-; z^Bh;XtxREge2!k`Jn;h@+6#1Yd`4ahdB++xe@m_zuUwyYGpAcbeIL(8+U(crO$6_M7sJct@w3 z%L^UGyX4w2vHUJ)zg%cr(0Jz-^JDdiAtL;mM?87J9KS#Z|KPn;lrvn9FvQuv+RTAY zaokqds{DS&l5i1-b57r@k?jvU_n*E0_dX5ERaNxcza~W{UL&19ko^rh#qqb!9_4#Y z$J0h5&gloLezLzohjrciNwfQhS~o%WH~TxP368}i?vb$G`4``JYnZUzf`*Zg==DqB zc@mL_kJ~yf@%{9DK3RY0U`NEg@b9Wv35gRfuPUeHK&LqNeV>Ylk`SkG1#^A9+xKx< zo5Xr!)7liouc_lASx)FwIRo$gh;s--hNdRYYW>cXY=_XvaS(Aj?02uUD1cYL?C zQJhab{W>4$Z@`Iq&PALE`dp#(Gj#vIem>s60`WiTb}jkd`PtukU!LR2 z7dhR;57|K3L5{o7mDG-&I;|< z8p?U-Vl(1I9AV}_2frYG1n0rjR>WDO<2>09p>y&}h&t_vcF zBL3{Ts=jg@gO1}x`~LFT+uew_sFm4%a(@~3X;H2)3wjYJox?e9=-`KRBEI}ysULCb zJA8)*I>m8YCq+JD@%MqmDXsGd(oWE+VE}nz`#PZCrlG{Er^gl9AG})!*uOtFdDIBv z@6qFns%i6S=>c10DRISiDabY4LR8ROxQ+C(v;}vB#m8d(S4`30*E}KX3ar zzFUfVCoeLe`2G5r%L|>1PwI&I>cjU%#0xptY)9yv{1I>8GUCh~W#&kKoL|f5u$<2W^vU9r1e1H1nWq->6N~w%N1$p`H=MI~2VZ zZX#YrhxbyUQylw!_jql$66d1BI3e?m^|eDMm!0*ceu{NL{S50Pv=e{dox~r~$m|d3 zV82ptIZmo~V-IoWwl~`kI&NBLHo#OkH z;N*CHo;cHV+$ZA{=+I7K7jVMvzC@h-?>L z$2gzjDu07`M+YhU$+#XmCO>&oJECo-rD{?*RFIah+k=>vWFbM@jh1> z-5*1B`RjC%o%@6OV^@(si~1%Naicnnx6nEHeedpc#Mw5{+z#3AE(^cWel}gnOq>B@ zyy4jU-GIBmGc91eSWJpt6_xgx*gi{PBEJoBVIav-b=<$oOcSaR*LsPPUJ5| zoEtS&{iOY%!+N^5Zr7eH+v_M$7=>>?vjLAJD-ss6RL@pEf4WJcs=;=serao|oF! ztp)K57dN+a@A6Xq)^8%NFH*G)@jK~pNX`q;A+CS&*1!879WUtqWAC38-?S(0?dRt4 zP4>^ax_`dC95zked!vK;C(bj&?yW*}A%3oKX8%El{)uy1;3Qw#oj4oyeM{Lcp#$fl z*vA}MqBn6;>USF@2Rd+ai2K1YAN3>7?+MNIf)1R#;@;BOgM*0kHJh0O9XRMWu+Ozl z!-!KZlbHh@ILJqV((cE@+Yf@x$ov(MrEV=b0b)eQc4ZYx5=p@wZ=4<(GbkZejT+@0LE>6qKh!`~!Q$ zX_JL`55t)K1s(j3b^uPVXW5C9DVLc89XL3D2Ts1>xrr0cVP60`aIpRXr%sCe#HrHP zTps8Y$Np|x_2h+#lSQ9@kpA>`e<8BCXZtF%i}>^O`4Q<)=u|((ddfcT49`%Rc)Ke9 zm-cPX@3lWwmH3PLo8N|1UpK(teoUYAk5vSl)GY2|wFkgcc zFt#3Xvgz-f%XtMlr}lF7WdLzzol@obckLzce4Ka(=S;aK#9tNK>|f}D8(%hy^YxvM zHz)p+;^y%MIytYL67%&Vzt+UNpx={{c7+c9!M;8GGj>)d;-nw1$|X6_frE83IMJ(j zBhJ>1=Jo=e;RQknhl^c`vBXJL#Z%wi%#4D%ACFvjN;2*3r zz$tR`IdS$zF#88O#qqa>iadtj;Mc@C5kqlg90ncYu-41t*E^cs9hAqwJH05MU%L0i zJ)KT*rC*>6&M%2)^SQUYmp&2yc@f2z<1ci4?#*qr_Tzj`j1k|6_d(~OB@ep9F?J81 z{08R^To%6Hg8Jud5hC)h`N;o zjSl;c&~ZDq--C*FAu{oH>HA{Rzuw)0vVS-CT$bp>kEQQV$aX2eo4dAo)i(um{Cp1$ z`4iZ)f^Tf%)<|l$Cv;AJNpL0}adP>XInpn~M1I9@XCmVK;Sfhar~KlaU#UDM8Syg2 zQst6<@ov0zTjf`9I~sf0m-tmwwTaB-BJcJJZz`&-|8%J@k9py~(G(Yi&<~92hI_XdB6M|Q2PGRD;*851(pU}a6h-<(JU8Oj2 zR{5Ll2c6=$t;$FF{Knq!rHM1Hxw(GOasP5#yTyIdW{1iWuT2*-4?5=gTmB*sa(Ylj z;uY_&cygSB4)br%Bm>XJ&YyE1+70fJ!G0ssR3&Z*owt?a0CZSS)YJ|hnY%=_s<8&e>#U4s`Hyr5UH_9DLJ;^@!6#-#eD$5_EFC zi}4Wk8x*x6@h+87<&yS;&U3%B-%n|AunFh_HIG^>H5Bp?1#|7Kj=@e!@k^Yh*MX;$00e;p*>>V22QCY9f(tEfZ3kV1!qs} zXMlhAW@q9*)BRG~6FSd)x79(c_utR$PW%adRQYB5f=;_hpD%D(un)@ny+Lo{EgWiY z_t3#VSf7LQD0zS4EOOWvh0e)8G4Br|PVOoH%|DorP|kO&hY^3_EHfXv;QTY?Gsh7H zx{PH0e6xR`lm5Xth4L=#KbClZ>-QC8yN3?`k?-bSFEf!iT?U!`3>`Q%=JWa_Z1}0f z**eV3flhJU);SSxu0B44IN9|*8QG4ZQ+7hU>9XFe<2?Sov2%#`Y^u3j(5Z6yTRkeX z|NLt#AkH~`o=DDv&{fwCoPPdNybvLO-k-p`EAV5ABuj`pV1c=O(82yPznkR#a^i$v zYUV(vIQH+rZGX0!INet&jvS}Fdr!q}O%>;O&UM&C{M`=zg^u&6ZmW@aFX^vFTZ#8} zzd8Q_UCn{(#@$FYIVcZ_eG&NM%I6)#%dx>6r$YySv=Vu)sw4Lh=f!C=2Rd+yiFq9^f+n)nkUo7*>ZuqW2ZutUOz=ZRCp;ay|sz=$`=a}+UMGI2qRMV^JcW+)P%f*!@WVf$ZxOG!pV<%4!G4%O;D@q1 z?+_<)V>1UjaIiiGr{UlS#F^5;%z+LZoI?O-QRgSbd7$GnIiEtOIBu(>cqcP)i5J9a zosacaOyPr!RedO}v+yC(8vL%H>mq=jW|k-xFtRM{_%XPR1XwAM91D>u2J9 z)bXY)7j&>Y<{xl6=KM~a9S(5=bg198qWpe9-1i}({F=`^;xMm3m$gpYl|=%6et)p6 zc%QiVjxfX&phNwzFO2%#eieZ@r*+&a?F1b-h=;-HzAXxIa_aMZk^>z$*cS$8 zQ>W;}DXH%-N)B|e_n>Gq>3-41%(01czMHu{LC5P^f2*Z<_dD{lc*N^HQ1N7Z1>HXV z{zJdZyL?+8h#GW0uc{cY-mFeU-23`Ix2!L8up9OzV7Ha+lM!c>eiuk`paTc{jNlxv z7 zgxB?w{(}yAq93mceLEiiRM0*R&a0z*)jH%L?uL1)UC6ivI^-eno#%*Gl6DVM_C!z~ z0{0xjAD1@|@y9GS_jBlAM|`gtoQ5F^5a-5nGY2|wI*I#{>t7dU&T2CUI>iaFZi&3? z<=w@JGgq%yq}`zN99P)ytv+5*n)qe(`cv}dz16aMV)+>F&g!@ge!lOEdsVP!))wW6 zKVY)i4$#4#7`MRbk*N}Kj?XZ2pi>;T)l|&u``=a}&RpFNWgG<^&+GR6sWQ8&6R)jq zchY{|?N0?*eZ+Z*+)=9W`54%R`T95cw z9NH;#@DJv3a6%SsNSs~{@0mfTI04p45l2r6+mtvpb$&(G7rNluJMyL|=ejd3h(BbB z*`CmO=DVy@5jek)Xhs|2-_!n*^8$2E_KeiH194)nF}HWNr^~u6)|~@0bsyA}D=oj8j({TI6hSnC7s(Y>HrK7ELvYKxf2Ma5!=9>Nr=n6X+DjZOxs^>w(;*MiFPcu9qCopi`p) z@@#?M{}%gB4I_>tUIRVfNFH>|3(U8Pc;fl_iNq_nQ2ABH6VM@^C{i}>v=7xL>3Gm> zy%X=K4LdlMxF2+$OqLTmRnDL51iV8De}wNpi+EEV@{7>HuZYvZ`7v}pabo5+_c!Pi z$9_KcaP7s!S)k7Y%W)9>)N_AuTam>5@QR0*5#L|m|B?2Cj^{!9dD~H0RuQk3!#P>G zK58cNF)@~`B~FV>X8S=0KOjE?Kdeu^kvQcWm^sjagYTn&({R&f;!JQ@cR&ZuhHV^I z7q79MI9t}4%L5(S#qt_Fznn|4n>h83DUNIx(8+hRR&Rckug?3Q`?-}wym6_{(6i!Q#418EAsoz znTw6IW>1e4v`)c39m-dF?kVC%)bGzq`$Ffr-`UTL4$F0p_!k$M?dx6s)@ALk&hx>w z3x5zl!a;L8gAVN#>s{EPX#dN^NqW=FfesvuYv80Tbd5OuUz<75DURFPF_-U!G>Cqa zIGw&KjvTL`<9i|Yd}{3jw~06Evzh0VPdy{-UiQv?;?0kw-Y1arIdrgBc`^RInDLl6 zi|?B41synu&tdmwtDh66wS(QEgWYlN9h`V|UK1z%1+$%?<9WiqUTPomJ@Nc?{bc|0 zZoOpBYcF5-iTH=KU!@;poQHV;^*+4t8*w^DH2V`e_yPGIaGH$^;qz-g^KboLgDekp z@WX&f-2U^l4MUvCkIjC7j@y5Lh4Zf{SE3vdi1+Z0;z>V1=XsoSThqlkhB!AP6Tki= zb9teY@fp5%i1J<<9gTSTzbT%yBXsZw_RYaLP$(905=Jxo6*|T7w>B2%^Wewc#U)PV zrHO+b?vy-{!V(9|!d=Qz8fP=G`;%pp*VUybIo;!g+|-xX#fh^*$48O_o#Oag!*6iB zoi=6};yl&oA7uZ84(m>~dw?}x#K%RimLqPj;mTfe+=LGCG2YwH60KnTMlUx8<>Rog z2EQg7Qi=GtMw|Ts9qfqxOmG$!s!E(cbRJFG5jw@O_p^;xs}tv$&JW7^dfU&U$4*1{ z2ha4WMf|qMRej|+1)cN{&XuCxOA6N`UcB38|3C--;GMGajYBujvHwtzf6(s1pY$kz z_;>ZZChZCx{DbeEfpcq06XKNqZuT>DisQCQh3Ead24z|hCu9usI1k;eA-~?gw4dwQ z{Ixal9;Gnz%;$RS->0~Gw>|N8WK%rZexyGNYe%1qb7rRBfDu9dz&#e&ant6`#Gj(y z?UMe04*o#A1o5Er%K5F?B;r>)@L%};mfs1U=RTF2M*PV7ysMn& zpu;?Oedfr@1tJd#%C8m`?;CV3JBzq}6I8k7xC|YS%l31~e&Od4??o^3cqjdiI2`so zaC9MYA`dZhpi_PiT%U;Zi}!XdCC*bl&&v8iC)c&@#Cu0?7p)}TdtE;{&OsMkzeoIl zdVg=cmiVXJo7)j|nAZ|$$2@JC_(9QQe*}%Az9R1sxzeq^X=yxXI0uy z{1Fc4q2xJ7#C0g=pV1Bz=fhaDU!a3u(EhNxCgKKdP)gADUf;;;hd$A{IzQQ?f^q;u7 zLq`2IubRN&yc=}z7tTw7Gxu{C;w*F6--8bRl5yUI-4Td$b-a08gwFGPV$YjL8x)23 z4|MyK_VjMtby>~D`N}e`=)_OsbPq}EU{8!su;+@Xv57O-A%6oMIGA_9+4UtJaSAz{ z!-EcfL4N=zWxT}188XxC7wCfXODhrg#e9{F_$#}a`Otah+xOK2*838_awqe6BKOs& zi1F>WwrPn|ptqR=9sCki#EJf;GZLryP%{TQa6*ebpA|kUaq{bP!g8L04xE~TbLm(P z;yl;!h~z+rc7pwJw3Dx!^AIPdUhl~M1)bbizi=i-?0aX^>{EQV^+lXl`P{Pr@t^7Y zd2&94PR1eFr$c#L7brr!77p_abg&=tec-J9R-8C_b-ktipo9Hz-V~fe`%4pN+H`Y2 zfG)Uxfc*{d`z$U;{HXferYt9P@WURp}gS)%CTXFZI#a&B^7cZ6~B}nn&?jL>6JSopUGne}>A8WnqU0KcZ%go-R zd-lvh{xHh*C}%a|1*q{wmJ2%g0rz3R8JVaSaS|HVgV3RVX~nrk>+5xhGg;-^%60}F zpIbQB3C)%@AYN3JcPsmack6@z%eNx06Jxe-O#Fz-uQGlCT{`8E(U;N`y&NkjbbWwv z3-&CPyE$>Qsk~h27wF&@{0z>9IIW5ElVKi$&gMAtxHJ6OjyNlnf2AG0>mSbbL4>89 zh+j$7SMt4EAGoco;n`0Qr*tF!QDq0&uAr0s2KIqn|7_Tcc$*FR+|a>4$TtD!M3#QU znXd8`rJtcgzd0=O9{1)NM4Y#3e=-BJIO1(;svp0hLwu>2a&+{I`(1ArPYdmj$maon)Nd1s zKeLK{-2ff@4gZ5PbI=sxw63S;K!^T_b`Q>~{4t@uv>5`O^Q;$^MA_40yk+T}-?a?eunq&g7p+bCwb3fMLBV{eyiG z%Jb{^Rm4f!TyH<Y=|$`-kBh{z{zr)%Ep*j^lmKxXFk(2Z?vEiOrM!+PnRpbG?%P z#1Y~jS8-|SXRKEWDu)Dp@GG_=)4%tm@UAx6`<30tiTksPM@xG`2fyH)8JwqmPZKAw zz1}a-*&L@Gm*qQ0oO*_Rt#@{Gu2)jN3?_aiwf~cT@^1WfTkA#s)6ZKj6F<7jJ>t~242RSwy)#(V3oQziBriChldV+#=UTqCu!L`#JQp3YqGsS2S4Nd z37mwj%LN_E z6;FH@W$j&`m|w?5`xxR3(1qLn_x;B{J44TbPmBHXrIX=^_t?;npp*Rw>jn6++2lyX z%VUW9KnFkmv5EW9Pt~Fk=Zeb1lITkpoXFYAncX(_+y@E>uB ze^kY7Wjle6&zYU^z=}r`63=Cb)5>@t#(Vf7)q$kMIbk?ggbsc{JPDlD&wYvWPOWpL zAE1LDuopO=Xo6ZG9QTZ{-hH!5Wl11OFuv-{ebZa~iP~LzltsH+ z*_g+XjcrR3ubj$PknIjS?0c)Kfv$Vu;|bn%Y7@HeohJOUBTZT22NW^*SLxtitn<*` z#+)lpoH~Z}Hgu>j)=%K{8d;e*HR|g78FZfenajd=7r~$5Uyb;iTkGo!9oixGFW>~f zt4W+YYW*qw44s^J{DdD8?5;z+j)r*$I`{$KMMSxtZf!uExQ4tA=->y8Ti}H0-IzGt zJL~-boo7Ed<4-gFniKziukYczt)XIlJ^w>1;ujlg^QC{FWB)kcDP8uu9rM&Vj_il> zozg%BK9*)%jS8CwhxS9{U%=iS|L#Qmq=t8)po9N$FCLuT`@0e6Xk&f*hR)>wsRMcu zr(whI@sHF0Cp+{b{v_28r9HiCUv6uH@PGZxgNPqTjU&?k(6Rqr7SX(YH6~ z?0N=T#l*dmsOMG^=acfE?61&a9a2kmtg~01yeYhNUg-FTJX-jp^z=2vFQxKbr9YrE z`E_oc4a8Yvh=Z_So$tKtF1v*|ebu?CtgpB4ykNb8`kqO@gZP(KJC=OtJkPIRzE2?H zvR$I=A%0|az9Yv6=p5spSf4aL5k$OghIZ6WtP{tH`0j*p2Z$3z?f<2pp+kGbJOKOo zS2{wRNCx{s=h=SF_+PY`$BAD`wWI&)pTx&`UB5j2Y2r6k?MwOxx>mbq{rvSig8M|= zuIZz*#G9_hZ)sQP(0_1V54(mr8cdw}1N7qsbnp-258x~reVI7HhWP}#Z}|s)1ph?Y z5aLfT#Al)N%y;HTKZ$sg`0)($jLeV5`5?*}`S@+(6flfO(7`Ww#|0esuKUCZuiA_B zFLdA_KM9;I^BxiB)EK>gp|d&8cYP{!dq$k=%Kozb>fiMVu(F8nKXmtdMZ7;%yOZY@ z(BXV4`PR%Y{T62m&1-rf;)73Lyd~~2wI7n@gielw3q(BRuj8O;RoqSb6FT@Id}O{~x20G(;+XORpsP7K$%baX;T=ooca(c(i$uIKOYHU` z$35@PH3O{%jdb~_o)0I$%hW>h&0ME8M^yMiF@mcI*)XI?_tTVn8fcnQ{TS3 zt9c3j!96Var`yW7#A#xPV?t+hoc0V{l#n=ARC|&2_12!q(?mJ@u1rdNe2+o$q4PWr zx~+;LujcavU*g|T`$^eb6<73Yj)yy=&$cj-py0aI7gq{xru*Qoij-P=;It;&cnp| ztl!JL#9O7#8|1hI9sF}stVi_hJFa0&2d{x#5vZz0sh1}ulk{^A9Qy6`*(kZ z?+?M>aoUt1UbmlYe@Gs5%nPszh`fpgxyukQyBf#kyagTRtq40>POmyXi<-9rtXCB{ zpDojya>PBM=6lJ7Eb*~~W`fJp_(0$fz@jhVNJ(Y+XZjrv9qftzgK`E(ZcdyeUG?8zfDRn28^O8$b1UN17@+4s2M*%$;H;?FjyTVz z={eBZ9JiHSyyJJ~d`IHEU1M`(e}j(WR4!}6jn8xr-LY9$;>9?tFBfzu*NY$6Z@VV; zB~DZI9-u52baGz9`T_Q8*kB;>2ENjNuO2$s9qT)AhR+;AoSg;^bl?;h_YUqx8bO>^ zW%YJ~&gMARFR24Y6X!@Tn*cYRIfxQ+JFTUZN5jxZlb^~X#?=s@# zGUS0m2M*>TaHg$aNt||S|LS@Ea$B=SywF3z`$-dIn(DhKs; zhfeaMedhhv`IVcA*X6pN2VI3O_Bd(V&3O)XzQ%Uq<-Be4WPg(9VECS5^;>aM&fhUH zbpM9=1AgiMc^C2fzq9$$FVNZL46w$W;d&QZx0g7nW7zL+$n_I+SeNbTvTkv?;0>Yu z9rFmvmvz{F;zm^Os>$*}hw{ak6#i-Eu9gMPu_A|wvn#p2e9-w|0#g6~57)-+2)2jxdb|LC^ri}TIc@gFj;xSj_c&p&|{zSDtvUW@XSc)wS$ zd2-x>4sj>E8x*PWuTe(Dn;beWv=jU02xngq|3rShKcIs@aGsBHuABUZIGc;;InaSK zL#&g}{_%l0f0WU4ptCv7{KF%^TCu;5Q{IWLw;R@V-`YPR?+^98-775d1H$Ob2_5Q- z@c^9oSYj!Up7{4jN3YT`u?tDnE2 zgZ=O`I9p$*BhC>&?>M*z49s4(XFqFmCMWUt z81Av6T%P^x+y_l=???QU+x6=obiwK9Oz8`I#{O^afW@i+qLXB-r3V-;rnYS=bOeZ;_o@+p z_9cBep+kMK4+F=2vKDbJe9&{C11EB!G?cfVZ%JL^ynd$VKnJ@a&W`e=DAABOSD)zZ z2A$`A=d$vPyr+Qcjfvkivc5e-hx#JUhjRWsyg6}_r_pnuGjU#IZ%v&01-;<}SS?1+ zqCDpxL)sDNab=q$_gB#Qs11y7+TLH~F$7xo#rMRnP3=V77%D$b_FL#Y`@{L}_pD0Y zh@Y*4U2ZwwneL?`&mDf*k-Hc1M)%R%6FT^1fLM35d)AjY=TzR8oTs1z2k(ZUJPX$j zBu+TPJ9*IA9JjSa#5I;o9!i|4D&I@m2|9b4z`09+HLxp>b7g9bB;FHMF4x&|*5ZEINgv;h30>DCzZ&**otr@X0fu}D z=xloiSdq_iJYw|p$;8<)S>IovL;d&Hxw_cz{T5ZeKr4+{AC4b7ow&)=IgzYCbe{W* zb6w(Bem3zx8uF~Xn@64BNs1VKKJnwL_g!Q=Gkqrs>u~sM`k6(<+iu9KfDZi#>j`iw z&s<8J>+|&e8akWfJpaAlXeDv_8}ccN49ZzF!y4kJR_|WOaRWN^_c|h9aPzbE z#A&fwKQ2Ni_W^svJ9A0yZ6@BNb$a_j2S3O-ZJo{Ai4$yiKLa|G9}@K4O`M5_b3pI> z;QU@i^!EFRf8XF|@4i%W4t>D!@33Sy6@KW#QD5R&w^>hL}5Qn!^>^!K3wH;MnlB7J$Gll>jzC(8RE;T_^_ zIBN6cI0T)kzpr0=pE%3T=sDcq-B!XC2k4zvtMMb^SWor!gO1~oE(_mpK>gAdct*UT zmVP{g&aPj8HB@}}VMOd##M!L&Ke8V~hknfVcdmot-+N2kesAph%5{KT2bEXghR27l z_T9B7bpJP8lQ_R6(6=|} zOq^V);u5EeY8SF!GRJxE_Q}2k#JLnrzdkVCyT$i7QNK*Bk`QlIIz10MyP5vGzZevj z$KQy-DTtRxoiEFN1s(cT`iJZDG!7ky1FRG=c;E8zNNVENQT3Kw=)iq&Bv+w<9Ug`5 zE6R)hRC!@~;%2*}Zzs^fzu5o5j;s7L6Q}4+JqJ3Q6KG`-`|i_`vJ)rVN1G$Z59mDa zBU~1~ql0q(`g1PgUsv%s*$$zz%NbzZKEn5zs#nfSoI|Q#NPj>Fe?%L1VECHlC)M~C zU^Nl%{DtjMkhqVt+3itsp{u9H&6_t|uP@#HIn>XHf1%!+lll{PxFP-n9sG>~y5i?HrHNA^m(7uWhR(B}o%d$uB`in$TV?coZ}aW)i1GE8N|lIz zH9&6%=wMI86JgKK8LJW}R%Sg1I&g4)2hO|mHHcF>t)2rN{DSxuICXytAWoY6db>gQ zEx%wt4u16-^@(4gfSwPXXTICIE$-Xq3L8lL$mMOm+($rnH-GGsr%Rvqhh1T3l=tAa zrpzR;`{^y&w* zM^y8GbNpKJb}(^$4DU^OH-0(!Lw5`({$j&B0^ag*UWfMew(n@-mr?OF=|AY;zrt!h zixapY{)Igqlzp7{13FF|PyD%tynbH4yDa3ZqMRA$Pa@9DUix+bo!t-I7QW*OPVxrR zi1VVZ&5`2{bet#Rg`41#ud6we2O_n|JF8=lhGLlCLIyN_8(# z+E4%fqRYZQ3-)UmvW|EU4dXa;u-|TRza`3+O~lEm?)Av|06K7RUkv4`)NLDa<`~Y8 zptCt{D~EW;=xy#@#2IBcuYr#DQ%=9;eenzN{MCK@|K?Zcxk7?%`-s2Aa1YSi^~hy$ z@3VdXL&QHmN8gX3ll=~P3h+~t7DtJfYLU&8{)P_z#<~xjwAoG)=aH(ntjhEBn86o`v)GUy4c)i=;9vev;`y%h z&JTz?qJDmbuM;QZ8a)R(_+jx;&d*8{=@xOO8SXDZ=XqW2^mEnIcZuIw-G7t)(A$23 z_ikW^tuy{6etC6nA;(wfq<`>^6nH^Bo)E9L;XDdD^b^<_oUX-Q5a$O&e}fL3#p1i< zc_Y3d&KTAHWIKiq?YM0C9F%WT``mltBvI{7+6g*orypZ;Tq$J1XX17GS>OJkL;bpo zd3@QEuyMbRr-ogt=RgNe6Tx}0ITCU9`RX~)frGp`_^s0NXvB%6?(;}{LkD}iMLzME zg0YA*Wt|<5lJO z;9d{xnAImK@rp#(`vW@o1MhNx^XYO5;yg>Q=Rjw3oa=$@BU2OSekz+I$06_5c~1Xq z%9)<{cU8WpwGTQ=gAuj!3lU6$nhwL2Ga z#;S73{sJBRjQ)f2%zvDhI6GDRNOGVv`T5V)1&LEQ>i78BWvvtMoJJq)PyFZc^mc;| zf7I|UJ#?P=&Udy)&aOfH ztm>Tw**@iav^Y;hIbF2_h;z=c|Ah{I!O!5-k6)iS(GBl2LuYf`R=c#1=-jZ<=SIXy zqVmG!xB?xID{d>z!Yg!co#8-J;=MQQo4aeJ$T#;Sk#huVaC|)M#jmYL!4~G zn@R_B74lQ>6Syq--u%YCZHYV55Wj&Ae!w{+{4lU!N8-#<@dMc|paTcv7&t9ncOlL$ zL%V>^<~Y~eL$>xH&Vv!()6d*iz~%I`jy%z;5Aolq_h4juf$rM;7XQ=r9geQVnrVspu`vEc+>R97H}F`0KI^A%3pW`tb`o_y_(1=lP@I#3?XA z&w&p0#d-pq(Hln-XYMq;f1vX`PJPKM5bqTX>^YwJwbXu2`Ug6WC%UZC;=F2&|0LoC zEY{Z>I@s@~c()^bglWXdyIjwK4jkAM_8ahQCULqL_8ri{56D{sC+wcN#IX$fQs_MU z!PyTJ3|mP2WNQB>+qHM|oy%${{BpVCPs9(uOm9!clC}R-$uOb zhWq>S9f0E^e? zbZ8IwE?8i~!ha-xvoEwg;Cuu8_i26NeI0+S9IVgNh7Rok>kDw=3=d13XrJ{Q=)j3| zli!D${vZNz?iH}>E88b@;5^yE<7bJ6QHay5vz`N;&H0k=EArK%heRjN;Kepaj>FJl z9Nv&R{Gr=%Mug^Rz(26pl?}0po9noDe#Lh|)<-We@z$RuH%QSs8K=C$`GV zl^p17j@t?=-Z9!3HV<(um5(deq0rgW&%gP-y+yv$m+zkwuWCNq-m)J-hkn%W;mGQv z59L*H4Ci}Bt(z4hZilk^@m;=WR71^|;{wXnNt|m(==g~DZs3=435pUwQyqOffewCw zpTN0#q6BfqwbFB-gI|#M0?y%4Wr!2T@ZB2dcz^3$FP$q>o_M7O>+R>=dMUtqDBi(r z8=*4s&l}=b&}CD8`LTZP6TjE1U>x5PJgZ9F6oz-XWxv}k-a+!)R+Bi%RlG>{7wBxi zIQP4wdetG0k0IZ}+jY!*aZY``Py^!MQT3JU4Cp+sGu#&PmC&wsrEN_7M(Q4_?C;Pi zH(6-!S48}M@88XccUI*gO1nY_4&sd{SCTWWh_goByOkX1z{wqyf#P-BAGPiNBJHGq*T}h_E;gbY@wPV9j|*}?jq`WdX^Vd^;yh@p=RgNL zA9FH|0noR6TxeAFa-q#J$f-%|61^Bz~L^ka!zS;bprKb7~mkbk>1+u9os zg0_Z^dpIA49hTplK>Q_!I4pGVGxBl43HWU?aUxIGw-@Maj&uHaGGRJ#I;--?`g%8i zILFo2)n*evq4JaDdpEAStlj9!j47>JAyqI{=4exJ52mc@s z2AtGaml7xcFui}EGx?|Rf|bPiQ_a89e%|^A`(%{!R^v6qzoy1#$%pP+^U@BHfBPog z2I9vt%uCQo|KQvm<^A#TX5vLzsJAP0@DJj)-~?~oPMp++yd>!0AH)H`d9!RcaUQFD zD``LIJloIt-g2DILBubr@(ks?$M0RbtnHEb9CCWT1H_r4&Q&A_I@k^OJ5b-EpAHk} z_#(aip#ukTKXBq6I!2ruhWI~pu>ZB{W9WXl-^5eIX|2vvr2V1uZ2v&(qPUkgxz1VQ zmsIa4NIrC)`EKj)_w1KGX@iO1d!ubnIetMW`v>yMVAtE(E)#F!CcS^4gMTm%ffM=t zRpMM%Rz(L#(oV&lj zBu-Vsx*j@k){DH(UPs>&C#K;%6gqHHZhT1j8@|&%5~qn_9|)bz`SMyquib$M|hxGo34)yCS-h(O?5S2LH z_Up$9=4DRggFb)c#zKQ>Ob=i$xxL)FuAJn>R>bF6dA{#BE@w zF3n01XQLs{2s&`EjsWLgx-!HWuI@j|bpv$ZAZ`Or_Xp*O^ZQJFy`Zx>&hy^78!8be ziebIr-FdGw{u91;RpL)J>?36S2j^(0@9H5niL=R&rvx4B20Mb2qCy?w#8Y_(p6&0< z2k#rT0dexGaZdJc@Afs$xWn;Nfy7T>h=+Q&uXR~(#k;Q8rZyw~GsC_KI@lBA0PI=5 zUMu3PYlssx;CtG)%c?5ABT%>S2IBj5)Rz-FyPR%o zV+0--^S#|noaJgAD*XVR>?dxKH|g58op^t$b6+{nLkB-#{Q&z_>A9OY?G5`G=up4W z8_rYQBy+(a;(So^xvU>_^1dMUIVjh(Py30N(9rLo^E_^Sc|Tr!H?ZZw!^Cf5h+{$r z`(b~Ga>n0$j5wVQaZKpI!Fe<|RVSY!PHV$>1s(igi8#TWNoR>O*l@oAI$j?**U#np z1{3eR+ONp*$-DJ)p!Gz&pZvDXW#Z3M`N)zFoo7D>Se+Vj9N|KS5aRncu=}r!Pe7;p z&eK2NUuq}L$6MCAN!%SR^y30_@cUu$PH%^NcZd^GNY8-|oQ2~3#JqR!6DKIQo&%lD zamEcI%y>kc3+G=D4lC;{A^qy*?6WsmdFd;~#X~@7&goL;SA6>ij+lzK&ReOygKxciT3M{mqdbU6-1}-ClI;aLtk3JJ39enagvtBAn;E)3 z$9fF@DSk34@hchb2S5iuW1IwM$i$e$u?+9LKxcCTE#%{X^RaSV;#@P-7dp@5mGj-) zXR#6zKS-VTO26yh&2_%-{_sK);ypI(?@aHz!>*`*oFgfSchqn$1ReZ=`461L6H*gr zp&?EI9r|4?abGj#_VmPwWXLOlj_+%_tcL~pyC!jmWF}sfruuOOI@Ax}$3p$?70OPW zMTYqSI&g3f08YABxro!s5C?@09PE$48L}}iaUL7)wLoWc01OB&bcHXI?sHUh41#j4!f$9ApU?hdjCQPdtx5~&iTk?h|{C1 zo&y~?_#PlQU2c>k&S8UJpaTcz3*d}dREanR4C5Pg;2_T$oGXp0635>#-a=<{ocr41 zzBP%n#gI?w-M-oRZvOc2b%-BV-9we*r0Ly!v~$!u(ee7kE2G9;xu1d#_Cs67x{d!k>~*Hw65yE3FL@fWMO zmt0psmsrjFTLX5^L*hP$KGd~ zK>Xe+&q4CN9mjfzyoH@PCKG>{%3qS>7<8)pgywA{&IY^2|1gbsgH;?x`Ug7r2kjr6 zEkU!0lfn>>fDRnYFW?j(F^@Qv4S8qK!Ov*t;M}UXh&Vp#d`$WQI?sOo@_yshqm)PX zDeh9@r&oTKet>Rhwi+dBckNLY}-(i1)`ltJ4FY($N`~e;OF;v8r z+Ar8ooILCG>q_Y04~#!3&#T@?h;!h(#x3Xf1kME=C;svx`f~Dpd$;xZ6UXP;pFB;R zGZAc#^aFGiy4dRv)LY)mY*6tW@!Su*yO)XgSc-*AYj%J2q|kNAEa9J$Ij<1Ebpn08 zp@W~1mk0mE&2^hN*OTe*WkQGg%JYvatsW7lRe1Z}uIzWveXHM9jL+}oq-_*v6O z{a;s*{()}c9PwUGEnY7!+WU@p^@m6KA5WHdqtc!0ZtIaZ)T`&=3eS$@2+bSYAo9l6 z53>?}9XIQEKC-sF(8+#{eg!|(86KT@pBmXbIiErYe|Hz{CepK*#Oc`KpWn1B$$<_W z>>p8{NonE`Cs`7Gd7uO5*yGf6pK8p2c*IE=$@YUR4|L!p75BYcMo&nbkALYo(Aga4 z`nSmI#KZ}|z;0)2vD=RgPh?(@JNQ8M}w?@0~)I1e5CgY_^ti7w_R zPD&Lomi-Jmo8z{&j*UU-hbW|t)X zwYK{5n(j3q&H_75&s3Ip5ma80v?Fxz2gXxyR^Kj99M?9zU!j9PmQ3gVJ8fiT;>6ge zuOD=FdqY09+v+3oy7IQDM!dgOKDL}^p<`Zv^|#2&elf8Y@phfF%O~S9&|!Wpacl3H z_&JLNq2BSty17-py2Pz*ST{o_`x){_;g2`x8xXHWklr8A!LNv)fs<)YW8zFv_LJ=c zI-BF{7q3e-C(h{Wc6t6=zi`Hp{GPNT{-jrWzISmXw^dB|C-1^`#Q!65%>TJSwj=1I zfADS)?E3z zpR0B%{Q@2Qf^#BpT8$Y=oMi-{jM%J=XCtbH-XzA^o3;@5g-w^KQvL5KOw_7mpaKug98 zaxLCW-0J)F>r&_tr>mwKz@HIak*=Lz8ycs>dJz8G^JzQrGp^O|7oan>gZaC66DO#L zp2O|HW#N1Xp9SY6K*}fcl5CQ8REpaXz%;v zI0hZsH{Q=fy*|XaM4X^&`nUpg@M9t6$HGlFw&|U6V(55_I34%{S6m~0kreuUE_A3b z_Tk`ssCSzGv)i$t9GyKRnaFVJy&aaozf`#|L;MiTg6I=fr}md_uv=p3lRv>3$M_J_@p z?F2egz9`~Iu-nL`v58x$jDEa?4)(`-7@WYX@rX0jFrPpN&Oz~RR+=1%h!e)}-XnD2 zU|)>#?4Od9IB`no<4@4p9JiIJJ)iIV9l@74sruR+*&d*i=R3!%@q1BcMyDp;)SvY{ z=-e0j{#-QB3O!d{Ch}Hi6irXO&FA#>gwABIwoftS?#xt#xTSLH_iNB${qUv9{O@1Kz9X(@_W$=C|6St! z`1)3H;&(2i-)BK*`zyeTu!ZAJvp1F^&cl-W`a_5MM^Cm7h|6E|GO$-yOzv zUd4&kiQmeQk0;l8h)=K|2E0v&cxqrxTg%AT~4>vPvjBx z+SZ*o2~>WX9A}|Z-63?o!+HVr&itY`@oFv7j|b4f516;WX?e3hak?As&p`)HX0hMg zv|uoC+63y$106WNOIOgl8}S2%6X${<{}(#gJ91;5hpxmKO`HZj?ea)_Ls#tXudVt= z+8sLoBp3NY5zdSwUYa2`Pqs7YF#jN5XxP)LPjenv7&^WqZytWgGwDa-Z#3NVgAVq? zIv$)@e@!ROX2UrQbT-EsXKx)chd2Wa-vjh+TyyfrPFz6zr;Y6Ok+i3GeCPWDZ=x^%uV{;M_lsn7E8M5ohbmDff?!R0p1rvdg+kZ#ISc9d?JmhSy$2{3QGI<%AA? zzahrc8u!-_r@|pU2Rd*9ryr+0u0s1Z5GTFi9dYPP?dQjaTZpqtt)u^&f8EwVF`jPt zxSjaJKHBw_{Tn(to+AGTez>||H}O(>nKuN#fU~Mh5OK0(*N=nHne2D>`F`SD|E_y} z&U+&3mL4X4vD*6a%)5IcZfmAE@Bg8}G2;8R)Q`W=N&nzn1a@^NI7Pfl-Sl>a4(%J~ zBH;9~&JySRKs^UKo8z{si2H;if-Vqek-A?Z+aGjx{r-)oofq-B7y~X5FI7Liy`W=W zfOYvG$J1u|UnSnB;WkgkNua|%q}S=@KMrYAJ_z+rE6y(qf4)xKit3#%S#Rj1Uy;`g ze=OdAi+B?ZdCbtkud@FpTz-!@_tkw9xh{nc^}~0bP@dlH9};JvS|`i;LHDgZj*0c2 z5^sZAFaNi3(0Ps-Q0OJ`gBMCfde^PK-D|FFc#q1ubIo44ou ze~9sKW7vqq52NzxWjlgS_LEs+-?h1VRO0n8toNaV{ooh)A!_EB#3|KU?+56>$u8FW z;cmqv&UC~5Zs_0#?88u=LOl}^Cztvjw)6vZp7(Fgb>^n)Nr|8SntuJ}-8$24T@vGA zv6{ZbU#Zp&($COIKjZyk*u(!)YT_*|ulEmh@Xsg_e=I*gJ#pNf^c?7Hj@xpJ?{PlP zo|!o7de|IUKj`H5I1!gb{c7yXM!aom-!J_Qoo9c$t?i=Ut)7&N_{(3iW+{C$;O3#4~oZ5mD#i~J^Bw6(w=)l<-V>X?W71>jpIAaRvInaTV zTFg6*`qd*&^ip~bbl@QF1UnZj(ug?2D(N}UfrIZTfm17WQ{uF(qUS(obKKUT+5C>c z&Dbr8lTp=6j%&~@e4O`x_O*G=cXr!EX-m9JDi27mx8*y#L6evL=wIRw6(4Y}@5(;y zK-}{w?E1?7F4uPr)xb93W2+s}#;y%r-=Q7Do(ETTA^wH9dV4|#zjO$*n&Ni58}}ei zmXvx9bnwfqdHj9vo=N%;Cw6w5BmDv$zZc=QZb#vA_4}hg@%j|f^PtNRt=x!hQ!Xff zxUHCCzJEJ&F!AD*(et2_^UL;id|q^N@NnW?uB^8kbZCE3#JN)}*J$FTEn#zH`-2Yq zy<+G3ck$gDHnja=9fbCAqUm_zZYpbYW&4BfTk`?(RKPE^Vlweh8pciNP=E9za86&D zL7b&3?kVdJ9qM0gTkKm|hy44!8T2D?_jR93+82C+<7^1%Byp9+5e#9{jb}~ zF1{E3>e?#e?T%^NPx7GS^JTZSUc~*Z25X7utIla94?5ZZaPJ-d^|`fyc>N4jX?RP|feswEh%*d5aEv&&-|IQh znf!3E+bQCtkEtI&z4b%l<2-*P%6yjiJJh@@`vY_t{AW=f4f4`pzX7){5HDg5{d@!+ z?1#7wIBO$aCQjkPdJc3p$8Ak1nU>DK|G0mZIR0^Lj;tSa@_sAqigL|ebc1+5srwgl zK7-EleCD=JJ?3-AB=v3+e_BF)d7=B(_|(7Fed6UX>~rM!gnVVxyS3jV;xvw-_Xl+F z$3+pRu6q9|aUMs|*AF_oe*eb3XNx?;7V}>cFC@O62OaYQtX+pV?meX6TjEtsXY=Gb z06MG#(zcJYy=4?v==m?+tA_pF<@-q75{7j+bXW(}R}S8nv3tDM+eWK(mdnC@9Psnx z@JaG@Tysq(y?>xXzr#2N&XTQRiPK5hQ^vue0|)2I;50fDg*ZX#K7!;x2M+dc;A}n^ zgE(Q_`f(IGn-gdSinv6o262dU#NXz~ehZ!Fej8}vy&06V$i4W)53aB0L+6?Awvvl+ z`uU>7#Q#gZ(<;X?=(g|UaT@oPQQqJEk`u3)I^U80fe!xZF4mt1Bc>uwcJ*$DwQ+rA;&t)U+Y378Ip3Sz*s&V% ziWj$ea-RIwd$Y*nL^*R+szuy%i-wN1X|ObC9H3FLcT+7VL=q7wUf}Onc%*uc5ai zbg*Lskx$`wxD#<^)YF#>I&R<2y!q<0x)IO6iQbOhoihbm_r*J_8&3Bk{>a99K6Kxj z?;_pe@v)yzf8wuFaZuThpp)P4$~@=D$llu@hQ^)Ji1We5u?G`BsgK@o(82%cSMYy? zkYU7WX}ISM9XO@Ldzw=&jUrCloBDYLI-BE+d!(5)jyRuA*c@p$@75vC^Q)1?ek6YX zV0}5ib$;cOVJdNoJ=SyNIvwi;*njW&8N?ZDup4x+8_oy8sk>+najF~MJ%H|8?HTJZ z@Y8i$K>WvV^nQWPGvE20uYUeZh=1U<-k$P1Uuc&or~l(+#7X4kdh4c$>z`h@ia4dB z>iZXT@C)+oP@ao5*AgdLGW|FMooBl__dWYFZX|x}EPB3o`yS`_Rm&iMJ=So(G-0Uw1*op(Yjkjd;6NyOHIC&N0u6`64*o3F56) zaZHE(l@9jD{bu;FVCvtAb56yNBnLWha8Dha^FN#?&he-E{tg{D$VURlFTzFQjJc=h zKxcCTtTZC-6!yjy;xs#LbL4s&I;__QK3LjxYn%h2>s^e~D4(_U8gbL9IJ>kLbf_oV zFF27${Y9MWzv=4<9XNRR6`Wdi?h@U#C{(`t1>UZbtYvL_Bq%RkAcDVwqa^ha%@t@uk zXMlQ#S+*g&`*>Rn|{?OU= z547;!E;w-_MIg@J4K_!X6FTfCOT$4CXKb33dsS%vLi+|k?1jk0Z?{QbPUz4M5my7} za)B7c`LI#Xfe!WUAmW_Ev&SJ${m%OOLg#tDcb>;qd>Ehj2^!ga+0ISpv8{jLeM;~jM1Ag>$#Sb0A+apqpppNB&S4*DZFRXe69 zj>~W#96E4t9s|zmyP1g7B(lDJKnG41aUbz^`)tISR6x&x&gKMKl|?%%@Hr=OdNr{* zvR^^xxt%%V^~YxV5x=j>_m+I`;`Pq^th;|LK>X;7^>&c=S><~FexoA98Lr~;vb{kE zzvL6^%9_uL5vSG^eR-e*2lE@+NtUsth?9Sdo&y~?sYJZ@L}nLp78&wQp|d#wR>UJb zzb5}!fjE6H+2xV*4|JG+8nyVG?{dGxYTR{xmo)jMD#T5xxN;maeU}v9feN+T@70O- zo1wnY!ERWmfODo-ZQ=|uymttl&2hH>J|~(IXY>WzZvU8>c>myU_ZpcVZWm{IuPgDKs^UKaPY1TILTUeA(AgZfby%EN56IbrIOERh?F5}YKmB`NeJy+r+V}l(yEpNsUC{HOsV-qJCOuYC$cDs}90y-|A%Nj4{sDvn_(AR;i66%B4zhP~W9K{9sehkB{FUML z_LT1o;X68LHkr|zA~@ew+3@{4!YMm9b| zoaXAC3dw=a<~YBjnK0FH;uIUL_k*|J(cB~Qc<07CP5hDSUaa&3bkYwv$A|rrT{=s= ziiUba2m9f>5a8_I8cdwpBlP_bI-Bz)pG3rSXZE^G9M>S5BkKnp$8(+Y-oE@H#0wdz z_Xqlu=lzSCvD&8#9OK21Cj@w9PjX*J=n29@z})cZCFP_hjxwr0#5#0@rctiiryd4!5=tp z1gG!lM8uh#O5gsV^E^HUSQ|?`qy28p+{uVPL&d#hzkzOHglYfFbGK$)sQK`h8^56b zwc_%=H~6|Qako^`*B?4?5D!BA-%m?Joc1cND#t76z`;BMPS~y)h?Ba5zC6&`9JkfH z3GY+?%$9{X_gmT=+1{X&@qz5(J3=q-W+&cq!*`CML%C*(d4K1v+{E!$>pNL4=wv^D z{a~-!%kvTMoLY~6%kIgxa$eM^CWVMIv6J5J(7}Hg?@^werHc}$sKHLq!A`%6xW%L} zC5aP7z0)A=1f6Xs#4Vir+93%l5ifIByMA(A?cKiCZH)=T{V2X)HR8uFrjKJl7Zl6Z z+w~8$qKfy{HutYZygSG2_jqM_p~F6_fNC%yu{Iw5G&(4BK0}-ze(KnvF7cnw*1z`) z9sDiZU*zKri1YV2JqJ2)riu7*jl_+KbK;Pm10C#!{4&%lN7rV=siwYLCG7^C=W*C& z;rtx@vX@&DKV>=F&$7QkXO}a;8uTac&o|C&OPmlDCzRtkbXW)Rco1lXjmh`kr={pf z+~TA4@geAB-rUUKQ3HJz{+l<4cs=SLysQiHW3AKIA3FFuq1fk__v=BNyyx{C=-_YU zt)V=Zj`t?c>eu@ELg(4v&UIGcn*PLJYIr}zJHFf6BIbkgGkt4baEWW zeh%g87x_ox{h3-njzjk?f8+PSPkDL@@qftoeSGXU!2fH|4B`(jtmi|AalMRk{O@IF zcRBxVU1s#=j?On*TS{MF_`$Qk zT-N%k{61~Z1Ivj2%3ohj=ulsri^6W57Ox`Cpvrm3E9~uod8dTNrEz}(<#Q$emAISR+x3+5K6L0;^;Exl9&_>Ar_J_+ z_N#XypLNU+2Z`UJx89D>q5eOJxODz~M~IU{l~dLqI@JH*+}nLqZ23X0=bZCYm;uL$ z8>rr?lJ)m)o(i;(=M4M4DRi3nsZ=~&@}cwGkKLA^h!33oaF+Oo#@P0i^B{C`9*iLN zGu!qC6Yu$Cy}zJCyT|(ksCTDfmx=RE#jE6a3Z2byzPI;l`4HkHTBvWI-o3XMXjK*Y zEVT>YBz~KvdOmcX{o~9xsT}?e@go}Ie{y`p`5Wxn^w@pkbXM=5Nqa&EzhJxsXYY@X zh*M9+`y>ZC_yz4BoSn6w5hug{=X;yW3%(+LlNEZucsu{$UIoe@JKQ_sr&Do2X$Rd2;+CajL6$pX5LX4(tog@jX7tzm9J{U8e7M(1C;VOmHgy5{@{Brt3M-*&Jv7 z!^lOEh;wJ4&5`4ZKK~)WYI}_5ub$1K5wE5iXXW?}9meluQAW@8YgjhOE~hho{UT*7 z;^tT1b&%t$To(qYfiQRK*Ubv-nj1QQVO<7$&btwp_)iVzI?%x{-NimVS;N0DwnK-o#=Tp$xoIvZAxVLvVwD+fVin=rRml68w3fWuc#-+_-Op`u8iFpSWjh z*<4v}=scI(ZS4^I@SFJx6F+=Hz2Bje>yI!ZzH!2@81a@ww0Y9+(82H6m!aNEE|(-u z-sk%LJajh4ZAG1wp8PVbZ&~6Lh@`I{bU`y({7(-a?wz7spORM~o?D%F$bJW%Z7;-U zzkFvzd}lG_x5~uZug*85y`V$6upUIYcK5DEoa*X4LzW9V=|{|O;O)s;i+B|q>ir0v z$&Zh2*C9@PweFYoV?PF3)x|om`1uCJ$*uMwvL8VAt$q->I)9gJ+tkLypP}}7(tgm% z?~>sj8|-)5-JE!JRoq?H8#>qz>jQ9-#%N8P$7+6*9O%HoJOs|2lkJGpM2+u~1D(xr z?zc)G>qMMcDvw6`0qafAesG>wq#ECi_%3xGBl+H)S2*wSHZR|c_-A|S+qGObBHtAD z9Ga^ian7lIp|mG-@C)LP;AF@!h&UP4x=M1OvpH@n$4LHO)Ze#;5+__U{Wu35_kZWU zEz^RL#9MEOD|okWbAG4gX1%e*KVXO}@ON5V)-;iaRW8Xy;>1_?4W#{`gCEde;D`8E zCKKnLArA{WaFDM6&b&j@i4(`LzJm^&+#;WA>)6@E$vjHmPN4&*s<>bO%dhi^6IXq= zLD~m8o8$Z*PSO#JiBnLWbIX1y^B*gzhL-77{-)7>-J;?+U%r1V{J6@08Sw`-*Vh+1 z)EDCw?7!vpO5$8p_o!rjp#ul+`GGU}%o^fs?WZpfbg&!tv*7ql*+860YP^wlgU<7K z>HI!=mtI?lf5ouR!}rm5%QWXY4$^U!VgA;}tkhe>+8-M237W=-?O3=iofvah5oZ4e=f5 zJloAV|LhwYO#H)Ye=Ggs?firI9LnFT{AJ=7nya@bbg(Dxae@;ZD}*?87wI|BfrESQ z;MBQzgE+Aa@gwMLj@w!!&K(!+y-l2btL*Z~bu4teKXF@uf;V;1ec}bHIGf}_$2{lz zQ+e7vB3??>ALKsyTklUT6?r31YCR)vt~qwSWqW~6o`ZD0wz+V?fK8!sB&@gKpK(cF z5x>BAy`Q18{S#nS68E~#hP)-tX%(NB^@k4efheWUrf$8!d=B#Hr;o&4tK#*t{@(RR zmxcS;sDI;jJ}JJAZ!S>nK+dbs+4T>!U~h1u=L<)iT86kYbnp|tTQp+P(G}Bz|NTxZ z<`3{^T9Jsqak<`4(4jvL5b?fN&!ZB@=Vv_!I`qduQ}|u`)W60g&JQXsB>N9^oKNc9 zfBiNpF7Xx`+PQarcCIIuS4&9zSn9k+mKW;@-bXmsbcz0=yhu1{5{n6tE?Y%Jf68Mob#ewDQ~4Go}aoGBl`t(=oi?(g7e|e z%*6Sq)|;~bLiert6!CKKH_p#a{Awz%Q1YQezr*@{Pnsejqw?+y9Urlu0)JvaZsHF% zth=E@ziTJ@UF`Vzh?Cc_ZiEi@M0-bh?w&75oOp(HBXr-gC-UaNUo^v?_;(EJM(8~A z-B!!+oL70EehK1tRP&^4r_jlMh#It{*{Sm4WqX9qv;6|BqUkw5=G305#P6f-G0Axrx|GU*GX6VM#LHI~8cT?Fxjvvs$FNl|d(1!DOq2qHUmsL*OpIIEMIq`hdd@sudon5W~tA==wpr21`;$&3gi}WXSST{9}F*DMV z&&O2U!5P=M{!2UJ1~<0n1KAE`Tm#?v-CQlc-=%K5LgN})#d*kto}Gw4udRL^0Ui81 zL40TDP_Ayo`Jmzm(vHx9^H6->;Np{>#EGirKgog4<~Y|OEjD~f%bD+EUHa#7voHS^ zt9Y5To44aD@{`b>t8^bo{DzGo2&h~Y=_Xnp0F?MP^$fI<{0+j(1C+~0?wRz zLB!c-n3timIc}@Hh!5pVbAULn)p>{P_t4qHGtR@E@um^?4-+q{VZUsOH;oeOhDYm< z5ida5ORgKB!@6PB+GLxWM8tPN0<35iczs^}?kVEVQudQv=<2F@DZ}Dr4_s4!4xN{< zUxi;vuRlxNSBCFWL1+8L`F>!!4#C8!pw8E1`Q-b7L-P*5xPAD)aVG3vP`;5_E))0p z5dC@yI?w&YdHy>rO9=6QQ+Y$OA9}YBc3Zc~^Zx(ElN-d}tIp+RdxcK9O}*dx<@{0X zSC6l~O}r0=b4=*a9}!=NUGKEJPn^|;bs2OvC(y#V2RKi&J|a$G6{nQ;gYH}Y!MPOp zot{1=esR^`B_BG^e7BWR?0ai&drABsR9s&A2RiAW;o@9z?XtJT8)t~mLkIuhT|d-& zQs3~tU$1wI8S<*3gMSve`MZ>fi$^9-(LDNlThR5&ZMWZS$(R1`-0De?oi}Fl-PRGy z{yr(nx#4j%;=jnJzgGoa;NM?=-_&`JC1cxI#5?BFm)Bc=WBrHnUcDKY_=&se%L|>W zKYt$w@1%p5!apJLwl1=Ha-M|_{>J_qoJIwc5+~|5JqJ4Yqo26XTke+>#Cg0}Uq9%0 zesrFTP0f>reyXEa z%F!%-&n&-(&Y##*&OG5wla?&aeO=K z>+9Wo<{aFCC;J~dJc5(3*HL^ z=h&T+#Cd;5&w&np!T1DD!D(fQbL_I-FVK0O$DDpY>0g2PA&>NY@BHqz7Ror}!^*^e z_=Eo4cJa1>>jZ)vwlI4O9eyJ_)TQ+{$jW{dS_q?QEpo3p_ z*UL1Z>59Uk_bqpdJcIPBdJ#9HDxaKxp~L)z??SiR6_WCKq@{c9ay##_H)-CF_z~3i zY{c!X5*~Iy*zN;qt33Tv7 z`>55{XN_=LjbCo-iSR?aKjstniQ#)=&`Ccaeg*qozp|Kka}3`nh7SE}m^eqN_~$a> zbT`a%(1A0%*duzEs^$1q#A)_PKYu`HbDZmht4-Gu=ac$AuN+UjTPHa4ZI&0>Nc?Eg z>~<&lGT$bJa!9ijO=?~kzdUrkjrkvb*fD>5y9&z#;&PAaEr@V-}&0F+{IIAY><0sI8(@?AjBh-3EoODt39O%$q zu-`?!QZ9Z;oNNj8aSiCc<K?zudA{Z%63h59BwZTnCo_K%B7Z-Fmt1fDRn&qrllaG6Hc5 zh1HJ((AgYkzH{2LQHXQzgWgX1eCI&x$Fw|d24#pryt9vNo}4eC^BkWIw6G6Ay_YPB zP5eQ(^?c~Cf51CP+k01wf2aH!0_eEkeYvk8zK1dKU>f3eF`V;5hxU#2 zKk7Ydat7i|H=OrE2M*#^;EW8+Mx1em@fbRA@{4#!;)wZ(bM%S6|3e4P`Ud>{5#Pjx zh*S5L{=GuzY)+ts_g_&jS8spf96VxkWV?XwTkYauWImtW5L}%28^Y+v1?c4XiT4Ch z-o!0S6EB&+-hR-*e$&J_?7re6&hKiTkmDzGHpltCN3LlViPOB3zJ8|tB)+GM`V}qi zCf>8Y`uahKas_VQNAuv({nd#RGG1RU=oVh&`}Tjic{~nVsW$PdY}NCilYT`1LOsu2 ztxLS_m-Y384t~5M&VBp$YDk>eYTl84gwEzT_W^kmH6hLd_5OnF2i}d3&hw(4zcwfS zu$|9ck1@U%Nd}bcc6oRx(olfB6KEBhL(B` zbZF11XL0*$zONf`mZ*J(tRHlJ=M0#CFI{{!KfA4C;yf^*PcP!FZl$*ubhf>aFB4$7 zM4YpDzJA2RcSB_S20FxV&cBQF_g9h5=l?C)(G zh||ZAPX!&1XKpLDxSy52`xfFo>tmNo+6y}71y~2g_nFK1?I7N(VKz^WkI-R!WIs6L zsx{v1CT?#-o|TNN)==P^@r$>7mMbW9d_-Os+U2obLBt=d^1)^QfewEDQ{307IOhOy zd{o}I> z+b$F5l*;#zet_=U{IA#7E~~cK7q%W3LcC3Ce=h3>on5W~D@dG!kEwig3S#yqMyB)WXs35O=v@Jb;e-t;@pqOW=p1VIB}~k0H+zI`|Xc0{~~+ z{zt?KQRBL_8+7nz!W9Q-zf^7TGvdrr`J>XG(8>G3n3qtlmL%xyBOUS7llV;tiZZT4x2p#v(Z&3bQS3VN|v|&959sGlHKX77(_@w$e-nm+p zQ~C!wo8vs6%djaNaaO1}{eQEk+e+4-$JJ{KBN6{x7yWz(oxE??R>ZINHHt>O{>}9B zI&`of_Cc^;;l#0sv#`CM106W9CpgU`#v@L6Lw+=L;KUT~AvMUBh&Ub8`)tym(1Bx# zcfmh?OiG;7CG_=z&gKMI>%@3B?Yb{HScw)}l*NUZr&ilYAT5BzbceFowWcIoR6or&`%RTkpxE9DI*(89d{lqbUZ z?8Ny)om0yCLg#t??YyUWVQOyTuQZ(FdAIL!t^@ZC%SZf@>VBszzg!2#QRCtI-xt-1 z8F^CZc!+x)uuq0gg@_-mzTVEz!C#2)gEM|WQR0NFsOLaubDZ{!*|H>YQfIe0vcBHh z6TgRYj;>jj`2MNChwrw2JI3q6Lp~LVe?gs($oUq!h|M_P9QHwZQ?9H`yy5A+v8&6% zx*Z(<8r6t%rm&s^9sJx%tmlS=two%YZaoJ&o8!#)YOtdYal)&6(b7)({ILM*lE^cf zKCA)p+IO_eCC5wXFka3KDS9)_n8wEOGD?HS#Lc4SPuU*jc-cVpzmazX^Q~?*D0IBU zx(|L?TC_Rw({<3>6FS?T0oL#{-0m*FZ$+HiD$XG54;|`XG*QTtj_0hK((|6gzdAwhKj_e}kjH{@{!q3baRwURp@t5eqT+jc^|KEm&ZL_9 z@<50F`9i$Y&?wR{;#@9gb7cR7POcm8M&$FdGQW%@UggsIazTgs;XN1F>3XNJ#A#Dq z&w&mc*bSVV=_V5AzM8k?dJj5q0>t~5J|UBd6UDGkgASZxg41!%bmBBJ#ND8?IbXgH zEaHLX>dhw3DHUIpeuGXma|`9d{Rr6UQR(@_>uYG2(1C+dC(nP zd%FFIQ=3EM6UZAz`7Y($K)j8HxEOTOpVLHKHuI~^#9OJ}{gLe+I`|Xcb3wU|$J{}j zA5~t2J(Tn5_8{WdRQt^T#&=t{MSmMN`2g|r ztM`?qzoC=7*|r-@Tr zts73S@JIb4${Z}yYZy4q+=y=|8em^AXzRSdG zq|S?FdF6dVf92@HAC}a+J^=3nxvV21k1<*A5aNeX`%~#}=uG}@k?kgNk~Q+i-!2RJ z7HBs$?%XC$QA1n?I<%W5Y5AUT&L#JWGr$mcgpT9+ZY!&}w;i?FBjV*(=Pa@vLiec4 zjifItFA05z81obAnK%72;j%}?-j>64(<*AjyU(fa~#C?!cfkE z*FF;eu3;R6&ht3yvTBLAV2oWpslSegR%)$pf6&36a=)9bM>yitFvJ(31Lw9lms*%T z5^>{&WGVPN48(+cpP=c2NSf9 zOT0J}ZJylk$@t*Hs{>E08Cg6C7xFXGY3E{Og8y)S!bOPK*0Z zKOMkA#~i}$!u-!6;%iQ{h=N1y`-aWrs_Hz+}z z`i5}?I&g3f1x|ycWr%Z3jU%#N(82y#Pl1#6YB}O0SL27YKXjh$@BE(5kjs^be^`wp zlJD*Jbe_EDeOAR4Rf&H~z56A{Q|M&>z&SYV(Xv%d;$0YGw?}DL=-?lWH{eXpP=`3x z4Da~A>Ild^8&1>^4+UD&4{5f6|3HU$Bm2R5eY0rWxTbf0zaw{eNeT_`$Nw=qKNsZ%#?P-UuhV>po5=t z3eLm+orqJ&5VwWS)DH(1>_(hX$}h5?c-s%>i|ka2e(0!|)BTkC;?mp3% z_`j(6PL>}!#8+|u^LD*4)z&wk73wea3-C`(97z1KD(^$~OX%P)*+vp)k=lpIb`PD8dcV{+ZSSw%qjX!owfNpc$){t8dq&L< zvcE&e`&PI0vxrON-Y|i9$yB^e@}T2>>9X)%73^1b;$-6OHjHb~!4FsmfRm{IbmBZv z_LF{q4x9^aet)@i(b>f5sp2k@1D(xr#v^|DFpoI5Roqp!U+?;#+gc|4@ayhH#NTg- z$3iFlfb{_E7jx-S;sqGuvCzSOF~s;a=dV@78D{VUbl~9qUz8{Lr?td+;-jB8p@Sdr z4lFoDmTx3Z*=zQ_3F!yuzBNu^-3b2Cl3R&i{DqzmooBw=+8BY?Bl9BfBz}e%wjE@D zgHHMx>u!{{{`Eb?i>1zyW&4H>{=t1$aH@>{l{l|s>iq*9`~y3H^J}An#JQ2y=1BiQ z=h=SF?<8H0{2TGZ7|!Lr{Z3Mvn6v3SOjFJsCw`T@dOJV|d+rhW(M5-yCeE<}dJc5p z+!XPNp1IEvXGeKG2Rd+OiT5Vz-w7s8`1*Pdbl}9__>khJpXXmD&P_x96LdDm8OQpg zP6%<%7}f*cwUYpAvUnHO*Z(H*%}H~!#V?`j47@vp_8sHXZQ@o_ z@i*Dup!(AEvBI(<`C;nHPe6+kTf_We1yj=1#ac0}(qoIRekWT`q zb0jvJ7vzD!c@Qpay5IAhF;v_y?FL=QesLJ7L_aLPCj#-~*z{-UP+#O9 zP|k%tq7bL6TA#`KLI)1oB{&3+Vg;NZL%IEmuMAx<%yeRAl)!To%2mY#}F zoX0kCA9Ud45a*bdk4a3Nk^}YQCUhpp*YFbOG+)(CPMkU_pD4#8=&%k>@B3rc!iT@! zH|koM&+}AS?m^rUD&Cgy1avZi1I8#O*1@fz9eGeHNt;T;=r!ZyuFoV+&YJE1c< zKi4~#_#W@OgjtDG!rQdJtRHlIkJlQ=9eUXY@2l#bRqK{2F5Yivuu;H#4lm9zJU(*M7{=` zzUj&nXQ$1+19aeEJ_2X;gNnqNKS}Qw=-?N$S8$rIs6w2v>b(Z(7wAH^o7L~9I@KWl z`oHw;+&#Zr_X+nmsZIR%i}ZGo`-C2%o%?35Pn-vn^&IHn7p#k6&$n;=i1TWio&%l9 zaT+znK4H%1O^NeWeFs2}XV96$!mo2lIOmFTr9Rz~c;9Tsf9RO!Fs^mr^=a1i{=_R` zbN&iC)H8wDAH30}194*5+?Rq*&Py1-P|qBBx)5)R%{&Yp{D^fgI4P3%AdZJkehNC1 z^Yc44sb14M$meEn;)J)kmkwQM{cC5JBeV`}G`BzTo2dO8*?*x6jgNa!utUx!gNgrS zjK2Ruhj|L$zbrQO=&bw84!G7y*~NL4HR*>FKe@VBDdS4$;4hprLOF*$9!Z?aHs{Bn zgTGD-e-&FBK%7SN^!0@%|D}mZv;B{BT#S>&6!J_HXHRE=)f5u@)1>TFCb2|OL`7;@GI6! zXdf^8EFq5PB7MFNx{&>9o#%E%T|xX2d-Z(x<~gS^Lil;k+||U-px!Z)euhr^8SxqH z@w(G`;uQ?1pQoUMe=u)>Gc@65;#{!F*FtAb8V%-co3mxo-`3jtsTW%63iA~-EI@k~64miig-zCnO68d%x zUC8#ce!tJF^h4rzE~4kVxBp_z2X21%g!tcl^zBjR1F^1x9S)yr4#ppF=2!nfoVGUo3_6qJG~$SJ){BaMA)Ygwp+kI$`@UTZ&3a2dbns7d<@hQy9?m$@cB?B-h4Bsc z*nTPr@gLdv2Ris?tJp_-Q6?pEs;KjgavcF3IJifJ@_b5{hB!gd^?ruVBF>96oaALP~5T~wN=LxLi*SaD;#M$s&zix8ZKN;nEJBE|^1#I41hK~K? zFy_VO^}zXS)ri;M<{eGwV80|{ef)4`E#hpq;Xnrt)?+R|T&_!;f}iyL4LXx!eV;Y@ z(uTyzaN6X^cC3G&)oIKZ`O98yn-K554G%hwKdkHP3bk4g@74u^wbsGK3@%~uT zlx>LrP<^LP)*m|g{Q=~K;g=6L+YxWv-+I452mkI6dFcWRIuXZT)mzSM(3u>o{Q_Eb zBhHtr|HXdRetbA*FX9iqt>?SjkMaFP*mLZszQj-cz~oE+K$m|Uzk?yiuib$IiTClb z-apX6Klpwk%C%$WP~v=7^StyAbl@N#0?w3iBZxEinZ7*GnH;AvOuR2%ecTx0yio52 zOFKcw?{hf~+`mV;DrO!}yeNnCc85-mpDo0_F7Mrwh!;+^6KOB#U@xp^QLgMUrxE9j z&G-o&IPj;7vw9|Rb{^E*9Xi;(fp~X!Q^~o+d3{Z9C+G&ATXv%Dr>H6}wvNZme#|G{ z(r zq0ES~jra@IIU>n-FOTJCM83=MN}q!}iC?0J-e1tg-e`WW7wh+=9t8^zsrbybzDE86 z_F1)LFY&+I+;f2r{z7~SPVZI+iL=J$yc~3P{<@dpC~=Oe{Xc2Xf9tR84^I$3x7rt! zeE0li&1RqGt6XKLp`?GRBg$^97o59JP{RMFf+w8+a2S1}9 zf%78v8{(X^IZpyz$bPWKu~pu@C;qw#dOy3z_cfAMWMwon==Y+1Za`YPi zP2tN{TJM_IaP9_n*|g+4afAApT z#Hp;>wTxq-ll|?pI3FK0HxBU{t9B#p1|95=eKGj)LDK}pnQU_|1Ui$WX~*r85GT@q zYRA_6W6eKO5I?gz*DA+b_r^nqQAfn_hhKXTKY>kqh7R^byajtc4@yUzUh4dc^b2(0 zVBP|!+rEs%S)|VYNe*=I3+5YehOW;_oR&7{v7rlX{KCHje{=hs#2=*c-O?}6h0On% zj}&>~G8z9M{@0dfzn1+5I@xbdi1RtyqUI-F$FBNu3p)4*d1BQ2*5N|L8D_J80-ecm z8r8+S7g+`uBhC=ju4Vn8GwX+aEkC39?j>|SD@=(}#QW1GUW9IM_!a-?5g+%jQua~* zYq4DW`+nkFYN-_7#4lwNFG3fZzw!N9*!9EB^2D#F`j4~+bkg5gM}ZeF*s>;ZHrtG2(1m8d#Ufu)*Ru}sgZk zaTpQBxVYp&ed6^}=bWYepo9G|zM!WuRa`3j33S{~tmk$%C+|kQT$T0Z zmFIRG3S4z-?yhSduXDwVh!;@a32A!~zhYREFWV7x@IT%e0O#T1zQo!7P@ms|4jhlS zC+OY($jt^4r{)hm2Rf5uJr`0k@=)SLN~*WpzdaXnZrz{656Gprn_NHRo(k-?Ebiicu^9Y zJlUV23wd1iHS&(ofpCV7ufUhx;ez{m|0(L=&(PE z_w$01m#-hD_exj4Lp%t655M`u-%-@$%YFwP{EqwwI0@n{CQgu3&w&p81>f@pC-Ko` z#Hr?|uP<~V`^h@)e%Q5&_z7*|UH8UaYd&jm;5y>Js;jqy?2mXC2X^?;d=qirsr#$a zFVMj+nAgDB=&_ABooqPJp+Bxp^PJ+w{TFuDqec9nQ*hU>=z`TYjmTaUae z&!82r=I=(maSjLdZhrj+@!tjL>kS<^$XkIk=ja{cJg~_pLI=)Y!Vf-U9}s7eO*{>q z$+5<->3cjOPDxcSIp4Xr?sgcN#Cs4=i@qR!ZkzQEbg1uWv2Lm#<_&QQEC0&&3!NP& z(~I}S>22dj=2-Wqn&16QobUtm?ajUY4lDoRw(rD`Ycv12$M-c7iSaN=*T@-u&zJ5~ z=S$^y2pz`5a-AcWTifcaYdpkvL(#r76pBvVCmGFnShgqV5Qo%N<6)GxcUD*Rn&aw+ z@B{eouf!t$)U5h=8#=TDe8(P~c3I;Q=XohT2Rig0%=h3t@0E}^l}G673teb&2>KcL zk1r-6zPCCrB<%@Z$b6^KT;#>7lubeW>o)J{K*#g7bv}&OI5qRi==+774{IrKz^OJL zpY}iK@;~-RVGsW*>51Pnt$rMa4*th@3QnOb8Hsb!=A0gM@IUMfPW-xAiPN)yzP`|f z=AS3x`w-`D<{*CSW+q?8Q_#tCCb)Nv@_Kg4L%dQp;~#XeAHG8hPRRoKh;wF*-Ve}$ z1ABrq?Mgx7?Axg4KxgNN1uKdYXZET8;s@(^oN#nW;#b?M=esu^I}Cic4R%WDPKUh}iF59bo&y~?Yd!h>IFGhfh%@i3o&y~? z;l(=Odi)y1>Hk2_fzIUk8aul2K4!OlzQoz^&E&{=4?4_yl@4{O5cD*=%4a(a>?6Wn zE4$Vs?w;7O{~6I_d;%TnSwZY$wy)|(oYfwB4s<5RdQR#?^rplqm(=9Qal`JMRR7yQ zX#KP|K}+K0@YI(JI)Q!*@B`Mh z@Tb@CF2pIhL(hQ@9P|fpf{XPaPOy6aSGEi2;7`0?08WLZeTY+frdb~8Pw33)^w<6l z>!#-uCJ zvX}aGUJK_=VUPJoM-u;$%{T=e{Ed0Z#pxA5oWoD`?FKsd+gqH^pIc-CaZagmO4<)P zIqwIE{u?2}Wa34MpzpuXndP#^cfsGM5$A3cegBp5T@DrBjlAuhXVJ^yYW-mSeqM#U zvxuMZo!)*j-+^=d@I#noe-Wq2V?75t)EDzNI4jyNBu=82CP(%Y=t7I{CXPx-`II54 zmlFTwdy_By3?26qhv5;0$G`kRD~Q)3qTbKYp&cV0K)o9eTSJ^(iS!)kz`^?=;1n*h zfjB8L=sD2A5BNR?IPE`dCQf26eY=A$WIs5JD&qe2i>y0{pS_sAoY0x&^fh*?d+jLa zo{f8mvm~a;k?{p|h%ZvaxtF(jdWUPj9C<2m2TeXi+)O+5?GQSws|%}sx6k9nu|qyb zUHuO4kb-}v-U;H*x4GX39sG@Zo!~5-eug-uTItu-(7}%7#JQw8@6QpZ@sk+;)I{15 zI@ob>_oDB+M+~d#=`>;o@OPSzcezB|0U6Bl$^Hi2+!?=rciqnzB;Nh5u;U8xGFH*A z|Dg-5T~-zQ<~L*9Ab#)+y*;6WU+|tO{F1Z7ZQ|^`rRP8g4&obde%!oA9G{f>aTPj~ z<1~WBd6nS}9}(xC%0EdzK*#Zn)7T~6@vr*$De)qze4OM#CwWmD@i^MC|4ZVZXljmpP+z-n2 zNA?KBds|v>Kj=d4XVyIUlG)LSzd+r4lJmT~d7J=|XY=eFi}<%~#vSO?u&wrc9mZgh z51*MY9`UB6*Y{`W;P3Ndy}7V|LgIL3(Q}|PIo9vuo;{L;IQP1k964SORo~Oaczt0; zj^+K2;k&O+V^2MPufL2(3gS9iPoS*mQ#CTe?WDeqFRQDUCouHHB z>7r!jc|g>!dW1a0YoN~0Ngi}pHy6&n@A|LvgNU!eOLaXj@$BBehAy=60Q(W(ckEn{ z`14i$<#+%c;;r(kn8mPo$SATn{zxg{Zv!pq+Momq}`wkZ9K;P3-FUA8b$nkHt`vBq49m3 z+`c>Q3?P0$db1zO@4@`(gK{t|s2FyZUlLXO_#?X!?lT{kk&iiStG6^U3)KI>haRvsW)!t8+JR*bQ+C z?6xp)GjR)j(3cN7a9~Gpwp89u93Pu~80cXC`r_TUt*>?wC)p9RJaRsSPR@teS3$WR zP2NYmHn;RV=p-+q$Tx&ZZ`=OQA#i8+1_$X!H1Sk&hkZ_(1&8iS>NwLgqV-hf#UFj27n$@n=la$F0!GaXhbB zN7U*3op{G*>Ekcx;2-Q4!$0LdhRO7MzU_j|d(hC?`6p(J2*lZL=*JEB{Npgnh$q!X zY~sdq=<5$1`eQ@o=*E40VvRn&(bXTZj)b3@r;ktkI5qV3hYt0}J_k5G)+8cM1a;3? z)*m|5-#dNPt)(}FQRQ(qf z)7v$)_+9=U>@%bMC5xn`-xDxh|9hZ=|L`vVgBx+L)hWB*HP;C z`YX<(g46CrX5x&td6yG9a6WG3eh~dqcH%Uwpf3+}CdZop9x@>}ai$g1+s(cFx0Rp2 zQeNWURPPSSdCon)(?Fga_G}!jAn{YyG|MmBEp&2R#&-n4`+lGZ@g~^3#|<6)GhgKS z{Q8z6j$K|CI&d&fqdffl73t3aF>bM*5UbZjT5kw&cl zW;rSoZ~lBe4?14|{d^y>Wv*B3f){6)Wr?B9wwhfe7^(4jr|Yn+qXOFr+m#2K&b zCi^{fn6JRAEY^veo_8SL)H`OmBo8{X{o$OQ(@4;eUr|4AKqvPpkcUFO zQ-2>xyon9;^@a}i!+iyC?rsSn&ZVS^|K6~ zlZmsch2Bok@wn_TQtZh{yuA^o6K}E2JwoWra`_sKj&S_bx6}gS#8q*NjDw*=9DKiH zk)tOj?NM=_pMiJUVYd?>78CbHbhF>e@fbSvvwEuHN4ng9lRfQHR~(7=`M}>6u$=g% zlj_Gm=-^+B&)_`&yoxw~=GAkcGdb3K?*VPs5ocO!eSO`#_wF$84m`>^^T|fy@31+? z1Rd&&c@vx_3$_v`SiQR{$0z8(!9EB$TMFzX&IX%#3_5VIp9fCQr+bJ~+U9+D=uD2& zxGMJRhK)QxoSJIgE$1ERc;2zDx3X3`LcGB??@5HV-ZIN;jpy6EKTiCJsm<{~_78XC zd3?tXb}jJN-^9P_r5|sglj9A>Gw}SKXNlLVoIB&7!$7|Q=UL8+#Mxd)&w&p9e(BHq zCQTk*CeB5h^KZ~`+-&uC;=R|2cT}xcq(9vAxAnb(s84PaKaI^iBHt^(J|^r}ZNYuw zM9ZnSA9UzygKNA3b}@h5v9~ z?lIqmg`cs1fO7u%{xk9Go-z5dUqdJRHR2lZsyV+C?`1UoI1OEB`OTZ<`MvkbTf=1j zJ&%~Jyvdj4g--T6ynluA_G}h`cw+|W=O^gkZ}=aaT@ND@r^^IA2Rd*{i*w!w1EUkC z_clETIn_ugilkojm&1%5wacZ~RLPrB9-us8ZqV9zqdU;DxxKRFEK z3Behcs2p+jt9B^+HFW5Yi0{Cewb_R_&*GZnjO;Jah1_5K44fAN|51G>@iS!A^Pvlw zZ~cDM(|py5pGv)NCfh5%AI1K67}7s=9@iq?up;_#7dniWh+9zaG|}o2XIcq82Rige z50T$GyrUs;0+O2?SwHAP>yP_~Hz8ha8-KVr4_eoyK7X_zzGn`-zujAxIt?%Jo^q4h zt%<*1*;U#DI_YnJF>W@T)Q)&7tLgm&9sC_AIMv#9B2LdHdJc5xkFYb^&C(3rh;vNk zqhx=CF692=XIv5UP-+4(1F^by2K(o=6g_xxi$pVy+n7~;Pj zthb;3e4f+5{T$T$jK_H5?NjS$=@01QBr$u8S%0VTM65T;oSQ_v59&QY$%9VzKjc4A zzMFTZ5^tot1S<^`0U!a3u;3sg5(W{Bm*XH~Lbl{{B z=TIV+Tu+<=HsyiN&F4;WIsb58h(iFyN`J3)%g=SjzR}Nz`x*J zOL&Mlp2OT}7k&ord4tm+*)ihOQRkWEJO|zXwV!Oabu{c4JzFf|b+Xylf)4(`cQV0gd^;|2au3w| z13Hsq-FN@%R&wGD3eelny|~oRXcc8T<=rQD_8|VEcKUt-UC4QNU!#NA7v2;t9r1UK zH2HEKg%0y*qBCod5QC6fyt5W9J>F# zSN^|0;B^$|bxPkZK)hXXP5Vh6bbMaN+D_xfEJC~~E%g3yubo=s-$#{-6Mw-jlP~=t z<6rbI_+!$7Qp6b@)t!FnFt82P;_CoM$$9S?J8>`|F$& z-q8RrbMZ>VJ7#kZ5IV`jJz($-r>IK2o1M*i%60-BpUbhH=cqfs2Jud|)sJWLJV!pW zpvLp6lcMCo4*?$M@AEPQSH!4s`JECGq{1$yNM_ zb5(sGS=tXeX+QKA_~Tiaro_vpzNancQ|LmDuYZ0wD-q{;`mAq3{Gn0w@hWt%AHLs* za?b4EhB#G{=sD1VgL^CB?5xzDIQ^69Incom=r7=$ecy>VUN-qw=(t~4*ZbeLbR%AJ z^&JoC2lv(?*70;v^IpVnW|OaQ&(GHHpt#Z(a zCb6=h7lKq4^);KMw$t&U7Llr z9Pfz#?zP^2(8>3p@V!9T?@Q88#PfWo=RqgOKjfFeJ0JBM@y4oqKhloS!5{b;oUk3k zX8k>XxjtC$SLo3H5U+!CuWm%*?5?e^A9S++p`C*l{ZbU-F=c?{y6U!pLoq}?u$VOe)mM(4lzZp3BOiX-_?q$+5osG4@6-;&fE|XR^NTzWaf9Nl{Z=)l3dE#M4&SeiH&2IaAU_4pi~7}xlWvlp1D(mS z&L0OHzQkFl@*1*zVO|TZ{UT05Ia7tJNBn~--y!+Xh0M3+^~YUkNc`ODyGqh8avjiB z%u6rFH6hMPn{#^5!7uO=>RZ2j3*zK$ulEadCg-Tj# zA8bdw!$Zw-$^M{^YkZBt*Z6!(g5jNrcVB&fO3qKvVSeKHz|X+`8thl5Q#azypKO*> zwioEIFNg2W1x$T2X++aeYW{N=$ghLnIBzfF`;XB32Rir}`*h%p{@9l|C)GI>+3ui2 z|HJu7aQr+55$AyF$FjcAh1@@^?`=1_Ka}`S+UV`+-u&b=qE_McS^W7Uh+nFszFk4b z{&yJY2e3y5zcIuctJ=M^D|GM=<|lAYr5sP38LGWY4s`I3Tt7dF7)YFZs+~*!Ko_!q ze%`Ct!0+|GJT{g1d2G&gLdS8V)5s>?W1TvBCh_K}_AUE4bkZN#*Mc2OR-Q||y5sfz z1v>Zx?^c4-Ki&f3{5e(6fe!w_ya3Mb;KjswI9uPIp$pj`)_Ae)U(1PKVu7CTUc6|X zuWGekP5j^`dOOJZ3hzk44nOj&C(hq1^c?8mm#r(g|87pZnK%vC=sD2AFQ`AtQ}V?& z;#`^gU+nhtoS68IQs+~R%&X1ek)zC|1#71zl20Ui1o;%;yn-oHzn ze6#fZ47!lVKZkL$6n|Hz^~#6Dk8RWbphNrnT#d(-;@zJT=f`4we}yjH+ic+?joj*5 zr()fLdS}Z2nt1(|>-zLi`0kXAc*Jc&(b~{R5rpA77)2`0iAeozaOiSKUXE^@R@gja2;Y z#=>FouA+5cYhn!-cbMsWhSu;}4fezzO>}e+`A3G*f zTH;)rU~*);gpSWmISr4R{Qa6<&pnA3tmZ4pgO1}5hk@@Epq}^cWG3F#Ir@4+hw%*i z7vT7;$WEMHWAq&8z`^RMnL2oDM;7^Pz;6(i7L!7cLO^);@bRqlG&*&`P zd0w-@N&KuTe<1nLg~mr79OW<5sXFmLseFRuLx+4)QRVR4rQ_9YvTuuP+#4<8iCMXQ ziGRr^&j20#h4UOJXV|xOiIYv`8D#r`4*t3)?(zQE+>kg?Z1QK&g=SCOt3Wxo_H9D^ zF*Eh;0lJX+*7fnG_AQ8?ah|^2%JnhEC6se+fi}e1@78$cFywnv$G^2BPP#ySd7y(| z&>v8qmHRpor{xU2-JlEEZq~SA>*8+2FKn~^c8_oMOOy`1i2usQ4$?0e?_h`YUj2xZ zPp!LUJBJSa1?O+TiST70agwY0N)B{(e!2g6C~+1o{BM3i9uVbhwP6JDliBn~=tB0p z!-y`<7e?wehWJ}lTq5lN9qftq4a#|^_;}*9u!%>Yv*Xl{5=fk;Dt{)+!yM~(@8bte zCC&t!{DnT>VBP=dF>@yI=Go*g^!pzUBl~(@Pn~Homv|Re>&FG?(Ejl5FYJEFcL8yR zuhX|b=rBLkRP$5xBFVf5d(KqrGwXgwjdV+h-^gY><#^g*%w5mx`9l#`5a+XvU!g;N zv9EyoUi*7BalYD&gV2TCAFTVCc~-AyzG}yEzISh3Zrz`n(ql96hpRlc`@j0R*^leQ z5C8eUoKy2PPKkS>{pQ^!ez=Hc`DI=MI^=sUOpDWNZ;W5(rEqTycA4mXpSXF{Id|FL zp~Lu$^HPTb^7o$^G|x3YM-%>g^5PNkSLf0D4?461yt9aME((4|oCDr^4s>V-Tg7^8 zM8GTJ3@@Z_2hfGw4t{>OK;&h%7kx+k&E-tK?2piKUdB2eKYH_#c)e`$4RSnApvL2I z38rVs?DD^LKRP)0EAcm~^{*U9vHpO6us(->9!xf}|DJzMr1A}t10DQ>aSxoQHNq38 zjLmu+x{&>2&ENEi7n%5lY}VuMjRV&2dvrJxjrbpW>HTl_eUGbR{k>#-EaFWaqHp)m z!9U1{!LC=P#UoB+xAJJ#IMb0a1#$l9q_3Yo&a|%UioW+CUZ-;UanrqZozobU?la}n z0^6q}{s6_7{Si9OYdVc};=J&_hZ%@hq?>-<0y@^IQC zzj!|uoNMiJ5U1^6JqJ3I<215P%s~4xBi`gDPNv2B`a!p63cr7X_kmHa#sl&aZ-qJ! zE$t7TX)o;4`WijOJXkqlLE?Q=_fX_K2p#6ZI!$xT+*h)q+E=xHPq)IABE%gT+icfz z9+c~ps>;FBe{9^oa>p(5NAUccR1E@aO*_} zaR&a!cnISq%DL>c5Ama@dz;dp(1kpXTk|z-;#492(qei$$b1dn$wxWo?ypXqb}{rE z=-?O3W8kc5=}VmT$@LuQ(ElzZtTs~&NRs(4BE7wG2B_`Ux*jKbpmux@u65-+Nk zzFg3m<@$N9z9#RhZJN-8IN?RqOOe#N1PSHOQ&vkmb-`snQd9sG=ZG&o zF?<2>BStpMDf#ZjtJZn#O7UT%U5kZan>u2v>S9GzoX?a z;)r>!Uzp9rA5vOhPUy^X`Wk}{bKX1ahHb>DRMX_heh8hbAC|A)xzF-(uK5k`g20Z| zs_iB2g=QvK)*m|bLx<{z_p_G2lw%k2I1U4T1HayygT!wbsPBi+!Qa@Y0H@B{qr`cs z#!1<3po1M}kB|5&OJ~CXCsp>7#ED#5Kb}Ds=6{cI{rt}(eb42^{C<0vOWYe7`TR6- zAGI^NGCu04bpJpNtS5e+yA|KPJCg4_@iy43=b&5dU zKIv!Z;Ad~MpvIRC*>gn=_v?G%h$Fzyj`&w<>iq>B{EqXsC}-X5 zpNP|Ct)2rNI8KpQX+7X8aZ=sabD%RhKl3w(_&t-&*MAVlnNi=Lq2vB+ZIAgYhRgAL z9J^C7{#reYH$08DckIL7``2o68%D=-Z-aNRw?=D;a)~_0sxViV6<(A{F z^e_53{2S+94C0irdH(@A_!sZRf)iz69O8_OqMuKpbNM$mzn?iKeFEaVNoR88cs)qz z`2LSIJ{t8oG4U>_{Jpf3jE``C2lbn{JUMYz7tq%aI@k$$IB=>D@E}fk8xC}^(>^h; zdF4n)oPzoEc7l%kfzuc!crUJHAl_7mo(CQC90tZa)brx_EX2#K_93L*p+h~RiT8z? zkIq4y@U8Uaf=+cGSN;^|kWj8YJ@XK6TXnr3p@Sc>zYNa1%K3=1u&kZ~oyoC|KLx`T zBF>poCP&T#?u|dz^~Z?|MT!5is-ExO`s3$*a#YUq^uJb;_#IRpM)p_eq<@engI$YG zEK9r%s$I)=10DQ>cMrh1R?!?7RMj;lfZ)HeGp(1qrQjg@K>ue<7Zvfk1U{ltFM`PB7^vsmq) zNe*=ILk_WCD|6kCIE`)2??PvCe!fR9zT0(idQ;-;vDt5fj>kDaW8gzR2jo?+CGpY^ z)%P#x)_3MS=iwh`KYW>8#C9EcCZ#{|GpKzyIX=jBAo4LUCUqY;=ln`nJ_h^d=qKlI zwkLiLbsj|e2RhTPzQ*?rY=?CxI}>NGI>#jA9O$qP-f+3+{wNO(73VmOjN+c-%}L#f zyHwR*jxW%0zS24#&h+g~yaf~V{&H_Tw7vs6HAR2or&i|=L-QBn`tc{6W8*HH>x%nu z-Ua?zdwUS^J#5Y;LWll@dH|Y8o@k>lM{V(kY9mnfV!$;iLZ(4XE@g69D zOCEG`9!s%_$E82QFD2eZn{%4b!5`>9sORhxD~MA^wR1U6LT7TEMzENF*6&zDoCz!S z_JWS*AM3j)kA`m`-feaNRoYAcE{gSi*{Z9y5pR(?M=slod+*CSjNm97r;L8Ii}>a5 z=+~{#!5_%u!ykzr>?2Oo^?D9;;7k+o$;0V~h%FMQP`Zn z=S{!Z+{=Ux9DH{UoQh{65NB!^eR-e*C$d-{9u9~?oQVVU9Oz7rui+u~)%Q7L5ND{m zZ!PT(9qjJ$+SmI_)nEJSyT!ix;=*x=8`);v0UgH625P)~z3zUr2nFW4{EB@!*l}O- z1jO$-SYLnWV8?0V9rN7R6BB1~Z#@S(lVe>MWS*6rIGt5_ z2k|G3)|V4HloR_Yuv^MB>4>w+CO(D^e!+U}>*DH_hZJ1z@(c2P;3s{Mf%sl(T`b2% z=-`)YVjdg6G7E7$ZSJo?2hKV1zU!1ZIf&C>k=}05!ERU|pk6Io{0&`AJ4ils@K=5{5RU${`>2e~>lcHaF&@FrZR(98{?z_@ zJ3|K!@=V|i%Qu!d=WO~BbnqAK0#3lE3B(z06SqMZ@;GMo*XnJPi64EG-k$F9oyK~x zAL-~fo%mtZ_$KEo=;VBbye#Z-dGTE0?Y7CoLI?l&i#$`RG7E_lQJt%m<0W+9G@Hxo z_ODNt5U1n_eVz$Aa7v4P&39c_5NDkF&Z{gBbl`m4%IBeCW?M&`Lwoh*fzISu@0XOm zww^fK)%TcXKXWgi;xs;r{n6T$HWPnAgV_JLUbYM9WV=Wq{8_Q=cH;G$V)CT@po9J3 zU$n0e19ubW${;-lI@k|!H8^Vr>?h8gN+w6z54w>1t@T~r0s)7KA2*-ges=HjUUj#}#Kb$+Yj+>44T_#@5izZL{*WGb5Oxs|}I~SYtg!l_y=*tV8yzj6^ ze1~sJwHL&T)l8qqhEDny?*+gwCr`g7-n35oaRxg07wc+p`euAjoW(Zd9(3T~Tmd+_ zMtma9+8g@vK!+iq};fZsjpT0dnC;f+YH0+iCX(Zx(&7?0Ebg(I18Sc99ci;cpurCf4ufO9r5-& z)%(}I^|;gMBgQx9Um1y?;)lMx&`E#b9vkc!uUb~(Esvv*1E7OHunz)GuPix<(kS?3hj9yji0@g7ILFiJInbFLOVI~1D(mSzJJ|$O$XwvF0Zezd+RPMKUU{1#DDGlZ+zS%gB||N*Ms;& zef50kkl)Io{1u_9_@+os2usFHqhI z7bXxdj?MXO=-{7N;@vLikjcb}?P114G9LmRICw__<=IerI&seA(sQ7*^K-;YvxpP3 zpvjSbaL><9qh8~jblxjhgTIJByN=11et?e0Wv5X=thYKpSwOthD(@`9du)@xhYtS0cn8j>XDf*F%;q~2(185 z%6Z@W6mi~N(c2R`_{BqvZ=YYDAx@81dJc4Gces~;@+6pZo;dF#>c?;Bc-*j_*FDoF zn0Pyq=;v>D&+Foy7L>PA)@#H+m`p!^LkIieUJy8~0?h$9V zs;_Jp(1BA+oEK{s;Sq5ne$o3AI&g4)4CNWI`zdkCzSMJ|GdaG-IdM++LED$aDgV{v z$o*#Mu-|;W?y^O#=HQ(q>vtHEq^xUqUzYPFw#m&X+}D|3kw5o!f-V{d?XtdLol2 z?GGL7pDEYWsJk9iQTMkT2JZi&z7q>aB5uKW`uai#yKNNTrJM9RDsgVa)pMW&2l;1| zr^J$`0q&w&p1Z!nGbA(u3YOPs4}oiFVVUDpChrmnek-DUsJ;vJPz`4bYaOJTkJ zp@aR4?)5r;H_bWM`U7z)>f85Q65^IEpsz1Gvht% z$64RW^J-Cmc$4nv%O&lf?hxi*SM%zq| z^dofaM_;4AIPcx9o0E74wwpX@f9PQU=U0Ny?EPF$jr)E^X>rc0Mw#lwy}!fc%JnC7 z$gAL-SC9D_KU68@?eZ_qL&7i1vilPMz)rnAp)>92Yed_~_8jxAE^(@?)z=?7)PKU@2 z+MhU0y6QR5!ET6a!I_$4FmWnX&~uT#0NxZ`9 z9+vb2*7G6fAFcW5RV4$6U%HxEUdeYaujXgqoGa|w`qOygcWI*MLx*v*yh_ka+Hzy} z#^GyR<0ke?!S`JkNc`J2??^xg|7Q~Ufag=D5hrC|z5k&D$8R!!m+@7tS;Se?NzZ`} z?I^7{ZxAQpU&P6(@@ujkLC1Lor;#Z?qEJ4U>4sz1m$5jw<4 zNteVcx@+bjSDc7@WT@}k-KU6~Ufo-e?F2fEcZd^bT>3b{Gi_(rc!zio{BHw;h(E+8 zUk;sVPwPGEuWc_7C!G2YldQkIM;$5Z#CMse|B4eIi+7QFcwHuLg8+R$gf6uC3jG!J z|NikB@uN=E^Pvlw?=5s!A?{;SGN8)80uOE+~gFi5j!5=ZYeI-u5DS8fc@CV)l1LsyDBhT-7 z%BkwT4>?{y7g~F+B+lVI3LBpInbf*U+7CKDhi9Fq&K{3Myd*aF9OXQP_rOr^dV$f1 zlR(`glI7^BhE8*9#i@Ox{&?gXTV?JcgdWH z_`}t>BKgpT%y$}t#r|jIS4oL~O5KN*euhr=6XdH=-fvq|67Q^PH_|`Q!9RFc6r9tq z(h%p4&H5NRaFDMC$8WnQaXxm}_c!QFj?*YE@=bMSW+u*Po46V}8Am*p`Myrsi8oEf zg>oK+4spbhm{tGu&OA-U5l-VtdCud+%9)$E^E#UKmHvcI`V;Xo>^Jcl}B$#~pIK?2_KO#1;3A74ufm4F~fl=-WSZ z=tm#LebVU*e2BACjdQXcKxcCNjC4o&-R7aKoy0k(;zwCu=tB0FuMtbUAHOhHb>jP~ zd#|#cL8qRNM>jCg|L=)@b>(v{;ucink?dE{0SJ3yy@Pf;=RjTJMX|}-K?e@bCxEkl zZA0RWu*us&hx&Eeb(Ze^gzejeIF;t=#|`MXe%5t$`UWkC7go&!($DV3+sH4V-kt^8 z53`@hM%2lhfv25p>|-9Tjj2r0GeV zjDP9N1D(mS<^fm59Y7qvgC0(8(!+)*@rIV{_He1iuk1QwlT#2 zmfGY?KSLLqpS#tYK)hj{^zZgdKgSb!za6h86DMU>JqJ4YIi27njy#JvXC6oUrwVdh zg$^9NLjgZr>o=b`ed_5s(1BA<>`SNla|v-OEr_Zu4|I0z&M|uxan@8ZIsdNRS>M5^ zvS2;&yIuHSNen~Ujo-nr-Xl18aWipNjWjv3obsG{y?=h2VR*ew7rlG!6|VUg_c-8} ztsS=!e^NVrIiWM{=`e8b5S-|{cMzw@7d;0$a4>&>GobZu;&`0Y`xiQsV;xswMBhi8 zmW|BzChg{4zjqj4#eMSW9}W<|Xas%#g3c_b(^w{|F81b^b)0Yc6_yOxk_#tPUQ^YynQ2!ka=)l3e0Zx(1XNVJPyPg9b?1b;!fb)Fb zIpWNTY|azXPSBanALkJ;`5ki#M^O2UoPlSuHGVV=b7OZ zac+FomkT;&eigS{-;qpm#42Zeh}|!Eq&Y$o#d@-!25lRH-`J;_q^i0DtaDtxo*!t zpXKCE*S-eUsi<$9Hj#)IsP=>8JOLg0Rdwaqc3oGj4A<(AYko!l0Ka(JXvAM&vrhya z`W5zd!KrvXCUHvi)VCApOpf)Pqa3s15~r2gACmQT@13Kc>lASxw{3%j#Gj=0iDWy3 z4)wiVgY!g<97%~YBv4;p=)gh!VYmA+QxfN)&3i-8f#VeKkxhA+nm8$J^6Su<9H)VM z;3&_873qmHa=z`?i&&f{-?5T}|=o(DRUW6fLl-k+a1mqwZOlKsHFy!FpKl87%p4l7Lj zfvUYoK6D|+7uIt|(S3>&zsyv)+a~sW`{-FrYH=v)k;?KxdZIX|xpYcipH{jW|th_9dW` zaRBxgQ11e1YZ33T%5O8w;_OJIZ(q>CpU5+V zbKk!yaRM)z@sji>bRqWx>;1-$Nm>!Vm|>Pv#w&LB8 z0WZd$F2w8iLBIZi4*od3p6A2M@p}ia)*(w`Wg;fLM#mJrXktjP;)T=y6A^pD&th|^+{o&z2H ziTpdtb8q}=;@q}*-w`@+s*Crz^QB%-oDs?N^8<9S_a*WD`U`V55hp`Y{rC+Xe@Db= zU_T4>8|k@?c;{Q|dC*D!p<_+po2XTuYmJ+ z+|$Ha8A*RX4LWd;cLL|bo3q53Y;#W+I`{?aDR6clx=5T9>b|J78+4)B4e=-Vw}b8x z|8qipeW5eU=`@m_;5ee^m`B8UK1V;kK{uf7H2R)OS+Opj|MDsEmfGa|po1SUPN9Cz zkuQmJ;+}is8|EEw5`=k6oD5<0?F%~i6Y(cFgZq3SPV7ATegIv_{35c^toxhXw6LjF~L9#6A(c^oD(31<-+$`OXircY<-biPPR@eu2*9 z{Jg*Xh4b^Hyz>%gOIovD(r?hoaR==S^-B=8An}^p+(U;B^~3pEaJr{2N}TdV^!A1h zoB**ND*dbkaaP3FbD%Rh);MnT(lW%EZu1_ByW@+;TfVQ=tZ6yo*Ry%Y1Uj>vzQzOb z-of8{s}jdwy#piH&Cp@pyeHP|p{p{N-(zxruA8g#{50=$P2z64ppQeKlkEiO5#Zmp zz3UL~u-Y$_{Tn*?7wcbeB7Lh*oQNm%aR_vF{@v5E5pfp3GdXg8chA4pbDztvHzodw ztor%Mz4*jBKXnLfN&H@FUrgFl&QFN9Vb7q#{=|9M#w@4gKnK4dPYh1jlkJH!!scEr zbl_ke3(m_%orx1*lOKf+{wr zPTao&XHctt#JR8LE7=aA!~SM9)!`Oz-XCLU&Hb+ZP3+Hr-#zgl;t&0`mh*Pbo-k;EcgLAOpJiNY-IF-~p zR??r)nH)dED8>6WxgQ)NPB{;=JkoB^h1UOk#d|Up{yIkd4&n8D=rG@8QT<~~-#b*DVFX%$%`x<*I@p`TB!fV7|q}FS4-i0oS^3%zwg*$Ale$M5m z9jiIM-?#o2adX(@RiT5Qu---e7froKoGzdB@db476YK%bs;-ZS^ZkW>UVtuSJ6h+R zY|dxIAFuK`(ogQqI}T%VT=vt+Os|OF{;u8*(An8@MwoZRIeJ6SVS8Hdqc|>oBu?b3 zdJf0+P6OYAhhI{y`bwPOOD0FgPtfuHkJS&WmKb?|&r_zpVDkR0AF#ebc?y{UdRh*ue=h z1A}s2z8a7C$8GYy(4oGtBRIEmCMHhub$Yu&2M*Tj;FSKBj5tHP={eAW6DapTUZo<= z4E278v_EvPzn=%MCu^-qOPpltofO$_p$mC@au|rmP~X$7GZ4R=&3i)7ndNjEm&Nzc z`ew*NoXs}(jGN{FWNA@AE1LDkjDY%@{!!cG1NU<=?Cagzxd*N zXeCGIB~AfV9yuOCXVwq*44lSnaW6ZlQbFQDb3q3V#u3E66<%>(=Ptxf!o$Mt;o#$BhePTW_x6~8|5 z|9GLd2XwN(;=Nh;yTS%P;@yg_@2}9o-#8}-PRiO%iPOMS&w&mc%n#sfD$oAKE@|L!9b1`@Qb^0q;?uocCw8C;o;~W;tcMgiiVa?H;`Ll{*u!Nez9yp@aRf zF9lA$2;GVEys4f89XJETzR2fYy@<1`lb!<|{LtZVUN_|L(~mgw`yvZ{2jMu!+WvReGi8bq3TFFw|JxAAmT5!xeovx%8C0Quv@Bj!-$hBy%#+q>k8u6pP;Wd--~t_$Sb3qC*G_kPH&t0575C6i08pMvTHqYywy8Z(x1?Q zgZt&+{5fnhahBSg--izVL>vOnol4t@vqJ4pNq<7e@r$o<=_sFj&Y5I4@!l*k?Je^= z&|w_Cu&M6TZxu(l?#&^OigI?#zn{3nY~EXg4&z^4HU5pQHZ%9%-xj#?;K;Lq?-TAY z@v{%nw-e~#7o6(?C)&Yd#ECat&w&np!Ff|~J`OuY9J}}0pyPh#G@Rnxt5G3{c;1uE za!Ef!C+`Pie*xv&7v}=;4%)n110DQ<_n?yuSr?(wj1w+@VEh6`kHl|fb3e>Izc`IqpV=>|s(vN@yyg0S37w47v0i~a>XtR~ z{hp7U9H?*a(7`{5+rdc_Jv?!~4c2p@GdWJfU#yQ8o`^)89wYSigO2AHYut7uQgq@S zvbpEzUfgEo$2u8{_y_&W^2+w)9^ZNn>BF#i#2?&TZx5V9Y7j7LYnBhc=B>eEd>ieb zhmcahZ^S9^_m9lUh;v5e17%zSoyqYvYFv!?Doba>0O$U_l*CEV(X5}G|Dn@= zf52%3h<8g0&rd_#+I{qPgO2xefAYk=o0v^JiC5kx?*^Uhcep=?de_UFnRuU694qYy z9sJN$_;b;u?8Mn;cb{G9z(Jl7<@qyl9^xFYWZFxX2Rf7EG>T>5^=s;&yu_KN&bQ0B z0lHl3FHD}bf3;g%ka(lxn&tX;{mXie>_P3K#LpK^UvHVOm-na=hbc*%>#6h{=->zJ ztHTeImX#q+Q7=6QI&cuTgEP;!9C3z~({rGMKXZ!rF`h@SNSth^vF1_nL=fw~e?_1xGPP4W;apS4|5?M~W_oHz>7xv51)t7h~ zTj}iw9sGcG7&zPV)g#W7_IeI z2R{rFasApHor%*%a^9jW9Z-qcG%~m4cktU!pG(}?Hu*;AWZnY#Ak=qn;|0W9tMXm4 zze9(-g_m+jln=(M=^HP&^62=kE%@uREFu0kmH(1{fewDbdJLQ~saFu^_6mLfgwEvn z8O_8#f4!%xiPKTNrz7hNUC85`^~{&Ac32_5W-dzRpw>VAqi$!*?yf)1QaBCZ%+G>AB} zXXyJ2bnwd*5fAK3dx1EsR9;=$4Z4u+<}~oVYSg#f(_rG)w8_Il$8nj{7$M?_%P8>9+{KJJTP?VZ}=IU8wI`+C#TJL z3LW|z@}%Hg{PBZ0t5tiG^@T39{&qYdJ^5$g;c)qX&xdAPXpRfgKhVvc(fl7h%A4&z z$KexpMIzo&n{gGo*c<0x{L$YquwVt`+2h2CHN4F8th<{Rz4>Ep$4*tP; zHE>o`ibb5YHvWOmS#r;InDgZ;}ITi5NHUTzQki+gCW+q>$ih#Rhve%^!*@u9Ek ze-F-g%Q-a123P+>90307d})clysUnnf)4)8Am+Podp(KM%H|vrbntH)5nrcvWF}61 zn{_30GQLK?MEw>;%uc+6qxANJF691f-OrCYCl~SG+nh&~@gU*{lyhnUFXFUYulECV z@B{Mb;6w;pfH>z*>p9Sw9A6{SMxJ*a8w(RB{&|xl+cR`%&kcfmPJQ3dU$r-^u=&0>bj}OCe|vw$`d!xXnZ1cOOr6J*?H7@L}JbBdu=PB2? zpH}wgbv1~8(xERWbZC!Q-=jTtFI}5B%iHNW(1C+}R&XAds85^|HhDVeOpeoNCH8H; zC2T~Tgf{25q2qYhy57vUsVVVJtMl*D5ALlut^4q$YqcbP#gJ7<;aoFU!~fxrHE)1J7c)VM3>Rp{Wao7H%~Cw5?G;v7`# zAKAa5<9OEkorVoXx)blpR(*TqJch$S{v7q&`KT9hywv+RvVPE^UE@2$;KVuKk2n$U z=*t70$+5m`9e&gx;tW^gwQPs(y=(0>l8Em=uJ~gZ@t3G_Nf-bV(1$%Q$@&zwch@;leD5CiyI%P)ao689xw5@L zC*!o+p9hz^m+Id*Ex+hz8NM7N{`il2|3GK@$NK(Am)oa^lPI>?UgUTz-ydn$rueFh ze*eBdf_9JkPds~$xa}hROn(V9@O^3UC!D)T{H@ndz@ z`wu$ze1|bWydyg@%T3~6vdOPNhyI24w^7bFH~uEhH=F%D=wzIP{0(@mSKKGw88Rso*p3^E(nl!tW1cX_C4IY-<3@Gm z*fM91%$sy(rz`J+coz0-{4z4}rw=vn#Yw+F2ftvx0Ozk}(TUT?h6A0+vBs~H562=- zlr8#ufS$##)^&a39A3mP@KVqBG;XRa&ZETLoPhXav*_C=bg(DlN7%DQGjHNtsjlZh z2M*r11gAmuQI7YXo`-$w0Xp~dqBYJg z)4eG12j|xF5pRUYFC+42Ei?KNzeh#Cc@XZ=f?dPNS?ie^;%%KXG=ebAqy6Lnqgvurtb4I!0CE4OV$H z>3`_l`^S2|uh{YG#LqZH-%jQEKCJ6e&iUhN6KDBoJqJ4Y0r_!o9uBTYoVzyXlAwbh zaIOoS`{f!D=fVK9Jkk%)$@l~3mB9O>NK@iP7;5rl`~ls-^6H#ZuOa>G_XysD`XX)u z_vn`v#I4*#Z#U>rUq5lq%x`BK;^gk5uP=0RJl!VVRq8soJ@Mw*vnHnxr*TD1@!iJJk$V&WzD*t#I@Gt7h)>oZ2_nvt zfqH*J2M+Qouv?;01BsKs<~|{G;J}XH+^sm2IL&R|D}m0={(a((Bu;d7-r>L5-x_bM zKR1T>`_#QK$@etg!21TUL%{6u#9zKh-#?&}{R43Wc-6*DCZ5ygydQM%k31)HrS^2< zthG7M3>`R#-%y_TDQ6R>g3UQ7=-_9Z{{tsQ?D@orr1D475731--(Y_Y{66P{iQnF4 ze+fF&cdCf@R?JvRoJBVK!O+=prZrkgoQO7dV~*3nx)t@>lVL4!2HWJ_pyT<*X%rIo zHR>eZNW42X=U1SUyr@lhAK;gpn~4`&PUCY$j^AS~*-pHk z>YRz>K^I$fYP7PnH=l9@74JdagM3`e{wwXbl&$8^M0F}*NFezCeDEl^~Jsb z?AAHSE#hpqiF2R>=ZMHNSGahGID-f2?GGI|@CV8>ck5%~q_m00p#ul`1#pZF&xw<3 zjXr+`oyl<;pRWEw=ke}!d_^2%rpb}x1$4LO@p(L~Z&0r2pWhNMc%eSe1YLOjDVw-A zUuon=;&(WxuQznCAM&&)=i*fV5U1TkJqJ2)aDEaT#}1?Lk9o?x(e&*CI&c#2;Jk!y z!^q4@r{_QiPGrG35H&h+-WSqypffoEM(iV8o_Ski5vQQKuPWD7&>^o|K0?A@RS|(AOV2*b)5=oX-uD z5GP%EJqJ2)eiz@vz7sD6amL&D13Hsqjl&Ctq$bWObskCD&9iwb(7<^~)OSa>bi|KX zPwyA#kPq@!0YuvFje_5t`S<;h_v<+>DU>x6@n^Nx+Y>t2(_746gYWqgCteRd2Rf7E zG^&VvP{66|#F?wcb2;uo$N3Mwj8}*_&DFhnIp0GE4(>ytJWb9QC62GneR}9jjy112Vul}amaFxFY{#C> z4_5x-hGmGKM6DbC8{c6R6?ug5863oqv{3IC=wMIWdxbs6cvoW1QauMca4_D0Q{Yxr z;sj0AbD%Rh)^$>tS54x?m}GKf`?Xsq?U};;y~cw>$V&x0ed0B2s^?*y zG&5R2r`wwsy4Fb-#5$=&i^jyur|#QY>Z^2eorLcuzz;JkHzVG-ZhAjJ2Y=!`GdOK( zv?9(On{g64`14H=-?Lcl^&4>pR@K`JIyt^&sKxhcA|LEPyoq)7Jm{poo+jpX@8N!3 zh_}qs4?Z?|KvGb$`A8pffqv@n_VQe#H4Y(&YTN z@yB`|<6H0`;&)Yfc**zdJcjlApd)$@BmR7~?vnii-v?dZiN6nu_cvhIVs%ClFT^JA z3LX4&QG6fhMv`&FiD&tI1L{xAWn#~my92v zL;Yrnb0=#zE+)?K9QyGCI-U={zbhlwIXeTF6Ypa#z1^WR%N1a3^L|VBF{<`oO`PMU zO^#gWK)1f4+CK`bcemHA4Ql_$dT;P%j`hU#_A$A#pUQiKRg`1T_x>%WW6%!QI_Hzv z2a0m}58|iEsjokD@Gr(e_&470t;A_oM$dr`{v9mxT>S%g66b7flOx+NbbOD~y54D* zWH0evRMXqfv-OVkU7|53|0I68#`=28cZtx>VZSKD4-w}=J3R+F_yKVMI6X@qBTglo zaRfS(W1WBc?LS4Fl{R^~{~BNS*msurSyaE1?b=iR^VIxqMVcfRiJvuv-tW+bmp?*Y z4|a}T_$u+6-qz=Rpo70K?}Ib2@J-@e-Kpn52TnV&o^A8>Z{n<2qUS(oa-2q5Up`;_ ze)N6f9A0H|WdDQiWdw5sN4Ww-{`Fwe$HaSbN?$JM!0{8DitC;cXNx*_Cg%_6OirM& zR-9KU>wHO^r(XJY0Uh!anbmk2I%9aZ#7}m)##8tO{yhBt4e?iI*7sxRP~X{Nz5VLw z2jbiB_VNURo2@HI*x;!#v0+*B{7l^Z=~A4l6HsA``+`p z>#uHft&0kg>w;qmd*Y>bnsU>v0pYdb3Wn}pQZN~bnw?a z@%~G;7X^vaLak$@J)!%*`s>?I)q>_&y;xLw>76oUN+8$nro3j$EJT%vql}(`@<~ zbg)16(@~y^ZyOP3r|M_2{X*y7{!U}Mcu%Lp<)*}6V{?86I-U=l#^294PDmNtl6bi9 zBj+pVm}h;@C`aqI#Cu{h&d76Glh<&*x}CEFajL0$O20y9`oo$>f10EVae|azW&P~( z=nIxCr~HK1_3p&WG*cfhKxf(u_jR1cuMOG$Ll^ZXUi|rb9&~-vO!zSVKE9_DVE9yG zUbD9Si08l9?HEr%qL1+5o`+K+|{^*!w z7;)~b(w7f9>|-~NFl}?(uz&CMVf;e*Vn-T9+~aCIk^K(3@cP|)ac|l_{2yVhntf)3*$)={wM zqck&!6B=xmPxe3PFdib$0M68hvxzfrnaPoUfzG{O9LDHOBa+Bl#1TU2|J<%AC7 zM8a6?k6iN?5yyX>Sxz}lKqtpR*a7uU{o69)`P$5H(7_J_MclR^_bTE9EzkQ7NhUeKc@(z5kOqADWrvk?jt;kav!q zS6UAVfuFH10&l|UL&Q5=Qtt=o!t*oQ1Nb`zA0xhx%{?pV!sFu|S@2`#J4O6*YCSH; z3+S+(N}xJqim;G2pF+;M=CN2}e60TX4DrJn==~2J{GU&7axJ<*oY=qVIncph*at*? zdyKtIoN7Jw{(>$%d!m1VKidB~@l&aCN_#@*p6@g|i|<)_MZZn_l-n@!gK$=U)?V-8{WNpktoHKwca5kAC1i@uJ(zKhVJ+IHw8D z(6OJ1GhDqJCH(;%I7LO?uYAXE#ObBhw~_-LIPnGNTi!@Tf6Twuu{l2l9oon2+8j5W z`4E*jFIBx{|AOv(cTniAAKfD_Y&VWh@h~sDT9s_i+AMW3R)3~TNapJ7hkAKj)?_bt?{12igCw@Ph z^|+_^_%ZLG{HIT*BK`~&Z%h9}hxy81Ici!OpL%Z}hq&T-d|wj$$CJ|%f2K_w4juf3 zd=og;8)PKTR~3iLeghr+g}5A?f4;eUun}{p@YAYDu4A$e<|!;mVdt!yHkAMHT%uH#BZVU1JcgW!C%(G_E=ZiX zDz7X#(AoLx%7CK8@wch3r~bm0Jid;LQHJ=DY~pO_ zU{BQ9`EHsdFB@C)Jta0U;kN}K>SeoDJR=iYACa|hlP zY7qZ76<0{UXMS-SGen$`GhQ9yM^$-XIWIvc=cQdDo^E-u9`T;p*cCeX8T*8=Yoz&& zh_l;fUV;vst0jhG*a`D?0?Xi9H)^)+)oTW(vmnuZQ^6-OgkZ87+|D3%=bV# z9c)Xy0xF)D`90_mXHC#c}(ynzQ<|EiG;lK{WJ)z!Jk?jRKjEi-YV z{H}Qn>t66v8dD>SWcX;|v z7y1#(Sz%c};>T6*6-awR7heCyJrD5r4jM%KUCIuU4;}oKMfoda-rkoL*DP@PYpU3f z+FoxM@lUta`wKew3-LS3c`@-Q;!JF*=RgO4;odYj+fR=r&iu}Le?b?XJu$9=-(}nc z;s^HA^PzLkcN!bTI=M;ZDa7wJ!sN^G5;{3vA}&LDonAADcX+(suF%0h*uMhj*xA{{ zscN&Hf)1Q`8~GjWZ9C=@C$U<0%Ki;S zBVJmY`ay^K;od0h)IZKD;*_(=t3wA4<{5C3o?lCxCN}Xrbl}7h^V7mP8;LW@CZ2~5 z_U&SKq9nVkJ^YH1$>?GdjO8S1}Sw72Y_=$5(QHSj%eqVJSQI;1v z*^dxEz#qAD?I&KeX!?Ex9sGfFE#MUX>kx59CDU`DGdY1qU-3PO#siNLCvi@_{h;fW z$J{slHT9DJ-E$8MF~6q)f8)FKD5v+Yr-=VWov)VmgO2?jV9XNtQZL^+Lp*PFK1s$S z&>~bV?-6LM6z6JIT)#lvvFg5(oZq0s{8mpnJj;Qrr3Ux=_d73mpAL4& zdgn6nb366^f)0LfC+5k+`>zw{Lw-F6I`|#=R+J~G>uutA=g|8dI`{LBbvAANc@V`^?c~yFJI-aF9{oeS~YF0D{qf+6!r-l@s#*JHs{u$gTIj9 z1?QKNVZ^E3N$)S{;4gd!44mRM-w-FS;>h+1oqK;djU4^>zF30zABew6wNKekq2qbb zX>1qkwQGkz6Yoqtv)+;iog6puUN7puVZb-yWoxasBXsZw@)h7DC>*KSk9pWIwLX$@ z19T?GY2cn5IF9#GiPLGIzJAcLy#fuKUk9(owwT20quzy({)H|)|HcvDwJ+Q^F7Zd$ z#5vGOe?%1P=gGYj5O0P}-U&MR1LGU&J+y>3aS9L7`xQF)1Mgacv+-Lp;;dHtFS5Ns zXVwqvUWf5!JFk0Z?@L9z%oFwHf)3@%BhK4IACZHPqm$+56?#zv(+`vH5Bng!!c!u zf5%&Y&jUL63F~n9Dc9fSiLo&y~?K5tIY_WFX^%CoPPlQ_9& zm>g+0=-iK|4g=qN0KdzHn#3PdQeRH!P)_7Wz-ctRE^!(})Rz-F#4%0P0G+$os6%gt z>~i&ItmD8hai9V5C#*HwkL-ug!7n&h2TqTdjfoTQi=G1=>ihHv-$Th%wi$5_)za4& zI`@9Do(syfza{ajsP~Md-#v?Kt>?MxS7}T9umfiKw+C#b{RsP zwl?QYpo71$KMqd0Xd{U8ht2*4bm))R*9IqK(P-jK+NZZ0bm7@8#+`3;Ub1od---YA zy54Tkp}x3xi*nX|Gm$t!@AMq#z`=Y2PS)d7iE|-_XWxavdK#ST17;FuLR38mI@llU zU2uBjnM<4*iGS|C)X|LN8mLYx`BAz{?Lz;(4ie+JqS+Fn@z+SenroL4xBt8ndzQq z&1Tz(GdZrlT|);B@~kM&&>K66liB7TKXfK1z!-9!*FD=>?Iq4z75~feA3EgM*zQgP z_XAPBBDwYx_qAFt%6Sty<~a<}|)f5;wsq;tu)C7eaIkNph=hN`VcRd|O zl=bWvuasfLE1ptcF6hj1S=X~EF1#jA(=2*F$n`Al4UO>jPFFkl-#q)@A`bT3@Sga- z1@z^F4($;4f8d989X}H%ZZ4A}+aYvlheygbybydGk~mWFAM=~ZRk@|#q2qq-H2lSV@crkb5^r5Wv%HcA9gjN>19@$f@5z*y#EVx- z?;q&UPjJ2h9IrZYiF3cAo&y~?aYcOkG*bfNY^tv3KxcBkzyIj%OXG6`Z*SsssiC(M zbUx>P>~BsZpRoI%?UEC(cxgQkI%#+0YhbVD?NSpjMHGEKp@Y4U7XT+l*>uFYq2?hu z&Orxzjc?8K&)uY%h%@Dxe*S@u+o?4UJ$cBNc=w)|JQ?SD8i#tH;Q9Pj_w2-<^;KV9 z=+=MbchT^jDcCVb(_F+m6Ho6C=->~3k%#Y+KOb@CN7QqmGdWiK?Rry?I7h|apr1-vjQCw+ntVBqKo|Jb)T7>b=M?rUQLZHM^4v20A=@EzXoo$-K2?Y8Wr=fV zv)-@JaX)h!hUmA`Iy;EhL7n51^@NUj*833gvsNPB0@dzhJ$aq!FmMhY_UaPfNt}o_ z-vx#a_PQtDxxI9-262vl)7uNWz}M+kmIyQqw7Xm4y{D|b>JTroic@5}gD$-FV-ewx zPC4omzfBr_yMs>p1M?8;w!&O`$6a4ehvfQApyT$yx)jl*rxqK=V~|Nz15tLIuIx86`OWr^WG}jH_BCf zY8T?YzM${_(7_MLqk^-tSP$a-{z%V(&g29b_?{Fv-5&QQPE*w{WV{HS>pN7L&mNh0 zZ?$?y(7Mn3YED1mUP+*DzxsV<>$`XPY7HV@oAmm2C-Z3dt{&>UGTt!aw9BdIKnMHd z-57A5pBzb?b4BzV=uD3Fd~L-i(H6w z2-fof#)Pl=C|)eGVmk4L=QP`$Y%kDd9%7!e!a4(Xz&Z)`9Q5mK;>N6Ca-|)h!#cN| z8u;F{Z~U(2lu*}x63(}P|H3(+_-k6}`wMjNJK}I~Cfy1q&RBJCSdM?tp}yE30Hd|O^;)*#D~UgTte)@Ldd<48Q1|5;;@2Lfx2N1!K!1Q8rjOb{oWtsz zfwU)d@C)*s;A~32nK+fU>dONi{DS!foaqO)5$Ef9z1^U5@0S20!ljuMPYxQsi};aV zn0(oPpo@TsUHzKnw&t4$NF`#JK>UdQGy%xat{4trwWhaIv#*iXEU(e>jS zbl{-fgVW;BVd890rsqHhzhJxr=Vb74;-paXi5wrGbMF_Yk)=Ja>+TObP5hTC9+CEg zj@NY#1MhC4ysL+uBVH?;^$m1(_Ir`~5^qUn5Q#oAce!p*(fO zdFr(#ZV@LyjnlHfLI+NC!HNFt4sm9wbE}dA9ohxnd4!#N{P}=5chq?u*)E_9uU*^{ z@!!DSPl(^XyJ>&fE}%nqMZM8KP~PRFDr4ntNg!g zFVKM#Joy^!5AKYSgE;R~>-`L!$@v~nE$8<}A|J?2oaU)ajP1zpffpv z#yN2g>fPhA#F?Y^S7g6}&V9eKp6hRQqXO}}rPce>?p#0GFX}yJab@E5Q~5vHUZI2i zx{H0k?RBdW=W8Op{h%{B-|v}=clQcqsYRTRDnBCID|CFW-(jq8c#qCMHIG`Cc(K$t zB~!H-1LC}~=||AXeuVWncrTU)60g2m2gq_k2fJf_0B381=EON@GfqHf za;)R^;e4%$6QuI0vfX+%UR(Js(zGM~gUn|8_-}lt5hV5z9!Bp({8uUT{Q^4aAGAx@ z_4MU>G2@qw6OVJ8FcV7;!$wcry4|@ zVZHTnCUoJAPX|i#J*Llzh7rGu%{%nag-qvt*hS(Vb-!yPiPs~WSzb9FKqvirM|`(s z-@>uPi>tn$A^RP4@JBl_-W9ArfjHYdx(E3Edv;>|c)i*b;?%M^HwK+qo(~Z^m*VeF z6zMgCIJt70b}9&ieEs&n|DZEAE;R3tZZLmusozlb8=`*KtKS9ccY^vIsD3-C-$3>2 zRKI@eH>djbQNQujuc3a!)~kPC{f4UF5cRuW{Vq_y6V&fO_1j7P2C84D`t?)4In}R^ z`i-Z44fPwgPW}7pH&p$GsNePKcY*qypneCc-%jc`Q2jd9ub=wOseXObZ#?yDsNb-) z>fcwtq3U;FlfkS0|GwQzl{ho~9$;iU%IAU(|2l{KzAvWfcbSKS4*Bc9dv5Vv(q)qC zJ}&an7$^RYy@0q+ljz3@=#a<8cVZ8nd3N^hS8rNG{DAl-U-m=j&<~OK z1m}6(WyGnRNzZ}K`g*#KEF-?d(sT1#;%8C!J7s;LGt2q?oXc@O z2jTo}BXM2?n&p(^A#?@b@;L~6&l2|Am|+X?4tLPo4>~(Pyz$yWoI`!|obddx@zieO z%xwQNez3+N1xD{9{^1^azGrccH9ik&eUSKT)O|VWAG`P*?H&G!mgOk%uGrWWI`~J< zSLq*}AkHY8d>wS)ApeQ-WH=g1oY5oneufSl%wOO<9ebWQCkN;`(3zY7BaS$)y0`Zw z;`~0`>oK*L2W&NQ;{bP=};pouo-*b|<*9`8beh-M7 zPu;(j@j7(w<3wvetW)X<@q>Qqy|O?9en9z0$9O^fQ!1}4%MTs==coL)HqP!Gr~Wxv z0__+53;eXNUlD(U%{!^k*>N1Z-Vx^)oBa&tSo72027MyVt6BQ?>SFU0d9M_;zW>n{ntoyFtsz&ttF&{nHW*m|GuGk-fABHA~N*u2-X1(RS3LX4_a}nTd zz7T^rS=9Y}$$<_Wb^@U{{T8E82KsjjtX%uppSSlcOq}I$^?4fT zOpf(_MXFzm6Q{V!$H{&x*G+iG=)#c$pQq*A?eZt`cJOEahb4*MKAXOr(4oFKR|-zw zqh*QHRoyF);~jLUFXj($KF_Z}oC~TvvcAxTH%?t%caHW!$~34<{B3H#LADp@_&h_P zu|vFDR4!>X;;qhW+E11jI>ZGv)VQ(cq0h^W>4IJU$tBK%`$noo{BtGr{(%nuLH-YR zjqxaeI9YP*InbFLr;%|czvs|qX#?UURr^}fe$dJ9SmN9k$~EVFAn~xjCG7{Ddw)BO zJ7OO^;oj!NPi>Pgg%0+^dnhPp-W#on6J3onvVB7b4)VF+%stkQICanH#~0|pK^_*I z0)sjcr|f<`2Rd-hi#RN0fo{a<7pmt#XL14!_ygt18>ts@w%#x~|ILq1V{8@9Q*6H2 zm-umRn|#^spu@C`dc&S5@A#Mli5KanzTVJ*gY(%oU0LM5ywy6vy$x^y72sh_i4btmunvJ_a)KKAJDny zI}B$Me*gT%+l9ohVRKFnI@nXj(dQQ}A)*Q5NDOmIU4Bf{2TGYdg5R{`)~er7&*mxpF86>5&v8!eSM)reGxyyZvV90Mx5z3 z^EY(h;9Lke6Kd@uP70fQD$tpn0Hdt9XCD$8LY&p=9*$gJK!^3k$@a}}F0a;F#XHvT z`Nq$9fVh3Hne9yaEwA^Di!-1j$YJeYd)apHee_a7u*#)BEvgq7@2 z<r6YSJ;pffqvdBx}DU&I*_U~*)A zJslshAAH zOEj@Q+g;`(ac(r$bD%?g@jegAlQ7jk#Hm$XUtj3lxAX7!oy2>v9i0(M{+J*A>TmL8 z{0tqRg9sVKxtrQVMSuixN1I9HCH%5iA_7FWN)yaj&q%rS_c#O8i0bnp-2 zY;XpAiA|gg>K?RQw?PL^LGi9nqqXsfllH4V{(%mD#<+*_pCgcr!2&8Tg>E3JIHks)}gRN z#OuEjr>9LE03G~-eIIbf&&y4mb2jCH&g3|a!SVRM!IVb%iSw+izTH8`c_D`}UBvfK z(iA4%qbhnkL5KRmZm8d?kHv^HUA^NGp1qMz184f4lEm3-Qy%D09(G^06W?FWzjD5?dq4cFICu9;-fF}fTU*~g zpyP2V&{!byuJ2mcBHl=wI2$^|2Z0KFIbyq|{x$w|#Rupo@Kf%?b&22AX8wT={o+=s zBXqAZ-n)jxiG9qRH>4e*GmD4)3G2S>o=;7Pw@rOVMEc#c{J3@eyYJnS_=Se**I%BE ze}M+x--2C(ue2flI(6PimLEF!EtB%wkj}SjcNw?X6*qhm=P~C`ZcqF)HussKLqEm- z2Fe-Mp)+v`ch%4L(4n7V-UjDXiSER?U~~QmI`{L5(-$H}N+rzHDF6@%tT4 zW3z}KQb*`Vyt3*&eaVAPxy7|Fk9QSN|AhMn5^rupy&a*0Kc0*DA#CMP;v{IP=RgO4 zVEjXQ&a@awoP~Au?HfAx{%{xp;#^*vRAY&MMBQJP^9^)nIi1F+C!Dv-c6mH;Hgz`3 zDg6MQ^ur0U-6Q@mWJqJ4Y z6a567(}m{~XT8mP0nml#&k32I(|wQPX%`Vcna%t8(7~Up#Je>eZZ9QHDK#!he?lky ziM%T69Wr+%@&4$jw;y!y1M&ml_}5!YoD??a0ii?v28#P-Zy|(MbCi__QQAs&Yn}3h|{c~$&vPh&V73hG?K*R^;e?)*N9)zU(bim{kfz7 zufe!ip5r4%Co)vgs#o6D_?}%~nMS{D;EmOzjO20!V{q7Wg zuXyYM@r-6BPxe3P;CIY3@K@U*Plyw(lb!>e$+5nl@w~(f;$&6*Pu5TWeumRn;N<=s z`P(bvO|v;?2OZnXY4oYe`|5k|z9ZhQQF{AB$2^CD{3z`9dDSQ4<@M+svcqUB;=R?I zzY^!o5WW4OGwmN>_&tpIE^AlAKzXvRj#TQ$yysB$ohjLFp^KpI$M|II{lEJ%Xdig5 z8{AW?qY?M~d;NRi(1A1I8;^Uls>LEskDGc9bZBQ;ML&xDA}(=Gy)`+qok1u4D8F|- zty==(dBxM)A3D@ewud3fy@``fjeD{`LT7RUjnm@%YJvI5h|{i$SspolLKoiniE$SG z{F*rx@t611^PzLk4=~b-c%#jAAL3V4=f`C`gbwT7VL5ly+q@0$U^@)FTY_>AnVEsO zCD!Qu1ReZ@^Aq4qDwvr#ced&|(7{iz2RJt`XC=Or*W_cwK??W-qVPO4_`d3?0fOz{h=kVdt@k%{;q{y5;(MlJW>zHrpx7o~_8aJO z#-IAXaXantB3nj1{P(+S*l$O;_%Q8j?6jiTNBeBroj9gs}y zuQy0jo%ned>c@5H&<;L}d9d7?TEvMFqUS(|c7X3#qrLJn$x-}?GO=iX1&a{$h( z4TwK7qP`tqyl~GCFd~cZ#4gwqNc^Fx_3Z#U#Gl8q4)6M+^(Ga6TJw>IS~n+d_r&^g z%Y0-b)ei2>eRx0c#SB+Fzkl33 z-z#_NG^c(4y_Y4&hqSXh5_e}Fef^B3^C{&><@egFx^F3`B&-^)la=z<9{4T}x zc7g5}<+tV~5`LM|ey?g54x^%2N6cE%hqza(nq28O=+J-hzBl}Ka72INByFPSKnKnY zaZWdF$-%^F8ldMuhj9RYLwR<-A4Z(oHu*B>-22=5J++1#M-l&+P2SS(dur$hDDSq8 z=2)gk6(cuO6kCHp)5P#b5`tm|2;~U(+L3tw%SU|i9HtRCz;19$p;EeQJ zOq_*k-6{JQbnwUSsl0y8A89#pnyEaV^apfi{jk60G_D-t?-^!%u!?w<)qOk3gHH0| zH)i`+UbBvPYi#TfUBr$jQtxWJ#dVJu@dxadwDTXtdp}vE=-^+R4*;h{`klo2WuaM4$$<_Wyw3+tji-BvQ`_bqC3I*f z@s=N;dq2@O{7IaW)AaoSI?hi!jRDoU-JR)qh=y}k|euZ-`sAr`T$A~w;#$M3D zUN~<6PLyb;h?8!q-tN$u9P2)FztA(pS+mmQ$Z^TDdBVES++fcI;@4lR=X@;FU z=YCgs%w^*5p0D>0bkaY_=fbX;YFsDY8XNyW2mj!E+29B|EhICyUa<%#++E^(Hr{GF^9bS5Xj7%A>$XWy5AIMdWP zBJ&y0VI4QB)6656Mjv;rSK0}?FCFDg+^{A3dP0YKq8+22Ppc;_^bacr3Aq$5Oq{NW8r^Jm{{~NL=~a z)6K4UtWgt=19tEACEjG4ehD4ovD#|F39RD%AWQavuKWScA;TZnhGr-J%;|c+LI;1~ zTpBo4Lh}-*k2*Id{R$oY@iBlG^=$v_U1S{f6F$+ zb8gqy8#>qz;|%QAIYxWp#I)Ief)1RGB46&msuOYgpVpTLI`{$W43y_}wQj_D_)zZ$ z=-iJ>fyQyMpBd+CPvY8?~l-#9BaNHd7*j48TwV< zUp(!Ph$m3a&R)U9Ppi)ROM61+zCSvRvf_Nk!DmZ|@1wpOB>fK^#~}`*q=@(1?p#5< z$Px7Y8#?$0{S@^c+kOpkhT43O2Rir%b^xbq#tp>zoZx5fqgeA1ojz_NeoY@e-_v}= zxkx;ZWxu|S_(Rn@OVU5kN&ny-QrII|>D|N&wt2rEI`{|UA2@?Ph7hMgsJ{O|2Tqtc z$6PVw0C9@l&~u=JpE2H{JnefQAx@J!`hEzV`+j1L>$X-uLHq|ckA$1hVcxX(1+)UbMvmA1D&1SB2KwToIWpq#%|XBk)zaA;ve~`cS@}HLK4Kk zN&MTYJ67_fGF0=)l1{u;45?`ItDN zkMtbqOwRZ7nd1GpHv^v&XHS^Pk#>U497nJp6<~xN=5x1?^1LG65Osf9#yij<-btAB zbDv=y=cv4e)3_kM-_`QbTjH)#->a78gielk$Roji9T$8gURL!Uh>RDYgC8*df%9_c zKg5Zmz8fJq(3u=-e~5Y}LYW`)kVn(~%zRt=0Ng`q{Jj&g$prSJD#yoz4EgXYI&gTo&&S zT|bqP_`x>kZ=r*KkQah~rVjTd&gM*dKSKu&;zn@J<<3r=YU;d^?BCGA&q>63D(dZ= z#CcIxKh8qO>nVqUJUz-)a&lhc9jv3TA9QB90*of&oZ3|Xg2Wl5?s>`a8aj;E#r7Sa zJ>~g!mEU(7_r!P|T(Br{y>0GgLMO*-*muPaTQKW*}G z(3u=-KIG+Z<%x5&uUQ`1&pgYA1RC2#zQr%AKk=&!*YlxsAGbPtMaSGa; zFM|#o#FHpb(Ist&vsAS&Ij%tm4&LVkXK|bM#QCVs3rP-iCdWFjyvo&?IK^%Dr97Qi zeBSUou-7AWC;sG_diz6%`X&_nGYeYwCXSEIc_Zk+nJWCaJAXgo3<{{y}~boTiay5a)Dzy?>xHIZor&Cttdca_Z1*;xwpia^$!V9ox@gq!aJIcMO_O zyx}$V<$}&ESAbDd#33>BE+S5orY1+mArp?Izwb z)t{w5po2fqZ@`(*ejjnBDLYCIbm(t*M;x3{xegNNuFW}4=-3|)BUV~|-{Iu@BgAWC zb1nip)UU2sXNB%PL7e?I=boSg2XO`JS19fo;*43O@Bh$&BisLh&*zEL$tKQ%&g59z zYt3z!i1UlO_bvOoXK{peeciXyHR5+r>k!HJY<+G0e%6;Pw}>CFtA2d8`+gSgO~OA@ zAKf9|AT?jgb_^Z-gZw-=vsXMI&S{%-hR}f%r6%78U-a7(;xwM1Z!ge+6SkN0j3e^C zAkMYvdJc3ZC%~93&bO`jVh&J`sQJKz;q8L;b&Q;&tGQ-@g*4q8cw{{h>qs z_b)wIXl3Sq_ou7)yrT1T-n}E1{V|{UyYjQFKXmTbuhxDTcT7~`CsXr=N@!ig+E= zdnnSso~xl- zp1a>)H!ty*bvOC4o!Xtd$NdwOck_UP#4FHA?;q&kA1|>VxU_0f;#?Hk%M&M$OE`Wj6B%bSB4X zOcduvUax6HoY95!_J{7&%Sc}7PX-r9x$v$U>=wUKQ{ok{=?Bn(1Al-sG=59se74DB zL1%ITjeDut{(GLaAx`|(X8q)N2VHpM5#EbMIS;LCPyC8a_4b2~_i+M@S|>P;{iAbd z;$fVZ?FBmYv%X(K3XMxM#1((yJDw=#hwR;n+eP&=X$Rw};q z%ZRg3)1=r)91~; zpW7GfL*IEviRZA1l6R?ewmSIef4bRR=UxHRuB zaeAwL70H1PoWvr(5;!a|an4NF`xQEHatKaIpJc>|W^?}%I+GJ%47twh#{^GP5~t2Z zvpllDLWlJu+ui!k+@z^K#65r6ROemwzx1!XBd{6(D{qb$?0P z6*~9_=ZwHv{=6`8hW)C)Hvyf=vF_^*-B6r3=YHxwm33ZCm83NBzuA1Z-m`Hw&_G@V zc4&C69Pzhb(f23lFmL5k{yKIk>8|$G7P$O{cmw=_^C}X*&pmyAf)4(|Ivt$QidBeH z{-K@&ot?ibyst)_JD>IS^=w~2(;k-BBK}CPpSdsLFdB*b$esGvCH^Fv_79z%J3Pucyk>px_}%DE#CvV?J!jrma~N1Jqg)?{bR*86 zHusI8gFn#T!Ff@+7jde6{F#2@G$xPnrv0V3e*_W#o$80uAJFmnqX1)(_^!g_fdh$m zVw+iB8E-&`c*FZd$?DtF<2-}Y$SC%O+GiR{Ttme}vYgOKKjS@8*!9rS5yZO|svpOo zL;uD5KHyyLG=?~BZ0^}XXXoeJsm2q>u!#rIKHT@aK%=cVZ!!7oB;war?WzoN4vrD|Fz* zERuoxN%i{+i8I~iJLAy7&&c?@S|?pPW*3K^?c~u^PNUwvEN$d^KRllId6_bvfn_*{l;lvos9Ap z9Jr5ochvb$$%Br00Y+?*=NxkQAn`UI){p1VA)lSLuHTcI@eiwZ^nHIsjOVdC93}2+ zb^c416FM%ZHGb~aDwKEu%gl1hb}!@S00oX67P|3CRJ(^8zol+-o%pABneT?l^%QjAAf5x~V#3?RDLqurfzISOjcKd- zU9=bt?-FP9RFfn916|Jg?N^ovG}L)5>wUxv;~x-j-`uFNkLf^h`vUf6AJgXhFeI$ZBx=&=5BDF5yY^XcugV2o?rMBWbk9XnnU zf5jq`FZ~N0{EK-BocXEV5@*eAJqJ3}7vmE+Hw%9x&dCS*`aFod=?1_8ZutSoGk%+S;yFLzt4jjbo;QWy!8gcS8)^ngU zIZh*965a=hxjiOvs*Nx?vfn`$KbhG8;b+7RC|8zVafvr(fqq2_=)lt-;|j6rw{7+(4k*CRlh9LuKN6P1Dd$TUBnaMPwkwH_!DoLeAzFdga5HE z04GMuRK(f!SkHkD{=#?N!6|jrhd6C*-cf@tJb&Tufq$=02I4QVdDjR!_k8QU?o2x} z6TfGAGhUMI*X~|7zE^7gwDPF97dKq-{@TY%`63ppTTBb3LVPncc0(G@IF_BI886;%L!e)&5JI68(1_M5>;e6<{L z4%oa82p#N(aS5E8n<^4#t9l<$#*xsu_pj4PeS*(%ZSGiw_&3$NGO~X_H(~Eg`feZW zfbzc1T%CAJ)cPbm`;8UnLX3O0h*Peh*>5EWI@k~S0+i>&qPoOsQ(C`Xf-XG!A@2l! zSo4O&|4>OkzCg$MD(icni!wJMUWZ!xddv4dVF#4!ew-G>Y1cr{fe!6h`g7#t*2EcI zP0xW2en5LfdB&}6N1V+5Cg;DkV{3jr{{BwH&!@^M`JRn~)^q(Km%9nD^E`BR{<*e)AaU}a(vQ#ZtNVW9FwTqb zD{UJ-l=xp&UP1O}=uqEyBF-+Gb0l#hU(?qYI&jcFVK=WUV~CSQy%#6T106X1#W;0f z#CYNqiK_P_bl~I=^X%NhlZlf*nVtik$+4a%zw>e$ahmz+`?u z$H;yFoeFzh>+m7Me#4T?CtmLO`g%hL`{BMA{8{{DFmcAe)wc`iz)3C6In)eVN}R7Y z`z_F!9P2*D$X{0yCx^FLFKH+Jyx=sRe&+b;Qh~L^OKmexLnrNy_6K`Sjl7X~H*DS| zg%0+@d;!jot(%E+K%GO7{Rld6DhRum8nc}^r)=Kygbo}dgwL0hD7Bk78ExV&=uA$4 z@l@n>Zp7b5oX=`MRqh)>hd4D(K)wB^Pn=VEmp}vOx?r!A^+JieLgoMEJOv%rkGN+$ zy3?7pMatkED~Ew|2;kRDeV+K;=IQeg(7}%FM8C{2{33A@1?xG`nH=l9JoNDu;yAD9 z^Ddsv%T|8D`ZtKbEU|vx@{I2=@O}X7`Rmo|yPKdwLrdt%-IC$z;q;$*TpmkAv> zAB0~P=6Xb&YU=*C>~GM4gLgGio`i{?5ocm8eZ8Oq2k)YQ(|$o1amuN4$Fe-onH=l6 zutmOah%-vv|B?MIymMhI#d|qZPP`{>{~7xB0Ui1q-piT4e0jbSahALCsA%t~f7xE2 ziT`@N-j2|r{uRWz-55!~5ogH(lOyX79mbnYTS9NXEA{WacC>Sp@8PwGjvw=qIdAIg z51sqC&YD;4cRVWb8>w?Vay;`aZnTb>s8Mj{ITJhaE2eAr6Lp zUKWi@`~fz380gRsx{H2!>rs5-?0TVZ2hf?E@B17XK2g49-Sou7iK*fb88<-3=UW2} ze8&>?yIdg|@!qI?Yv~{8!t?(O(Z6~`NJaeqne_1{bm(8HReH@@xR7}rd z|5*D^yJ1C$(>uS(@vQ$?&xaK#U4nQYY|eA(&xbjT6XIRq-+W6GFP%DXA?HizV6Ulb zIB$3HVL9U5x3N2P;9#E>{`z8cb!tE1#z~he4^}E&<)&`Ie)gU|L(uz z+$rk!AblI=IrZg&4)w!&6r8sY+7Tzrq31vc4%#y~TUT@<&N`cOM9_h={^Xxzr+V$X z5oeoCJAw`z*dOJIUB4G`_S&=~=uD0^e)>CI5OJp3^cxvJVSe^5(EUQCh?`yWGxi-( z&Vs2162DSSz5Ss>eGyNAGuUe=aV{0tbD%?g5m$nf_sR(3^i}WM%W(oa_v4V$7%SdW zoil9=@#m^>SM~$wIN$Fy%2nlY{C$n_#2Zz_theMrm#R;^!Oiy_*b6(N|D*m#GE64k zCp8Ys@dr9^;2&`6C!S86(>5IF;EzCY&hzVEvxrk6pIJZY59r+c!(rh4W0Z5olzGIj zZ4);@hjQZF6*yP=1QREp%{fKr5XUA|0aEv97ZyCsam>|E5a)w`(tj!OpV-W^(7`YG zehN5WVy-04DjN=TsIQz)3q)K?oJ3{xet|ALzoZoT>|@zB5`U&md;*ZsL zc*1cSe$#jysMB#DaT5Pya^yGwo!RX0e!=&5b4&5}jAvFpNW7CV^y3P2%nL9+h;#H~ z^Bg7K7L_lSet-^sXftoPlIr-6pI zcyA}@&}-sX+hq0=IgdgIzvNJUDI06sqH$jT?i*m9fjxV+d{6vb>rB4119Y$_)-m9u zj{2E6t&ZwB(7~RF$G{0(^_4i$@9OIdU3m7yJO=)<+7T=Km=|4SbKeg-_k4%(RIKw4 zM~h1QmtN-hEA0s#>={wq@9%#*265us+!uik9QX*EH`{NgnH#rJa~#7#{6LN@su=s3Q%o~N0AA}R3>mekiAsNJ*SCrS%-> z;D@myPkgtn4{>tY#H-MO6FiyM<14FWAkILW_#8U;6X%>zuPO;M6X$s$y+5II-w&Kd za&f+F%h{~N|E%mU;~MC~HuCv0%+n}ulFd1YS4Ev8lKl!g*bnU*oI5@95a*>$yMWH* zI1PM18=MIR3lJx;f@wclKj=7L?=Z57`DjJ@BE*YOUEj{2L%C9mdpslF6emt@72nHp zLC0~2!@#-@_4K_|ig?i#Pud+i*nO&K|Ic=mBThV<^$T<+$2#90>s^sJqt*IC&WE1P zw}_We-zNF15I?mlr{qHyUcZKY>pA0sBs{-?%t!h_~byy&s^1AJE>xiF~9TaZcpXbD)DCu#X2$&0(F0 zbFzfVk?j~d_wCrazc@dz8}Tbv{29JAFTW*yFXGR%8Fyq}9_Zo zn9sq9adH50(%IxYp@Ux#M}o6w;t=9IEU&j4bm930;}G~~YK$O$6*V46zeDGq?=b3# zbC{9xj3Iv3+WPhh9qhSV>|^<)8&911Hscm_;P{I;xmu#h#F=N4KZefaIE}#~zq=*b zbmF{G`C~aAK*#Z?^?bzbu5*c3+UB0Sr|~l81Nhmu{X*i$SMN#7@ zt~`v)TWr88A7aiG>8TU`5c7(YZS_Yh1y1%wgBM8PX8{eSUo$0Ui8|c?+E7Q_~TrUuQiBIy*n- zshWv6UDW#v|JBcbi1Fsj6<^|?SgS85bf_=ZQK)a3(bN(KCe#lpV)Ba)==5#kX(tgmnx1ZHNzf7%8{Jfp?d{6y@ zd!i`+tZKE1f48^Em;QlH`Um-L@Y=+yN4#3G*0gy=Dub=Nlb2h-o!L5FiEQ8s5Ob~SS85Y#)07!Mmg8bRDa zyY=OSPWCsviv)i}?m3Qlr)ulxH|XHkc4B|z-Rgt2*O#A}?|w3qa&`}e z4(2s*9?soHoK6Mx9O%Hoy+v?#blydruQu;#z+%FoQB!-;}3LloIE1N z$)YFr6K{&jx61wm9mdIW#X6M<@*D12m*L$i)OXIj!^G{bxU!u+z%$By47PI493$~Y7{)E9XtaQckCN1WY5%<{O`|Va$bdw=T+=qnw%MlS4N%Nl=gy-<9BPmc1r?Z z;w`ti*CX?_)s-X49ITwLZ1A71`3L*JDDT|RY{Z|IO>al&%=R5%;C%#e5{}JDocZcJ zsce7HAucLAJn^S(v+dtiSW+P`alO=cBV@nvEPl4eAwDq+62G~6S5ERhi$lKO4-xO< zA9z`W_&(~LIoTedll=$t1?(JScM0MpQ|BPv&od4K_b0&VF{Lzdj;MM|4s;mTaDNP( z<$cQ&C!H#fv>$Zt$2F^enilsbeob{wPVznVkG~k-Vs5KS{3AB!1EG`t`9*w><>Zta z#QUu7*+{!Whj|BfhF!assY9GOFZJ^dbnp-Q0XR`#*CS3__0EN~A9U{h^Zh&Z{rDU} z=OvAZe_y?OA=@`}a=gR23Y52e!=}XZx4Cx%9pY__cXRyPRX_KBi)*~Ycf!DrUbrRk z?auo_2mc_S1Wule9f;FVonMlEh7SHgoCVH(uP(&tp4RlU^bd6I{bP;)ZZ7Fg{Ei8K zhVL+jC*b|b#|3*6KV5EpJB1GR94q3rF=vB_^LKSU2Rd-DUkrPW@ft*&B(3xu=)l4E z^uQ^8eJF9R+w3<%XL6jzf+fo-pXWP#Byq|XFv}zR2XyB6g1m2lQB}N`c01o#;{EM! z^5i@V9p>3qEs|6pdntp;(*_y~#QQQnN5&I3tbxgu?GQTjyQ*qH?B5~V%M&O6aOEH1 zC)l%ljmgAsY4iP7=-?OZ6N8ib!8GC&S!|Y5_B-g{mo4J^N41vDBF->%k4*XnI(hyO z>tB@XM#Xu=TX9NnKj_@|W2f;}#1T1OEhPTL`zBwO7dkml$vDF6&=TUsR`+0~9if9i zuynU^=pQrQu5OLor!|V;jk91jYKj_%s)_tlk zWi}J9pUwR{xle`p0QRf&WE*kn-7w1~?FSwFfIJ;I3qI^3&OaW#%WXZcH1=!=aWel@ zJGSP-{~3OO`1O_DN~`;JkXgOr%`7ee}`>+sZYeotllS-^B;5zUgW2Dky?xMdF7LQC0=ct z^8wJAO1&SU0|)C$a3VE|L!4b}^c?8G!M#{;l4XfcoIEypS?ElT z_5JNVZxaz`n6kSZ4?TT<8~GQM^W(#$#E-wltgmcu(8+iU-zNkw)|QmS``adNg%0+^ zyZ}z`{yxN+w^Z*3=wLs@P2emmpMf}MRlAV(gUBy51fxf{Xeb9LA;vkod?;!po2d!--GjVdLH6TQ|&}@po2d!kAicq zZvo;IQ+}2HfX=-?tnqb$vPFnLT)k&3`5yaQ+_N1Ry9Dv`seK#SU+m7+Vf=?ZMx8H3 zyhuy*?HoGz2jdtx@8*;vPRkW~4s<5R`W^VgEh`ddo*MsT{p`L2kGKrwDp{in@lvXB zQ2GNp_x`rV!R>si6F>VReZ6HIjD1d&b9;~4#Cd8nZb1h>;CmY2JZ@W`IEmWm%LARs z2{1;V<#TTVfsKjt?7exOM$Wg;VZLoutW=yCq4>_@_wQGUcNgoGZ${h@bxue67rMaL z>3;a`p|!tN+S!tL{#W(qemw1OcVlwiZA7-V#Q#0EX-`>R=;ZtaJHtO+mbWL~8b43` zV?D3DwQXnOMCz#L@VFXi;JafePm}`PiIYWfWdDH9eg6nF=B0T>dAU+YdJ(^MXS1A= z51o6ybzl5_-5}!sK3VS{yM1xQb+AXxHv@?Gw@thX9r_`@!vRj?F++*d@tod2&|y4` zE8bB%*mNXuZuHaF4?45`A+PB)4vTYI3yiVEYu!iBgHG~(ZOHxO-JbEpn=n<+gYHnV z>PJgH_Hv!isv`2EH3KITFY0`gC)*2j$dA@g9q-Apft8}nTItGN(JXQ%1axQa@xKagK-RInaT#PQ0@|WL+?Emh97WphJ7Z zJ9Mbm_JK=@Q^h9!gHDcj*uMsEV%L?#i>!Fk-q4|bI6no>tP*R9^Va6P3UnsNns>X9 zX(MqesCiblFVFJF*8I-r7n_OyWrpbo$@eV2bQ;@5{C9cFcH-|-@ui%Ppp*XTF8V{; z{=12HY@xp0KnMR|{DXf&%k3l1M4R;zbnp-C08Xv=2Z9^TbJF6Q4q7 z=a(~?Fa1B#&N@npWa;9C!wl~3?!LH7>tGAPWe&A;WJmhu6p;oVnx7=p`F?1WcnDYy)CrZ^{I(yg0a%#R1 za=)I~SCTm8d*Tmit1mBfG9F@G3x6cL_KA3-dzw6HN9f=W1~p|+yCB?M>+p06`uG@Z02v!$$cf*j}2ZqUmxPN z2+)@oI?Uh7D#x5}J}E=whFjh9w?MJ)=i~7x#Gg4vZ&&ExpM4v+UFIDZgE-|U>p9TD zKbRMwzL#spAx`#L`ualWdAxEOorHhRBu+s5OY=>>j9bu2|KMI*l=s5D#Kb$L^4+pM zK?ncf{`_7AyObMvewcgT56``=IG1hw1D$98Sl7>$nr9~dPPIQ?+S9xBv(xw{=6k0yWhZ`3)!wE5p_B0q^J>^* z{fk`08>-gJ(yq|KKY?Q2ICj!+#2GP7?`P=XpJz!qKkzPVLE`*jlb?g`SN?$?QD46o zMTno~q`rSb=b7&`W(xm2nNouIA8o#uLC5~FzVBW)R$1cx_Eg{B?cR64D*SXfeFfq@ zvzgyR2Y+L}0Xv3zU70um1N8oe&g6u=`#Fa9F}B%MjX3SpxG38*baQ94_-DLA`#~NL z;%&A$KLj28f$vqQ zclc0Eh_l>gpBHrhPigi2*J)f4`@Jso z=|tQp&Gi0-PWl)5RoL@M{%*u;sqR&f@fAAw7x!|2voFOT#Ob2)bdm#|UH{m1s}FHr zv^VP~$0zUlhc*Acc=iC|ckc0D_||hV3mOh4{>*`T|Ja?2!M+mMHD~hS#LKVZg|sVl z@DIi@aC~l$B+k?EX1OE>Iy?WoO+Ah{W7IhMZ~cRD66L&7ZW8g^x6u0sx?jb|{o+2h zE3c;zzd#Yaf1u<3W4({9@A?_Ui&;%yUb%i~qQH|rC&?eyxzl!j+M>`L;^$KL#z?>M zywYKypTVvLj?5=cZ`Cd(2RhULF5_lZzK5X5gvG?kqVi^lzldwl>Ccl{*Nq)q%ZXd+ zKjqP!h9TD77h|s`{vS5^6zJIR4g>uH_B^<09r5CA_ohENj51<<=;{?joC`MJU7$n1 zTr18I?n=9lI2)SlzbiuLx!+jhW~M#|iQlw`p6^{;b{N&feYU$kA0dA9?IvHgPv{Wm zkgtMYqPP5uIGgV3InaTF_vye{cj**y{z|0xFLWlyX)F-m!$#CTOPmr_^zjBdv)$o) znAH!tVq7HN)IR!nfV-g$msuem=GDLXIEMVLe|_ zb;wd(E=*loaHBilg7X^S*ZXpt_@!;;A<)79SU-Z3ar=GZ>>Q-`KXfL?8rM!#c|@EJ zqxAOlE+6J++!X84Eq^~Hep8z~G<2T(y~F4jo4=P99r2R*X^-md03GZp3zZ;4a> zuAT!OIGESKFaF;@5GSL}xdQ0G!S_gTny>vrocLe#<$(_VMZW^4V`HQ8&-u=xQS|$- zq5GBnvHuqQ_f^6Yze!U4{%Gh>U#xe)sh%|=aiZJokA@B$d=CaE(H&pnT#ukH4|L#Q zUIEURbqR@cQ@zV2<2`gH$7#e6_h;>Rm4Y~@1I&0Y*XPi2e03VnXYjf2@k7%Puf!_- zJxkEZbKmio^1Thlk@UnXdsNSZj^`cL_#Cf#4&t?mqaTO8i_g||@T#}DiSJCTFRy+b zY<)kVXqJ4$i=N%&Nk4P@^)ryqMSDrxydZIE^)oqg+<*?_Mtx<_eg!6OXJL42VATdFGBqz#ZO?+xPuV4&wf`OV5RF?)Q&t6Xji1^yciJ^IA@0rFh39{IZI~ zEu!|d$$khOw{wTlP2>T0CvXza=a}A((7_*g=N$g1Go(6kx_;Ahpffp6W0IK19sW{_ zI0y5Xk1c}<>d&)(&E9mc$}oVV;#q#^NR+3eec4)w-98Q5>u zfyTrMRQoe!y`htS#rzn&b2Xb2@3>7K0y_8=^8#=(C-|K>cWk~xLuYcV^Rp=1+Y)ET z4!s||ou6TxM>)&a?@0U^=k$E&elJHXS=zK`VW2H{d_OZ-3x<= zpF`cBBI7G`>RKE$Zv*wmz8KhX=EI@HjcoI-8Fb)aJpxXnDGWAnZebatGnTgDJ) zn9aP8Io5S&uXYoN(^0+CDgEJHJGS=2)+r_v|C-G?M(_He!{{mQBZ&2G8u15Z(6?{s zV9(K_eGd(qMVuBk?@K`k4(1K;?}r`ph?7FS&n5dUbl~9p6FA#?EF#V=n|qU>GdWiK zH_5t;IEj**_L2Ve*8Z5!p`3Xit|Wfita|%Hhx(%a;FMUjmN)}!-milWoCxB()|RFl zi4)W2{YL0ux3zwp4^5h8D{&s$yz>N|=Qw4ZC%pb`7xB~8*89b~d4lzQ@4+kf6Tgod zpJW`A?|VmzRUqDmrB3yo$~u3`oaYE}w_n%KyX5>0`4srO>6YWf=|5i2fzI@k%ZRg) z_s_(Ldx|(IRen*{6S`(9(l`5k^0{`0f80Zb_=fUL?{|i{RX&*Ill=y|U-|#g;S=~JISI&n7I%r~J!zr?-r;ABg3 zn>drzem+?q=)ggq2AsOf?-8e!P5ur#^l$hHob5&bCQd$^{YTJww!hPu{h0G>Q?ETC z{;)%)f93odI!r)NZ?t=qcgCO>#LIn0KOck+9OP@jc~UcrxgdqKxMhw-%o+du4x@5Ec7<~6dtL5F(c zUUJy0RFN=Me$E3%RQDLjdO|1th<*WHsCVIs*CnHVyoAmym-YU=C2M_%^ZYlHBjcXD ze{a^OLAzT8{fK+m4~FucYaf-kr__7QGVVe5tA4Ro?8g{hJ|^)Ws_|Lc6FT0HVf{{f zDOX(LMJc8qN9A|g!pc#MG;%wfl=Rjw2T*fys&&rg!1aWq$ z{gE=>K!kl35 zc%}ls4|kwZCE|1s)^nf(2lFe`_kH`S#Myd8&w_Gf5Hup9`2Y(?S3%?iH-I+KOZRR=9fiqXU%Xq$bcjEZw(~q0b znH;NMLbvQioK80L8t?q#G}em!*99{4BmNSVca-BDbjls>d?WG^uwUB;1Bv&$&3p0C z!G2f8`H~llhY+WlnlH$CFm!hI3-x;daRQzCd6swfa~RRY`Qw&Bqly2;=Dlj@P+!DJ z*e(36@x)0NUEf}y0|)n6fK&58AaVAn``e}6paZA8$SbB=IgL2uuj}I|bSB4XBoEK$ zqYBlUMV!(qj>`509k(xQUU1~Mxx|ZNb3gsR%?sXIv5@$Gf7RDp`eB+FhZZziN}Pl- z^!|hne!#ga_%l$7%2OL%59S4>Hhs!+w2^62Hm?GruS2v(TmYUvS}jf5X7M)+~3(yK3UT z-j{Js5ck?iJr_FgVyyoo*Rs&Z)jHHV-^;W*n7CP==<|_s-GcXmuGJniD9y3M?sYD{ z3&C$UH=iSZr$~CgK?nb1z6DODdY6b(?73M^+0USZ|1oZWQ#R%`;+#|OTuJ{!=XqV= zXJB7B_-8lWB>v%;W_=|eI?sITIq%X9?h-#oo7Ed zjK3ms9xz7q_r$-g_SMSzLWla|dpPXY`_w1m9PXsAFLdDGo-uGfjQ&QPxV`io=uD3F ze0-rwp`AbHC7;-wU(=tDcN%GP2GIA2F`2^?@9I#qT(Z6Xtoz@$bNChW1l04~z-Ywt z?{D&C|ANl5U!BG-G2bo}IzI6S)ivXcEH89CAF{qfdzwT!sUjlzrh?16gpAVQk=@00B<&QBU z-#4gP2I3!zuJ;FYobR)qXKi;PGw~KW^*sG~R)>M{1@*sQHaqe94fSSRvii5uhg`(@ zyg|>A{@p(ND%~SofB0|2`La)6Kj`@WxsdnoM4oF!qJqS0q3*MjaSl4O-@1&5;=My- zYZ2m9RpYLl7eR;lOQ}Vb=g%2EvKZn4-s^z<<18*g+~McU`pWhP9rk(FRvkCTsuYVJ z`2Dz#0qb7yD-{i?*P9Yr5cy_*kN?{nG^x+d`}N7I+r?mnlsVxMP;r7q&NPh;|AdxQ@D zz`ghI$G~d!iL;}io&z1)AI24Miihzd&H{B_N47ucJjXwm5gcJUjh|V!HYI*)KeL>& z-#~Zj@_2J!uWH|DM?=K9#&(@r61TrPH!H^h=;Szu?})I&oSJQjH_c}L0Ui8;^%^+- zaXJv^j?MXE=up2^nRy-D@N{S5v{vWPW&NNVpU>Q}2S4NcJj&I4Y~Q}ZI}XXrfp z+0VeaI`Au3?M3`8HuvB_=b3MP_hVq3e#EaBsPBh%?|xw1L3z`>8%VtC>RhkvhtR>_ zSm%K=X6F#%Y#y)ghtR>_cuxhK3(EqC^Ge-!BmE7XyeE2RE8h41ro(9BtykyTBo8`1 z*J0f^v$x)O;>A|?1xS0zycOo_sAt5+fy9|*b58|yuseQ(QzFGQ;-qh>w>xy`7s!i% z)AY(r;*9QKa%8`N&U3r9=4*CLnM?dkHs4jf<2#M2zPyh6ZQw%UFC3!p7tqN#i2F@p zkLo3t63?mPj`TBh@DI+Hg0m{jO5$v`@iTPbL=p41E(g~Tr(A&E&(N8ikbOB~-SA}d z2IAx&r?(Syrk${Ea2c7!d@yXkEyO#n>L&F8WWH@at6_>D;zu2!wpq)Gk@gX%uG()V>o51&gxmWy(!HKP zzI$W5LAzOV^dNEfbvMf=>kr+p`Xl}w_>O?1#GkD0W%;*!yh9HDuyQAee^cGpBKgo^ zK3_=rZ|1sAa~r%L>HhwV`3CqWeNGd^pIeIL&PCBZSW6Sog_i z4ZK90y|e#|znsR$K76icWc_Qz52x;fmE#z6yk2k^FXC{#JX7u#@s2Fjw}0qhKg226 zFG1{k#2IX}p9MN_Fs}!v&gF;1S+DLtlm3Jbe!zSjoKo|j5GTR{y&s_S+;5!5@oMSm zyMS+{*Tf&B_JvA6KzH-;@SomO_A?%dbKHB+y(iwNVEwumI?v_xGa`$7s@E_7O#Dov z^nB>h4~wdP_^#rb;7IGnyW=Rp9Sw96tl!S-`36mz6kYAL#1~o#*)FG}4HB`}^h0N&JRtpMdl`bW=mu=ydC+ zd;4K$lsDi`9^#Ew_dm#a33Tue-V=_U-{)|xihJDt!MzFKZ(Ng~_)VhfzqdnY+SR(B za&xo7#Mz+ssmS&z_fs|t_8k*<;E#DF_Nk$K?-CRz?)+qC`DFc}^XxC{IAza4e~l`;yPVIRP44jw;|c6Dtx84WpRhR}1|9r| z_hrDTnafF>cvVb0OaDQK_%bniaLP-C*Qxep-IsIzZFS;CR^^lR_ilb;y;r8wmfFN$ zuHJ!{eDCg+vEGB7qjx>x@2_FnMfPXAd$94oAnd%qSR>}u)Au*%?BZd}2u+Ezvay~c z;~~al!=|<-{;f9pazcl=jeC_)&Spc~6K78c zeL0~+9IUJQ(VD91S45n+&E1c1?iKtDzjr2n7e9SDp@Uzhg>iy0lc7jX_&H94}~K_}-A{)gEQ0l|HV7pTr>%Ki-<`giL=nYJ~VGtNDK!28*- zTk|OciF>7i-Y?L>FO8I6B5g@_XL8FRw_lK72S0U#A;e$URBun{V9#SBk5n>E0C9%> zuD2(2Y)_~0Oq>^c7=1MH61CU!pkrRheyCc!zFqTi9PwT>(%TWbv)xQR{M3DzPtIu% z!z({I47@u5J5G2xiTI_e>+J{~+BfnM;2bzLl{n#w>N(KCA0>o8%8i>zoLcIfWNys_%slw1cuXO_!l+!pVlR*18ZIGxIy^^^SsI@mvbz}Lw4;udz> zAM;t1ukYI>#66?NS6M#jV1M`noUm6`5NE8-{%7dGi74{3H)gIO&McerThM_MR&W;f z-awpqHuDeYOpeo7A-wvRZcZ1zt>2hIR7&h)Qwh&X$$>dOP2$+6DQzWE;` z&T&;9IsSV$KXVw_MEhu1^Ca=#sPj;=-9v|R%KY!`dS{5!PH|*8q4PXX2|4c|?jxU; z-~#buPciK&+Y5BuUYy1ZasTzDpv%Myt?q}HJm{F`Fk*>&AogYbn|PhJ==}{H+703f z+TZO$Pl+?2w0>U+bSB4XR2!d;@`^{_z97!QF#5a|bi1egd@q+ZA3Zkr8{)l*s?Ym* z>)(v+_`B-6ZtsbIuz|k3(D8Rwr?FV<*Yo}KiFkjm)bpT|{yinmDWz}njd)wmnmpN0 zpo4$$og4m*9vrIL&w0sSYF~}yKnISoo9BxQDuyGDQBvRDpaZ9~=>O?HMkLNCn|-Fx znH+0gGx3PX#PJ_&mPht~@75`P2EOCKZfR3SCw_dDCzpKaFpn*w9J_YlpwPQ|EOyUh zKZrbKoZYdA|M-C3p3uRb_zsS8W@#ReIDg#IbD)Dg(Qm-Xn>!J4Qhn6h6FSfF)Y_k) z-%U#VSO1A`J)bsia!TT7_0^B>cIVSDKZQNER7gv_@QJ;d*E$S*Zw1FMT}I+uwmByV zo#`K^f&D<>#EhJkIDYDUj~qXs%c1@c@UTxBytoH)5bs4Cy}h6_oArye>E?uU99|rTlvT zKxgNlu)Av$r*|p6{k-)L_MfAiUk27AejA&7Fm#^%?=(J$dF<8VjfmexoeP%!fll@x zgcP37Db+3wyhtMG&77Iw!t?|P%YTn>9vWs|F zBzJe>4pi+$`V%_oPweZ4A7Z}#gLp@5*6YyO`7`0?zQk#1lh2U;#Cv)u&yW)Xi8Dvp zP1X-O_NR3{`+4dR;zdyBM`ir;w*MkdqP)J%1BhQoor{-z=sfpdmvL6)pK>%EP5i?) z_aH%scGNthPl2BKbM8jJ!8>Ls_w5Yhi95|^J^`IrP_-n8eFh<7-RKHfryc7(hJ zIF&a|CC+@C`z4_RXQen-onXgI;`Fc?N1y`-_xGYayT{EX&QhCuVWBfQ)^^yh&qCt7 zjP+l}5r={N8OoWz^HSnxiKw?fbav&OUwJEW(!ABL1H9`mA>Yr$I=N2Noy4!zNxx2p zj@zBn@;*-44nf4r{fC}s_dX8x^}voN&+H}Ms*U<}H*{z>ZN)sZeA$Drp zpEt_$xa42NxsXD?zJm^&a$?=RC}S{jQkBuy3p#MtHsE_h$BaEooZ1ET9O%H=oa+sJ z7mNAu0&zBX(Q}|PIWD7F75+}rp!pTz^i}?n<1BQTUqoNMYedw5Q$Z+SXpvtlapO91 zTi?`|4?1w*2ly{rt=q)Oq|P0o9C&vMI&g}K`$NiizE7M;SM}wA4xHuUe)Z8&9ueom zHa!PAljAgoi2Qu%IZufbSDkm0<1}=#A3qlP(M0)P5^vXAvs{t~9rGN9T)$_(_=b3! z)cJR5cj(M|x{M*Gd3|%X-v{D6RQH(5_5~fr-_?aDbUa^ngL@wLqAlki!)N+J+-1eh zel6<@9p@jd!Li^j1h>RT@SB=PxRaQfBF#TrOm!-=uE#`?RmCdRN|bl`M%)YIB4BxbUa;5 z;&1p%Z%^;`8CmZWoc1{m@#CE``O=uQwu?*Une7sn=_4juf1d@wjgisT?pUv<7owioEYafy2b z1HR@aPAhfpMRK4sIZormIR2h=BPbtn>Zx;T(oWFv_av9``2er$ea99i-d1()P{wiS z5XYx{UEh7l-Qy~bJB(A2c>Xc$LUH1T%cgJF(7|q4FTnocr<5kn%#ZqU89H!e96g_+ z9C4n8(w7H1aJq^4Yux!2iIX**o&y~?C?Dz-Hj|S$?QQO3fzISOjbeUh={|v1d#e*C znmW%W+XHmJ8ZWo4s7<_8>ih1$>*sz(^yBQrufN>qiu(}OzOCKxR@}|*euub&c2#s{d*ZLJqVF%zp?zX}0VmUkPQ=-+)}u0B zK?e@<7~l-A+l@FG*6Pax9ol&vvCnVOoj$}#sP4g%et^!LUg5ipb)J!;*#P1-FwAnv zcIfRq2z3~}-wHOnde1D#x_U>_EEpGJ-+-o&qZ`$32HC+~l087`1G3v9+A=)n0h zk=HwWQcWk$(e(Pb4;?sT#J#5T&(9)GyH)ynK?hC^vF<&)b{=sq?a*_eGdV6J-fu`>k}1^&r|(Q~$0OX6 z0(+*&d!4u~W0_o8PUz4*;2-3p!8^0z7V-9|dmiQd2s&^^iFk1*_dViludVOr(815A zlJR|t&OHx_li%k48|Z%3zj00s^{qel3GsWVbDFaMLFbw8GPXwL{`b7l3*sMIruRE^ z@cYImujXfZ`^=p`!}kZ2``(q;#BF7BpFMQ&Q-sYt{u}+?6Q}wGy`P|ipAe5wo|?r! z6KCNIlOyd2oo72*<8avI--*BKy`Jw~9Jc1qid+v}lDn3;Fs#Tq7ug^g8n-pbnq|i2+q^cF^RLs=AM4&Jp0#a zxWv3_VWqglFCNM)r;KCJ$vB4n$|&!(Yzc`s&&zYi$diC`IaE^O6bRD$6FT&>T64Ld zRd}9)IDP-rbD%RhPNPf&K411AC=GGKuQfT+PSCwscIczyaWpl4SjW-7uVo;fkK)OG z>+Lw2M(kf%x*`klOR2nwEH8A@uh{nme?07vgLvad>Fo#|{DHg`IKy-0A z<(Qoh#snQ&y2?E+;XWzwt2j#$zoZ)PWZZ%d{=q&WaJrRn5a*q`k3n*vLw~`2GT_9I zT9G*AXX@<--LLi+jn3#KUaT2Yr`R&X|0Sh!bdYFBo+2YeVtAUf(ZGh~uOBuZ%0u znRY_n%w?n(_kyi?(SmqW)xAn`ehwYxX;lg*%>U(L$=xU?=Gmxk%&o178)KN>pU_Ev z;(liEqV;Z1ygyYumHvbde!zY{a26HrOq?K__y?WIv94d6gziq9H#YIlyZ&H(-{{2Q zKZu`luD-u{H$S)LUm}k0OZ@ta^n4i)3o1w540u}RO3_X3c!+fo?7XetK;o~riHFd^ zU&w!ebICD;IO$Z}k$#5`{=$9#;3SO}K%5S<_4S3$^E}SaxF_z13eGW__&4V1`Otah zJB(U!c|A4r+c@IqwE1oc9qftm26p)JWfF0s^fk*V?Fk(?7;nJowtFgZ&bH8Vpffp6 zBZl}6uyD>y;&g9ka%4P%j^mkiy;QyaT;gR^<&xvPxAT&W)A+rFo{1I`zg0(ld76YmE}MyL2yWmd`q#_~brFymo(^JlSr&8&6!uL$Oc4 z{^29U->>%P$#EV!jPw0V?>#)Z@L82-x9*o}`t>j3c8hHG2U%{pU#f%}M;dRBmyj>kSnocrJP9O&RD#0hZD&b~;TP%1wv>kFOd zIB#tSLy}!3{$7@d0T9pcoXa*paaKO z>|;B3={|9u@79ka(1C+<^X~GjdPJO_*Yq6dz=Qq{FVNErNc-m>>vJ(QS;|~x#AEiP$;?$|2_a}7lXVYoiZy!ggL>zy0&Qtmmy7z^fhEDcjgZn<)^Wxo> z^|zeFd#26-OCEHp`5LGv?#+X}qHe4~yn;6Mgbp0+Qvs((cNcMrsQbF4-Jt^q_m+TD ztYv-TJglm>J9OaSJ~416RQDrJf#P}&bSB4XoQ}rt@3i=>8F3!ij6cvxd&}=(1>d$L z-qZB@azTgsVcvuKRbAVLIL~a}XN3;+D<U}cO7~*Ejt8a(U zNq=Cy4c@~KT^oVOdN6KBjteO>~(U#-)z zo(Dh2$l1gnvR==J&NDycU1Q<*Esf_BfB71dFZ~W3{NDTP#5x}zEVrGPoy)tJxVZ0K z_H#KeE3Fzp?EA|ju6Y^ko>w4$2fI{#xs3RE)qU-XLO#oMb}NlOK!8gZ(i-11Hq6-NY$(O3#4~{SbL6aPoBCPn?}v*IK#HntRKY-3T_4)0*7yee~VXXJjrayd{ zc>8Vck>Git%XoE+$Gz73uM=mYy0=5dKj;wujx;*e@9voG?)Vfa@@K>P-6rlZHP4ZD zgAVpbJ_q)%l=D7uhS<~-I&kov0-QDv|0YhFwB|T0%L5(!hJ}v0%`b(&?(TVf5rZ@e?I&SnT z5}P<(lbRe^U+=~xr{SE%??Qj{iBJ3uD$Yp1L&y8qtoH*x4oXD4Rgul|N}k>QfcWkU ze=O~mjCnTsXXxM$SoCG_@#j^}~abDu?@WhLGM)t+T}<+;yFYJxDl%HE(N%U1@$t{=qr^9Jj35`R=| zyKcIs@Fphvzsz`m}l&_@kPtchhKjY-l6SVKc2<=Clp=$h* z_Ji(M>ly4XM>#J&XiEG6_04ihK6Jn0Zx-(iZd~1x_|;XuSMs65IA2;hDmZ$}Locr# zaK|mgb(DWuU>o93ucfy$bnqAQIpEx^+krTjRlApdhYtS2{yK14Ch0<)59<4ttS@w) z?P_6CtjQY2F-j8^>6i?a_ zI{0IoxM#5J$3et7mr3te=-`iShq?cpsXvT3rK0QGH*}u;;V_OpPetQOqsk+RU*@;f@+Zy%nkl$vK---Vy|Y65ZBZ`0=!?BXNN zwZp&l7EUJK6UCGMg%1A3x&@q*Wv3IT%2B=Dpo4$QiTF6^)hyy1zHM@3e}m4mfBg*P zXHm{}$LA5hip_aA=sfeC2JTq{zwpIH#Gn33?;q%7|3f?iuTStY;&nTxw<~n;55^&I z?yguxoKSo89O%Ho`8IIcj$22Z@HXcsp#vxRqNS7vO|)qfao()fmj^nN<1!YAJi`vZ zZN&Lo-J>M?8Fc7pb;sAdePv7~m1nTNUsv$mPU5HTQ;ejUCW!EVRK?IvDpn|=lz z?3O^x>r1rVPn@eZ?|DLJa;)dq&ZR$0oHD^?{bWD$cAkKBF6tX}>KO5Fs(o;h51r?J z=4W6&0siIQCyD>yrM~>oVIG2aC+ZAKTr7O?weEQc+CBJJGn^s5BfNgRf)4(|c?59s zzdKJH-voLNbnqAK0#32jmx)tKt;7DUJuyEAKSzt}#GjznL6Q&M|JPr=Q{E>2Yn%I@ zrN6SP0GV&r^Y70K4tDzs`)pDED@pGY|GUk42|D$5g?Ea<*)`s%{d1mm zjZOX%x?kB-wugyZ!xI0kO}+=ZU-26^;ygy%z7dJPSnX$!?EyOYD~uA%T5SM@DADnwby%UsAMEoK)`%R&Pf08Qyq#5$pt=4^h zyl0Gj1^D57lMz3(&3;qp;2*4`!C4<86>)m0{if1C(AoKC&fT=ciC)MY_q_EF`Webq zabZT{9Z-28*}kFk>>sBwM%<(Dp?_B5zqHBkLZ{qfz>YV>II^a0PU5Auc^?BhaPVF@ z>YXT2UgB(5d2Q(r=uD0^Z>C)5Dqrvx21xVIeT$$!sDoUS(Q3_5Ud&J&#ei)s+3n9Vo}9XQKGyKmdVMVy#6`?#P3 z2X;q!hBT^AoPp{+CfV+xGdWJ<=-P8MPK?XhlsE@$zH39r?b5nG;ZUoV#OvnNuUmP4 zg2VVA?t>iHuMKe?rq*+yGwY{W2W9C%oEqW0TL+a<0nD%7{L;Q*=D6o+uqXTw`E+OE zAB>|fCv@;9&cB1xp;vd}OvtR~K!^I`JtuI&rtd|Z0_r|^8ONae)i|*IK?chIT~5)D z_)V&t<&<#;x=_Ho2J(_PPl&TPIv_MhxH z#QT5d^qEAw=>1Ke%nLz>ywL15vu<8bIoh3ny(!*bXkTn9aSIOBmlHbG&D`xHzVK_a z{4}{fj5^J_MgV3y7Ob z*-@4cI?-%Hp9TD4_L2&v!wYB z;zUyYU-|($&+*gx-fYmoAmZ1{WR_F%y?t*M`-M>cH`(_Qzh6W>A3Cg~koUWqtbN*j z#n!vWJFH{CA9VI0@dqc++Zj6etGw7ZF{|%U;+)Q|=RgO4q5Y#g7c-q8j-#}`zR-F0 zSIBwp^-n1epYM7w@e|mzE9lH}It_oZe#$rb9C7xldo$#?37w3KcpnJ$4&Uz*@%nbr z+YdVUAyCXmR~5WQoNq1l9OzKL`c1Oaydc}Vo5b0y-j9&=gHGjj<3v95;;9DGllCu8;f%DeS&$ap8dV@4VEnfR{<>iN)l z_OH{x`+~5;=1Sj*-__=REa+r^J0Sd%HF_A=&-u?h${y0L(7`|OJIZzDOnBmSnCwk^ z@iWlQ!D%|phd3!_>Fo!dXZ!gXH^qHR75t(S|J-~%AG%-hkq1Zl+g6Fm{3UumbQq`d zD@Wa5^E&d0evRGZ6wX_KKO=rz;;&Qpu1SAE2Y=x=IEfP`Bu*=J&!1m@AoA}dJedTxso#+1I zG@^#*b22yj<|F=gWd}LFLMP*yY}c7f6e3>jDSEp?2mjzZEb1K?p%`(NsW>Ft3v?#O zY1GZi`&HMRC`p`-3(WG!`a#F?9jBBfUah5i9&~IkmvLL{R~mMNjdnR#>=)dbvL-upVV!|?;*yl(3JS6 z{Y<{49VwkW7Z4!cr;LB31@Y32*7Km_cH}hhehccKV|r`iW!S3cLB~9Yfqn#Dl+5jk zcSXHN;<^7>Ics-zB2MOuX1P3btltUW*6c={4(k0e83(;xKV#o0>O1INPvV!3YxYCQ zhwfMHC*g7aP8?%SAL3`trjHxYJ$yFppZ7lyAF)nCc|TSfK)g34O`fzXbl@P*3(m6d z{=}(QQ_q18{SS5q=hfF?#A&SZHqw63dA6VR+`-iCBZ(haMPE*N?f~!Fpqw*0jU|qM z9lal*ll=twcJM}JoJhQ@YW*$!03G~*`3X3=?oB4ndz49;S5r2V1u9Dkk0i_g4H@Hu~q`0)#wd^!Jt zPWCgzEtEI-_F3XRxA`s!9qfmFpWp;!y+oYhvGw+Y4xBNe`MmGyPgjZ4kaZ0?SFe%4f;_UpQpU*)j=bQfGe9Ff24~Q2SPwzkIr2j^V ze)RI`W8x*xs<%6Iuow10!G9IbKPOJlD0&Wb;9y<@&VVhiiSs&?o&y~?ev|p0=z~Mw z6X&bVJGao89BZC4W3$i18U4xhpX`s`tw%!c{}=o3pQQax{8n#Fz8p`X!+08}X6g+$ zLmyS&9j)h>{x}!9&d>SCh@bWC&h8u&zN^45eY%Gy-hjw@zd&c&(`EQ>;`#H%+CId2 zqxNmc`a*~LHaRo@(Tgbf&gwKakBm#-7s@t?O5FX4&GN~(1|9EHaT;mFJ>vn%V-jz- zYKM{so#bJk8|-(Ya9rZWRo~qt4?6C*E+a)1j^i&~35i$gp1$2chkG5zmfn5v!`gkS z-8c;NN0hT(x}?OdxzFUv@fkY!8Q;ahNt`Stamw!3mk&DlIsDW9-=f9;;paZ#`8!gp z%V~(~yI$X}p<_EbjI|B!(Y(nuFaz->sdpA-eW63VsV4TRIAdoa&ZuC$-Jk;paSV0~ zzcM><2ENpDpaTc_O>heI%1xXvQT6>5I+GJ}-_l4vXH>9sVd7+uuJ2dSvAsjicZqx8 z2P`W^yk6g<{3DV506M-0-eF+9gL)R8A&p6AsL1M@!c{lhmWeuBsPdP4{MMHSzjgU_`hPTN@e{tq2E-Go2m zHEBznq_OoJ=)l1|5A_=Stpjl`=h1VZGdb4%OKDbjAx?kw9))bb-tEJ%<~i<^>OuT| zjm&b&xT4Q} z0OEAGpsz1qlq8&h@S7=J}K+FoR|E^6Mx5Hy*=f3PMqI?9k!JVB+j`_ zdJc5(3(kv!Q#isj;vC$r=RgMz?kxl7{DGOoNo(U@=uA$?ykPrY+82Ip=v?B2`O7Si z>~GM?`EkZ}{9WdJ`Gv$=eOAwdj@QY4#^`%IKW<)ZDe-P!HhHqYK^J|~f(zgK8wTc~ z#Mf}XD~Uf{wLjTUpo2efUp@Ts>dYGAj5@COD|9Bux=uYdZ3A&O zpVHUYyLGC=7;~HRixXOGA%2H*`f@^tc60qH`{l%>9mFZ4;(}~9(DAr#z0aX%y&&Rk zTxFI^^6c(&KpcZVHpkmXygq7uA^i#+{EFY;ypM2*IK6Dv70}uFb?2d@#96NDC)=@i zezoSMbL~7q{5G5Q?Z&$}ZGB(MclK%Ge_y0;H}ZWgtZ!h?*RJ!#nQ8N$Jal$`Nf+}n zandihrypFW? zp9MZ&i05}rZztJ*U^m#Q=jrdn3BIN0KnFWz5Z}YL9ST$T=e%O<`+5#^CdV3gj!ud| zoWyE8k#Wu2xP$dQ>icDkFY(XVj91W^<#ZX&<9rULj4K*(I_)*kcVv2+9HRkL4>OT64_J|^P_bnq|M1F+khISGkV zNzKC~2Rir{`$)lwGchS~rrFHDpz}QccNlZUe56&~l*I3SRNoGvGt22TN{Kv8jihOb z6ZWLu570?JV4Og`o8QhrJhW?RKj`2Gy!Q#t{3Th4Q}~L$e$c@W=r7>pT$qD6C)IbX zf7{OriE+!nT^{0>x0$~}hjQYaIm%gXY(e6jReq4=gwAt&v7WD3+N>z?b0yLH+3tJ= z^4%zJ7N3&DyO==lAL!s`e7^+eQ}MFIIhmq;q)Bn;0M`WsjD&^&QTe!=82e)*-IbrMD+^@C)Xj;2cWbfH<)`=sD1t9H)^@+@D+Qv>$Pr`|Irn zot#&o|DjyVhcqMJy-E6VL1&iBWrP*`fo2zOMVwd*Opdfabg=)*(@!3j-aFnsKH|J5 z$`|ml4RN!ne3l#^p_6eL^BC}ao^~K!<8Au-LI;0d5a&cktnEUa9?SF`=)l2!ZYWRs z&OL||pz0^b3Fu1wQ^AOa$+OOEPA7hXjwWCFQ-9C8 z^*h1nh_i{;uCKnl-hC%<8Lh>+_7VH$5kGr?$(Qj8I>f6vucwA@HF>3bzs0&LJpX<@ zcoA`TPSo20I*wP?cb$JLy^MHaX6Wr8&p)}p^T*$}qQRse-}%x0(Z1%FT}AxE^G&{t zKhW8=)BaJ{5hvX!J%`(=)3`6*)hv2+BXPow*V_*|(|*{eYW+S?bKq9u#j~-Wci#uB z`PTXQcM`wG61{)C%gZ^8w2^pT)4tqp;@`CyAE1MO5Le*mxY70#r-f>l($CO=GfKqS zT?Y>lXWCS~pP@53P9wkAj~uZ17;!?We4vaM&`Cez`z-3WqSHy@m7lEl19YC}t1ctj z`0y_?bukR^3obZA{J-a#d>JpG``=wAKmVC_a2ds>@V<&NjV=&(NOwIKIz886$n&EW zqg^3xPn+=vI{165$n&(@eVsV1$LakI9sHfV7VocF)%7-U9?mg2(%;bWyfx&WEV2Km zPUic>yJwT{hYr4g9|no>pi+WI#LK3>t4aSthw-52?+r7xUU6292YyCcv0vll#izu5 zveK-#9Cx9^ehs|mlCI;_RZ|9ycYl9E{6xE4HR&bsb8XW52Rit@b>&eOe8QgsS&* z{&L7jz1^TgeUYz${nx(>K{A}?l7V)R3@$KL8F%Lrd1GmH@etPx2 zLh_+Q|0}Kn#>o~DYj%9PR`qX(f&KsBU+9*I_=jx1qeBP(j~(;F`suI_!z8_0_L5F(cTqNrG>_`pb#Ib32(3u>kQRo=gv)~99an7mn zTh7yjxd`hxg9FNz&SnIN#OxRLOx3_1n0R z&4`mp^>bN2=w#do)p7*AL-gutOX7vK*^dRCSuU4xQp`WUtZYM^5o*7^?C;Q_eTBNY zGg{hlL*4rgF^<7*vD$YaZWi@jM)r5;F#gq16~8jLTcK(VHoM0^kDk%{ z7dkutz8cwsI5p4fIqY9Q1=JI2R(@lLBkoJPktly9MDEtK8v@jEh*K(a%KiQt4^Xy+gql;K~Yz+z|e*9f} zK6IY>PGe_fj<4mX&LDo)LwbKhC;bh3!yd2d&LLjFmTCeD;ndJc4GH~!+DxT{&06Q_D3y`7+w{S5bPpnl&ktRmifm1mLd5IVMd z$i8r~Z{yC8b;RpFLSH`UnCCF^i#*HNrJIOXaID^L(82!rPKA2z9=MG-DQ&(NKnME| z68o2Cmfl4ipO*UiLC5yDzSlSY`EKGRXlL?dzxD3DK8F!S3@EAI1~d zZ)}Cb#K|;4?+56>K|BQ~-}ht0DP*(GfewDCA=dNNwwxr6|608tpyT)%@||V}$Der9 z&k%2_+K(#x0d&!(?A{kGSuywbMy&Hu&+WA@5bvFuze>MC=Q&B|Wn+T|uOZ}}_oGvX|~t#6mm$@qtSB<%Ml_bcMf zJ*_VnbnpXygLCcUTjGpa;SI-W%%05sr2fW_#L2Ga8M2>1$NkQl=ep4GEAgVMe)sSC ziNlB__N`XQ8LIxz`OHshAE9i=(4qa|oFMF1=xtcyG*@{jSx)G%UdX5%oaym|IF(i2O|A!_!+NlLr&a9+-k#)M4=xwqBW^@a zOx$@k@fte#7x&23tFkFYjI(Fl@f71A{4(@NGUAW1Iqv`+{2M5~Gxi#uia35YaTz-J zcXxgM?sU3SI^slAaas0b=;S;G;}+_dB5x+*)mH0t**~E3JljFFlC4z{)Aa?$%hX8 z?Kc%@E+l&wVPchu?*8T!-|ea%Do6a|yY+sC4t_@eLpeVVtwfv;HvJ7cJA2k}R3%Oo z)!$@&y|bs)@0)#V691G|_*eMbkoh*Lw&gJfKQPR4!29q^VfNzITjHOznU_L`_P0okZx3U4Bu*Eb@eMlkw;amB*Pk`MGV;$0?*8^ntkWBW z>Pq}0oAv&I4t~Zu5A`h>)Pp!nZ1#^rhx%f_8#u%N=uMnMYQ7=+8+4w>BkR1iN#6d% zKW4N3@^-w#cOR60dQyMl53m{Upo2Z}Jr$g&w}uj@rp>$qI&g4q0-Q&4Mi8f%8sB9< zh7KH@lLqIT-x%V&IHqqW(80g3BRCmSPaw{?Gx~l9ooD|#4PUXJ{CB^}#LuG6=g2q+ z9nX&(#)+Cd&M!+pop@nv)?d)Ue(($Gef7>P;`pr5+YdT$&|kpWd|)1N`rGXHhR)6p zDTXW}PGy_Cvv+>5_Ol5KmJxr)mjB{smw|gXVTYGZRuTWc+UGCV6VPEjk#}CTmWgI! z|EQmV`=!BsAAcQj^Uczi8#=^W>~H!~#rbak#wG6cqFm2kKDCkfCDnI+*&m@pJ4i0# z_Qa7}iPOR+ZbJtS@*t@1YsXIFWU)D~3Y}eh`j}@oaTcm`Ycg(o*Pg8Fh{*Bw6Tg^> z+tO}w9f5uAsBeP{hltbFW?uw!up9alIOiuFBaX|aJkY^zm?wf$w8lx|>{0VnX*cLR z$3yEqQU~IkA%0$)Jf1v1wX_!R+nsUwJaOi#d8#ZYbZ5Jndi1-~oucQD)}pL?9gp<^ z>~Q1IW#YHBnU6t-euwY6u}1}U>Rx%3d!ExxtiR8Wxla5GYTY8+4|MQ5exsbbtKKHg zP&Iye=2+*S4SL)sPM3@NIOE;?)5@PvI0w5v-}aJt)os>&(7`|WegsZ}UT=vr{D^*>g$|sJRe2x8nMR+9Gb32ffe!t9 zax#98{Zy{+#F>*(?`P@2t*)`ual$e_s@wAyb^hdD%+Ofe!x0d>8et zQLP4XepBy0N`FJ=+27WGI0hZ;iSO&M z!=qe&#Ibva13GZ9KL(sPpPLdVz~+4x=uD2&2p0P}vYu&4oQ-8o|H^m^9miYi_xz75 z+7PdHDSdn4^{C5OUV`Uc;~I7#PK9D7NA^4Db|(z{XWy?;tZ?e_lgIvv`r@5L*ezSM zF2voT`m?MrbaI{qzkpYFe|O?l^3&HBI{0(fD$Y}ln%#>yz1r(J(1G(*?00Kbw;yqq z*}O{(oyoE0e|E$fM4US6-DTOIyc^e?#x}8Ee$qCt2@GTGRuMVAJJ#jkaH0NJ34nddRpYQ85xz_%5=g=nN&S+zD|D9j0 z{EJ<;5kHsOfB0|t4H|L0ikV~=@%N3_mmfOJU(mlVo&E5r?BN^kJS+SO{~cSnoA`It zd2Bh}LWg#MehJQkM*E2~!{)mObl~7z3OFm@93oCrwT_VGfzISOjS^zKoiyqgadN*k z?FPFT-@gCz2OW>M*7t7HWIjo}wK4Va!Q1z4anCdAz3j|s;%8Ft=*V^i9sFEU1?;5L z-`~&p?2y~f_)ZM|y5;AIKcu)mZa@eB;JZIK1MA-+&N7?t)X;&mTkJ>d5%7RG=Qrx( z26W)m7W=N6IGz$GAc~#?oyqYta328b)q492;v5+e`5((l`$Ok>-fYdwC%XKa_`_G2 zeA%vlm6!hz@jY=Ghtt@rtLH#xa-7CZ zalR&bkkRnx{9G}ccOjtT?~&HHyf9T*;w31dkDK1kt8gC??6+uY1ma)rrH?Pr@x02< zz&-@<+B{DFJo!I_sd4RNa3jF-@XgZvXX$F8I& z&Z-Y){bYHdgI}HEeU}w|G83moXmgyA<1TdkzKheq`zk0`(q!3*7dw%j2OY-+YaAYR zC>Qa<+@8}YCeCxE=vRh#Mb-P6vOPeTyli(NpR*CpsZQfml4*o#D z0O#I2o-usk(g$^92IFHq;X)WSBwpk}bXL780yIm>k5@$+wz1_Ua+c}LV;$6wV zPchtp@Torf5E90 z@H=swW%L~A;EzLxPtf`C3su??XM0AyKcMqG-*g(-$BuID`rMKDM{M4yhEB#!+$RoR z!sT6w7gR!DZ|Gn@tOLMFRlg^3{A=hr(1C;dZ@|eKvk!5~HPdsT0|)nufU_ao0OI)A zyw?n!$#EJ5g3i!;D63!j6X%1s(4oEHzEhN^TfrT~Ih{-2UZC?FkFD?Lj9mFA@#Cp`SY>_X`#E?I6y-cq zaW8Scse5*0oPo}4cbHE(4CHsfD}Md}@hYqHIkH^P!4K%i;0$VZgg9Aj_Ip7G4)#HV z^X=1d;_Os;J6Rs+OisvsXyTsn!Sl`$XRFQr2Ix2*JB)Byp3ys@m3m(y-nyOob`Kre z*`H#6U{B*3aY}F1_jBmv{=g;AM%?SOD39&;?t8~?5`XShJzswB#ygR}O-!1)x&Mzm z!hG@ltW>SL#1Foqw*z$WGxq7a{e1laaT0~n_gm<|!Tv08ig$QSoaL$Y9O%$q&WU_+ z%-qk3Grp9`k^KrfvtRv)=NLy&uEeii5iePJeLR8Ab3Abx=_+zO-x2tZ_#dk2%L|>1 z1Ea>K+|HTThxj`ao8^>zZ^!LZadP=F0dF9XP2)c^VH$Nt{uY_2q#Mc7vZ#o<;G~66b4xKAuA7x&1f|+;;^2*U1@( zzj>j_mvINWU&WngiL(%I-En<+y^A~6d;T^q%uf6gFZFiib6eJXbozG6O`P~~%yup7 zZFi3j;s@+FI9@*DMTn>GC(xnY;5Rs*w-qFge=ise6 z#Ce;;p=gJ(=mN*w%={eA$ zzAuh&p07x|j>LJT=0mc+(0T5+4g>G;z@C34=}P>2-Sp*z&Mc?P7%txNsdc;uaYhU= zIdcCEbj{R+@@m87`Ae;dWxF1E-?uk$Yfm=0vi@>CQWpHs$LiewIMBVX0sDwh|NeFR z6Mv%3{uSupZ{!ug*&fTEI8W#3?Fb$Gjr#_{IhKDIaV9O%*B3g^{10mF)mJw1WXlN~fFq8uyX9j2o4B9($ng1meC9(6=w> zoXY>XpCxZTVe)>r|8ajZ?ENI$Wa8$sx#t%;_#fwBz-e-GA#r|Fev|PHI&hG$0Vk;A zQsTs|sGlD}2M+efgY$6t3gUeFUC)6I?Gfkw!HJuF4RPYy-z(GClH@_78baOYEcgd)Xo4rzm0aO4zbG)w}IL18=dAg;5M z$(3;gIyF42@AnQvuA}FS{6f4=Q}uR(4)yij#O1o%$ng6)4>)|azP`|*zLht+{1-I& zUc}^DzYnJP9)`HfSDIYeAFyuo+#js-w1=}I5Wo5%J>T27k8?6;Pi;&362H@JlP~=R zo%9#>gMoKEQ8eN`{AluI`-Bed9N*W$DRVp~aZbP0bD)ENU)1}h;y*K$&u|2 zI)1;`+E3O-&O*E>MfLXc&d*K*_s*i;(R$|~e&aIw@Va}ehbRd30G4jjxIz=;*6B5~R$ z(w7H1lVfei-5*vV&Kwm_{$0GX?gwhTsygv+sQOC2xBG$m%X5PLY7;+39DO^2PL3xS z$6?pLjq4F_N^HGdp@V-gKL@9LsYb*p=&R>I2M*#kIFFo7i4z=3&w&np#(Wr@Wl>rZ zC;eTs-%39~=XqSRp4VKttu^siJ~R0;zS^DF#P>Cnw?@nM#2Xx1UvKDOKjgu|NtB>7 zar&tCA^RJ2upiJdiy~K`yptOg-wq1Cv-9{VV;S4mrgy2 zc-PD7%LN_$0Kb59_udrZw5XuxKxgNNbxUUuXFzq6^Y8p%ov&?QK8N^U>gw&MpRZZh z`ODWVAl~lc`g+TCetEIqWXZWD#95F|&w&npm?rikJ$$f&IKejaG3dZS-W2{kc5n@G zI$zVz$DlJgP6PK=gY)OC4a7-sULU`qGutcn5xI;BV!a36 zJ-#%U>(0mF+!)F^w95|S`s_BjvfV+4bxT9l!A|(}98qNFT$N977}&23e#EXp#6PxE zZ%^pp7u+iaj!&t5#L2%%&wjdi}QX3vt>`(Q}}K{Si073De7H{BvG3{|LSPq4R8iYyaC`C@k@J zj{Prur_nMz=K=DDib(wXlTE(tf6&Q!U0yMt*mlsDczI^&?Ft?IgLxk8ntOCK;#`}r z=Rjw2oJOV6{9fdwnz4uzcd5R9&=s#)F5q188a3hX_K~L3d9fYI;t{X9P5g(hXD&03 zo;B&Be>g98oA;%de3$V>+`qOrW+LJ*QSo2SGoZsfBXq#LrjIk@-h4k}yNHXYuO}t$ zxY?#1 z9o*dse(~VJ-AQnF*Wec1-QC^Y-Q5Ns=HtE7E3o$IzTE%3Z>pwlQN{Yvy?4v*-EzLp z&qJK?hBy#9_y^-3%9EmP0pes*_Jxy;<&+;42@8ac*TY~sY zReP7^m+wEvR{iPkkwNjIB>4FqC&UY|&)eHR#NV#QWtr!J4*t3&=F7{A%MoX-vV-J6 z2Y(@diSlG=?;=jm*?N0I=iXmFmiPPPlrK7&t19u|7}_Uvs4vbpqnxkO`x0lI8t-L& zp#ukT7C4DM)+SB}Lwo=oIIV^KT|4R%XX8|Tzk|-^xU6X6J%I588WAU&vcDW}ptGA9 z*0V0FuHX$Q+l+Wk=G*0xJm{F`Yt0ki&wduO74fbsyUTeCI?P*}QY9U}zRJwt^)cR6 zf**$e-Ill;4C6R-n78VxepcaO{KHq%M-Cnre)Wyh!;hbOU@Ub%|}1-U0*nU`0z21_~q0(MEV0d=?}zZXwUs#P9BF=Hc`^M1O9A{k6I`IPH3@>b#=im9k zxn3K4YBBL|x%7Nb;~M1sV26GimJ>gepPmmL#>1k@;U8LcC>UYfw&3v)b_YLfo7Kc0 zQOz@dVSEHYKHehpfmZs>eg+a;O-tLiC0?1MUn>{kFPE({&Ai^UbZ+(ys?J(2)Z-{`CTyN z5n#7$SuYar)>ys$p@aRgj}A`mk5`D3AW+YN&gQtRiXvX?vi$~eVocY!FX-&{^)vq4 zA?{hX7;uMpFV*@*+8;XR`C8q@eTBVc9}w@US{KUr4?4tu8A2v(7%_E$-Kcj}@jl9x z3{QxgPv!GvIiZvN3hP<;BhJU?#CxvlE#p1t;8)qN*4%$hoMb9~mK^A8j&nXbvf@2) zR;l*!|7$*aljSq*@O)bnwrP&W9=Amn}hf;`CJC6_ECW&i%UA>Hi%UBNE^0 zZkJQ?J)M{E-U#gQ*A6e@-yf*=4|LK$=m+2xT^NIS54-8@3LX4YROE>lZ;Znn!}l7X zgMT`Sd~B(X35YXMy`LoQ2c7$TtdE8FX;I(Lsge-iRZCxA=_=&)~mv|W~b)xD+%`^jJAnHpCvKwN8t z-oDVmPdF!r`oD=;ggCU{bwl16I`{{F24`NzX2i*0*!P6aHX|kIbGI=DjfF@n?8d0H!s-t`lO$slYU0t2X>tjaSZXQd-y#AnZLT# zYdmo#sCTNR{h&j8iMsX$y)(D;TOe_=tGH5fpo1T9ZVmP7J#i{=te$#5K=-@$g7eMb zH=Q_>_+t$Fd(ff2xHk*Vl+i)N8KvSLSzqYD!Mp`d_VNpeQ=+k5U&(rg)skmFVQ|Rn=ioA)h)k~buXfk^>@$Rd4YGwQe9pd=3 z$_AMx)F z*4r04_x;d$z9w_^L&U#7TF;m3u$*dOZ0l3@R;Uh(g8hf{)+m4C_{WI9#*jyZ4*d#w zMR5MOeUdnt4CnBmvpLSZ#i@m7iL=TOr+5}eIQccEUnKrMLtO3|-^Yq6&Lehia+UZW zRU9P8Dd=EN%p0)hxso@DbHQ*<06G(Aa-_S&sjvE#EDv*jjCbOF!3C!u5a*Sumt(wB zI(~=3Wla+A-fsweLcF$yyfbusKKIAHLt*z!bzcy#zG^qJp3uQwcsB-i|C-_rapD@z zi9rVr+BG;$WB)~*1?qf(EDvuz> z;TXRYHh+-S=+`{zMAa_kI1L@f=~e49ox7CltQv=$=ijT83r*aXYW$Gnv^@Xrr^dOH zvGbm)xo>muIQLQP-?xbop7>`B>w4&5N1O+QKN4SyNSrl>b867p9OwD;Nb|jjQ%JQB z*`M_1(_Pkk@g7|ANim3*!*Fg8I_^(?7S6e#p2Hf(A>KH(K9c@`4sk;T<&V2phYkG@ zf9EdPF`mdPP3@R~_|Mh8n(QCYfrI!C#CxH} zU1@jdU@weQ;Ec|Zl{jq-<1KWsSNpGN>0Hv&zjF|0wPF7ky4$mw{#S4Foad7|?8`&E zDF5U6q$pxPKEj9s#Gk3gDd}J6q(89EfgM*>DMCCSgB_uRKLSO6DwCiDajF`|CFo#3 z2|D-jtdHe4iO*wtT`foaIV<%23_8>o?}(zD{tI2i>1=4n(1C+@ zg~936rYdn}8S+5TfrC6XI7MB)#A#{pBXl;$WgQdy=c(e=Ax;59o(np=ePLbcvSJF} z+ne=?m)X!>p<|x&9m$Cc8xe26s;8X)T``h`(gNf`8!GKR&l7PBugQ1)c3z=lQLD-#QcL=2!juT%N5Ho%8>K zdtHg2{+>Q>W}0`3i+pS2em#j-?XsQ+o%9FR`>^B3gnfzU|JLToc?UZ5Gkj+nobB5O z5a&>2{rCYL`dOcbsc7EmJa!0i5~Q{{vb{kk>laJJ*MHX;LA(tq_2q)jE?0AF<|>Y_ zvqv05oJp!&VNn3}U0diTJhS74b)D?}K*ZaDg-?Dt{V=oYZ_axLFa8-%|IXyBc6nud zC-Xu-zU=(p|5|su@V<8uyf@}!VgCVsTCidg@rx(b+XFiI8|Oa3x$ZlSINsUx9O&S0 ztQ*1k6mAxACYQ81(%;Z=T;}|qY~?pW#5++#Z@+*0J=ye27ZCqrS$)0b{DwFe^)A?O z32{!Ua>{-O9sGd#4x9+_R}g2iA^!>;IN8KG^t=~W6Q_haS0Kv+oy~EMr_Uy?C(f%L zdi#4io}TdH`*?p<-%R|JYCRy^3v}q7s5j;n*zZDu?Zg{0Qtwabz`;2pa2gccO`PE> zUY7QQ4jkMM0Ov&b{lt0EN?#u6Y>vwcS%vQvd_H`LI0f3-9BC)$To-#R`}O;e$g`nb zw+A01UjHt7ze49e{`9pXm*R60t`4V&AF7|tm+=d9>6Bx0p4&0GThPz->_`!R56yOt zxVbv(?Fk+H0zaYtU*B9L&REs1-TT*>=ghn2Dsc*^_AAGC&+O=YZz0Tro5X)UO7ADn z_McqVL=k^9YIT?R=X%A(tY-D?eF2oPufBr{3qltL4Wz+D)+hLdophG*v{SDYFQ`tzw>11dp z&>_!PMRmBCg~BXP8EH{)p0A}i=eIp-RN_}R_yaoBSMKA#KNo{IAq@K}(4oGV|54vR z_rxL2W5f9|=-kgQ&V7T=W+Z-)x=$+o10AmeTvqZ>98XNloRxUjm+R*#=+d@)clO+-qdQ?o zmk$&NoSWLC5!L z{4AWqM7g&2Dnh&vi}Y~=bngA@W7QM;fCoyIAbujlIVb2~KfD`)a!!t3nmD7?ev0%1 zbl{+UgR|soIpQ2zW!G17ptCuCRu6F={rVdhak?1#1$4hVkB&S9%K3V0RpQ?>+)IKE z?d6ZuypOVYv@db?8up2y%c2?*>OD}b!)8ydL%aaBZz;zE=;Zqq15!O4QZ>)d_bX() z-D0#K@v9lmCqV~4W1fNi9#?2goJCFaeufSlyax|XsqoE-)6H-`2|D;;NO)eq#@*kF zI6c&T3fXU=^P0SSpI4H?D&F{UFI()_yI!>;USUH#1Ral?epV#$Zp`Dgoro8wmVO+7 z4si$0t*;8WIwgGZCc*O>?sLG7t`=R0zs*nY59r{J%VK_~_!9P}I5^G?pu#ECsX?-%H7j`JMKvybD5gM5SRC#Jk0;zX3|#fCuQ_3fkg zGj#6#?DRwRfT_ffZ&-gyKlBiO*phoDaeAq^Qu+Zp_yKVT>bvOgIm9`k)(?^c9sKZ7 z>{Bh-H=j6X4eLqh?C|#IdJF3?l3z&|w^`q2l||;SxqJI`L;-3h$wVKR(KO z;%_tTUqNU4$JZ(?zLz)b{3hazQRio5{h>qs@Aqo8<@K`f#_x12o3V|!kqqbYJnJvc zJn`!$yNKUf?E^~tdd7EI%UygvqkN8i#LuGQMA^=uQ*H}h*CVcgzp|A&M7-jL{c-5f zFR?xYCtvhq#5t_;bFyDTXLJ0lE8<<%F&9n}XQqk^rTw6DKc2a)MPhy7H}x#>yQ=mt z?FSvNFP!H9FW0|FJkvT1%W@FRA4Li`bi_Y$C!{Q>7t z!MjrM1@Y#o^9Is>(7}FQ!VfLNz9CLB!?{lAO!ljM>OFCe7{(J%?PsOq_0#pKpNPNL zFrGli>n9%z`+=}u?6s zbg&!VGlw6qzKl+Z3E$$bi_{`X_wzASV7 znpGn1&7^uRbhVV@XANJRA^+BO!TVEX#k>|VdNtw}$gUq(p@aX?FX8{ccKZ_NPANSH zI+OqFEUZHuS52EE{o`3bbmpO4_56sRxT2o#Ssu!nf6p7EG4VI6_9f?KnSV$Bf<12> zZAP3}E%o+<4t~KsYjElfYDJvC1N0o|z)2{+C)lu9JK{7}?`z0tvQy824)x0*-UV!YWHoUL+}GC+I_@9N`0{z<^~Adp+HPmkub$3p@IUHZsMTiT zUo`Nc``x(uTEzdUqi!eup=Ww~KqtploHs&wyRFzoyhI`O{SG?#8}TDJ2`la+&dP*( z4s`H0_A|h__wgWcqUEqTa^8SW?(e~G;JsOJlz0bA|1bXbv5v>$c<@Zklf++Uh%=yr z{rttbmD7pN5of$1&VUXaw^R?hata_C=cVg%{(1BC(;5 zFzL(<;%qLg=RgMzzH^T9tQ>NOI7!;-Indc0m-R-x>m4Qj1L9m&^P_BE(8>M-yP;g) zRz4=)RKxliI@E8Z@PF(w&xzx;Q(r&mY>x9h=jsBliIer3&5`4qr{_8G_fX%o-`^2` z-jV->@5~cMc>a<2C64OtAoGM+&!C(+Cw?W)lx=zrbSA${YV;3r(x1_Dq+jNWb2H^5 zhHCn2-tw5@NWVb$yK^%knsL51Xl+>HKYpYyCvaitqd^e18o7C>lKqaU(?2e}57>>@PM{9nf#n{^?!f{ro;pL(zY_9E?W% zmr3>Cd4$fc|BrKKBLCNWbu8l4HGJ0yCe;$hS(0PWYAx_i$dV500_HgwC#~^WB3qrE(GHs!Q(=`R;*B^_P~%-p^WB zdwH-wFh0Q_1^475{z;YJk$D8@>~i{8h}Xfn6TL8TE=SdmJJ6xN$SZ&|tw0IlG*tII z<@gAl`?%T1!nzy$pw~XcPf$m{9)Zp-r?2%{^uNQ!%MqtSOPeG6A9Uz{2QI}Ani(he zZqy&=q*158>kl3JUwP&4lgU#jdRNEX|7K>dLj2-y^!4Zd=VOf$`!>0@ zRwquU$a;T6XV=$drEGJBzK1lqaxLQYHpKbR@q7EubzP{?^@x|Vid`;gKdcYjw;z|4 zSiHX{jY&fnD>kOh6?U~7&lSA@E^MnH%o7uE9(y(`XAzx-u;gh=o9y6e1fQxrC#Mx6)fb zPeX_HSYDh*TY6y}aoR?*IkG)Mhx$e?{UF=Mw>XdX<9lf0en`&6fyB)jPhVf?I1l2o zZi{!B5|5fnyo_=6Jm{F`%m-JfJd=3KBHKLKUu3+9bJI;0^=Q%iper~&!T1FK#19)p z{5>&jzKlli)Z^)&UZ6TEnQChnTGal zdN<>Oi~D1e2CIoTMzu@XzoC=;1?T!;$K|os6YpSry&a*0Kd>(bPPr4Ci1RwFo&z2H zfq5I8ZbP>br*AHsBikQz?&m8%>$cdR?oo0V@sk$Q^P&44AN>I3zZPyE@pGwlf-FCD zm~ZeM?-ecYd6)j{P;mbV6zky>M-LLeYc;){p@YAWr$RYT-aks5>~-}V=)egQ`yDMW zpCnGH+?iYgGw$6X&Y?tl`$6|R`*moTnex3c_BM< zPSsD&OVGg&TaNO(&HGQhCr;5kdOtwt-Ve_6=FvxfBK|y;*O23jr~L%)*~1P4%6}vN z*a~_-Lnr$Q#vSm==C+#snjbwp9TDKWOjZEV~$vIM0ge z{R7?a{PR-GFa6d=B>v7adOmdS`94;*c)TyQ!{3Ye#|-&4=wQ#n>o|@`oht@$!j;q8 z6FP8^M}R%MevVC?>{ay~=)l?diuaRp-;7V3H?{Q~=xmO!o(Ql|xIs+`e#s=rh52 zVyshO$1*hv65s2d&6jqB4*o!#0M78jMTwI}t-B=$I-BEM-xPjRf;gjJ=_&e3U(!ZOJ{H%@Q9&b?P^2D!WSf4?Mc)Eo0SG)|qOUjOE5$rFFL&0{=REhY_ z4f{*bnfx_6PBr2@GpwVezn+TuBU_1@#5t$di_)IZxt~9r`P8=M>k>clU447-bbST8 z!=8;(H6Z@_gL*!6@K;RbuYqMB_dA+5FnIlg`!e8fO4Nk-75C};7j*C!;y-Y{zHCmM zScY*EI`l8ZRp3nD+L|~k)c7R*1>Nubg}4Cxmy6pIKlfU_J)v{Y_pzq^!SRIEr8DvC z8uFLW!JarbgmQi@+?6;>)O}bvUO@*A#zAn_W$sCw2(yx|b~d0UbEu#ktzlH6{?JfV$5kInbFnk8)2Y&io$w z{?D8r-+?-og4VOCTTCZTJat}J+8est@dN2y!SG_9y3l+!@g}JA`Ene94&xsFy}9#} zjBnKJZ16mlNaTHDjF?CKENWj(+7CL|4|xOlGr)flaiV$Y?FSt=Q$zCpda)GCh%?7@ zzEJ6Gj&nYmb9g0j`m6J3(r%viQ@{0lX+JVUyS2n`n9i;*>~5*|ZlUA%vYhu1t|i<^ zyzGW^Z}R>D<`MXzYvirO=~P+o2k793T%uj{*uR4~w~OjI(7_Lwk5Hc30egsZs-n%2 zet^#Xc0BF5bvnr9tU)=Ki*phCx`DB;smLBO8Y~H_J(-`ob>U(6DNW?XCd1&biW(-u#N_Q z+078mf6af+Qun1KA3FDZ=XYgi&ksZV^XmN->2FWJD|@KbAH=Jz?w3ou%6HX~UqZb< zWs6Ll3=8z*7Ie0Me619hr_#HoU$4a^PCpfY%6uMl`h1@A+}O=Kaf#b?rT*L$=evBY zFXBD@R8JBTr^gIE2RgJvyte~;HSqT)&V&K_b7Ih8+{3dOfo`W?nWaM}$`O`NbN^c?6=U&IaIJWHIOIOSjK>kFOxe&KvSdiwi}#2*<& zKTmn~ezY?V%s(?L@x3DH+qaAZvCjcJEH9RmICH)A9O&Q|#2?_KeVKXSt(AgZ9 zbx53(emtuHaYmJ~IdVLK&K~cPzjeOHxur@G;!RWWlpOy&dymt(erOn}1o4x%u*)m? zo{rzh`@*hMj`Wh8^PMPPm ziPKs6UD^#ga9~Gpb}gbjE_iklB*59{t-rrgBr7iJZ8rrG6zY|j2({8@KBXQ~(&c{Lrd*2Y}^E!7A zAkHU)y`h7hCW?C;OY?Ro&W6GI@eVp3?_AbA!RzvOFXDwZ#LLhz&-pH3`@Q{$mrljY zGX9b80xrJsDnU@Be8KB1j8AB1gN6+vZkieTaT7X>Yt_}bR;>4mry+cf1dnUT>w+Jp z>oDSHF}xcC9sG>`0Z!qfqllAgynbAR&gS@8YdRmMII?Njal~mJsIM<{zl%3;J^NuV&J)nN=ew*NVjY%f?G)nUdk(TcL#NynoFB$|3gvwoFoSqUl|AJ62_5|7 z688pr=AT2HQz{QGIncpBm_JaSPod@$XNYPq(tgnW&VJsPIS*L;!y@8mSNU*hKj>s0 zFsgX3w));>#2aX^A9S!E)}yHR?RKk(b3(mODD4LwIPeQNc_OSMPXDF)egd7z4~dp; zB+k)8`hMc6A8@V}<@9R0mG~9Z{-g8*bTZ#DL!8U_T5BiqTC4qS$%9VzivwbQ+Wf~} z;)S}VwhCH)E={DC+XoI8Dv5U0r-y+5FH-!J?u>>GpMrsfIa zR}F2SLy`RsI`@2E>!G;U5xx2u;&=RJmtW3b&>>H_c)?A-z`z{4VV9m_za>n)i^Q$* zP;VFLFn`rm4IupZ8Iw}Q8x}l&A)f)eyw7o!_z_#{+W~ZF2e_XBPNicvh?Ak2o&z1~ zi~a@9o7#7X^I63y(ofL2_mhtmRlJ{e&%8vFj`Q zBXlS0OwPNe{1Nruq4OS8syxq$ch$r5H0W2bUz#MZiL>O3-Ve~(_4BowUgGm*Pfxug z&dOIdN496^La2n4cgk+Ge~35(<;&IkBXMt9`hE!A@BE869Q<9GzY@QT;ob#wXwR50 zz&Y^nJ8{k!?p;6!4)RLi44f0P#jknMoLTkf-=G7hk9fCsYq*HSnNd*BfzIZ*tPEB8 zd!5IQc@bx`+J}_w7CN3sobS}l$`g}#-`3jsE!j@wJ9RT-4hfU5LqI`{$m!{8L(pO`ow*XcRX*&OG2wZ-L= z5hvtRnX1)Pol zWFXFJm2Z>whYlR%1HlR;7NtD6X&yf_e8cY=y&9mvp|uLtDZygE4lj(G(B2_3H(@%=vO{R5rtALl#yOOKQ!&g%y@N7i4y zlRvI`YVV{qf8I;)> zl%g{6bAHwHp+o#vQWZR75Y$?&-+(1s%69 zA8VvI_tI`#bK;FR+&6>{{uwUb75o&?nm9+2>HPzp&2hdfm~KdW;v~(ex1VS4!#Vvk zvr}i{cQu>~_S8S=MO^W`OjqJ(Q}L(t4|LK$xF3rC6Z(4(;%!pvJ~^*J2mc`611If) zKE#=73bnp-C0M7Gn1Bf%#aE=^0_wlvUKSwGIA^tLT?_Bmz&-l*$#sz+X#Ltpg z-%jOz2j&ylp=!OU#3^pr2Zj!Q!Mpq5xc1H=G1?y9A=6KH`&h}&acn>=F ze)%!(MCI?4^&2^l_&p=&=QZd!@8`1KzTopuZBi{F-svKG9(1ywtP$hS(iKaIce0+% zlkFQi_yh9}?D$vSmBd-D@&l3s9XQB4fRo|O8sfAuyrT{s{EBuBPNZ2Ih~sliKR-j~ z-mlJeLY?AUh@bC-p6}T@!Ff;U#nbJ?k8xkmm+>Ip?eU#_dPPquycSG+C#wFLb~27v?$ezkNSO{N*Y? zEyqXbe#gf-81T2vJVpGP`}O6A4)I_<)j>0CeVX-CqpiXI3KV(ab#2cPf8Z^fFZ&mC z@E7_U%6YQnCE`>taG*2!>xlO?;xtfsYH3eT{e{1Wa{lr5Ch^B#)Y}s}_x^HO14Q0_ z;_kb|PpkMc-hfVyqy8cuT-o9w@d{ni`v*GsXOP&>x*F{%aYjGabD)EN+8^P2;0t!W zAkNT``gjmJ_x5w%yKDaDE%E(h>G__`o4%IsktK9Le?^6l#ILH(Map$1bXaHSe_pyy z$`j*_>ujHLUx}MQozs@>R*tLq9$lgD$CISK{d3)h?+2pYb`Bl3<*#|g-pTa-gAVNg z{SBN2YyKe4u&0s#+XQ8QhYp-(o8OZib48C#oG4%Q9O%I5Bi`BgXL?lPgw3Ikx1a+j zyV$oZ79}QeM*8YG(1C;RWx_s@hQ%RH)Zuy#bl`Z4aWzhq1jK2+QO|)69K27A@=TeT zm^e3Y=sD2Y9B03(<(rH+XaBJ|vVD5iublTF8hlAf{L?BPm3+_cLAb0~Vjb6Rc3R@! zQSD!jtI)}D6?q8QHC&lLiPtxezCS?+|KMB)I1l1wAx`@8dJc5(PX=*6w)M&E#2KtO z(tgmnA6I|8OEQV`!{_?vCVq%I`f@^t`eNOM`ugX~Pn_#5^&IHH!Fm{+%83gTXS|=D z106Wne*vfGvSP&PS3}Q%&gQr**WCVej;3I(QpCB|!{*5L1>JwPzy9+-%9UT-3w-po z4Do{0JR`?h=xmO!^-6pftgr+G%?cTvuR=^GF~j>@Y_dqU^F|2n_pbl`ep;#UoC$3?Oo;5$y0 z)Brf|{p=-!Tl_pfkGuuSJzzz1;`UeX_Q`e%9olJUac*&Olh(v(tT?iK(4n2)Yx$y6 z$(_jGJI`5XP1K&a;|%X-d3w$oaXso^ICN*?XUMLPH=%RyH$Q8sIM1=;eiz~oOrz&R zhkjN_`S0H0>ybkz*b>~&aLyU!zqh6b@k^-uob(@b=w~TJyfSBQAL5)d?0-Xteuj7f zRk>wK0t@@u(%ozk6%4ici;2*!Q-Lq zNBc+ZCVuxt`gj34_zQ6e{8c*re&VE5-&K|Vf)4&do)4T0M-LI_+;P1