diff --git a/CMakeLists.txt b/CMakeLists.txt
index fd8470d..7a40971 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -5,22 +5,22 @@ project(modcma)
# Set the version of C/C++ (here C++17)
set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
file(GLOB SRC_FILES ${PROJECT_SOURCE_DIR}/src/*.cpp)
list(FILTER SRC_FILES EXCLUDE REGEX ".*interface.cpp$")
-
-add_executable(main ${SRC_FILES})
-#add_subdirectory(${PROJECT_SOURCE_DIR}/../IOHexperimenter ${CMAKE_CURRENT_BINARY_DIR}/ioh)
+add_executable(main ${SRC_FILES})
target_include_directories(main PUBLIC
- ${PROJECT_SOURCE_DIR}/include
+ ${PROJECT_SOURCE_DIR}/include
${PROJECT_SOURCE_DIR}/external
-# ${PROJECT_SOURCE_DIR}/../IOHexperimenter/include
)
-#target_link_libraries(main PUBLIC ioh)
-
if (MSVC)
target_compile_options(main PRIVATE /bigobj)
-endif()
+
+else()
+ target_compile_options(main PRIVATE -march=native)
+endif()
+
\ No newline at end of file
diff --git a/README.md b/README.md
index 38429a2..4acb02f 100644
--- a/README.md
+++ b/README.md
@@ -202,7 +202,7 @@ The CMA-ES Modular package provides various modules, grouped into 13 categories.
| -------- | ------ | ------ | ---- |
| [Matrix Adaptation](#matrix-adaptation) | Covariance | :green_circle: | :green_circle: |
| | Matrix | :red_circle: | :green_circle: |
-| | Seperable | :red_circle: | :green_circle: |
+| | Separable | :red_circle: | :green_circle: |
| | None | :red_circle: | :green_circle: |
| [Active Update](#active-update) | Off/On | :green_circle: | :green_circle: |
| [Elitism](#elitism) | Off/On | :green_circle: | :green_circle: |
@@ -256,7 +256,12 @@ modules.matrix_adaptation = c_maes.options.MatrixAdaptationType.MATRIX
# We can also only perform step-size-adaptation
modules.matrix_adaptation = c_maes.options.MatrixAdaptationType.NONE
# Or use the seperable CMA-ES
-modules.matrix_adaptation = c_maes.options.MatrixAdaptationType.SEPERABLE
+modules.matrix_adaptation = c_maes.options.MatrixAdaptationType.SEPARABLE
+# Other variants:
+modules.matrix_adaptation = c_maes.options.MatrixAdaptationType.CHOLESKY
+modules.matrix_adaptation = c_maes.options.MatrixAdaptationType.CMSA
+modules.matrix_adaptation = c_maes.options.MatrixAdaptationType.COVARIANCE_NO_EIGV
+modules.matrix_adaptation = c_maes.options.MatrixAdaptationType.NATURAL_GRADIENT
```
### Active Update
diff --git a/external/Eigen/version.txt b/external/Eigen/version.txt
new file mode 100644
index 0000000..fbcbf73
--- /dev/null
+++ b/external/Eigen/version.txt
@@ -0,0 +1 @@
+3.4.0
\ No newline at end of file
diff --git a/external/unsupported/CMakeLists.txt b/external/unsupported/CMakeLists.txt
new file mode 100644
index 0000000..34408c0
--- /dev/null
+++ b/external/unsupported/CMakeLists.txt
@@ -0,0 +1,11 @@
+add_subdirectory(Eigen)
+if(EIGEN_BUILD_DOC)
+ add_subdirectory(doc EXCLUDE_FROM_ALL)
+endif()
+if(BUILD_TESTING)
+ if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
+ add_subdirectory(test) # can't do EXCLUDE_FROM_ALL here, breaks CTest
+ else()
+ add_subdirectory(test EXCLUDE_FROM_ALL)
+ endif()
+endif()
diff --git a/external/unsupported/Eigen/AdolcForward b/external/unsupported/Eigen/AdolcForward
new file mode 100644
index 0000000..56caeae
--- /dev/null
+++ b/external/unsupported/Eigen/AdolcForward
@@ -0,0 +1,159 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2009 Gael Guennebaud
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_ADLOC_FORWARD
+#define EIGEN_ADLOC_FORWARD
+
+//--------------------------------------------------------------------------------
+//
+// This file provides support for adolc's adouble type in forward mode.
+// ADOL-C is a C++ automatic differentiation library,
+// see https://projects.coin-or.org/ADOL-C for more information.
+//
+// Note that the maximal number of directions is controlled by
+// the preprocessor token NUMBER_DIRECTIONS. The default is 2.
+//
+//--------------------------------------------------------------------------------
+
+#define ADOLC_TAPELESS
+#ifndef NUMBER_DIRECTIONS
+# define NUMBER_DIRECTIONS 2
+#endif
+#include
+
+// adolc defines some very stupid macros:
+#if defined(malloc)
+# undef malloc
+#endif
+
+#if defined(calloc)
+# undef calloc
+#endif
+
+#if defined(realloc)
+# undef realloc
+#endif
+
+#include "../../Eigen/Core"
+
+namespace Eigen {
+
+/**
+ * \defgroup AdolcForward_Module Adolc forward module
+ * This module provides support for adolc's adouble type in forward mode.
+ * ADOL-C is a C++ automatic differentiation library,
+ * see https://projects.coin-or.org/ADOL-C for more information.
+ * It mainly consists in:
+ * - a struct Eigen::NumTraits specialization
+ * - overloads of internal::* math function for adtl::adouble type.
+ *
+ * Note that the maximal number of directions is controlled by
+ * the preprocessor token NUMBER_DIRECTIONS. The default is 2.
+ *
+ * \code
+ * #include
+ * \endcode
+ */
+ //@{
+
+} // namespace Eigen
+
+// Eigen's require a few additional functions which must be defined in the same namespace
+// than the custom scalar type own namespace
+namespace adtl {
+
+inline const adouble& conj(const adouble& x) { return x; }
+inline const adouble& real(const adouble& x) { return x; }
+inline adouble imag(const adouble&) { return 0.; }
+inline adouble abs(const adouble& x) { return fabs(x); }
+inline adouble abs2(const adouble& x) { return x*x; }
+
+inline bool (isinf)(const adouble& x) { return (Eigen::numext::isinf)(x.getValue()); }
+inline bool (isnan)(const adouble& x) { return (Eigen::numext::isnan)(x.getValue()); }
+
+}
+
+namespace Eigen {
+
+template<> struct NumTraits
+ : NumTraits
+{
+ typedef adtl::adouble Real;
+ typedef adtl::adouble NonInteger;
+ typedef adtl::adouble Nested;
+ enum {
+ IsComplex = 0,
+ IsInteger = 0,
+ IsSigned = 1,
+ RequireInitialization = 1,
+ ReadCost = 1,
+ AddCost = 1,
+ MulCost = 1
+ };
+};
+
+template class AdolcForwardJacobian : public Functor
+{
+ typedef adtl::adouble ActiveScalar;
+public:
+
+ AdolcForwardJacobian() : Functor() {}
+ AdolcForwardJacobian(const Functor& f) : Functor(f) {}
+
+ // forward constructors
+ template
+ AdolcForwardJacobian(const T0& a0) : Functor(a0) {}
+ template
+ AdolcForwardJacobian(const T0& a0, const T1& a1) : Functor(a0, a1) {}
+ template
+ AdolcForwardJacobian(const T0& a0, const T1& a1, const T1& a2) : Functor(a0, a1, a2) {}
+
+ typedef typename Functor::InputType InputType;
+ typedef typename Functor::ValueType ValueType;
+ typedef typename Functor::JacobianType JacobianType;
+
+ typedef Matrix ActiveInput;
+ typedef Matrix ActiveValue;
+
+ void operator() (const InputType& x, ValueType* v, JacobianType* _jac) const
+ {
+ eigen_assert(v!=0);
+ if (!_jac)
+ {
+ Functor::operator()(x, v);
+ return;
+ }
+
+ JacobianType& jac = *_jac;
+
+ ActiveInput ax = x.template cast();
+ ActiveValue av(jac.rows());
+
+ for (int j=0; j
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_ALIGNED_VECTOR3
+#define EIGEN_ALIGNED_VECTOR3
+
+#include "../../Eigen/Geometry"
+
+#include "../../Eigen/src/Core/util/DisableStupidWarnings.h"
+
+namespace Eigen {
+
+/**
+ * \defgroup AlignedVector3_Module Aligned vector3 module
+ *
+ * \code
+ * #include
+ * \endcode
+ */
+ //@{
+
+
+/** \class AlignedVector3
+ *
+ * \brief A vectorization friendly 3D vector
+ *
+ * This class represents a 3D vector internally using a 4D vector
+ * such that vectorization can be seamlessly enabled. Of course,
+ * the same result can be achieved by directly using a 4D vector.
+ * This class makes this process simpler.
+ *
+ */
+// TODO specialize Cwise
+template class AlignedVector3;
+
+namespace internal {
+template struct traits >
+ : traits >
+{
+};
+}
+
+template class AlignedVector3
+ : public MatrixBase >
+{
+ typedef Matrix<_Scalar,4,1> CoeffType;
+ CoeffType m_coeffs;
+ public:
+
+ typedef MatrixBase > Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(AlignedVector3)
+ using Base::operator*;
+
+ inline Index rows() const { return 3; }
+ inline Index cols() const { return 1; }
+
+ Scalar* data() { return m_coeffs.data(); }
+ const Scalar* data() const { return m_coeffs.data(); }
+ Index innerStride() const { return 1; }
+ Index outerStride() const { return 3; }
+
+ inline const Scalar& coeff(Index row, Index col) const
+ { return m_coeffs.coeff(row, col); }
+
+ inline Scalar& coeffRef(Index row, Index col)
+ { return m_coeffs.coeffRef(row, col); }
+
+ inline const Scalar& coeff(Index index) const
+ { return m_coeffs.coeff(index); }
+
+ inline Scalar& coeffRef(Index index)
+ { return m_coeffs.coeffRef(index);}
+
+
+ inline AlignedVector3()
+ {}
+
+ inline AlignedVector3(const Scalar& x, const Scalar& y, const Scalar& z)
+ : m_coeffs(x, y, z, Scalar(0))
+ {}
+
+ inline AlignedVector3(const AlignedVector3& other)
+ : Base(), m_coeffs(other.m_coeffs)
+ {}
+
+ template
+ struct generic_assign_selector {};
+
+ template struct generic_assign_selector
+ {
+ inline static void run(AlignedVector3& dest, const XprType& src)
+ {
+ dest.m_coeffs = src;
+ }
+ };
+
+ template struct generic_assign_selector
+ {
+ inline static void run(AlignedVector3& dest, const XprType& src)
+ {
+ dest.m_coeffs.template head<3>() = src;
+ dest.m_coeffs.w() = Scalar(0);
+ }
+ };
+
+ template
+ inline AlignedVector3(const MatrixBase& other)
+ {
+ generic_assign_selector::run(*this,other.derived());
+ }
+
+ inline AlignedVector3& operator=(const AlignedVector3& other)
+ { m_coeffs = other.m_coeffs; return *this; }
+
+ template
+ inline AlignedVector3& operator=(const MatrixBase& other)
+ {
+ generic_assign_selector::run(*this,other.derived());
+ return *this;
+ }
+
+ inline AlignedVector3 operator+(const AlignedVector3& other) const
+ { return AlignedVector3(m_coeffs + other.m_coeffs); }
+
+ inline AlignedVector3& operator+=(const AlignedVector3& other)
+ { m_coeffs += other.m_coeffs; return *this; }
+
+ inline AlignedVector3 operator-(const AlignedVector3& other) const
+ { return AlignedVector3(m_coeffs - other.m_coeffs); }
+
+ inline AlignedVector3 operator-() const
+ { return AlignedVector3(-m_coeffs); }
+
+ inline AlignedVector3 operator-=(const AlignedVector3& other)
+ { m_coeffs -= other.m_coeffs; return *this; }
+
+ inline AlignedVector3 operator*(const Scalar& s) const
+ { return AlignedVector3(m_coeffs * s); }
+
+ inline friend AlignedVector3 operator*(const Scalar& s,const AlignedVector3& vec)
+ { return AlignedVector3(s * vec.m_coeffs); }
+
+ inline AlignedVector3& operator*=(const Scalar& s)
+ { m_coeffs *= s; return *this; }
+
+ inline AlignedVector3 operator/(const Scalar& s) const
+ { return AlignedVector3(m_coeffs / s); }
+
+ inline AlignedVector3& operator/=(const Scalar& s)
+ { m_coeffs /= s; return *this; }
+
+ inline Scalar dot(const AlignedVector3& other) const
+ {
+ eigen_assert(m_coeffs.w()==Scalar(0));
+ eigen_assert(other.m_coeffs.w()==Scalar(0));
+ return m_coeffs.dot(other.m_coeffs);
+ }
+
+ inline void normalize()
+ {
+ m_coeffs /= norm();
+ }
+
+ inline AlignedVector3 normalized() const
+ {
+ return AlignedVector3(m_coeffs / norm());
+ }
+
+ inline Scalar sum() const
+ {
+ eigen_assert(m_coeffs.w()==Scalar(0));
+ return m_coeffs.sum();
+ }
+
+ inline Scalar squaredNorm() const
+ {
+ eigen_assert(m_coeffs.w()==Scalar(0));
+ return m_coeffs.squaredNorm();
+ }
+
+ inline Scalar norm() const
+ {
+ using std::sqrt;
+ return sqrt(squaredNorm());
+ }
+
+ inline AlignedVector3 cross(const AlignedVector3& other) const
+ {
+ return AlignedVector3(m_coeffs.cross3(other.m_coeffs));
+ }
+
+ template
+ inline bool isApprox(const MatrixBase& other, const RealScalar& eps=NumTraits::dummy_precision()) const
+ {
+ return m_coeffs.template head<3>().isApprox(other,eps);
+ }
+
+ CoeffType& coeffs() { return m_coeffs; }
+ const CoeffType& coeffs() const { return m_coeffs; }
+};
+
+namespace internal {
+
+template
+struct eval, Dense>
+{
+ typedef const AlignedVector3<_Scalar>& type;
+};
+
+template
+struct evaluator >
+ : evaluator >
+{
+ typedef AlignedVector3 XprType;
+ typedef evaluator > Base;
+
+ evaluator(const XprType &m) : Base(m.coeffs()) {}
+};
+
+}
+
+//@}
+
+}
+
+#include "../../Eigen/src/Core/util/ReenableStupidWarnings.h"
+
+#endif // EIGEN_ALIGNED_VECTOR3
diff --git a/external/unsupported/Eigen/ArpackSupport b/external/unsupported/Eigen/ArpackSupport
new file mode 100644
index 0000000..67c4ac8
--- /dev/null
+++ b/external/unsupported/Eigen/ArpackSupport
@@ -0,0 +1,30 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_ARPACKSUPPORT_MODULE_H
+#define EIGEN_ARPACKSUPPORT_MODULE_H
+
+#include "../../Eigen/Core"
+
+/** \defgroup ArpackSupport_Module Arpack support module
+ *
+ * This module provides a wrapper to Arpack, a library for sparse eigenvalue decomposition.
+ *
+ * \code
+ * #include
+ * \endcode
+ */
+
+#include "../../Eigen/SparseCholesky"
+
+#include "../../Eigen/src/Core/util/DisableStupidWarnings.h"
+#include "src/Eigenvalues/ArpackSelfAdjointEigenSolver.h"
+
+#include "../../Eigen/src/Core/util/ReenableStupidWarnings.h"
+
+#endif // EIGEN_ARPACKSUPPORT_MODULE_H
diff --git a/external/unsupported/Eigen/AutoDiff b/external/unsupported/Eigen/AutoDiff
new file mode 100644
index 0000000..7a4ff46
--- /dev/null
+++ b/external/unsupported/Eigen/AutoDiff
@@ -0,0 +1,46 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2009 Gael Guennebaud
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_AUTODIFF_MODULE
+#define EIGEN_AUTODIFF_MODULE
+
+namespace Eigen {
+
+/**
+ * \defgroup AutoDiff_Module Auto Diff module
+ *
+ * This module features forward automatic differentation via a simple
+ * templated scalar type wrapper AutoDiffScalar.
+ *
+ * Warning : this should NOT be confused with numerical differentiation, which
+ * is a different method and has its own module in Eigen : \ref NumericalDiff_Module.
+ *
+ * \code
+ * #include
+ * \endcode
+ */
+//@{
+
+}
+#include "../../Eigen/src/Core/util/DisableStupidWarnings.h"
+
+
+#include "src/AutoDiff/AutoDiffScalar.h"
+// #include "src/AutoDiff/AutoDiffVector.h"
+#include "src/AutoDiff/AutoDiffJacobian.h"
+
+#include "../../Eigen/src/Core/util/ReenableStupidWarnings.h"
+
+
+
+namespace Eigen {
+//@}
+}
+
+#endif // EIGEN_AUTODIFF_MODULE
diff --git a/external/unsupported/Eigen/BVH b/external/unsupported/Eigen/BVH
new file mode 100644
index 0000000..666c983
--- /dev/null
+++ b/external/unsupported/Eigen/BVH
@@ -0,0 +1,95 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Ilya Baran
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_BVH_MODULE_H
+#define EIGEN_BVH_MODULE_H
+
+#include "../../Eigen/Core"
+#include "../../Eigen/Geometry"
+#include "../../Eigen/StdVector"
+#include
+#include
+
+namespace Eigen {
+
+/**
+ * \defgroup BVH_Module BVH module
+ * \brief This module provides generic bounding volume hierarchy algorithms
+ * and reference tree implementations.
+ *
+ *
+ * \code
+ * #include
+ * \endcode
+ *
+ * A bounding volume hierarchy (BVH) can accelerate many geometric queries. This module provides a generic implementation
+ * of the two basic algorithms over a BVH: intersection of a query object against all objects in the hierarchy and minimization
+ * of a function over the objects in the hierarchy. It also provides intersection and minimization over a cartesian product of
+ * two BVH's. A BVH accelerates intersection by using the fact that if a query object does not intersect a volume, then it cannot
+ * intersect any object contained in that volume. Similarly, a BVH accelerates minimization because the minimum of a function
+ * over a volume is no greater than the minimum of a function over any object contained in it.
+ *
+ * Some sample queries that can be written in terms of intersection are:
+ * - Determine all points where a ray intersects a triangle mesh
+ * - Given a set of points, determine which are contained in a query sphere
+ * - Given a set of spheres, determine which contain the query point
+ * - Given a set of disks, determine if any is completely contained in a query rectangle (represent each 2D disk as a point \f$(x,y,r)\f$
+ * in 3D and represent the rectangle as a pyramid based on the original rectangle and shrinking in the \f$r\f$ direction)
+ * - Given a set of points, count how many pairs are \f$d\pm\epsilon\f$ apart (done by looking at the cartesian product of the set
+ * of points with itself)
+ *
+ * Some sample queries that can be written in terms of function minimization over a set of objects are:
+ * - Find the intersection between a ray and a triangle mesh closest to the ray origin (function is infinite off the ray)
+ * - Given a polyline and a query point, determine the closest point on the polyline to the query
+ * - Find the diameter of a point cloud (done by looking at the cartesian product and using negative distance as the function)
+ * - Determine how far two meshes are from colliding (this is also a cartesian product query)
+ *
+ * This implementation decouples the basic algorithms both from the type of hierarchy (and the types of the bounding volumes) and
+ * from the particulars of the query. To enable abstraction from the BVH, the BVH is required to implement a generic mechanism
+ * for traversal. To abstract from the query, the query is responsible for keeping track of results.
+ *
+ * To be used in the algorithms, a hierarchy must implement the following traversal mechanism (see KdBVH for a sample implementation): \code
+ typedef Volume //the type of bounding volume
+ typedef Object //the type of object in the hierarchy
+ typedef Index //a reference to a node in the hierarchy--typically an int or a pointer
+ typedef VolumeIterator //an iterator type over node children--returns Index
+ typedef ObjectIterator //an iterator over object (leaf) children--returns const Object &
+ Index getRootIndex() const //returns the index of the hierarchy root
+ const Volume &getVolume(Index index) const //returns the bounding volume of the node at given index
+ void getChildren(Index index, VolumeIterator &outVBegin, VolumeIterator &outVEnd,
+ ObjectIterator &outOBegin, ObjectIterator &outOEnd) const
+ //getChildren takes a node index and makes [outVBegin, outVEnd) range over its node children
+ //and [outOBegin, outOEnd) range over its object children
+ \endcode
+ *
+ * To use the hierarchy, call BVIntersect or BVMinimize, passing it a BVH (or two, for cartesian product) and a minimizer or intersector.
+ * For an intersection query on a single BVH, the intersector encapsulates the query and must provide two functions:
+ * \code
+ bool intersectVolume(const Volume &volume) //returns true if the query intersects the volume
+ bool intersectObject(const Object &object) //returns true if the intersection search should terminate immediately
+ \endcode
+ * The guarantee that BVIntersect provides is that intersectObject will be called on every object whose bounding volume
+ * intersects the query (but possibly on other objects too) unless the search is terminated prematurely. It is the
+ * responsibility of the intersectObject function to keep track of the results in whatever manner is appropriate.
+ * The cartesian product intersection and the BVMinimize queries are similar--see their individual documentation.
+ *
+ * The following is a simple but complete example for how to use the BVH to accelerate the search for a closest red-blue point pair:
+ * \include BVH_Example.cpp
+ * Output: \verbinclude BVH_Example.out
+ */
+}
+
+//@{
+
+#include "src/BVH/BVAlgorithms.h"
+#include "src/BVH/KdBVH.h"
+
+//@}
+
+#endif // EIGEN_BVH_MODULE_H
diff --git a/external/unsupported/Eigen/CMakeLists.txt b/external/unsupported/Eigen/CMakeLists.txt
new file mode 100644
index 0000000..631a060
--- /dev/null
+++ b/external/unsupported/Eigen/CMakeLists.txt
@@ -0,0 +1,32 @@
+set(Eigen_HEADERS
+ AdolcForward
+ AlignedVector3
+ ArpackSupport
+ AutoDiff
+ BVH
+ EulerAngles
+ FFT
+ IterativeSolvers
+ KroneckerProduct
+ LevenbergMarquardt
+ MatrixFunctions
+ MoreVectorization
+ MPRealSupport
+ NonLinearOptimization
+ NumericalDiff
+ OpenGLSupport
+ Polynomials
+ Skyline
+ SparseExtra
+ SpecialFunctions
+ Splines
+ )
+
+install(FILES
+ ${Eigen_HEADERS}
+ DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen COMPONENT Devel
+ )
+
+install(DIRECTORY src DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen COMPONENT Devel FILES_MATCHING PATTERN "*.h")
+
+add_subdirectory(CXX11)
diff --git a/external/unsupported/Eigen/CXX11/CMakeLists.txt b/external/unsupported/Eigen/CXX11/CMakeLists.txt
new file mode 100644
index 0000000..385ed24
--- /dev/null
+++ b/external/unsupported/Eigen/CXX11/CMakeLists.txt
@@ -0,0 +1,8 @@
+set(Eigen_CXX11_HEADERS Tensor TensorSymmetry ThreadPool)
+
+install(FILES
+ ${Eigen_CXX11_HEADERS}
+ DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/CXX11 COMPONENT Devel
+ )
+
+install(DIRECTORY src DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/CXX11 COMPONENT Devel FILES_MATCHING PATTERN "*.h")
diff --git a/external/unsupported/Eigen/CXX11/Tensor b/external/unsupported/Eigen/CXX11/Tensor
new file mode 100644
index 0000000..0938bb5
--- /dev/null
+++ b/external/unsupported/Eigen/CXX11/Tensor
@@ -0,0 +1,137 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner
+// Copyright (C) 2013 Christian Seiler
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+//#ifndef EIGEN_CXX11_TENSOR_MODULE
+//#define EIGEN_CXX11_TENSOR_MODULE
+
+#include "../../../Eigen/Core"
+
+#if EIGEN_HAS_CXX11
+
+#include "../SpecialFunctions"
+
+#include "../../../Eigen/src/Core/util/DisableStupidWarnings.h"
+#include "src/util/CXX11Meta.h"
+#include "src/util/MaxSizeVector.h"
+
+/** \defgroup CXX11_Tensor_Module Tensor Module
+ *
+ * This module provides a Tensor class for storing arbitrarily indexed
+ * objects.
+ *
+ * \code
+ * #include
+ * \endcode
+ *
+ * Much of the documentation can be found \ref eigen_tensors "here".
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#if defined(EIGEN_USE_THREADS) || defined(EIGEN_USE_SYCL)
+#include "ThreadPool"
+#endif
+
+#ifdef EIGEN_USE_GPU
+ #include
+ #if defined(EIGEN_USE_HIP)
+ #include
+ #else
+ #include
+ #endif
+#endif
+
+#include "src/Tensor/TensorMacros.h"
+#include "src/Tensor/TensorForwardDeclarations.h"
+#include "src/Tensor/TensorMeta.h"
+#include "src/Tensor/TensorFunctors.h"
+#include "src/Tensor/TensorCostModel.h"
+#include "src/Tensor/TensorDeviceDefault.h"
+#include "src/Tensor/TensorDeviceThreadPool.h"
+#include "src/Tensor/TensorDeviceGpu.h"
+#ifndef gpu_assert
+#define gpu_assert(x)
+#endif
+#include "src/Tensor/TensorDeviceSycl.h"
+#include "src/Tensor/TensorIndexList.h"
+#include "src/Tensor/TensorDimensionList.h"
+#include "src/Tensor/TensorDimensions.h"
+#include "src/Tensor/TensorInitializer.h"
+#include "src/Tensor/TensorTraits.h"
+#include "src/Tensor/TensorRandom.h"
+#include "src/Tensor/TensorUInt128.h"
+#include "src/Tensor/TensorIntDiv.h"
+#include "src/Tensor/TensorGlobalFunctions.h"
+
+#include "src/Tensor/TensorBase.h"
+#include "src/Tensor/TensorBlock.h"
+
+#include "src/Tensor/TensorEvaluator.h"
+#include "src/Tensor/TensorExpr.h"
+#include "src/Tensor/TensorReduction.h"
+#include "src/Tensor/TensorReductionGpu.h"
+#include "src/Tensor/TensorArgMax.h"
+#include "src/Tensor/TensorConcatenation.h"
+#include "src/Tensor/TensorContractionMapper.h"
+#include "src/Tensor/TensorContractionBlocking.h"
+#include "src/Tensor/TensorContraction.h"
+#include "src/Tensor/TensorContractionThreadPool.h"
+#include "src/Tensor/TensorContractionGpu.h"
+#include "src/Tensor/TensorConversion.h"
+#include "src/Tensor/TensorConvolution.h"
+#include "src/Tensor/TensorFFT.h"
+#include "src/Tensor/TensorPatch.h"
+#include "src/Tensor/TensorImagePatch.h"
+#include "src/Tensor/TensorVolumePatch.h"
+#include "src/Tensor/TensorBroadcasting.h"
+#include "src/Tensor/TensorChipping.h"
+#include "src/Tensor/TensorInflation.h"
+#include "src/Tensor/TensorLayoutSwap.h"
+#include "src/Tensor/TensorMorphing.h"
+#include "src/Tensor/TensorPadding.h"
+#include "src/Tensor/TensorReverse.h"
+#include "src/Tensor/TensorShuffling.h"
+#include "src/Tensor/TensorStriding.h"
+#include "src/Tensor/TensorCustomOp.h"
+#include "src/Tensor/TensorEvalTo.h"
+#include "src/Tensor/TensorForcedEval.h"
+#include "src/Tensor/TensorGenerator.h"
+#include "src/Tensor/TensorAssign.h"
+#include "src/Tensor/TensorScan.h"
+#include "src/Tensor/TensorTrace.h"
+
+#ifdef EIGEN_USE_SYCL
+#include "src/Tensor/TensorReductionSycl.h"
+#include "src/Tensor/TensorConvolutionSycl.h"
+#include "src/Tensor/TensorContractionSycl.h"
+#include "src/Tensor/TensorScanSycl.h"
+#endif
+
+#include "src/Tensor/TensorExecutor.h"
+#include "src/Tensor/TensorDevice.h"
+
+#include "src/Tensor/TensorStorage.h"
+#include "src/Tensor/Tensor.h"
+#include "src/Tensor/TensorFixedSize.h"
+#include "src/Tensor/TensorMap.h"
+#include "src/Tensor/TensorRef.h"
+
+#include "src/Tensor/TensorIO.h"
+
+#include "../../../Eigen/src/Core/util/ReenableStupidWarnings.h"
+
+#endif // EIGEN_HAS_CXX11
+//#endif // EIGEN_CXX11_TENSOR_MODULE
diff --git a/external/unsupported/Eigen/CXX11/TensorSymmetry b/external/unsupported/Eigen/CXX11/TensorSymmetry
new file mode 100644
index 0000000..b09c5e4
--- /dev/null
+++ b/external/unsupported/Eigen/CXX11/TensorSymmetry
@@ -0,0 +1,42 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2013 Christian Seiler
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_CXX11_TENSORSYMMETRY_MODULE
+#define EIGEN_CXX11_TENSORSYMMETRY_MODULE
+
+#include "Tensor"
+
+#include "../../../Eigen/src/Core/util/DisableStupidWarnings.h"
+
+#include "src/util/CXX11Meta.h"
+
+/** \defgroup CXX11_TensorSymmetry_Module Tensor Symmetry Module
+ *
+ * This module provides a classes that allow for the definition of
+ * symmetries w.r.t. tensor indices.
+ *
+ * Including this module will implicitly include the Tensor module.
+ *
+ * \code
+ * #include
+ * \endcode
+ */
+
+#include "src/TensorSymmetry/util/TemplateGroupTheory.h"
+#include "src/TensorSymmetry/Symmetry.h"
+#include "src/TensorSymmetry/StaticSymmetry.h"
+#include "src/TensorSymmetry/DynamicSymmetry.h"
+
+#include "../../../Eigen/src/Core/util/ReenableStupidWarnings.h"
+
+#endif // EIGEN_CXX11_TENSORSYMMETRY_MODULE
+
+/*
+ * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle;
+ */
diff --git a/external/unsupported/Eigen/CXX11/ThreadPool b/external/unsupported/Eigen/CXX11/ThreadPool
new file mode 100644
index 0000000..c5cafb2
--- /dev/null
+++ b/external/unsupported/Eigen/CXX11/ThreadPool
@@ -0,0 +1,74 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Benoit Steiner
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_CXX11_THREADPOOL_MODULE
+#define EIGEN_CXX11_THREADPOOL_MODULE
+
+#include "../../../Eigen/Core"
+
+#include "../../../Eigen/src/Core/util/DisableStupidWarnings.h"
+
+/** \defgroup CXX11_ThreadPool_Module C++11 ThreadPool Module
+ *
+ * This module provides 2 threadpool implementations
+ * - a simple reference implementation
+ * - a faster non blocking implementation
+ *
+ * This module requires C++11.
+ *
+ * \code
+ * #include
+ * \endcode
+ */
+
+
+// The code depends on CXX11, so only include the module if the
+// compiler supports it.
+#if (EIGEN_COMP_CXXVER >= 11)
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+// There are non-parenthesized calls to "max" in the header,
+// which trigger a check in test/main.h causing compilation to fail.
+// We work around the check here by removing the check for max in
+// the case where we have to emulate thread_local.
+#ifdef max
+#undef max
+#endif
+#include
+
+#include "src/util/CXX11Meta.h"
+#include "src/util/MaxSizeVector.h"
+
+#include "src/ThreadPool/ThreadLocal.h"
+#include "src/ThreadPool/ThreadYield.h"
+#include "src/ThreadPool/ThreadCancel.h"
+#include "src/ThreadPool/EventCount.h"
+#include "src/ThreadPool/RunQueue.h"
+#include "src/ThreadPool/ThreadPoolInterface.h"
+#include "src/ThreadPool/ThreadEnvironment.h"
+#include "src/ThreadPool/Barrier.h"
+#include "src/ThreadPool/NonBlockingThreadPool.h"
+
+#endif
+
+#include "../../../Eigen/src/Core/util/ReenableStupidWarnings.h"
+
+#endif // EIGEN_CXX11_THREADPOOL_MODULE
diff --git a/external/unsupported/Eigen/CXX11/src/Tensor/README.md b/external/unsupported/Eigen/CXX11/src/Tensor/README.md
new file mode 100644
index 0000000..2f65b1b
--- /dev/null
+++ b/external/unsupported/Eigen/CXX11/src/Tensor/README.md
@@ -0,0 +1,1815 @@
+# Eigen Tensors {#eigen_tensors}
+
+Tensors are multidimensional arrays of elements. Elements are typically scalars,
+but more complex types such as strings are also supported.
+
+## Tensor Classes
+
+You can manipulate a tensor with one of the following classes. They all are in
+the namespace `::Eigen.`
+
+
+### Class Tensor
+
+This is the class to use to create a tensor and allocate memory for it. The
+class is templatized with the tensor datatype, such as float or int, and the
+tensor rank. The rank is the number of dimensions, for example rank 2 is a
+matrix.
+
+Tensors of this class are resizable. For example, if you assign a tensor of a
+different size to a Tensor, that tensor is resized to match its new value.
+
+#### Constructor Tensor(size0, size1, ...)
+
+Constructor for a Tensor. The constructor must be passed `rank` integers
+indicating the sizes of the instance along each of the the `rank`
+dimensions.
+
+ // Create a tensor of rank 3 of sizes 2, 3, 4. This tensor owns
+ // memory to hold 24 floating point values (24 = 2 x 3 x 4).
+ Tensor t_3d(2, 3, 4);
+
+ // Resize t_3d by assigning a tensor of different sizes, but same rank.
+ t_3d = Tensor(3, 4, 3);
+
+#### Constructor Tensor(size_array)
+
+Constructor where the sizes for the constructor are specified as an array of
+values instead of an explicitly list of parameters. The array type to use is
+`Eigen::array`. The array can be constructed automatically
+from an initializer list.
+
+ // Create a tensor of strings of rank 2 with sizes 5, 7.
+ Tensor t_2d({5, 7});
+
+
+### Class TensorFixedSize>
+
+Class to use for tensors of fixed size, where the size is known at compile
+time. Fixed sized tensors can provide very fast computations because all their
+dimensions are known by the compiler. FixedSize tensors are not resizable.
+
+If the total number of elements in a fixed size tensor is small enough the
+tensor data is held onto the stack and does not cause heap allocation and free.
+
+ // Create a 4 x 3 tensor of floats.
+ TensorFixedSize> t_4x3;
+
+### Class TensorMap>
+
+This is the class to use to create a tensor on top of memory allocated and
+owned by another part of your code. It allows to view any piece of allocated
+memory as a Tensor. Instances of this class do not own the memory where the
+data are stored.
+
+A TensorMap is not resizable because it does not own the memory where its data
+are stored.
+
+#### Constructor TensorMap>(data, size0, size1, ...)
+
+Constructor for a Tensor. The constructor must be passed a pointer to the
+storage for the data, and "rank" size attributes. The storage has to be
+large enough to hold all the data.
+
+ // Map a tensor of ints on top of stack-allocated storage.
+ int storage[128]; // 2 x 4 x 2 x 8 = 128
+ TensorMap> t_4d(storage, 2, 4, 2, 8);
+
+ // The same storage can be viewed as a different tensor.
+ // You can also pass the sizes as an array.
+ TensorMap> t_2d(storage, 16, 8);
+
+ // You can also map fixed-size tensors. Here we get a 1d view of
+ // the 2d fixed-size tensor.
+ TensorFixedSize> t_4x3;
+ TensorMap> t_12(t_4x3.data(), 12);
+
+
+#### Class TensorRef
+
+See Assigning to a TensorRef below.
+
+## Accessing Tensor Elements
+
+#### tensor(index0, index1...)
+
+Return the element at position `(index0, index1...)` in tensor
+`tensor`. You must pass as many parameters as the rank of `tensor`.
+The expression can be used as an l-value to set the value of the element at the
+specified position. The value returned is of the datatype of the tensor.
+
+ // Set the value of the element at position (0, 1, 0);
+ Tensor t_3d(2, 3, 4);
+ t_3d(0, 1, 0) = 12.0f;
+
+ // Initialize all elements to random values.
+ for (int i = 0; i < 2; ++i) {
+ for (int j = 0; j < 3; ++j) {
+ for (int k = 0; k < 4; ++k) {
+ t_3d(i, j, k) = ...some random value...;
+ }
+ }
+ }
+
+ // Print elements of a tensor.
+ for (int i = 0; i < 2; ++i) {
+ LOG(INFO) << t_3d(i, 0, 0);
+ }
+
+
+## TensorLayout
+
+The tensor library supports 2 layouts: `ColMajor` (the default) and
+`RowMajor`. Only the default column major layout is currently fully
+supported, and it is therefore not recommended to attempt to use the row major
+layout at the moment.
+
+The layout of a tensor is optionally specified as part of its type. If not
+specified explicitly column major is assumed.
+
+ Tensor col_major; // equivalent to Tensor
+ TensorMap > row_major(data, ...);
+
+All the arguments to an expression must use the same layout. Attempting to mix
+different layouts will result in a compilation error.
+
+It is possible to change the layout of a tensor or an expression using the
+`swap_layout()` method. Note that this will also reverse the order of the
+dimensions.
+
+ Tensor col_major(2, 4);
+ Tensor row_major(2, 4);
+
+ Tensor col_major_result = col_major; // ok, layouts match
+ Tensor col_major_result = row_major; // will not compile
+
+ // Simple layout swap
+ col_major_result = row_major.swap_layout();
+ eigen_assert(col_major_result.dimension(0) == 4);
+ eigen_assert(col_major_result.dimension(1) == 2);
+
+ // Swap the layout and preserve the order of the dimensions
+ array shuffle(1, 0);
+ col_major_result = row_major.swap_layout().shuffle(shuffle);
+ eigen_assert(col_major_result.dimension(0) == 2);
+ eigen_assert(col_major_result.dimension(1) == 4);
+
+
+## Tensor Operations
+
+The Eigen Tensor library provides a vast library of operations on Tensors:
+numerical operations such as addition and multiplication, geometry operations
+such as slicing and shuffling, etc. These operations are available as methods
+of the Tensor classes, and in some cases as operator overloads. For example
+the following code computes the elementwise addition of two tensors:
+
+ Tensor t1(2, 3, 4);
+ ...set some values in t1...
+ Tensor t2(2, 3, 4);
+ ...set some values in t2...
+ // Set t3 to the element wise sum of t1 and t2
+ Tensor t3 = t1 + t2;
+
+While the code above looks easy enough, it is important to understand that the
+expression `t1 + t2` is not actually adding the values of the tensors. The
+expression instead constructs a "tensor operator" object of the class
+TensorCwiseBinaryOp, which has references to the tensors
+`t1` and `t2`. This is a small C++ object that knows how to add
+`t1` and `t2`. It is only when the value of the expression is assigned
+to the tensor `t3` that the addition is actually performed. Technically,
+this happens through the overloading of `operator=()` in the Tensor class.
+
+This mechanism for computing tensor expressions allows for lazy evaluation and
+optimizations which are what make the tensor library very fast.
+
+Of course, the tensor operators do nest, and the expression `t1 + t2 * 0.3f`
+is actually represented with the (approximate) tree of operators:
+
+ TensorCwiseBinaryOp(t1, TensorCwiseUnaryOp(t2, 0.3f))
+
+
+### Tensor Operations and C++ "auto"
+
+Because Tensor operations create tensor operators, the C++ `auto` keyword
+does not have its intuitive meaning. Consider these 2 lines of code:
+
+ Tensor t3 = t1 + t2;
+ auto t4 = t1 + t2;
+
+In the first line we allocate the tensor `t3` and it will contain the
+result of the addition of `t1` and `t2`. In the second line, `t4`
+is actually the tree of tensor operators that will compute the addition of
+`t1` and `t2`. In fact, `t4` is *not* a tensor and you cannot get
+the values of its elements:
+
+ Tensor t3 = t1 + t2;
+ cout << t3(0, 0, 0); // OK prints the value of t1(0, 0, 0) + t2(0, 0, 0)
+
+ auto t4 = t1 + t2;
+ cout << t4(0, 0, 0); // Compilation error!
+
+When you use `auto` you do not get a Tensor as a result but instead a
+non-evaluated expression. So only use `auto` to delay evaluation.
+
+Unfortunately, there is no single underlying concrete type for holding
+non-evaluated expressions, hence you have to use auto in the case when you do
+want to hold non-evaluated expressions.
+
+When you need the results of set of tensor computations you have to assign the
+result to a Tensor that will be capable of holding onto them. This can be
+either a normal Tensor, a fixed size Tensor, or a TensorMap on an existing
+piece of memory. All the following will work:
+
+ auto t4 = t1 + t2;
+
+ Tensor result = t4; // Could also be: result(t4);
+ cout << result(0, 0, 0);
+
+ TensorMap result(, , ...) = t4;
+ cout << result(0, 0, 0);
+
+ TensorFixedSize> result = t4;
+ cout << result(0, 0, 0);
+
+Until you need the results, you can keep the operation around, and even reuse
+it for additional operations. As long as you keep the expression as an
+operation, no computation is performed.
+
+ // One way to compute exp((t1 + t2) * 0.2f);
+ auto t3 = t1 + t2;
+ auto t4 = t3 * 0.2f;
+ auto t5 = t4.exp();
+ Tensor result = t5;
+
+ // Another way, exactly as efficient as the previous one:
+ Tensor result = ((t1 + t2) * 0.2f).exp();
+
+### Controlling When Expression are Evaluated
+
+There are several ways to control when expressions are evaluated:
+
+* Assignment to a Tensor, TensorFixedSize, or TensorMap.
+* Use of the eval() method.
+* Assignment to a TensorRef.
+
+#### Assigning to a Tensor, TensorFixedSize, or TensorMap.
+
+The most common way to evaluate an expression is to assign it to a Tensor. In
+the example below, the `auto` declarations make the intermediate values
+"Operations", not Tensors, and do not cause the expressions to be evaluated.
+The assignment to the Tensor `result` causes the evaluation of all the
+operations.
+
+ auto t3 = t1 + t2; // t3 is an Operation.
+ auto t4 = t3 * 0.2f; // t4 is an Operation.
+ auto t5 = t4.exp(); // t5 is an Operation.
+ Tensor result = t5; // The operations are evaluated.
+
+If you know the ranks and sizes of the Operation value you can assign the
+Operation to a TensorFixedSize instead of a Tensor, which is a bit more
+efficient.
+
+ // We know that the result is a 4x4x2 tensor!
+ TensorFixedSize> result = t5;
+
+Simiarly, assigning an expression to a TensorMap causes its evaluation. Like
+tensors of type TensorFixedSize, TensorMaps cannot be resized so they have to
+have the rank and sizes of the expression that are assigned to them.
+
+#### Calling eval().
+
+When you compute large composite expressions, you sometimes want to tell Eigen
+that an intermediate value in the expression tree is worth evaluating ahead of
+time. This is done by inserting a call to the `eval()` method of the
+expression Operation.
+
+ // The previous example could have been written:
+ Tensor result = ((t1 + t2) * 0.2f).exp();
+
+ // If you want to compute (t1 + t2) once ahead of time you can write:
+ Tensor result = ((t1 + t2).eval() * 0.2f).exp();
+
+Semantically, calling `eval()` is equivalent to materializing the value of
+the expression in a temporary Tensor of the right size. The code above in
+effect does:
+
+ // .eval() knows the size!
+ TensorFixedSize> tmp = t1 + t2;
+ Tensor result = (tmp * 0.2f).exp();
+
+Note that the return value of `eval()` is itself an Operation, so the
+following code does not do what you may think:
+
+ // Here t3 is an evaluation Operation. t3 has not been evaluated yet.
+ auto t3 = (t1 + t2).eval();
+
+ // You can use t3 in another expression. Still no evaluation.
+ auto t4 = (t3 * 0.2f).exp();
+
+ // The value is evaluated when you assign the Operation to a Tensor, using
+ // an intermediate tensor to represent t3.x
+ Tensor result = t4;
+
+While in the examples above calling `eval()` does not make a difference in
+performance, in other cases it can make a huge difference. In the expression
+below the `broadcast()` expression causes the `X.maximum()` expression
+to be evaluated many times:
+
+ Tensor<...> X ...;
+ Tensor<...> Y = ((X - X.maximum(depth_dim).reshape(dims2d).broadcast(bcast))
+ * beta).exp();
+
+Inserting a call to `eval()` between the `maximum()` and
+`reshape()` calls guarantees that maximum() is only computed once and
+greatly speeds-up execution:
+
+ Tensor<...> Y =
+ ((X - X.maximum(depth_dim).eval().reshape(dims2d).broadcast(bcast))
+ * beta).exp();
+
+In the other example below, the tensor `Y` is both used in the expression
+and its assignment. This is an aliasing problem and if the evaluation is not
+done in the right order Y will be updated incrementally during the evaluation
+resulting in bogus results:
+
+ Tensor<...> Y ...;
+ Y = Y / (Y.sum(depth_dim).reshape(dims2d).broadcast(bcast));
+
+Inserting a call to `eval()` between the `sum()` and `reshape()`
+expressions ensures that the sum is computed before any updates to `Y` are
+done.
+
+ Y = Y / (Y.sum(depth_dim).eval().reshape(dims2d).broadcast(bcast));
+
+Note that an eval around the full right hand side expression is not needed
+because the generated has to compute the i-th value of the right hand side
+before assigning it to the left hand side.
+
+However, if you were assigning the expression value to a shuffle of `Y`
+then you would need to force an eval for correctness by adding an `eval()`
+call for the right hand side:
+
+ Y.shuffle(...) =
+ (Y / (Y.sum(depth_dim).eval().reshape(dims2d).broadcast(bcast))).eval();
+
+
+#### Assigning to a TensorRef.
+
+If you need to access only a few elements from the value of an expression you
+can avoid materializing the value in a full tensor by using a TensorRef.
+
+A TensorRef is a small wrapper class for any Eigen Operation. It provides
+overloads for the `()` operator that let you access individual values in
+the expression. TensorRef is convenient, because the Operation themselves do
+not provide a way to access individual elements.
+
+ // Create a TensorRef for the expression. The expression is not
+ // evaluated yet.
+ TensorRef > ref = ((t1 + t2) * 0.2f).exp();
+
+ // Use "ref" to access individual elements. The expression is evaluated
+ // on the fly.
+ float at_0 = ref(0, 0, 0);
+ cout << ref(0, 1, 0);
+
+Only use TensorRef when you need a subset of the values of the expression.
+TensorRef only computes the values you access. However note that if you are
+going to access all the values it will be much faster to materialize the
+results in a Tensor first.
+
+In some cases, if the full Tensor result would be very large, you may save
+memory by accessing it as a TensorRef. But not always. So don't count on it.
+
+
+### Controlling How Expressions Are Evaluated
+
+The tensor library provides several implementations of the various operations
+such as contractions and convolutions. The implementations are optimized for
+different environments: single threaded on CPU, multi threaded on CPU, or on a
+GPU using cuda. Additional implementations may be added later.
+
+You can choose which implementation to use with the `device()` call. If
+you do not choose an implementation explicitly the default implementation that
+uses a single thread on the CPU is used.
+
+The default implementation has been optimized for recent Intel CPUs, taking
+advantage of SSE, AVX, and FMA instructions. Work is ongoing to tune the
+library on ARM CPUs. Note that you need to pass compiler-dependent flags
+to enable the use of SSE, AVX, and other instructions.
+
+For example, the following code adds two tensors using the default
+single-threaded CPU implementation:
+
+ Tensor a(30, 40);
+ Tensor b(30, 40);
+ Tensor c = a + b;
+
+To choose a different implementation you have to insert a `device()` call
+before the assignment of the result. For technical C++ reasons this requires
+that the Tensor for the result be declared on its own. This means that you
+have to know the size of the result.
+
+ Eigen::Tensor c(30, 40);
+ c.device(...) = a + b;
+
+The call to `device()` must be the last call on the left of the operator=.
+
+You must pass to the `device()` call an Eigen device object. There are
+presently three devices you can use: DefaultDevice, ThreadPoolDevice and
+GpuDevice.
+
+
+#### Evaluating With the DefaultDevice
+
+This is exactly the same as not inserting a `device()` call.
+
+ DefaultDevice my_device;
+ c.device(my_device) = a + b;
+
+#### Evaluating with a Thread Pool
+
+ // Create the Eigen ThreadPool
+ Eigen::ThreadPool pool(8 /* number of threads in pool */)
+
+ // Create the Eigen ThreadPoolDevice.
+ Eigen::ThreadPoolDevice my_device(&pool, 4 /* number of threads to use */);
+
+ // Now just use the device when evaluating expressions.
+ Eigen::Tensor c(30, 50);
+ c.device(my_device) = a.contract(b, dot_product_dims);
+
+
+#### Evaluating On GPU
+
+This is presently a bit more complicated than just using a thread pool device.
+You need to create a GPU device but you also need to explicitly allocate the
+memory for tensors with cuda.
+
+
+## API Reference
+
+### Datatypes
+
+In the documentation of the tensor methods and Operation we mention datatypes
+that are tensor-type specific:
+
+#### ::Dimensions
+
+Acts like an array of ints. Has an `int size` attribute, and can be
+indexed like an array to access individual values. Used to represent the
+dimensions of a tensor. See `dimensions()`.
+
+#### ::Index
+
+Acts like an `int`. Used for indexing tensors along their dimensions. See
+`operator()`, `dimension()`, and `size()`.
+
+#### ::Scalar
+
+Represents the datatype of individual tensor elements. For example, for a
+`Tensor`, `Scalar` is the type `float`. See
+`setConstant()`.
+
+####
+
+We use this pseudo type to indicate that a tensor Operation is returned by a
+method. We indicate in the text the type and dimensions of the tensor that the
+Operation returns after evaluation.
+
+The Operation will have to be evaluated, for example by assigning it to a
+tensor, before you can access the values of the resulting tensor. You can also
+access the values through a TensorRef.
+
+
+## Built-in Tensor Methods
+
+These are usual C++ methods that act on tensors immediately. They are not
+Operations which provide delayed evaluation of their results. Unless specified
+otherwise, all the methods listed below are available on all tensor classes:
+Tensor, TensorFixedSize, and TensorMap.
+
+## Metadata
+
+### int NumDimensions
+
+Constant value indicating the number of dimensions of a Tensor. This is also
+known as the tensor "rank".
+
+ Eigen::Tensor a(3, 4);
+ cout << "Dims " << a.NumDimensions;
+ => Dims 2
+
+### Dimensions dimensions()
+
+Returns an array-like object representing the dimensions of the tensor.
+The actual type of the `dimensions()` result is `::``Dimensions`.
+
+ Eigen::Tensor a(3, 4);
+ const Eigen::Tensor::Dimensions& d = a.dimensions();
+ cout << "Dim size: " << d.size << ", dim 0: " << d[0]
+ << ", dim 1: " << d[1];
+ => Dim size: 2, dim 0: 3, dim 1: 4
+
+If you use a C++11 compiler, you can use `auto` to simplify the code:
+
+ const auto& d = a.dimensions();
+ cout << "Dim size: " << d.size << ", dim 0: " << d[0]
+ << ", dim 1: " << d[1];
+ => Dim size: 2, dim 0: 3, dim 1: 4
+
+### Index dimension(Index n)
+
+Returns the n-th dimension of the tensor. The actual type of the
+`dimension()` result is `::``Index`, but you can
+always use it like an int.
+
+ Eigen::Tensor a(3, 4);
+ int dim1 = a.dimension(1);
+ cout << "Dim 1: " << dim1;
+ => Dim 1: 4
+
+### Index size()
+
+Returns the total number of elements in the tensor. This is the product of all
+the tensor dimensions. The actual type of the `size()` result is
+`::``Index`, but you can always use it like an int.
+
+ Eigen::Tensor a(3, 4);
+ cout << "Size: " << a.size();
+ => Size: 12
+
+
+### Getting Dimensions From An Operation
+
+A few operations provide `dimensions()` directly,
+e.g. `TensorReslicingOp`. Most operations defer calculating dimensions
+until the operation is being evaluated. If you need access to the dimensions
+of a deferred operation, you can wrap it in a TensorRef (see Assigning to a
+TensorRef above), which provides `dimensions()` and `dimension()` as
+above.
+
+TensorRef can also wrap the plain Tensor types, so this is a useful idiom in
+templated contexts where the underlying object could be either a raw Tensor
+or some deferred operation (e.g. a slice of a Tensor). In this case, the
+template code can wrap the object in a TensorRef and reason about its
+dimensionality while remaining agnostic to the underlying type.
+
+
+## Constructors
+
+### Tensor
+
+Creates a tensor of the specified size. The number of arguments must be equal
+to the rank of the tensor. The content of the tensor is not initialized.
+
+ Eigen::Tensor a(3, 4);
+ cout << "NumRows: " << a.dimension(0) << " NumCols: " << a.dimension(1) << endl;
+ => NumRows: 3 NumCols: 4
+
+### TensorFixedSize
+
+Creates a tensor of the specified size. The number of arguments in the Sizes<>
+template parameter determines the rank of the tensor. The content of the tensor
+is not initialized.
+
+ Eigen::TensorFixedSize> a;
+ cout << "Rank: " << a.rank() << endl;
+ => Rank: 2
+ cout << "NumRows: " << a.dimension(0) << " NumCols: " << a.dimension(1) << endl;
+ => NumRows: 3 NumCols: 4
+
+### TensorMap
+
+Creates a tensor mapping an existing array of data. The data must not be freed
+until the TensorMap is discarded, and the size of the data must be large enough
+to accommodate the coefficients of the tensor.
+
+ float data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
+ Eigen::TensorMap> a(data, 3, 4);
+ cout << "NumRows: " << a.dimension(0) << " NumCols: " << a.dimension(1) << endl;
+ => NumRows: 3 NumCols: 4
+ cout << "a(1, 2): " << a(1, 2) << endl;
+ => a(1, 2): 7
+
+
+## Contents Initialization
+
+When a new Tensor or a new TensorFixedSize are created, memory is allocated to
+hold all the tensor elements, but the memory is not initialized. Similarly,
+when a new TensorMap is created on top of non-initialized memory the memory its
+contents are not initialized.
+
+You can use one of the methods below to initialize the tensor memory. These
+have an immediate effect on the tensor and return the tensor itself as a
+result. These are not tensor Operations which delay evaluation.
+
+### setConstant(const Scalar& val)
+
+Sets all elements of the tensor to the constant value `val`. `Scalar`
+is the type of data stored in the tensor. You can pass any value that is
+convertible to that type.
+
+Returns the tensor itself in case you want to chain another call.
+
+ a.setConstant(12.3f);
+ cout << "Constant: " << endl << a << endl << endl;
+ =>
+ Constant:
+ 12.3 12.3 12.3 12.3
+ 12.3 12.3 12.3 12.3
+ 12.3 12.3 12.3 12.3
+
+Note that `setConstant()` can be used on any tensor where the element type
+has a copy constructor and an `operator=()`:
+
+ Eigen::Tensor a(2, 3);
+ a.setConstant("yolo");
+ cout << "String tensor: " << endl << a << endl << endl;
+ =>
+ String tensor:
+ yolo yolo yolo
+ yolo yolo yolo
+
+
+### setZero()
+
+Fills the tensor with zeros. Equivalent to `setConstant(Scalar(0))`.
+Returns the tensor itself in case you want to chain another call.
+
+ a.setZero();
+ cout << "Zeros: " << endl << a << endl << endl;
+ =>
+ Zeros:
+ 0 0 0 0
+ 0 0 0 0
+ 0 0 0 0
+
+
+### setValues({..initializer_list})
+
+Fills the tensor with explicit values specified in a std::initializer_list.
+The type of the initializer list depends on the type and rank of the tensor.
+
+If the tensor has rank N, the initializer list must be nested N times. The
+most deeply nested lists must contains P scalars of the Tensor type where P is
+the size of the last dimension of the Tensor.
+
+For example, for a `TensorFixedSize` the initializer list must
+contains 2 lists of 3 floats each.
+
+`setValues()` returns the tensor itself in case you want to chain another
+call.
+
+ Eigen::Tensor a(2, 3);
+ a.setValues({{0.0f, 1.0f, 2.0f}, {3.0f, 4.0f, 5.0f}});
+ cout << "a" << endl << a << endl << endl;
+ =>
+ a
+ 0 1 2
+ 3 4 5
+
+If a list is too short, the corresponding elements of the tensor will not be
+changed. This is valid at each level of nesting. For example the following
+code only sets the values of the first row of the tensor.
+
+ Eigen::Tensor a(2, 3);
+ a.setConstant(1000);
+ a.setValues({{10, 20, 30}});
+ cout << "a" << endl << a << endl << endl;
+ =>
+ a
+ 10 20 30
+ 1000 1000 1000
+
+### setRandom()
+
+Fills the tensor with random values. Returns the tensor itself in case you
+want to chain another call.
+
+ a.setRandom();
+ cout << "Random: " << endl << a << endl << endl;
+ =>
+ Random:
+ 0.680375 0.59688 -0.329554 0.10794
+ -0.211234 0.823295 0.536459 -0.0452059
+ 0.566198 -0.604897 -0.444451 0.257742
+
+You can customize `setRandom()` by providing your own random number
+generator as a template argument:
+
+ a.setRandom();
+
+Here, `MyRandomGenerator` must be a struct with the following member
+functions, where Scalar and Index are the same as `::``Scalar`
+and `::``Index`.
+
+See `struct UniformRandomGenerator` in TensorFunctors.h for an example.
+
+ // Custom number generator for use with setRandom().
+ struct MyRandomGenerator {
+ // Default and copy constructors. Both are needed
+ MyRandomGenerator() { }
+ MyRandomGenerator(const MyRandomGenerator& ) { }
+
+ // Return a random value to be used. "element_location" is the
+ // location of the entry to set in the tensor, it can typically
+ // be ignored.
+ Scalar operator()(Eigen::DenseIndex element_location,
+ Eigen::DenseIndex /*unused*/ = 0) const {
+ return ;
+ }
+
+ // Same as above but generates several numbers at a time.
+ typename internal::packet_traits::type packetOp(
+ Eigen::DenseIndex packet_location, Eigen::DenseIndex /*unused*/ = 0) const {
+ return ;
+ }
+ };
+
+You can also use one of the 2 random number generators that are part of the
+tensor library:
+* UniformRandomGenerator
+* NormalRandomGenerator
+
+
+## Data Access
+
+The Tensor, TensorFixedSize, and TensorRef classes provide the following
+accessors to access the tensor coefficients:
+
+ const Scalar& operator()(const array& indices)
+ const Scalar& operator()(Index firstIndex, IndexTypes... otherIndices)
+ Scalar& operator()(const array& indices)
+ Scalar& operator()(Index firstIndex, IndexTypes... otherIndices)
+
+The number of indices must be equal to the rank of the tensor. Moreover, these
+accessors are not available on tensor expressions. In order to access the
+values of a tensor expression, the expression must either be evaluated or
+wrapped in a TensorRef.
+
+
+### Scalar* data() and const Scalar* data() const
+
+Returns a pointer to the storage for the tensor. The pointer is const if the
+tensor was const. This allows direct access to the data. The layout of the
+data depends on the tensor layout: RowMajor or ColMajor.
+
+This access is usually only needed for special cases, for example when mixing
+Eigen Tensor code with other libraries.
+
+Scalar is the type of data stored in the tensor.
+
+ Eigen::Tensor a(3, 4);
+ float* a_data = a.data();
+ a_data[0] = 123.45f;
+ cout << "a(0, 0): " << a(0, 0);
+ => a(0, 0): 123.45
+
+
+## Tensor Operations
+
+All the methods documented below return non evaluated tensor `Operations`.
+These can be chained: you can apply another Tensor Operation to the value
+returned by the method.
+
+The chain of Operation is evaluated lazily, typically when it is assigned to a
+tensor. See "Controlling when Expression are Evaluated" for more details about
+their evaluation.
+
+### constant(const Scalar& val)
+
+Returns a tensor of the same type and dimensions as the original tensor but
+where all elements have the value `val`.
+
+This is useful, for example, when you want to add or subtract a constant from a
+tensor, or multiply every element of a tensor by a scalar.
+
+ Eigen::Tensor a(2, 3);
+ a.setConstant(1.0f);
+ Eigen::Tensor b = a + a.constant(2.0f);
+ Eigen::Tensor c = b * b.constant(0.2f);
+ cout << "a" << endl << a << endl << endl;
+ cout << "b" << endl << b << endl << endl;
+ cout << "c" << endl << c << endl << endl;
+ =>
+ a
+ 1 1 1
+ 1 1 1
+
+ b
+ 3 3 3
+ 3 3 3
+
+ c
+ 0.6 0.6 0.6
+ 0.6 0.6 0.6
+
+### random()
+
+Returns a tensor of the same type and dimensions as the current tensor
+but where all elements have random values.
+
+This is for example useful to add random values to an existing tensor.
+The generation of random values can be customized in the same manner
+as for `setRandom()`.
+
+ Eigen::Tensor a(2, 3);
+ a.setConstant(1.0f);
+ Eigen::Tensor b = a + a.random();
+ cout << "a" << endl << a << endl << endl;
+ cout << "b" << endl << b << endl << endl;
+ =>
+ a
+ 1 1 1
+ 1 1 1
+
+ b
+ 1.68038 1.5662 1.82329
+ 0.788766 1.59688 0.395103
+
+
+## Unary Element Wise Operations
+
+All these operations take a single input tensor as argument and return a tensor
+of the same type and dimensions as the tensor to which they are applied. The
+requested operations are applied to each element independently.
+
+### operator-()
+
+Returns a tensor of the same type and dimensions as the original tensor
+containing the opposite values of the original tensor.
+
+ Eigen::Tensor a(2, 3);
+ a.setConstant(1.0f);
+ Eigen::Tensor b = -a;
+ cout << "a" << endl << a << endl << endl;
+ cout << "b" << endl << b << endl << endl;
+ =>
+ a
+ 1 1 1
+ 1 1 1
+
+ b
+ -1 -1 -1
+ -1 -1 -1
+
+### sqrt()
+
+Returns a tensor of the same type and dimensions as the original tensor
+containing the square roots of the original tensor.
+
+### rsqrt()
+
+Returns a tensor of the same type and dimensions as the original tensor
+containing the inverse square roots of the original tensor.
+
+### square()
+
+Returns a tensor of the same type and dimensions as the original tensor
+containing the squares of the original tensor values.
+
+### inverse()
+
+Returns a tensor of the same type and dimensions as the original tensor
+containing the inverse of the original tensor values.
+
+### exp()
+
+Returns a tensor of the same type and dimensions as the original tensor
+containing the exponential of the original tensor.
+
+### log()
+
+Returns a tensor of the same type and dimensions as the original tensor
+containing the natural logarithms of the original tensor.
+
+### abs()
+
+Returns a tensor of the same type and dimensions as the original tensor
+containing the absolute values of the original tensor.
+
+### pow(Scalar exponent)
+
+Returns a tensor of the same type and dimensions as the original tensor
+containing the coefficients of the original tensor to the power of the
+exponent.
+
+The type of the exponent, Scalar, is always the same as the type of the
+tensor coefficients. For example, only integer exponents can be used in
+conjuntion with tensors of integer values.
+
+You can use cast() to lift this restriction. For example this computes
+cubic roots of an int Tensor:
+
+ Eigen::Tensor a(2, 3);
+ a.setValues({{0, 1, 8}, {27, 64, 125}});
+ Eigen::Tensor b = a.cast().pow(1.0 / 3.0);
+ cout << "a" << endl << a << endl << endl;
+ cout << "b" << endl << b << endl << endl;
+ =>
+ a
+ 0 1 8
+ 27 64 125
+
+ b
+ 0 1 2
+ 3 4 5
+
+### operator * (Scalar scale)
+
+Multiplies all the coefficients of the input tensor by the provided scale.
+
+### cwiseMax(Scalar threshold)
+TODO
+
+### cwiseMin(Scalar threshold)
+TODO
+
+### unaryExpr(const CustomUnaryOp& func)
+TODO
+
+
+## Binary Element Wise Operations
+
+These operations take two input tensors as arguments. The 2 input tensors should
+be of the same type and dimensions. The result is a tensor of the same
+dimensions as the tensors to which they are applied, and unless otherwise
+specified it is also of the same type. The requested operations are applied to
+each pair of elements independently.
+
+### operator+(const OtherDerived& other)
+
+Returns a tensor of the same type and dimensions as the input tensors
+containing the coefficient wise sums of the inputs.
+
+### operator-(const OtherDerived& other)
+
+Returns a tensor of the same type and dimensions as the input tensors
+containing the coefficient wise differences of the inputs.
+
+### operator*(const OtherDerived& other)
+
+Returns a tensor of the same type and dimensions as the input tensors
+containing the coefficient wise products of the inputs.
+
+### operator/(const OtherDerived& other)
+
+Returns a tensor of the same type and dimensions as the input tensors
+containing the coefficient wise quotients of the inputs.
+
+This operator is not supported for integer types.
+
+### cwiseMax(const OtherDerived& other)
+
+Returns a tensor of the same type and dimensions as the input tensors
+containing the coefficient wise maximums of the inputs.
+
+### cwiseMin(const OtherDerived& other)
+
+Returns a tensor of the same type and dimensions as the input tensors
+containing the coefficient wise mimimums of the inputs.
+
+### Logical operators
+
+The following logical operators are supported as well:
+
+* operator&&(const OtherDerived& other)
+* operator||(const OtherDerived& other)
+* operator<(const OtherDerived& other)
+* operator<=(const OtherDerived& other)
+* operator>(const OtherDerived& other)
+* operator>=(const OtherDerived& other)
+* operator==(const OtherDerived& other)
+* operator!=(const OtherDerived& other)
+
+They all return a tensor of boolean values.
+
+
+## Selection (select(const ThenDerived& thenTensor, const ElseDerived& elseTensor)
+
+Selection is a coefficient-wise ternary operator that is the tensor equivalent
+to the if-then-else operation.
+
+ Tensor if = ...;
+ Tensor then = ...;
+ Tensor else = ...;
+ Tensor result = if.select(then, else);
+
+The 3 arguments must be of the same dimensions, which will also be the dimension
+of the result. The 'if' tensor must be of type boolean, the 'then' and the
+'else' tensor must be of the same type, which will also be the type of the
+result.
+
+Each coefficient in the result is equal to the corresponding coefficient in the
+'then' tensor if the corresponding value in the 'if' tensor is true. If not, the
+resulting coefficient will come from the 'else' tensor.
+
+
+## Contraction
+
+Tensor *contractions* are a generalization of the matrix product to the
+multidimensional case.
+
+ // Create 2 matrices using tensors of rank 2
+ Eigen::Tensor a(2, 3);
+ a.setValues({{1, 2, 3}, {6, 5, 4}});
+ Eigen::Tensor b(3, 2);
+ b.setValues({{1, 2}, {4, 5}, {5, 6}});
+
+ // Compute the traditional matrix product
+ Eigen::array, 1> product_dims = { Eigen::IndexPair(1, 0) };
+ Eigen::Tensor AB = a.contract(b, product_dims);
+
+ // Compute the product of the transpose of the matrices
+ Eigen::array, 1> transposed_product_dims = { Eigen::IndexPair(0, 1) };
+ Eigen::Tensor AtBt = a.contract(b, transposed_product_dims);
+
+ // Contraction to scalar value using a double contraction.
+ // First coordinate of both tensors are contracted as well as both second coordinates, i.e., this computes the sum of the squares of the elements.
+ Eigen::array, 2> double_contraction_product_dims = { Eigen::IndexPair(0, 0), Eigen::IndexPair(1, 1) };
+ Eigen::Tensor AdoubleContractedA = a.contract(a, double_contraction_product_dims);
+
+ // Extracting the scalar value of the tensor contraction for further usage
+ int value = AdoubleContractedA(0);
+
+## Reduction Operations
+
+A *Reduction* operation returns a tensor with fewer dimensions than the
+original tensor. The values in the returned tensor are computed by applying a
+*reduction operator* to slices of values from the original tensor. You specify
+the dimensions along which the slices are made.
+
+The Eigen Tensor library provides a set of predefined reduction operators such
+as `maximum()` and `sum()` and lets you define additional operators by
+implementing a few methods from a reductor template.
+
+### Reduction Dimensions
+
+All reduction operations take a single parameter of type
+`::``Dimensions` which can always be specified as an array of
+ints. These are called the "reduction dimensions." The values are the indices
+of the dimensions of the input tensor over which the reduction is done. The
+parameter can have at most as many element as the rank of the input tensor;
+each element must be less than the tensor rank, as it indicates one of the
+dimensions to reduce.
+
+Each dimension of the input tensor should occur at most once in the reduction
+dimensions as the implementation does not remove duplicates.
+
+The order of the values in the reduction dimensions does not affect the
+results, but the code may execute faster if you list the dimensions in
+increasing order.
+
+Example: Reduction along one dimension.
+
+ // Create a tensor of 2 dimensions
+ Eigen::Tensor a(2, 3);
+ a.setValues({{1, 2, 3}, {6, 5, 4}});
+ // Reduce it along the second dimension (1)...
+ Eigen::array dims({1 /* dimension to reduce */});
+ // ...using the "maximum" operator.
+ // The result is a tensor with one dimension. The size of
+ // that dimension is the same as the first (non-reduced) dimension of a.
+ Eigen::Tensor b = a.maximum(dims);
+ cout << "a" << endl << a << endl << endl;
+ cout << "b" << endl << b << endl << endl;
+ =>
+ a
+ 1 2 3
+ 6 5 4
+
+ b
+ 3
+ 6
+
+Example: Reduction along two dimensions.
+
+ Eigen::Tensor a(2, 3, 4);
+ a.setValues({{{0.0f, 1.0f, 2.0f, 3.0f},
+ {7.0f, 6.0f, 5.0f, 4.0f},
+ {8.0f, 9.0f, 10.0f, 11.0f}},
+ {{12.0f, 13.0f, 14.0f, 15.0f},
+ {19.0f, 18.0f, 17.0f, 16.0f},
+ {20.0f, 21.0f, 22.0f, 23.0f}}});
+ // The tensor a has 3 dimensions. We reduce along the
+ // first 2, resulting in a tensor with a single dimension
+ // of size 4 (the last dimension of a.)
+ // Note that we pass the array of reduction dimensions
+ // directly to the maximum() call.
+ Eigen::Tensor b =
+ a.maximum(Eigen::array({0, 1}));
+ cout << "b" << endl << b << endl << endl;
+ =>
+ b
+ 20
+ 21
+ 22
+ 23
+
+#### Reduction along all dimensions
+
+As a special case, if you pass no parameter to a reduction operation the
+original tensor is reduced along *all* its dimensions. The result is a
+scalar, represented as a zero-dimension tensor.
+
+ Eigen::Tensor a(2, 3, 4);
+ a.setValues({{{0.0f, 1.0f, 2.0f, 3.0f},
+ {7.0f, 6.0f, 5.0f, 4.0f},
+ {8.0f, 9.0f, 10.0f, 11.0f}},
+ {{12.0f, 13.0f, 14.0f, 15.0f},
+ {19.0f, 18.0f, 17.0f, 16.0f},
+ {20.0f, 21.0f, 22.0f, 23.0f}}});
+ // Reduce along all dimensions using the sum() operator.
+ Eigen::Tensor b = a.sum();
+ cout << "b" << endl << b << endl << endl;
+ =>
+ b
+ 276
+
+
+### sum(const Dimensions& new_dims)
+### sum()
+
+Reduce a tensor using the sum() operator. The resulting values
+are the sum of the reduced values.
+
+### mean(const Dimensions& new_dims)
+### mean()
+
+Reduce a tensor using the mean() operator. The resulting values
+are the mean of the reduced values.
+
+### maximum(const Dimensions& new_dims)
+### maximum()
+
+Reduce a tensor using the maximum() operator. The resulting values are the
+largest of the reduced values.
+
+### minimum(const Dimensions& new_dims)
+### minimum()
+
+Reduce a tensor using the minimum() operator. The resulting values
+are the smallest of the reduced values.
+
+### prod(const Dimensions& new_dims)
+### prod()
+
+Reduce a tensor using the prod() operator. The resulting values
+are the product of the reduced values.
+
+### all(const Dimensions& new_dims)
+### all()
+Reduce a tensor using the all() operator. Casts tensor to bool and then checks
+whether all elements are true. Runs through all elements rather than
+short-circuiting, so may be significantly inefficient.
+
+### any(const Dimensions& new_dims)
+### any()
+Reduce a tensor using the any() operator. Casts tensor to bool and then checks
+whether any element is true. Runs through all elements rather than
+short-circuiting, so may be significantly inefficient.
+
+
+### reduce(const Dimensions& new_dims, const Reducer& reducer)
+
+Reduce a tensor using a user-defined reduction operator. See `SumReducer`
+in TensorFunctors.h for information on how to implement a reduction operator.
+
+
+## Trace
+
+A *Trace* operation returns a tensor with fewer dimensions than the original
+tensor. It returns a tensor whose elements are the sum of the elements of the
+original tensor along the main diagonal for a list of specified dimensions, the
+"trace dimensions". Similar to the `Reduction Dimensions`, the trace dimensions
+are passed as an input parameter to the operation, are of type `::``Dimensions`
+, and have the same requirements when passed as an input parameter. In addition,
+the trace dimensions must have the same size.
+
+Example: Trace along 2 dimensions.
+
+ // Create a tensor of 3 dimensions
+ Eigen::Tensor a(2, 2, 3);
+ a.setValues({{{1, 2, 3}, {4, 5, 6}}, {{7, 8, 9}, {10, 11, 12}}});
+ // Specify the dimensions along which the trace will be computed.
+ // In this example, the trace can only be computed along the dimensions
+ // with indices 0 and 1
+ Eigen::array dims({0, 1});
+ // The output tensor contains all but the trace dimensions.
+ Tensor a_trace = a.trace(dims);
+ cout << "a_trace:" << endl;
+ cout << a_trace << endl;
+ =>
+ a_trace:
+ 11
+ 13
+ 15
+
+
+### trace(const Dimensions& new_dims)
+### trace()
+
+As a special case, if no parameter is passed to the operation, trace is computed
+along *all* dimensions of the input tensor.
+
+Example: Trace along all dimensions.
+
+ // Create a tensor of 3 dimensions, with all dimensions having the same size.
+ Eigen::Tensor a(3, 3, 3);
+ a.setValues({{{1, 2, 3}, {4, 5, 6}, {7, 8, 9}},
+ {{10, 11, 12}, {13, 14, 15}, {16, 17, 18}},
+ {{19, 20, 21}, {22, 23, 24}, {25, 26, 27}}});
+ // Result is a zero dimension tensor
+ Tensor a_trace = a.trace();
+ cout<<"a_trace:"<
+ a_trace:
+ 42
+
+
+## Scan Operations
+
+A *Scan* operation returns a tensor with the same dimensions as the original
+tensor. The operation performs an inclusive scan along the specified
+axis, which means it computes a running total along the axis for a given
+reduction operation.
+If the reduction operation corresponds to summation, then this computes the
+prefix sum of the tensor along the given axis.
+
+Example:
+dd a comment to this line
+
+ // Create a tensor of 2 dimensions
+ Eigen::Tensor a(2, 3);
+ a.setValues({{1, 2, 3}, {4, 5, 6}});
+ // Scan it along the second dimension (1) using summation
+ Eigen::Tensor b = a.cumsum(1);
+ // The result is a tensor with the same size as the input
+ cout << "a" << endl << a << endl << endl;
+ cout << "b" << endl << b << endl << endl;
+ =>
+ a
+ 1 2 3
+ 4 5 6
+
+ b
+ 1 3 6
+ 4 9 15
+
+### cumsum(const Index& axis)
+
+Perform a scan by summing consecutive entries.
+
+### cumprod(const Index& axis)
+
+Perform a scan by multiplying consecutive entries.
+
+
+## Convolutions
+
+### convolve(const Kernel& kernel, const Dimensions& dims)
+
+Returns a tensor that is the output of the convolution of the input tensor with the kernel,
+along the specified dimensions of the input tensor. The dimension size for dimensions of the output tensor
+which were part of the convolution will be reduced by the formula:
+output_dim_size = input_dim_size - kernel_dim_size + 1 (requires: input_dim_size >= kernel_dim_size).
+The dimension sizes for dimensions that were not part of the convolution will remain the same.
+Performance of the convolution can depend on the length of the stride(s) of the input tensor dimension(s) along which the
+convolution is computed (the first dimension has the shortest stride for ColMajor, whereas RowMajor's shortest stride is
+for the last dimension).
+
+ // Compute convolution along the second and third dimension.
+ Tensor input(3, 3, 7, 11);
+ Tensor