33 * @author Kevin Ushey
44 * @license GPL (>= 2)
55 * @tags simd parallel
6- * @summary Demonstrates how RcppParallel could be used to implement a SIMD-aware variance.
6+ * @summary Demonstrates how RcppParallel could be used compute
7+ * the variance, using SIMD instructions.
78 */
89
910// [[Rcpp::depends(RcppParallel)]]
@@ -15,10 +16,10 @@ using namespace RcppParallel;
1516using namespace Rcpp ;
1617
1718/* *
18- * This article illustrates how the 'simdMapReduce()'
19- * function can be used to compute the variance for a vector
20- * of numbers . First, let's look at the R code one might
21- * write to compute the variance.
19+ * This article illustrates how we might take advantage of
20+ * `Boost.SIMD` in computing the variance for a vector of
21+ * numers . First, let's look at the R code one might write
22+ * to compute the variance.
2223 */
2324
2425/* ** R
@@ -34,66 +35,66 @@ sum((x - mean(x))^2) / (length(x) - 1)
3435 * 3. Sum the deviations about the mean,
3536 * 4. Divide the summation by the length minus one.
3637 *
37- * Naively, we could imagine writing a 'transform()' for
38- * step 2, and an 'accumulate()' for step 3. However, this
39- * is a bit inefficient as the transform would require
40- * allocating a whole new vector, with the same length as
41- * our initial vector. It'd be much better if we could
42- * combine our map and reduction operations, as we could
43- * then avoid that large allocation. The 'simdMapReduce()'
44- * function provides an interface for doing just that, with
45- * a SIMD pack providing the 'buffer' for the mapped
46- * operations.
47- *
48- * The 'simdMapReduce()' function expects a class providing
49- * templated 'map()' and 'reduce()' functions. The 'map()'
50- * operation represents the transformation we wish to make
51- * on the input data (in this case, the square of the
52- * deviation from the mean); while the 'reduce()' operation
53- * represents how the transformed result should be combined
54- * (in this case, as a summation of said deviations).
38+ * Naively, we could imagine writing a 'transform()' for
39+ * step 2, and an 'accumulate()' for step 3. However, this
40+ * is a bit inefficient as the transform would require
41+ * allocating a whole new vector, with the same length as
42+ * our initial vector. When neither 'accumulate()' nor
43+ * 'transform()' seem to be a good fit, we can fall back to
44+ * 'for_each()'. We can pass a stateful functor to handle
45+ * accumulation of the transformed results.
5546 *
5647 * Let's write a class that encapsulates this 'sum of
5748 * squares' map-reduction operation.
5849 */
5950
60- class SumOfSquaresMapReducer
51+ class SumOfSquaresAccumulator
6152{
6253public:
6354
64- // Since the 'map' operation requires knowledge of the mean,
65- // we ensure our class must be constructed with a mean value.
66- explicit SumOfSquaresMapReducer (double mean)
67- : mean_(mean)
55+ // Since the 'map' operation requires knowledge of the
56+ // mean, we ensure our class must be constructed with a
57+ // mean value. We will also hold the final result within
58+ // the 'result_' variable.
59+ explicit SumOfSquaresAccumulator (double mean)
60+ : mean_(mean), result_(0.0 ), pack_(0.0 )
6861 {}
6962
70- // The 'map()' operation generates a squared deviation,
71- // and updates a buffer. By making this a template,
72- // specializations for SIMD packs, versus scalar values,
73- // can be generated as appropriate. We use the
74- // 'boost::simd::sqr()' function to compute the square.
75- template <typename T>
76- void map (const T& data, T* pBuffer)
63+ // We need to provide two call operators: one to handle
64+ // SIMD packs, and one to handle scalar values. We do this
65+ // as we want to accumulate SIMD results in a SIMD data
66+ // structure, and scalar results in a scalar data object.
67+ //
68+ // We _could_ just accumulate all our results in a single
69+ // 'double', but this is actually less efficient -- it
70+ // pays to use packed structures when possible.
71+ void operator ()(double data)
7772 {
78- *pBuffer += boost::simd::sqr (data - mean_);
73+ double diff = data - mean_;
74+ result_ += diff * diff;
7975 }
8076
81- // The 'reduce()' operation collapses the accumulated
82- // SIMD pack structure into a scalar value, and updates
83- // the buffer. In this case, we just need to add up the
84- // values in that pack, and add it to the buffer.
85- template <typename T, typename U>
86- void reduce (const T& data, U* pBuffer)
77+ void operator ()(const boost::simd::pack<double >& data)
8778 {
88- *pBuffer += boost::simd::sum (data);
79+ pack_ += boost::simd::sqr (data - mean_);
80+ }
81+
82+ // Provide a 'conversion to double' operator -- this lets
83+ // us easily extract the resulting value after computation.
84+ // This is also where we combine our scalar, and packed,
85+ // representations of the data.
86+ operator double () const {
87+ return result_ + boost::simd::sum (pack_);
8988 }
9089
9190private:
9291 double mean_;
92+ double result_;
93+ boost::simd::pack<double > pack_;
9394};
9495
9596/* *
96- * Now that we have our 'SumOfSquares' class defined, we can
97+ * Now that we have our accumulator class defined, we can
9798 * use it to compute the variance. We'll call our function
9899 * 'simdVar()', and export it using Rcpp attributes in the
99100 * usual way.
@@ -108,15 +109,18 @@ double simdVar(NumericVector data) {
108109
109110 // First, compute the mean as we'll need that for
110111 // computation of the sum of squares.
111- double total = simd::accumulate (data.begin (), data.end (), 0.0 , simd::ops::plus ());
112+ double total = simd::accumulate (data.begin (),
113+ data.end (),
114+ 0.0 ,
115+ simd::ops::plus ());
116+
112117 std::size_t n = data.size ();
113118 double mean = total / n;
114119
115- // Construct our map-reducer with the generated mean.
116- auto reducer = SumOfSquaresMapReducer (mean);
117-
118- // Use it to compute the sum of squares.
119- double ssq = simdMapReduce (data.begin (), data.end (), 0.0 , reducer);
120+ // Use our accumulator to compute the sum of squares.
121+ double ssq = simd::for_each (data.begin (),
122+ data.end (),
123+ SumOfSquaresAccumulator (mean));
120124
121125 // Divide by 'n - 1', and we're done!
122126 return ssq / (n - 1 );
0 commit comments