|
| 1 | +// [[Rcpp::depends(RcppParallel)]] |
| 2 | +#include <RcppParallel.h> |
| 3 | +#include <Rcpp.h> |
| 4 | +using namespace Rcpp; |
| 5 | + |
| 6 | +// http://nt2.metascale.fr/doc/html/tutorials/processing_data_the_simd_way.html |
| 7 | +#include <boost/simd/memory/allocator.hpp> |
| 8 | +#include <boost/simd/sdk/simd/pack.hpp> |
| 9 | +#include <boost/simd/include/functions/sum.hpp> |
| 10 | +#include <boost/simd/include/functions/load.hpp> |
| 11 | +#include <boost/simd/include/functions/plus.hpp> |
| 12 | +#include <boost/simd/include/functions/multiplies.hpp> |
| 13 | + |
| 14 | +template <typename Value> |
| 15 | +Value simd_dot(Value* first1, Value* last1, Value* first2) |
| 16 | +{ |
| 17 | + using boost::simd::sum; |
| 18 | + using boost::simd::pack; |
| 19 | + using boost::simd::load; |
| 20 | + |
| 21 | + typedef pack<Value> type; |
| 22 | + type tmp; |
| 23 | + |
| 24 | + // Let's consider that (last1-first1) is divisible by the size of the pack. |
| 25 | + while (first1 != last1) |
| 26 | + { |
| 27 | + // Load current values from the datasets |
| 28 | + pack<Value> x1 = load<type>(first1); |
| 29 | + pack<Value> x2 = load<type>(first2); |
| 30 | + |
| 31 | + // Computation |
| 32 | + tmp = tmp + x1 * x2; |
| 33 | + |
| 34 | + // Advance to the next SIMD vector |
| 35 | + first1 += type::static_size; |
| 36 | + first2 += type::static_size; |
| 37 | + } |
| 38 | + |
| 39 | + return sum(tmp); |
| 40 | +} |
| 41 | + |
| 42 | +// [[Rcpp::export]] |
| 43 | +double dot(NumericVector lhs, NumericVector rhs) |
| 44 | +{ |
| 45 | + // construct simd vectors |
| 46 | + typedef std::vector< double, boost::simd::allocator<double> > vector_t; |
| 47 | + vector_t a(lhs.begin(), lhs.end()); |
| 48 | + vector_t b(rhs.begin(), rhs.end()); |
| 49 | + |
| 50 | + // call dot function |
| 51 | + double result = simd_dot( |
| 52 | + &a[0], |
| 53 | + &a[0] + a.size(), |
| 54 | + &b[0] |
| 55 | + ); |
| 56 | + |
| 57 | + return result; |
| 58 | +} |
| 59 | + |
| 60 | +/*** R |
| 61 | +lhs <- rnorm(10) |
| 62 | +rhs <- rnorm(10) |
| 63 | +result <- dot(lhs, rhs) |
| 64 | +all.equal(result, sum(lhs * rhs)) |
| 65 | +*/ |
0 commit comments