|
| 1 | +// [[Rcpp::depends(RcppParallel)]] |
| 2 | +#include <RcppParallelSIMD.h> |
| 3 | +#include <Rcpp.h> |
| 4 | +using namespace Rcpp; |
| 5 | + |
| 6 | +// http://nt2.metascale.fr/doc/html/tutorials/processing_data_the_simd_way.html |
| 7 | +#include <boost/simd/memory/allocator.hpp> |
| 8 | +#include <boost/simd/sdk/simd/pack.hpp> |
| 9 | +#include <boost/simd/include/functions/sum.hpp> |
| 10 | +#include <boost/simd/include/functions/load.hpp> |
| 11 | +#include <boost/simd/include/functions/plus.hpp> |
| 12 | +#include <boost/simd/include/functions/multiplies.hpp> |
| 13 | + |
| 14 | +template <typename Value> |
| 15 | +Value simd_dot(Value* first1, Value* last1, Value* first2) |
| 16 | +{ |
| 17 | + using boost::simd::sum; |
| 18 | + using boost::simd::pack; |
| 19 | + |
| 20 | + typedef pack<Value> type; |
| 21 | + type tmp; |
| 22 | + |
| 23 | + // Let's consider that (last1-first1) is divisible by the size of the pack. |
| 24 | + while (first1 != last1) |
| 25 | + { |
| 26 | + // Load current values from the datasets |
| 27 | + pack<Value> x1 = boost::simd::aligned_load<Value>(first1); |
| 28 | + pack<Value> x2 = boost::simd::aligned_load<Value>(first2); |
| 29 | + |
| 30 | + // Computation |
| 31 | + tmp = tmp + x1 * x2; |
| 32 | + |
| 33 | + // Advance to the next SIMD vector |
| 34 | + first1 += type::static_size; |
| 35 | + first2 += type::static_size; |
| 36 | + } |
| 37 | + |
| 38 | + return sum(tmp); |
| 39 | +} |
| 40 | + |
| 41 | +// [[Rcpp::export]] |
| 42 | +double dot(NumericVector lhs, NumericVector rhs) |
| 43 | +{ |
| 44 | + // construct simd vectors |
| 45 | + typedef std::vector< double, boost::simd::allocator<double> > vector_t; |
| 46 | + vector_t a(lhs.begin(), lhs.end()); |
| 47 | + vector_t b(rhs.begin(), rhs.end()); |
| 48 | + |
| 49 | + // call dot function |
| 50 | + double result = simd_dot(&a[0], &a[0] + a.size(), &b[0]); |
| 51 | + |
| 52 | + return result; |
| 53 | +} |
| 54 | + |
| 55 | +/*** R |
| 56 | +n <- 1024 |
| 57 | +lhs <- rnorm(n) |
| 58 | +rhs <- rnorm(n) |
| 59 | +result <- dot(lhs, rhs) |
| 60 | +all.equal(result, sum(lhs * rhs)) |
| 61 | +
|
| 62 | +library(microbenchmark) |
| 63 | +lhs <- rnorm(n * 1000) |
| 64 | +rhs <- rnorm(n * 1000) |
| 65 | +microbenchmark( |
| 66 | + simd = dot(lhs, rhs), |
| 67 | + R = sum(lhs * rhs) |
| 68 | +) |
| 69 | +*/ |
0 commit comments