@@ -12,58 +12,49 @@ using namespace Rcpp;
1212#include < boost/simd/include/functions/multiplies.hpp>
1313
1414template <typename Value>
15- Value simd_dot (Value* first1, Value* last1, Value* first2)
15+ Value simd_dot_impl (Value* first1, Value* last1, Value* first2)
1616{
1717 using boost::simd::sum;
1818 using boost::simd::pack;
1919
20- typedef pack<Value> type ;
21- type tmp;
20+ typedef pack<Value> packed_type ;
21+ packed_type tmp;
2222
2323 // Let's consider that (last1-first1) is divisible by the size of the pack.
2424 while (first1 != last1)
2525 {
2626 // Load current values from the datasets
27- pack<Value> x1 = boost::simd::aligned_load<Value >(first1);
28- pack<Value> x2 = boost::simd::aligned_load<Value >(first2);
27+ packed_type x1 = boost::simd::load<packed_type >(first1);
28+ packed_type x2 = boost::simd::load<packed_type >(first2);
2929
3030 // Computation
3131 tmp = tmp + x1 * x2;
3232
3333 // Advance to the next SIMD vector
34- first1 += type ::static_size;
35- first2 += type ::static_size;
34+ first1 += packed_type ::static_size;
35+ first2 += packed_type ::static_size;
3636 }
3737
3838 return sum (tmp);
3939}
4040
4141// [[Rcpp::export]]
42- double dot (NumericVector lhs, NumericVector rhs)
42+ double simd_dot (NumericVector lhs, NumericVector rhs)
4343{
44- // construct simd vectors
45- typedef std::vector< double , boost::simd::allocator<double > > vector_t ;
46- vector_t a (lhs.begin (), lhs.end ());
47- vector_t b (rhs.begin (), rhs.end ());
48-
49- // call dot function
50- double result = simd_dot (&a[0 ], &a[0 ] + a.size (), &b[0 ]);
51-
52- return result;
44+ return simd_dot_impl (
45+ REAL (lhs),
46+ REAL (lhs) + lhs.size (),
47+ REAL (rhs)
48+ );
5349}
5450
5551/* ** R
56- n <- 1024
57- lhs <- rnorm(n)
58- rhs <- rnorm(n)
59- result <- dot(lhs, rhs)
60- all.equal(result, sum(lhs * rhs))
52+ lhs <- rnorm(1024 * 1000)
53+ rhs <- rnorm(1024 * 1000)
6154
62- library(microbenchmark)
63- lhs <- rnorm(n * 1000)
64- rhs <- rnorm(n * 1000)
55+ all.equal(simd_dot(lhs, rhs), sum(lhs * rhs))
6556microbenchmark(
66- simd = dot (lhs, rhs),
57+ simd = simd_dot (lhs, rhs),
6758 R = sum(lhs * rhs)
6859)
6960*/
0 commit comments