Skip to content

Commit b21daa8

Browse files
committed
Merge pull request #20 from kevinushey/feature/boost-simd
add in boost.SIMD
2 parents d1adc4e + a8febf1 commit b21daa8

File tree

3,724 files changed

+129280
-6
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

3,724 files changed

+129280
-6
lines changed

.Rbuildignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@
88
^README.md
99
^doc$
1010
^src/.*\.o$
11+
^examples/

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,5 @@
44
.DS_Store
55
src-i386/
66
src-x64/
7+
.Rprofile
8+
inst/doc

DESCRIPTION

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,17 @@ Description: High level functions for doing parallel programming with 'Rcpp'.
1818
For example, the parallelFor() function can be used to convert the work of
1919
a standard serial "for" loop into a parallel one and the parallelReduce()
2020
function can be used for accumulating aggregate or other values.
21-
Suggests: Rcpp, RUnit
22-
LinkingTo: BH
21+
Suggests:
22+
Rcpp,
23+
RUnit,
24+
knitr,
25+
rmarkdown
26+
LinkingTo: BH (>= 1.60.0-1)
2327
SystemRequirements: GNU make, Windows: cmd.exe and cscript.exe, Solaris: g++ is required
2428
License: GPL-2
2529
URL: http://rcppcore.github.io/RcppParallel, https://github.com/RcppCore/RcppParallel
2630
Collate:
2731
'build.R'
2832
'hooks.R'
2933
'options.R'
30-
34+
VignetteBuilder: knitr

R/build.R

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,25 @@ RcppParallelLibs <- function() {
1717
inlineCxxPlugin <- function() {
1818
list(
1919
env = list(
20-
PKG_CXXFLAGS = tbbCxxFlags(),
20+
PKG_CXXFLAGS = paste(tbbCxxFlags(), mtuneFlags()),
2121
PKG_LIBS = tbbLdFlags()
2222
),
2323
includes = "#include <RcppParallel.h>",
24-
LinkingTo = "RcppParallel",
25-
body = function( x ) x,
24+
LinkingTo = c("RcppParallel", "BH"),
25+
body = function(x) x,
2626
Depends = "RcppParallel"
2727
)
2828
}
2929

30+
mtuneFlags <- function() {
31+
switch(Sys.info()[["sysname"]],
32+
"Linux" = "-mtune=native",
33+
"Darwin" = "-mtune=core2",
34+
"Windows" = "-mtune=core2",
35+
""
36+
)
37+
}
38+
3039
tbbCxxFlags <- function() {
3140
flags <- "$(CXX1XSTD)"
3241
if (Sys.info()['sysname'] == "Windows")

TODO

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
Boost.SIMD
2+
==========
3+
4+
- Make it easy for client packages to use Boost.SIMD (plugin? Also need LinkingTo: BH?)
5+
- Enable 'parallelFor', 'parallelReduce'-style calls to SIMD
6+
- See 'Wizard' (http://www.evanmiller.org/) for inspiration
7+
- Need to think about NA (idiom that handled NA)
8+
- Setting of compiler flags for CRAN
9+
- Don't overlap with Armadillo, Eigen
10+

examples/boost-simd-abssum.cpp

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
// [[Rcpp::depends(RcppParallel)]]
2+
#include <RcppParallelSIMD.h>
3+
#include <Rcpp.h>
4+
using namespace Rcpp;
5+
6+
#include <boost/simd/sdk/simd/pack.hpp>
7+
#include <boost/simd/memory/allocator.hpp>
8+
#include <boost/simd/include/functions/sum.hpp>
9+
#include <boost/simd/include/functions/load.hpp>
10+
#include <boost/simd/include/functions/plus.hpp>
11+
#include <boost/simd/include/functions/multiplies.hpp>
12+
#include <boost/simd/include/functions/abs.hpp>
13+
14+
// [[Rcpp::export]]
15+
double simd_abssum(NumericVector x)
16+
{
17+
using boost::simd::pack;
18+
using boost::simd::load;
19+
using boost::simd::aligned_load;
20+
21+
typedef std::vector< double, boost::simd::allocator<double> > vector_type;
22+
typedef pack<double> packed_type;
23+
24+
vector_type data(x.begin(), x.end());
25+
26+
packed_type packed;
27+
double* it = &data[0];
28+
double* end = &data[0] + data.size();
29+
30+
while (it != end)
31+
{
32+
packed_type loaded = load<packed_type>(it);
33+
packed = packed + boost::simd::abs(loaded);
34+
it += packed_type::static_size;
35+
}
36+
37+
return sum(packed);
38+
}
39+
40+
/*** R
41+
n <- 1024 * 1000
42+
x <- rnorm(n)
43+
44+
library(microbenchmark)
45+
microbenchmark(
46+
R = sum(abs(x)),
47+
simd = simd_abssum(x)
48+
)
49+
*/

examples/boost-simd-accumulate.cpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
// [[Rcpp::depends(RcppParallel)]]
2+
#include <RcppParallelSIMD.h>
3+
#include <Rcpp.h>
4+
using namespace Rcpp;
5+
6+
#include <boost/simd/sdk/simd/algorithm.hpp>
7+
8+
struct plus
9+
{
10+
template <class T>
11+
T operator()(const T& lhs, const T& rhs) const
12+
{
13+
return lhs + rhs;
14+
}
15+
};
16+
17+
// [[Rcpp::export]]
18+
double simd_sum(NumericVector x)
19+
{
20+
return boost::simd::accumulate(x.begin(), x.end(), 0.0, plus());
21+
}
22+
23+
// [[Rcpp::export]]
24+
double cpp_sum(NumericVector x)
25+
{
26+
return std::accumulate(x.begin(), x.end(), 0.0, plus());
27+
}
28+
29+
/***R
30+
n <- 1024 * 1000
31+
data <- rnorm(n)
32+
simd_sum(data)
33+
microbenchmark::microbenchmark(
34+
R = sum(data),
35+
simd = simd_sum(data),
36+
cpp = cpp_sum(data)
37+
)
38+
*/
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
// [[Rcpp::depends(RcppParallel)]]
2+
#include <RcppParallelSIMD.h>
3+
#include <Rcpp.h>
4+
using namespace Rcpp;
5+
6+
#include <boost/simd/sdk/simd/extensions/meta/tags.hpp>
7+
#include <boost/simd/sdk/config/is_supported.hpp>
8+
#include <boost/config.hpp>
9+
10+
// Try playing around with the '-march' flag to see what support is detected.
11+
// Note that most modern processors will implement instructions up to SSE3,
12+
// but certain AVX and FMA instructions are only available on newer CPUs.
13+
14+
// [[Rcpp::export]]
15+
void simd_capabilities() {
16+
17+
using boost::simd::is_supported;
18+
using namespace boost::simd::tag;
19+
20+
std::cout << "SIMD Capabilities\n"
21+
<< "=================\n\n";
22+
23+
std::cout << "AVX2: " << is_supported<avx2_>() << "\n"
24+
<< "AVX: " << is_supported<avx_>() << "\n"
25+
<< "FMA4: " << is_supported<fma4_>() << "\n"
26+
<< "FMA3: " << is_supported<fma3_>() << "\n"
27+
<< "SSE4a: " << is_supported<sse4a_>() << "\n"
28+
<< "SSE4_2: " << is_supported<sse4_2_>() << "\n"
29+
<< "SSE4_1: " << is_supported<sse4_1_>() << "\n"
30+
<< "SSE3: " << is_supported<sse3_>() << "\n"
31+
<< "SSE2: " << is_supported<sse2_>() << "\n"
32+
<< "SSE: " << is_supported<sse_>() << "\n";
33+
}
34+
35+
/***R
36+
simd_capabilities()
37+
*/

examples/boost-simd-dot.cpp

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
// [[Rcpp::depends(RcppParallel)]]
2+
#include <RcppParallelSIMD.h>
3+
#include <Rcpp.h>
4+
using namespace Rcpp;
5+
6+
// http://nt2.metascale.fr/doc/html/tutorials/processing_data_the_simd_way.html
7+
#include <boost/simd/memory/allocator.hpp>
8+
#include <boost/simd/sdk/simd/pack.hpp>
9+
#include <boost/simd/include/functions/sum.hpp>
10+
#include <boost/simd/include/functions/load.hpp>
11+
#include <boost/simd/include/functions/plus.hpp>
12+
#include <boost/simd/include/functions/multiplies.hpp>
13+
14+
template <typename Value>
15+
Value simd_dot(Value* first1, Value* last1, Value* first2)
16+
{
17+
using boost::simd::sum;
18+
using boost::simd::pack;
19+
20+
typedef pack<Value> type;
21+
type tmp;
22+
23+
// Let's consider that (last1-first1) is divisible by the size of the pack.
24+
while (first1 != last1)
25+
{
26+
// Load current values from the datasets
27+
pack<Value> x1 = boost::simd::aligned_load<Value>(first1);
28+
pack<Value> x2 = boost::simd::aligned_load<Value>(first2);
29+
30+
// Computation
31+
tmp = tmp + x1 * x2;
32+
33+
// Advance to the next SIMD vector
34+
first1 += type::static_size;
35+
first2 += type::static_size;
36+
}
37+
38+
return sum(tmp);
39+
}
40+
41+
// [[Rcpp::export]]
42+
double dot(NumericVector lhs, NumericVector rhs)
43+
{
44+
// construct simd vectors
45+
typedef std::vector< double, boost::simd::allocator<double> > vector_t;
46+
vector_t a(lhs.begin(), lhs.end());
47+
vector_t b(rhs.begin(), rhs.end());
48+
49+
// call dot function
50+
double result = simd_dot(&a[0], &a[0] + a.size(), &b[0]);
51+
52+
return result;
53+
}
54+
55+
/*** R
56+
n <- 1024
57+
lhs <- rnorm(n)
58+
rhs <- rnorm(n)
59+
result <- dot(lhs, rhs)
60+
all.equal(result, sum(lhs * rhs))
61+
62+
library(microbenchmark)
63+
lhs <- rnorm(n * 1000)
64+
rhs <- rnorm(n * 1000)
65+
microbenchmark(
66+
simd = dot(lhs, rhs),
67+
R = sum(lhs * rhs)
68+
)
69+
*/
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// [[Rcpp::depends(RcppParallel)]]
2+
#include <RcppParallelSIMD.h>
3+
#include <Rcpp.h>
4+
using namespace Rcpp;
5+
6+
// http://nt2.metascale.fr/doc/html/tutorials/simd_hello_world.html
7+
#include <boost/simd/sdk/simd/pack.hpp>
8+
#include <boost/simd/sdk/simd/io.hpp>
9+
#include <boost/simd/include/functions/splat.hpp>
10+
#include <boost/simd/include/functions/plus.hpp>
11+
#include <boost/simd/include/functions/multiplies.hpp>
12+
#include <iostream>
13+
14+
// [[Rcpp::export]]
15+
void HelloWorld()
16+
{
17+
typedef boost::simd::pack<float> p_t;
18+
19+
p_t res;
20+
p_t u(10);
21+
p_t r = boost::simd::splat<p_t>(11);
22+
23+
res = (u + r) * 2.f;
24+
25+
Rcout << res << std::endl;
26+
}
27+
28+
/*** R
29+
HelloWorld()
30+
*/

0 commit comments

Comments
 (0)