@@ -116,19 +116,20 @@ sycl::event inclusive_scan_rec(sycl::queue &exec_q,
116116{
117117 size_t n_groups = ceiling_quotient (n_elems, n_wi * wg_size);
118118
119- sycl::event inc_scan_phase1_ev = exec_q.submit ([&](sycl::handler &cgh) {
120- cgh.depends_on (depends);
119+ const sycl::event &inc_scan_phase1_ev =
120+ exec_q.submit ([&](sycl::handler &cgh) {
121+ cgh.depends_on (depends);
121122
122- using slmT = sycl::local_accessor<size_t , 1 >;
123+ using slmT = sycl::local_accessor<size_t , 1 >;
123124
124- auto lws = sycl::range<1 >(wg_size);
125- auto gws = sycl::range<1 >(n_groups * wg_size);
125+ auto lws = sycl::range<1 >(wg_size);
126+ auto gws = sycl::range<1 >(n_groups * wg_size);
126127
127- slmT slm_iscan_tmp (lws, cgh);
128+ slmT slm_iscan_tmp (lws, cgh);
128129
129130 cgh.parallel_for <class inclusive_scan_rec_local_scan_krn <
130131 inputT, outputT, n_wi, IndexerT, decltype (transformer)>>(
131- sycl::nd_range<1 >(gws, lws), [=](sycl::nd_item<1 > it)
132+ sycl::nd_range<1 >(gws, lws), [=, slm_iscan_tmp = std::move (slm_iscan_tmp) ](sycl::nd_item<1 > it)
132133 {
133134 auto chunk_gid = it.get_global_id (0 );
134135 auto lid = it.get_local_id (0 );
@@ -172,7 +173,7 @@ sycl::event inclusive_scan_rec(sycl::queue &exec_q,
172173 output[i + m_wi] = local_isum[m_wi];
173174 }
174175 });
175- });
176+ });
176177
177178 sycl::event out_event = inc_scan_phase1_ev;
178179 if (n_groups > 1 ) {
@@ -203,11 +204,11 @@ sycl::event inclusive_scan_rec(sycl::queue &exec_q,
203204
204205 sycl::event e4 = exec_q.submit ([&](sycl::handler &cgh) {
205206 cgh.depends_on (e3 );
206- auto ctx = exec_q.get_context ();
207+ const auto & ctx = exec_q.get_context ();
207208 cgh.host_task ([ctx, temp]() { sycl::free (temp, ctx); });
208209 });
209210
210- out_event = e4 ;
211+ out_event = std::move ( e4 ) ;
211212 }
212213
213214 return out_event;
@@ -235,7 +236,7 @@ size_t accumulate_contig_impl(sycl::queue &q,
235236 NoOpIndexer flat_indexer{};
236237 transformerT non_zero_indicator{};
237238
238- sycl::event comp_ev =
239+ const sycl::event & comp_ev =
239240 inclusive_scan_rec<maskT, cumsumT, n_wi, decltype (flat_indexer),
240241 decltype (non_zero_indicator)>(
241242 q, n_elems, wg_size, mask_data_ptr, cumsum_data_ptr, 0 , 1 ,
@@ -321,7 +322,7 @@ size_t accumulate_strided_impl(sycl::queue &q,
321322 StridedIndexer strided_indexer{nd, 0 , shape_strides};
322323 transformerT non_zero_indicator{};
323324
324- sycl::event comp_ev =
325+ const sycl::event & comp_ev =
325326 inclusive_scan_rec<maskT, cumsumT, n_wi, decltype (strided_indexer),
326327 decltype (non_zero_indicator)>(
327328 q, n_elems, wg_size, mask_data_ptr, cumsum_data_ptr, 0 , 1 ,
0 commit comments