diff --git a/.clang-tidy b/.clang-tidy index 72e0de1a1fc..1a4fb50ea38 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -22,7 +22,16 @@ Checks: - '-*' - 'performance-*' - '-performance-noexcept-swap' + # TODO(jfaibussowit): re-enable for CCCL 4.0 - '-performance-enum-size' + # The majority of thrust/CUB require value semantics because they may be used on device, + # where converting to const-ref is detrimental to performance because of possible + # register spilling. Also, for device functions, the overwhelming majority of types are + # PODs or trivially copyable, so moving them (another potential way to fix this) gives + # no additional perf improvement and only leads to more verbosity (not to mention + # compile-time instantiating all those move templates). + # + # All this is to say it probably is not worth the hassle of enabling this. - '-performance-unnecessary-value-param' # END REMOVE ME # HICPP is 99% aliased to other checks (mostly modernize-* and bugprone-*). We don't diff --git a/c/parallel/test/test_util.h b/c/parallel/test/test_util.h index 2380415d3fb..3bbb0dd367d 100644 --- a/c/parallel/test/test_util.h +++ b/c/parallel/test/test_util.h @@ -917,7 +917,8 @@ struct name_source_t }; template -iterator_t make_iterator(name_source_t state, operation_t advance, operation_t dereference) +iterator_t +make_iterator(const name_source_t& state, const operation_t& advance, const operation_t& dereference) { iterator_t it; it.state_name = state.name; @@ -977,7 +978,7 @@ inline std::tuple make_random_access_iter template iterator_t> make_random_access_iterator( - iterator_kind kind, std::string_view value_type, std::string prefix = "", std::string transform = "") + iterator_kind kind, std::string_view value_type, std::string_view prefix = "", std::string_view transform = "") { std::string iterator_state_name = std::format("{0}state_t", prefix); std::string advance_fn_name = std::format("{0}advance", prefix); diff --git a/cub/.clang-tidy b/cub/.clang-tidy index 706ce606e43..914c638ca93 100644 --- a/cub/.clang-tidy +++ b/cub/.clang-tidy @@ -1,21 +1,5 @@ --- -Checks: - # TODO: BEGIN REMOVE ME - - '-*' - - 'performance-implicit-conversion-in-loop' - # END REMOVE ME - # 'modernize-*, - # -modernize-use-equals-default, - # -modernize-concat-nested-namespaces, - # -modernize-use-trailing-return-type' - - # -modernize-use-equals-default # auto-fix is broken (doesn't insert =default correctly) - # -modernize-concat-nested-namespaces # auto-fix is broken (can delete code) - # -modernize-use-trailing-return-type # just a preference - -WarningsAsErrors: '' -HeaderFilterRegex: '' -FormatStyle: none +InheritParentConfig: true CheckOptions: - key: modernize-loop-convert.MaxCopySize value: '16' diff --git a/cudax/test/execution/common/checked_receiver.cuh b/cudax/test/execution/common/checked_receiver.cuh index 36a25ea76bc..07c38d2e445 100644 --- a/cudax/test/execution/common/checked_receiver.cuh +++ b/cudax/test/execution/common/checked_receiver.cuh @@ -29,7 +29,7 @@ struct checked_value_receiver using receiver_concept = cudax_async::receiver_t; _CCCL_HOST_DEVICE checked_value_receiver(Values... values) - : _values{values...} + : _values{::cuda::std::move(values)...} {} _CCCL_HOST_DEVICE checked_value_receiver(checked_value_receiver&& other) noexcept @@ -106,7 +106,7 @@ struct checked_error_receiver } template - _CCCL_HOST_DEVICE void set_error(Ty ty) && noexcept + _CCCL_HOST_DEVICE void set_error(const Ty& ty) && noexcept { if constexpr (::cuda::std::is_same_v) { diff --git a/cudax/test/execution/test_bulk.cu b/cudax/test/execution/test_bulk.cu index 72ffaf4f7c0..ecf303c60cd 100644 --- a/cudax/test/execution/test_bulk.cu +++ b/cudax/test/execution/test_bulk.cu @@ -147,7 +147,7 @@ void bulk_keeps_values_type_from_input_sender() check_value_types>(ex::just(4.2) // | ex::bulk(ex::par, n, [] _CCCL_HOST_DEVICE(int, double) {})); check_value_types>(ex::just(4.2, string{}) // - | ex::bulk(ex::par, n, [] _CCCL_HOST_DEVICE(int, double, string) {})); + | ex::bulk(ex::par, n, [] _CCCL_HOST_DEVICE(int, double, const string&) {})); } void bulk_chunked_keeps_values_type_from_input_sender() @@ -158,7 +158,7 @@ void bulk_chunked_keeps_values_type_from_input_sender() check_value_types>(ex::just(4.2) // | ex::bulk_chunked(ex::par, n, [] _CCCL_HOST_DEVICE(int, int, double) {})); check_value_types>( - ex::just(4.2, string{}) | ex::bulk_chunked(ex::par, n, [] _CCCL_HOST_DEVICE(int, int, double, string) {})); + ex::just(4.2, string{}) | ex::bulk_chunked(ex::par, n, [] _CCCL_HOST_DEVICE(int, int, double, const string&) {})); } void bulk_unchunked_keeps_values_type_from_input_sender() @@ -170,7 +170,7 @@ void bulk_unchunked_keeps_values_type_from_input_sender() | ex::bulk_unchunked(ex::par, n, [] _CCCL_HOST_DEVICE(int, double) {})); check_value_types>( ex::just(4.2, string{}) // - | ex::bulk_unchunked(ex::par, n, [] _CCCL_HOST_DEVICE(int, double, string) {})); + | ex::bulk_unchunked(ex::par, n, [] _CCCL_HOST_DEVICE(int, double, const string&) {})); } void bulk_keeps_error_types_from_input_sender() diff --git a/cudax/test/execution/test_then.cu b/cudax/test/execution/test_then.cu index 0efe8e68dd1..e44a5e1ae5e 100644 --- a/cudax/test/execution/test_then.cu +++ b/cudax/test/execution/test_then.cu @@ -268,7 +268,7 @@ C2H_TEST("then can be customized early", "[adaptors][then]") { // The customization will return a different value dummy_scheduler sched; - auto snd = ex::just(string{"hello"}) | ex::continues_on(sched) | ex::then([](string x) { + auto snd = ex::just(string{"hello"}) | ex::continues_on(sched) | ex::then([](const string& x) { return x + ", world"; }); wait_for_value(std::move(snd), string{"ciao"}); @@ -279,7 +279,7 @@ C2H_TEST("then can be customized late", "[adaptors][then]") // The customization will return a different value dummy_scheduler sched; auto snd = ex::just(string{"hello"}) - | ex::on(sched, ex::then([](string x) { + | ex::on(sched, ex::then([](const string& x) { return x + ", world"; })) | ex::write_env(ex::prop{ex::get_scheduler, dummy_scheduler()}); diff --git a/libcudacxx/test/libcudacxx/std/numerics/numeric.ops/transform.reduce/pstl_transform_reduce_binary.cu b/libcudacxx/test/libcudacxx/std/numerics/numeric.ops/transform.reduce/pstl_transform_reduce_binary.cu index af3ee617ea7..7b1c2370fd7 100644 --- a/libcudacxx/test/libcudacxx/std/numerics/numeric.ops/transform.reduce/pstl_transform_reduce_binary.cu +++ b/libcudacxx/test/libcudacxx/std/numerics/numeric.ops/transform.reduce/pstl_transform_reduce_binary.cu @@ -43,7 +43,7 @@ inline constexpr int size = 1000; template -void test_transform_reduce(const Policy policy, const thrust::device_vector& Input1, Iter input2) +void test_transform_reduce(const Policy& policy, const thrust::device_vector& Input1, Iter input2) { // N * (N + 1) / 2 for the first N integrals // 0 for multiplying by 1 diff --git a/libcudacxx/test/libcudacxx/std/numerics/numeric.ops/transform.reduce/pstl_transform_reduce_unary.cu b/libcudacxx/test/libcudacxx/std/numerics/numeric.ops/transform.reduce/pstl_transform_reduce_unary.cu index e51121b6c75..51f6f5f995a 100644 --- a/libcudacxx/test/libcudacxx/std/numerics/numeric.ops/transform.reduce/pstl_transform_reduce_unary.cu +++ b/libcudacxx/test/libcudacxx/std/numerics/numeric.ops/transform.reduce/pstl_transform_reduce_unary.cu @@ -45,7 +45,7 @@ struct plus_one }; template -void test_transform_reduce(const Policy policy, Iter input1) +void test_transform_reduce(const Policy& policy, Iter input1) { // N * (N + 1) / 2 for the first N integrals // N for plus_one diff --git a/nvbench_helper/nvbench_helper/nvbench_helper.cuh b/nvbench_helper/nvbench_helper/nvbench_helper.cuh index 344c261bf8c..517e2ca2652 100644 --- a/nvbench_helper/nvbench_helper/nvbench_helper.cuh +++ b/nvbench_helper/nvbench_helper/nvbench_helper.cuh @@ -182,7 +182,7 @@ NVBENCH_DECLARE_TYPE_STRINGS(bit_entropy, "BE", "bit entropy"); } } -[[nodiscard]] inline bit_entropy str_to_entropy(std::string str) +[[nodiscard]] inline bit_entropy str_to_entropy(const std::string& str) { if (str == "1.000") { diff --git a/thrust/testing/copy.cu b/thrust/testing/copy.cu index 73a6892d66e..6fc3b6978e5 100644 --- a/thrust/testing/copy.cu +++ b/thrust/testing/copy.cu @@ -213,7 +213,7 @@ DECLARE_VECTOR_UNITTEST(TestCopyListTo); template struct is_even { - _CCCL_HOST_DEVICE bool operator()(T x) + _CCCL_HOST_DEVICE bool operator()(const T& x) { return (x & 1) == 0; } @@ -222,7 +222,7 @@ struct is_even template struct is_true { - _CCCL_HOST_DEVICE bool operator()(T x) + _CCCL_HOST_DEVICE bool operator()(const T& x) { return x ? true : false; } @@ -231,7 +231,7 @@ struct is_true template struct mod_3 { - _CCCL_HOST_DEVICE unsigned int operator()(T x) + _CCCL_HOST_DEVICE unsigned int operator()(const T& x) { return x % 3; } diff --git a/thrust/testing/generate.cu b/thrust/testing/generate.cu index e949110c14a..d1ecf9115af 100644 --- a/thrust/testing/generate.cu +++ b/thrust/testing/generate.cu @@ -2,6 +2,8 @@ #include #include +#include + #include _CCCL_DIAG_PUSH @@ -12,9 +14,9 @@ struct return_value { T val; - return_value() {} - return_value(T v) - : val(v) + _CCCL_HOST_DEVICE return_value() {}; + _CCCL_HOST_DEVICE return_value(T v) + : val(::cuda::std::move(v)) {} _CCCL_HOST_DEVICE T operator()(void) diff --git a/thrust/testing/unittest/assertions.h b/thrust/testing/unittest/assertions.h index 37af425a77a..6f02e57f8cc 100644 --- a/thrust/testing/unittest/assertions.h +++ b/thrust/testing/unittest/assertions.h @@ -117,7 +117,7 @@ struct value_type> //// // check scalar values template -void assert_equal(T1 a, T2 b, const std::string& filename = "unknown", int lineno = -1) +void assert_equal(const T1& a, const T2& b, const std::string& filename = "unknown", int lineno = -1) { if (!(a == b)) { diff --git a/thrust/testing/unittest/special_types.h b/thrust/testing/unittest/special_types.h index 8b10a101480..7e7cbb99d21 100644 --- a/thrust/testing/unittest/special_types.h +++ b/thrust/testing/unittest/special_types.h @@ -2,6 +2,8 @@ #include +#include + #include template @@ -81,8 +83,8 @@ struct key_value {} _CCCL_HOST_DEVICE key_value(key_type k, value_type v) - : key(k) - , value(v) + : key(::cuda::std::move(k)) + , value(::cuda::std::move(v)) {} _CCCL_HOST_DEVICE bool operator<(const key_value& rhs) const diff --git a/thrust/thrust/detail/copy_if.inl b/thrust/thrust/detail/copy_if.inl index 4b02bf505dc..01131424750 100644 --- a/thrust/thrust/detail/copy_if.inl +++ b/thrust/thrust/detail/copy_if.inl @@ -16,6 +16,8 @@ #include #include +#include + // Include all active backend system implementations (generic, sequential, host and device) #include #include @@ -43,7 +45,8 @@ _CCCL_HOST_DEVICE OutputIterator copy_if( { _CCCL_NVTX_RANGE_SCOPE("thrust::copy_if"); using thrust::system::detail::generic::copy_if; - return copy_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, pred); + return copy_if( + thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, ::cuda::std::move(pred)); } // end copy_if() _CCCL_EXEC_CHECK_DISABLE @@ -62,7 +65,13 @@ _CCCL_HOST_DEVICE OutputIterator copy_if( { _CCCL_NVTX_RANGE_SCOPE("thrust::copy_if"); using thrust::system::detail::generic::copy_if; - return copy_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, stencil, result, pred); + return copy_if( + thrust::detail::derived_cast(thrust::detail::strip_const(exec)), + first, + last, + stencil, + result, + ::cuda::std::move(pred)); } // end copy_if() template @@ -77,7 +86,7 @@ OutputIterator copy_if(InputIterator first, InputIterator last, OutputIterator r System1 system1; System2 system2; - return thrust::copy_if(select_system(system1, system2), first, last, result, pred); + return thrust::copy_if(select_system(system1, system2), first, last, result, ::cuda::std::move(pred)); } // end copy_if() template @@ -95,7 +104,8 @@ copy_if(InputIterator1 first, InputIterator1 last, InputIterator2 stencil, Outpu System2 system2; System3 system3; - return thrust::copy_if(select_system(system1, system2, system3), first, last, stencil, result, pred); + return thrust::copy_if( + select_system(system1, system2, system3), first, last, stencil, result, ::cuda::std::move(pred)); } // end copy_if() THRUST_NAMESPACE_END diff --git a/thrust/thrust/detail/find.inl b/thrust/thrust/detail/find.inl index e7f64fae81b..35d55aa7a47 100644 --- a/thrust/thrust/detail/find.inl +++ b/thrust/thrust/detail/find.inl @@ -15,6 +15,8 @@ #include #include +#include + // Include all active backend system implementations (generic, sequential, host and device) #include #include @@ -54,7 +56,7 @@ _CCCL_HOST_DEVICE InputIterator find_if( { _CCCL_NVTX_RANGE_SCOPE("thrust::find_if"); using thrust::system::detail::generic::find_if; - return find_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, pred); + return find_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, ::cuda::std::move(pred)); } // end find_if() _CCCL_EXEC_CHECK_DISABLE @@ -67,7 +69,8 @@ _CCCL_HOST_DEVICE InputIterator find_if_not( { _CCCL_NVTX_RANGE_SCOPE("thrust::find_if_not"); using thrust::system::detail::generic::find_if_not; - return find_if_not(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, pred); + return find_if_not( + thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, ::cuda::std::move(pred)); } // end find_if_not() template @@ -93,7 +96,7 @@ InputIterator find_if(InputIterator first, InputIterator last, Predicate pred) System system; - return thrust::find_if(select_system(system), first, last, pred); + return thrust::find_if(select_system(system), first, last, ::cuda::std::move(pred)); } template @@ -106,7 +109,7 @@ InputIterator find_if_not(InputIterator first, InputIterator last, Predicate pre System system; - return thrust::find_if_not(select_system(system), first, last, pred); + return thrust::find_if_not(select_system(system), first, last, ::cuda::std::move(pred)); } THRUST_NAMESPACE_END diff --git a/thrust/thrust/detail/for_each.inl b/thrust/thrust/detail/for_each.inl index 292c623c2b9..da9f966a768 100644 --- a/thrust/thrust/detail/for_each.inl +++ b/thrust/thrust/detail/for_each.inl @@ -17,6 +17,8 @@ #include #include +#include + // Include all active backend system implementations (generic, sequential, host and device) #include #include @@ -44,7 +46,7 @@ _CCCL_HOST_DEVICE InputIterator for_each( _CCCL_NVTX_RANGE_SCOPE_IF(detail::should_enable_nvtx_for_policy(), "thrust::for_each"); using thrust::system::detail::generic::for_each; - return for_each(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, f); + return for_each(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, ::cuda::std::move(f)); } template @@ -55,7 +57,7 @@ InputIterator for_each(InputIterator first, InputIterator last, UnaryFunction f) using thrust::system::detail::generic::select_system; System system; - return thrust::for_each(select_system(system), first, last, f); + return thrust::for_each(select_system(system), first, last, ::cuda::std::move(f)); } // end for_each() _CCCL_EXEC_CHECK_DISABLE @@ -66,7 +68,7 @@ _CCCL_HOST_DEVICE InputIterator for_each_n( _CCCL_NVTX_RANGE_SCOPE_IF(detail::should_enable_nvtx_for_policy(), "thrust::for_each_n"); using thrust::system::detail::generic::for_each_n; - return for_each_n(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, n, f); + return for_each_n(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, n, ::cuda::std::move(f)); } // end for_each_n() template @@ -77,7 +79,7 @@ InputIterator for_each_n(InputIterator first, Size n, UnaryFunction f) using thrust::system::detail::generic::select_system; System system; - return thrust::for_each_n(select_system(system), first, n, f); + return thrust::for_each_n(select_system(system), first, n, ::cuda::std::move(f)); } // end for_each_n() THRUST_NAMESPACE_END diff --git a/thrust/thrust/detail/generate.inl b/thrust/thrust/detail/generate.inl index 8631b78969c..1cba09d50a1 100644 --- a/thrust/thrust/detail/generate.inl +++ b/thrust/thrust/detail/generate.inl @@ -18,6 +18,8 @@ #include #include +#include + // Include all active backend system implementations (generic, sequential, host and device) #include #include @@ -44,7 +46,7 @@ generate(const thrust::detail::execution_policy_base& exec, { _CCCL_NVTX_RANGE_SCOPE_IF(detail::should_enable_nvtx_for_policy(), "thrust::generate"); using thrust::system::detail::generic::generate; - return generate(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, gen); + return generate(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, ::cuda::std::move(gen)); } // end generate() _CCCL_EXEC_CHECK_DISABLE @@ -54,7 +56,7 @@ _CCCL_HOST_DEVICE OutputIterator generate_n( { _CCCL_NVTX_RANGE_SCOPE_IF(detail::should_enable_nvtx_for_policy(), "thrust::generate_n"); using thrust::system::detail::generic::generate_n; - return generate_n(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, n, gen); + return generate_n(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, n, ::cuda::std::move(gen)); } // end generate_n() template @@ -66,7 +68,7 @@ void generate(ForwardIterator first, ForwardIterator last, Generator gen) System system; - return thrust::generate(select_system(system), first, last, gen); + return thrust::generate(select_system(system), first, last, ::cuda::std::move(gen)); } // end generate() template @@ -78,7 +80,7 @@ OutputIterator generate_n(OutputIterator first, Size n, Generator gen) System system; - return thrust::generate_n(select_system(system), first, n, gen); + return thrust::generate_n(select_system(system), first, n, ::cuda::std::move(gen)); } // end generate_n() THRUST_NAMESPACE_END diff --git a/thrust/thrust/detail/inner_product.inl b/thrust/thrust/detail/inner_product.inl index 5b17b38697d..81d9c61d9b8 100644 --- a/thrust/thrust/detail/inner_product.inl +++ b/thrust/thrust/detail/inner_product.inl @@ -21,6 +21,7 @@ #include #include __THRUST_HOST_SYSTEM_ALGORITH_DETAIL_HEADER_INCLUDE(inner_product.h) #include __THRUST_DEVICE_SYSTEM_ALGORITH_DETAIL_HEADER_INCLUDE(inner_product.h) +#include // Some build systems need a hint to know which files we could include #if 0 @@ -43,7 +44,8 @@ _CCCL_HOST_DEVICE OutputType inner_product( { _CCCL_NVTX_RANGE_SCOPE("thrust::inner_product"); using thrust::system::detail::generic::inner_product; - return inner_product(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, init); + return inner_product( + thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, ::cuda::std::move(init)); } // end inner_product() _CCCL_EXEC_CHECK_DISABLE @@ -69,9 +71,9 @@ _CCCL_HOST_DEVICE OutputType inner_product( first1, last1, first2, - init, - binary_op1, - binary_op2); + ::cuda::std::move(init), + ::cuda::std::move(binary_op1), + ::cuda::std::move(binary_op2)); } // end inner_product() template @@ -86,7 +88,7 @@ OutputType inner_product(InputIterator1 first1, InputIterator1 last1, InputItera System1 system1; System2 system2; - return thrust::inner_product(select_system(system1, system2), first1, last1, first2, init); + return thrust::inner_product(select_system(system1, system2), first1, last1, first2, ::cuda::std::move(init)); } // end inner_product() template #include +#include + // Include all active backend system implementations (generic, sequential, host and device) #include #include @@ -59,7 +61,7 @@ _CCCL_HOST_DEVICE T reduce( { _CCCL_NVTX_RANGE_SCOPE("thrust::reduce"); using thrust::system::detail::generic::reduce; - return reduce(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, init); + return reduce(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, ::cuda::std::move(init)); } // end reduce() _CCCL_EXEC_CHECK_DISABLE @@ -73,7 +75,11 @@ _CCCL_HOST_DEVICE T reduce( { _CCCL_NVTX_RANGE_SCOPE("thrust::reduce"); using thrust::system::detail::generic::reduce; - return reduce(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, init, binary_op); + return reduce(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), + first, + last, + ::cuda::std::move(init), + ::cuda::std::move(binary_op)); } // end reduce() _CCCL_EXEC_CHECK_DISABLE @@ -98,7 +104,8 @@ _CCCL_HOST_DEVICE void reduce_into( T init) { using thrust::system::detail::generic::reduce_into; - reduce_into(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, output, init); + reduce_into( + thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, output, ::cuda::std::move(init)); } // end reduce_into() _CCCL_EXEC_CHECK_DISABLE @@ -112,7 +119,12 @@ _CCCL_HOST_DEVICE void reduce_into( BinaryFunction binary_op) { using thrust::system::detail::generic::reduce_into; - reduce_into(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, output, init, binary_op); + reduce_into(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), + first, + last, + output, + ::cuda::std::move(init), + ::cuda::std::move(binary_op)); } // end reduce_into() _CCCL_EXEC_CHECK_DISABLE @@ -165,7 +177,7 @@ _CCCL_HOST_DEVICE ::cuda::std::pair reduce_by_ values_first, keys_output, values_output, - binary_pred); + ::cuda::std::move(binary_pred)); } // end reduce_by_key() _CCCL_EXEC_CHECK_DISABLE @@ -195,8 +207,8 @@ _CCCL_HOST_DEVICE ::cuda::std::pair reduce_by_ values_first, keys_output, values_output, - binary_pred, - binary_op); + ::cuda::std::move(binary_pred), + ::cuda::std::move(binary_op)); } // end reduce_by_key() template @@ -222,7 +234,7 @@ T reduce(InputIterator first, InputIterator last, T init) System system; - return thrust::reduce(select_system(system), first, last, init); + return thrust::reduce(select_system(system), first, last, ::cuda::std::move(init)); } template @@ -235,7 +247,7 @@ T reduce(InputIterator first, InputIterator last, T init, BinaryFunction binary_ System system; - return thrust::reduce(select_system(system), first, last, init, binary_op); + return thrust::reduce(select_system(system), first, last, ::cuda::std::move(init), ::cuda::std::move(binary_op)); } template @@ -263,7 +275,7 @@ void reduce_into(InputIterator first, InputIterator last, OutputIterator output, System1 system1; System2 system2; - thrust::reduce_into(select_system(system1, system2), first, last, output, init); + thrust::reduce_into(select_system(system1, system2), first, last, output, ::cuda::std::move(init)); } template @@ -277,7 +289,8 @@ void reduce_into(InputIterator first, InputIterator last, OutputIterator output, System1 system1; System2 system2; - thrust::reduce_into(select_system(system1, system2), first, last, output, init, binary_op); + thrust::reduce_into( + select_system(system1, system2), first, last, output, ::cuda::std::move(init), ::cuda::std::move(binary_op)); } template @@ -338,7 +351,7 @@ template #include +#include + // Include all active backend system implementations (generic, sequential, host and device) #include #include @@ -69,7 +71,8 @@ _CCCL_HOST_DEVICE ForwardIterator remove_if( { _CCCL_NVTX_RANGE_SCOPE("thrust::remove_if"); using thrust::system::detail::generic::remove_if; - return remove_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, pred); + return remove_if( + thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, ::cuda::std::move(pred)); } // end remove_if() _CCCL_EXEC_CHECK_DISABLE @@ -83,7 +86,8 @@ _CCCL_HOST_DEVICE OutputIterator remove_copy_if( { _CCCL_NVTX_RANGE_SCOPE("thrust::remove_copy_if"); using thrust::system::detail::generic::remove_copy_if; - return remove_copy_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, pred); + return remove_copy_if( + thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, ::cuda::std::move(pred)); } // end remove_copy_if() _CCCL_EXEC_CHECK_DISABLE @@ -97,7 +101,8 @@ _CCCL_HOST_DEVICE ForwardIterator remove_if( { _CCCL_NVTX_RANGE_SCOPE("thrust::remove_if"); using thrust::system::detail::generic::remove_if; - return remove_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, stencil, pred); + return remove_if( + thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, stencil, ::cuda::std::move(pred)); } // end remove_if() _CCCL_EXEC_CHECK_DISABLE @@ -117,7 +122,12 @@ _CCCL_HOST_DEVICE OutputIterator remove_copy_if( _CCCL_NVTX_RANGE_SCOPE("thrust::remove_copy_if"); using thrust::system::detail::generic::remove_copy_if; return remove_copy_if( - thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, stencil, result, pred); + thrust::detail::derived_cast(thrust::detail::strip_const(exec)), + first, + last, + stencil, + result, + ::cuda::std::move(pred)); } // end remove_copy_if() template @@ -158,7 +168,7 @@ ForwardIterator remove_if(ForwardIterator first, ForwardIterator last, Predicate System system; - return thrust::remove_if(select_system(system), first, last, pred); + return thrust::remove_if(select_system(system), first, last, ::cuda::std::move(pred)); } // end remove_if() template @@ -173,7 +183,7 @@ ForwardIterator remove_if(ForwardIterator first, ForwardIterator last, InputIter System1 system1; System2 system2; - return thrust::remove_if(select_system(system1, system2), first, last, stencil, pred); + return thrust::remove_if(select_system(system1, system2), first, last, stencil, ::cuda::std::move(pred)); } // end remove_if() template @@ -188,7 +198,7 @@ OutputIterator remove_copy_if(InputIterator first, InputIterator last, OutputIte System1 system1; System2 system2; - return thrust::remove_copy_if(select_system(system1, system2), first, last, result, pred); + return thrust::remove_copy_if(select_system(system1, system2), first, last, result, ::cuda::std::move(pred)); } // end remove_copy_if() template @@ -206,7 +216,8 @@ remove_copy_if(InputIterator1 first, InputIterator1 last, InputIterator2 stencil System2 system2; System3 system3; - return thrust::remove_copy_if(select_system(system1, system2, system3), first, last, stencil, result, pred); + return thrust::remove_copy_if( + select_system(system1, system2, system3), first, last, stencil, result, ::cuda::std::move(pred)); } // end remove_copy_if() THRUST_NAMESPACE_END diff --git a/thrust/thrust/detail/replace.inl b/thrust/thrust/detail/replace.inl index 6bda9105d6e..4745f233e53 100644 --- a/thrust/thrust/detail/replace.inl +++ b/thrust/thrust/detail/replace.inl @@ -16,6 +16,8 @@ #include #include +#include + // Include all active backend system implementations (generic, sequential, host and device) #include #include @@ -57,7 +59,8 @@ _CCCL_HOST_DEVICE void replace_if( { _CCCL_NVTX_RANGE_SCOPE("thrust::replace_if"); using thrust::system::detail::generic::replace_if; - return replace_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, pred, new_value); + return replace_if( + thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, ::cuda::std::move(pred), new_value); } // end replace_if() _CCCL_EXEC_CHECK_DISABLE @@ -73,7 +76,12 @@ _CCCL_HOST_DEVICE void replace_if( _CCCL_NVTX_RANGE_SCOPE("thrust::replace_if"); using thrust::system::detail::generic::replace_if; return replace_if( - thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, stencil, pred, new_value); + thrust::detail::derived_cast(thrust::detail::strip_const(exec)), + first, + last, + stencil, + ::cuda::std::move(pred), + new_value); } // end replace_if() _CCCL_EXEC_CHECK_DISABLE @@ -105,7 +113,12 @@ _CCCL_HOST_DEVICE OutputIterator replace_copy_if( _CCCL_NVTX_RANGE_SCOPE("thrust::replace_copy_if"); using thrust::system::detail::generic::replace_copy_if; return replace_copy_if( - thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, pred, new_value); + thrust::detail::derived_cast(thrust::detail::strip_const(exec)), + first, + last, + result, + ::cuda::std::move(pred), + new_value); } // end replace_copy_if() _CCCL_EXEC_CHECK_DISABLE @@ -127,7 +140,13 @@ _CCCL_HOST_DEVICE OutputIterator replace_copy_if( _CCCL_NVTX_RANGE_SCOPE("thrust::replace_copy_if"); using thrust::system::detail::generic::replace_copy_if; return replace_copy_if( - thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, stencil, result, pred, new_value); + thrust::detail::derived_cast(thrust::detail::strip_const(exec)), + first, + last, + stencil, + result, + ::cuda::std::move(pred), + new_value); } // end replace_copy_if() template @@ -143,7 +162,8 @@ replace_copy_if(InputIterator first, InputIterator last, OutputIterator result, System1 system1; System2 system2; - return thrust::replace_copy_if(select_system(system1, system2), first, last, result, pred, new_value); + return thrust::replace_copy_if( + select_system(system1, system2), first, last, result, ::cuda::std::move(pred), new_value); } // end replace_copy_if() template @@ -167,7 +187,7 @@ OutputIterator replace_copy_if( System3 system3; return thrust::replace_copy_if( - select_system(system1, system2, system3), first, last, stencil, result, pred, new_value); + select_system(system1, system2, system3), first, last, stencil, result, ::cuda::std::move(pred), new_value); } // end replace_copy_if() template @@ -196,7 +216,7 @@ void replace_if(ForwardIterator first, ForwardIterator last, Predicate pred, con System system; - return thrust::replace_if(select_system(system), first, last, pred, new_value); + return thrust::replace_if(select_system(system), first, last, ::cuda::std::move(pred), new_value); } // end replace_if() template @@ -211,7 +231,7 @@ void replace_if(ForwardIterator first, ForwardIterator last, InputIterator stenc System1 system1; System2 system2; - return thrust::replace_if(select_system(system1, system2), first, last, stencil, pred, new_value); + return thrust::replace_if(select_system(system1, system2), first, last, stencil, ::cuda::std::move(pred), new_value); } // end replace_if() template diff --git a/thrust/thrust/detail/scan.inl b/thrust/thrust/detail/scan.inl index ff84c14742a..44cbc41aa40 100644 --- a/thrust/thrust/detail/scan.inl +++ b/thrust/thrust/detail/scan.inl @@ -25,6 +25,7 @@ #include __THRUST_DEVICE_SYSTEM_ALGORITH_DETAIL_HEADER_INCLUDE(scan.h) #include __THRUST_HOST_SYSTEM_ALGORITH_DETAIL_HEADER_INCLUDE(scan_by_key.h) #include __THRUST_DEVICE_SYSTEM_ALGORITH_DETAIL_HEADER_INCLUDE(scan_by_key.h) +#include // Some build systems need a hint to know which files we could include #if 0 @@ -64,7 +65,8 @@ _CCCL_HOST_DEVICE OutputIterator inclusive_scan( { _CCCL_NVTX_RANGE_SCOPE("thrust::inclusive_scan"); using thrust::system::detail::generic::inclusive_scan; - return inclusive_scan(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, binary_op); + return inclusive_scan( + thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, ::cuda::std::move(binary_op)); } // end inclusive_scan() _CCCL_EXEC_CHECK_DISABLE @@ -80,7 +82,12 @@ _CCCL_HOST_DEVICE OutputIterator inclusive_scan( _CCCL_NVTX_RANGE_SCOPE("thrust::inclusive_scan"); using thrust::system::detail::generic::inclusive_scan; return inclusive_scan( - thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, init, binary_op); + thrust::detail::derived_cast(thrust::detail::strip_const(exec)), + first, + last, + result, + ::cuda::std::move(init), + ::cuda::std::move(binary_op)); } // end inclusive_scan() _CCCL_EXEC_CHECK_DISABLE @@ -107,7 +114,8 @@ _CCCL_HOST_DEVICE OutputIterator exclusive_scan( { _CCCL_NVTX_RANGE_SCOPE("thrust::exclusive_scan"); using thrust::system::detail::generic::exclusive_scan; - return exclusive_scan(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, init); + return exclusive_scan( + thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, ::cuda::std::move(init)); } // end exclusive_scan() _CCCL_EXEC_CHECK_DISABLE @@ -123,7 +131,12 @@ _CCCL_HOST_DEVICE OutputIterator exclusive_scan( _CCCL_NVTX_RANGE_SCOPE("thrust::exclusive_scan"); using thrust::system::detail::generic::exclusive_scan; return exclusive_scan( - thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, init, binary_op); + thrust::detail::derived_cast(thrust::detail::strip_const(exec)), + first, + last, + result, + ::cuda::std::move(init), + ::cuda::std::move(binary_op)); } // end exclusive_scan() _CCCL_EXEC_CHECK_DISABLE @@ -158,7 +171,12 @@ _CCCL_HOST_DEVICE OutputIterator inclusive_scan_by_key( _CCCL_NVTX_RANGE_SCOPE("thrust::inclusive_scan_by_key"); using thrust::system::detail::generic::inclusive_scan_by_key; return inclusive_scan_by_key( - thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, result, binary_pred); + thrust::detail::derived_cast(thrust::detail::strip_const(exec)), + first1, + last1, + first2, + result, + ::cuda::std::move(binary_pred)); } // end inclusive_scan_by_key() _CCCL_EXEC_CHECK_DISABLE @@ -185,8 +203,8 @@ _CCCL_HOST_DEVICE OutputIterator inclusive_scan_by_key( last1, first2, result, - binary_pred, - binary_op); + ::cuda::std::move(binary_pred), + ::cuda::std::move(binary_op)); } // end inclusive_scan_by_key() _CCCL_EXEC_CHECK_DISABLE @@ -217,7 +235,12 @@ _CCCL_HOST_DEVICE OutputIterator exclusive_scan_by_key( _CCCL_NVTX_RANGE_SCOPE("thrust::exclusive_scan_by_key"); using thrust::system::detail::generic::exclusive_scan_by_key; return exclusive_scan_by_key( - thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, result, init); + thrust::detail::derived_cast(thrust::detail::strip_const(exec)), + first1, + last1, + first2, + result, + ::cuda::std::move(init)); } // end exclusive_scan_by_key() _CCCL_EXEC_CHECK_DISABLE @@ -239,7 +262,13 @@ _CCCL_HOST_DEVICE OutputIterator exclusive_scan_by_key( _CCCL_NVTX_RANGE_SCOPE("thrust::exclusive_scan_by_key"); using thrust::system::detail::generic::exclusive_scan_by_key; return exclusive_scan_by_key( - thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, result, init, binary_pred); + thrust::detail::derived_cast(thrust::detail::strip_const(exec)), + first1, + last1, + first2, + result, + ::cuda::std::move(init), + ::cuda::std::move(binary_pred)); } // end exclusive_scan_by_key() _CCCL_EXEC_CHECK_DISABLE @@ -268,9 +297,9 @@ _CCCL_HOST_DEVICE OutputIterator exclusive_scan_by_key( last1, first2, result, - init, - binary_pred, - binary_op); + ::cuda::std::move(init), + ::cuda::std::move(binary_pred), + ::cuda::std::move(binary_op)); } // end exclusive_scan_by_key() template @@ -300,7 +329,7 @@ OutputIterator inclusive_scan(InputIterator first, InputIterator last, OutputIte System1 system1; System2 system2; - return thrust::inclusive_scan(select_system(system1, system2), first, last, result, binary_op); + return thrust::inclusive_scan(select_system(system1, system2), first, last, result, ::cuda::std::move(binary_op)); } // end inclusive_scan() template @@ -316,7 +345,8 @@ inclusive_scan(InputIterator first, InputIterator last, OutputIterator result, T System1 system1; System2 system2; - return thrust::inclusive_scan(select_system(system1, system2), first, last, result, init, binary_op); + return thrust::inclusive_scan( + select_system(system1, system2), first, last, result, ::cuda::std::move(init), ::cuda::std::move(binary_op)); } // end inclusive_scan() template @@ -346,7 +376,7 @@ OutputIterator exclusive_scan(InputIterator first, InputIterator last, OutputIte System1 system1; System2 system2; - return thrust::exclusive_scan(select_system(system1, system2), first, last, result, init); + return thrust::exclusive_scan(select_system(system1, system2), first, last, result, ::cuda::std::move(init)); } // end exclusive_scan() template @@ -362,7 +392,8 @@ exclusive_scan(InputIterator first, InputIterator last, OutputIterator result, T System1 system1; System2 system2; - return thrust::exclusive_scan(select_system(system1, system2), first, last, result, init, binary_op); + return thrust::exclusive_scan( + select_system(system1, system2), first, last, result, ::cuda::std::move(init), ::cuda::std::move(binary_op)); } // end exclusive_scan() template @@ -399,7 +430,7 @@ OutputIterator inclusive_scan_by_key( System3 system3; return thrust::inclusive_scan_by_key( - select_system(system1, system2, system3), first1, last1, first2, result, binary_pred); + select_system(system1, system2, system3), first1, last1, first2, result, ::cuda::std::move(binary_pred)); } template @@ -463,7 +500,8 @@ exclusive_scan_by_key(InputIterator1 first1, InputIterator1 last1, InputIterator System2 system2; System3 system3; - return thrust::exclusive_scan_by_key(select_system(system1, system2, system3), first1, last1, first2, result, init); + return thrust::exclusive_scan_by_key( + select_system(system1, system2, system3), first1, last1, first2, result, ::cuda::std::move(init)); } template @@ -487,7 +525,13 @@ OutputIterator exclusive_scan_by_key( System3 system3; return thrust::exclusive_scan_by_key( - select_system(system1, system2, system3), first1, last1, first2, result, init, binary_pred); + select_system(system1, system2, system3), + first1, + last1, + first2, + result, + ::cuda::std::move(init), + ::cuda::std::move(binary_pred)); } template #include __THRUST_HOST_SYSTEM_ALGORITH_DETAIL_HEADER_INCLUDE(sequence.h) #include __THRUST_DEVICE_SYSTEM_ALGORITH_DETAIL_HEADER_INCLUDE(sequence.h) +#include // Some build systems need a hint to know which files we could include #if 0 @@ -49,7 +50,7 @@ _CCCL_HOST_DEVICE void sequence( { _CCCL_NVTX_RANGE_SCOPE("thrust::sequence"); using thrust::system::detail::generic::sequence; - return sequence(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, init); + return sequence(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, ::cuda::std::move(init)); } // end sequence() _CCCL_EXEC_CHECK_DISABLE @@ -63,7 +64,12 @@ _CCCL_HOST_DEVICE void sequence( { _CCCL_NVTX_RANGE_SCOPE("thrust::sequence"); using thrust::system::detail::generic::sequence; - return sequence(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, init, step); + return sequence( + thrust::detail::derived_cast(thrust::detail::strip_const(exec)), + first, + last, + ::cuda::std::move(init), + ::cuda::std::move(step)); } // end sequence() template @@ -89,7 +95,7 @@ void sequence(ForwardIterator first, ForwardIterator last, T init) System system; - return thrust::sequence(select_system(system), first, last, init); + return thrust::sequence(select_system(system), first, last, ::cuda::std::move(init)); } // end sequence() template @@ -102,7 +108,7 @@ void sequence(ForwardIterator first, ForwardIterator last, T init, T step) System system; - return thrust::sequence(select_system(system), first, last, init, step); + return thrust::sequence(select_system(system), first, last, ::cuda::std::move(init), ::cuda::std::move(step)); } // end sequence() THRUST_NAMESPACE_END diff --git a/thrust/thrust/detail/tabulate.inl b/thrust/thrust/detail/tabulate.inl index 403bdbb0484..0a28be52b3e 100644 --- a/thrust/thrust/detail/tabulate.inl +++ b/thrust/thrust/detail/tabulate.inl @@ -16,6 +16,8 @@ #include #include +#include + // Include all active backend system implementations (generic, sequential, host and device) #include #include @@ -42,7 +44,8 @@ tabulate(const thrust::detail::execution_policy_base& exec, { _CCCL_NVTX_RANGE_SCOPE("thrust::tabulate"); using thrust::system::detail::generic::tabulate; - return tabulate(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, unary_op); + return tabulate( + thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, ::cuda::std::move(unary_op)); } // end tabulate() template @@ -55,7 +58,7 @@ void tabulate(ForwardIterator first, ForwardIterator last, UnaryOperation unary_ System system; - return thrust::tabulate(select_system(system), first, last, unary_op); + return thrust::tabulate(select_system(system), first, last, ::cuda::std::move(unary_op)); } // end tabulate() THRUST_NAMESPACE_END diff --git a/thrust/thrust/detail/transform_reduce.inl b/thrust/thrust/detail/transform_reduce.inl index 97e1e571d02..ce52e639544 100644 --- a/thrust/thrust/detail/transform_reduce.inl +++ b/thrust/thrust/detail/transform_reduce.inl @@ -15,6 +15,8 @@ #include #include +#include + // Include all active backend system implementations (generic, sequential, host and device) #include #include @@ -48,7 +50,12 @@ _CCCL_HOST_DEVICE OutputType transform_reduce( _CCCL_NVTX_RANGE_SCOPE("thrust::transform_reduce"); using thrust::system::detail::generic::transform_reduce; return transform_reduce( - thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, unary_op, init, binary_op); + thrust::detail::derived_cast(thrust::detail::strip_const(exec)), + first, + last, + ::cuda::std::move(unary_op), + ::cuda::std::move(init), + ::cuda::std::move(binary_op)); } // end transform_reduce() template @@ -62,7 +69,13 @@ OutputType transform_reduce( System system; - return thrust::transform_reduce(select_system(system), first, last, unary_op, init, binary_op); + return thrust::transform_reduce( + select_system(system), + first, + last, + ::cuda::std::move(unary_op), + ::cuda::std::move(init), + ::cuda::std::move(binary_op)); } // end transform_reduce() THRUST_NAMESPACE_END diff --git a/thrust/thrust/iterator/constant_iterator.h b/thrust/thrust/iterator/constant_iterator.h index 9076b70e0c9..fd6b938ec63 100644 --- a/thrust/thrust/iterator/constant_iterator.h +++ b/thrust/thrust/iterator/constant_iterator.h @@ -20,6 +20,7 @@ #include #include +#include #include THRUST_NAMESPACE_BEGIN @@ -207,7 +208,7 @@ template CCCL_DEPRECATED_BECAUSE("Use cuda::make_constant_iterator instead") inline _CCCL_HOST_DEVICE constant_iterator make_constant_iterator(V x) { - return constant_iterator(x, 0); + return constant_iterator(::cuda::std::move(x), 0); } // end make_constant_iterator() //! \} // end fancyiterators diff --git a/thrust/thrust/iterator/transform_iterator.h b/thrust/thrust/iterator/transform_iterator.h index 78240ffcabf..4264f603fe2 100644 --- a/thrust/thrust/iterator/transform_iterator.h +++ b/thrust/thrust/iterator/transform_iterator.h @@ -41,6 +41,7 @@ #include #include #include +#include THRUST_NAMESPACE_BEGIN @@ -233,7 +234,7 @@ class transform_iterator //! \param f An \c AdaptableUnaryFunction used to transform the objects pointed to by \p x. _CCCL_HOST_DEVICE transform_iterator(Iterator const& x, AdaptableUnaryFunction f) : super_t(x) - , m_f(f) + , m_f(::cuda::std::move(f)) {} //! This explicit constructor copies the value of a given \c Iterator and creates this \p transform_iterator's \c diff --git a/thrust/thrust/iterator/zip_iterator.h b/thrust/thrust/iterator/zip_iterator.h index 11dee61b6a7..bddfe86c289 100644 --- a/thrust/thrust/iterator/zip_iterator.h +++ b/thrust/thrust/iterator/zip_iterator.h @@ -40,6 +40,7 @@ #include #include #include +#include #include THRUST_NAMESPACE_BEGIN @@ -189,7 +190,7 @@ class _CCCL_DECLSPEC_EMPTY_BASES zip_iterator : public detail::make_zip_iterator //! //! \param iterator_tuple The \p tuple of iterators to copy from. inline _CCCL_HOST_DEVICE zip_iterator(IteratorTuple iterator_tuple) - : m_iterator_tuple(iterator_tuple) + : m_iterator_tuple(::cuda::std::move(iterator_tuple)) {} //! This constructor creates a new \p zip_iterator from multiple iterators. @@ -318,7 +319,7 @@ template inline _CCCL_HOST_DEVICE zip_iterator<::cuda::std::tuple> make_zip_iterator(::cuda::std::tuple t) { - return zip_iterator<::cuda::std::tuple>{t}; + return zip_iterator<::cuda::std::tuple>{::cuda::std::move(t)}; } //! \p make_zip_iterator creates a \p zip_iterator from diff --git a/thrust/thrust/system/cuda/detail/copy_if.h b/thrust/thrust/system/cuda/detail/copy_if.h index 65533cb08c6..944b61b362b 100644 --- a/thrust/thrust/system/cuda/detail/copy_if.h +++ b/thrust/thrust/system/cuda/detail/copy_if.h @@ -33,6 +33,7 @@ # include # include +# include # include THRUST_NAMESPACE_BEGIN @@ -135,7 +136,7 @@ struct DispatchCopyIf stencil, output, d_num_selected_out, - predicate, + ::cuda::std::move(predicate), equality_op_t{}, num_items, stream); @@ -188,7 +189,14 @@ THRUST_RUNTIME_FUNCTION OutputIt copy_if( // Run algorithm status = dispatch64_t::dispatch( - policy, temp_storage, temp_storage_bytes, first, stencil, output, predicate, static_cast(num_items)); + policy, + temp_storage, + temp_storage_bytes, + first, + stencil, + output, + ::cuda::std::move(predicate), + static_cast(num_items)); cuda_cub::throw_on_error(status, "copy_if failed on 2nd step"); return output; @@ -203,9 +211,10 @@ template & policy, InputIterator first, InputIterator last, OutputIterator result, Predicate pred) { - THRUST_CDP_DISPATCH((return detail::copy_if( - policy, first, last, static_cast(nullptr), result, pred);), - (return thrust::copy_if(cvt_to_seq(derived_cast(policy)), first, last, result, pred);)); + THRUST_CDP_DISPATCH( + (return detail::copy_if( + policy, first, last, static_cast(nullptr), result, ::cuda::std::move(pred));), + (return thrust::copy_if(cvt_to_seq(derived_cast(policy)), first, last, result, pred);)); } _CCCL_EXEC_CHECK_DISABLE @@ -218,8 +227,9 @@ OutputIterator _CCCL_HOST_DEVICE copy_if( OutputIterator result, Predicate pred) { - THRUST_CDP_DISPATCH((return detail::copy_if(policy, first, last, stencil, result, pred);), - (return thrust::copy_if(cvt_to_seq(derived_cast(policy)), first, last, stencil, result, pred);)); + THRUST_CDP_DISPATCH( + (return detail::copy_if(policy, first, last, stencil, result, ::cuda::std::move(pred));), + (return thrust::copy_if(cvt_to_seq(derived_cast(policy)), first, last, stencil, result, pred);)); } } // namespace cuda_cub THRUST_NAMESPACE_END diff --git a/thrust/thrust/system/cuda/detail/core/agent_launcher.h b/thrust/thrust/system/cuda/detail/core/agent_launcher.h index d96a8fd56f7..fdf1239a970 100644 --- a/thrust/thrust/system/cuda/detail/core/agent_launcher.h +++ b/thrust/thrust/system/cuda/detail/core/agent_launcher.h @@ -17,6 +17,7 @@ # include # include +# include # include # include @@ -83,7 +84,7 @@ struct AgentLauncher : Agent template THRUST_RUNTIME_FUNCTION AgentLauncher(AgentPlan plan_, Size count_, cudaStream_t stream_, char const* name_) - : plan(plan_) + : plan(::cuda::std::move(plan_)) , count((size_t) count_) , stream(stream_) , name(name_) @@ -98,7 +99,7 @@ struct AgentLauncher : Agent template THRUST_RUNTIME_FUNCTION AgentLauncher(AgentPlan plan_, Size count_, cudaStream_t stream_, char* vshmem, char const* name_) - : plan(plan_) + : plan(::cuda::std::move(plan_)) , count((size_t) count_) , stream(stream_) , name(name_) @@ -111,7 +112,7 @@ struct AgentLauncher : Agent } THRUST_RUNTIME_FUNCTION AgentLauncher(AgentPlan plan_, cudaStream_t stream_, char const* name_) - : plan(plan_) + : plan(::cuda::std::move(plan_)) , count(0) , stream(stream_) , name(name_) @@ -124,7 +125,7 @@ struct AgentLauncher : Agent } THRUST_RUNTIME_FUNCTION AgentLauncher(AgentPlan plan_, cudaStream_t stream_, char* vshmem, char const* name_) - : plan(plan_) + : plan(::cuda::std::move(plan_)) , count(0) , stream(stream_) , name(name_) @@ -205,7 +206,7 @@ struct AgentLauncher : Agent } template - static cuda_optional THRUST_RUNTIME_FUNCTION get_max_blocks_per_sm(AgentPlan plan) + static cuda_optional THRUST_RUNTIME_FUNCTION get_max_blocks_per_sm(const AgentPlan& plan) { return max_blocks_per_sm_impl(_kernel_agent, plan.block_threads); } diff --git a/thrust/thrust/system/cuda/detail/core/util.h b/thrust/thrust/system/cuda/detail/core/util.h index 6aa87de0fc0..f30ab16d778 100644 --- a/thrust/thrust/system/cuda/detail/core/util.h +++ b/thrust/thrust/system/cuda/detail/core/util.h @@ -227,13 +227,10 @@ struct AgentPlan , grid_size(grid_size_) {} - THRUST_RUNTIME_FUNCTION AgentPlan(AgentPlan const& plan) - : block_threads(plan.block_threads) - , items_per_thread(plan.items_per_thread) - , items_per_tile(plan.items_per_tile) - , shared_memory_size(plan.shared_memory_size) - , grid_size(plan.grid_size) - {} + constexpr AgentPlan(AgentPlan const&) = default; + constexpr AgentPlan& operator=(AgentPlan const&) = default; + constexpr AgentPlan(AgentPlan&&) noexcept = default; + constexpr AgentPlan& operator=(AgentPlan&&) noexcept = default; template THRUST_RUNTIME_FUNCTION diff --git a/thrust/thrust/system/cuda/detail/find.h b/thrust/thrust/system/cuda/detail/find.h index 90952c0cca2..f1819a151a6 100644 --- a/thrust/thrust/system/cuda/detail/find.h +++ b/thrust/thrust/system/cuda/detail/find.h @@ -26,6 +26,7 @@ # include # include +# include THRUST_NAMESPACE_BEGIN namespace cuda_cub @@ -96,7 +97,7 @@ find_if_n(execution_policy& policy, InputIt first, Size num_items, Pred Size result_idx = num_items; THRUST_CDP_DISPATCH( - (result_idx = cuda_cub::detail::find_if_n_impl(policy, first, num_items, predicate);), + (result_idx = cuda_cub::detail::find_if_n_impl(policy, first, num_items, ::cuda::std::move(predicate));), (result_idx = thrust::find_if(cvt_to_seq(derived_cast(policy)), first, first + num_items, predicate) - first;)); return first + result_idx; @@ -105,14 +106,14 @@ find_if_n(execution_policy& policy, InputIt first, Size num_items, Pred template InputIt _CCCL_HOST_DEVICE find_if(execution_policy& policy, InputIt first, InputIt last, Predicate predicate) { - return cuda_cub::find_if_n(policy, first, ::cuda::std::distance(first, last), predicate); + return cuda_cub::find_if_n(policy, first, ::cuda::std::distance(first, last), ::cuda::std::move(predicate)); } template InputIt _CCCL_HOST_DEVICE find_if_not(execution_policy& policy, InputIt first, InputIt last, Predicate predicate) { - return cuda_cub::find_if(policy, first, last, ::cuda::std::not_fn(predicate)); + return cuda_cub::find_if(policy, first, last, ::cuda::std::not_fn(::cuda::std::move(predicate))); } template diff --git a/thrust/thrust/system/cuda/detail/for_each.h b/thrust/thrust/system/cuda/detail/for_each.h index 6ca50350d17..1829f792e0e 100644 --- a/thrust/thrust/system/cuda/detail/for_each.h +++ b/thrust/thrust/system/cuda/detail/for_each.h @@ -24,6 +24,7 @@ # include # include +# include THRUST_NAMESPACE_BEGIN @@ -52,7 +53,7 @@ Input _CCCL_API _CCCL_FORCEINLINE for_each(execution_policy& policy, In using size_type = thrust::detail::it_difference_t; size_type count = static_cast(::cuda::std::distance(first, last)); - return THRUST_NS_QUALIFIER::cuda_cub::for_each_n(policy, first, count, op); + return THRUST_NS_QUALIFIER::cuda_cub::for_each_n(policy, first, count, ::cuda::std::move(op)); } } // namespace cuda_cub diff --git a/thrust/thrust/system/cuda/detail/generate.h b/thrust/thrust/system/cuda/detail/generate.h index 16295bd4245..42fc61cc162 100644 --- a/thrust/thrust/system/cuda/detail/generate.h +++ b/thrust/thrust/system/cuda/detail/generate.h @@ -19,6 +19,7 @@ # include # include +# include # include THRUST_NAMESPACE_BEGIN @@ -35,7 +36,7 @@ generate_n(execution_policy& policy, OutputIt result, Size count, Gener status, (CUB_NS_QUALIFIER::DeviceTransform::Generate), count, - (result, count_fixed, generator, cuda_cub::stream(policy))); + (result, count_fixed, ::cuda::std::move(generator), cuda_cub::stream(policy))); throw_on_error(status, "generate_n: failed inside CUB"); throw_on_error(synchronize_optional(policy), "generate_n: failed to synchronize"); return result + count; @@ -46,7 +47,7 @@ generate_n(execution_policy& policy, OutputIt result, Size count, Gener template void _CCCL_HOST_DEVICE generate(execution_policy& policy, OutputIt first, OutputIt last, Generator generator) { - cuda_cub::generate_n(policy, first, ::cuda::std::distance(first, last), generator); + cuda_cub::generate_n(policy, first, ::cuda::std::distance(first, last), ::cuda::std::move(generator)); } } // namespace cuda_cub THRUST_NAMESPACE_END diff --git a/thrust/thrust/system/cuda/detail/inner_product.h b/thrust/thrust/system/cuda/detail/inner_product.h index d0c8b601753..c96b7c4786c 100644 --- a/thrust/thrust/system/cuda/detail/inner_product.h +++ b/thrust/thrust/system/cuda/detail/inner_product.h @@ -21,6 +21,7 @@ # include # include +# include THRUST_NAMESPACE_BEGIN @@ -36,10 +37,10 @@ T _CCCL_HOST_DEVICE inner_product( ReduceOp reduce_op, ProductOp product_op) { - const auto n = ::cuda::std::distance(first1, last1); - const auto first = - thrust::make_transform_iterator(thrust::make_zip_iterator(first1, first2), thrust::make_zip_function(product_op)); - return cuda_cub::reduce_n(policy, first, n, init, reduce_op); + const auto n = ::cuda::std::distance(first1, last1); + const auto first = thrust::make_transform_iterator( + thrust::make_zip_iterator(first1, first2), thrust::make_zip_function(::cuda::std::move(product_op))); + return cuda_cub::reduce_n(policy, first, n, ::cuda::std::move(init), ::cuda::std::move(reduce_op)); } template @@ -47,7 +48,7 @@ T _CCCL_HOST_DEVICE inner_product(execution_policy& policy, InputIt1 first1, InputIt1 last1, InputIt2 first2, T init) { return cuda_cub::inner_product( - policy, first1, last1, first2, init, ::cuda::std::plus(), ::cuda::std::multiplies()); + policy, first1, last1, first2, ::cuda::std::move(init), ::cuda::std::plus(), ::cuda::std::multiplies()); } } // namespace cuda_cub diff --git a/thrust/thrust/system/cuda/detail/reduce.h b/thrust/thrust/system/cuda/detail/reduce.h index b8d326e27ca..93b7590f043 100644 --- a/thrust/thrust/system/cuda/detail/reduce.h +++ b/thrust/thrust/system/cuda/detail/reduce.h @@ -41,6 +41,7 @@ # include # include # include +# include # include THRUST_NAMESPACE_BEGIN @@ -592,7 +593,7 @@ THRUST_RUNTIME_FUNCTION size_t get_reduce_n_temporary_storage_size( status, cub::DeviceReduce::Reduce, num_items, - (nullptr, tmp_size, first, static_cast(nullptr), num_items_fixed, binary_op, init, stream)); + (nullptr, tmp_size, first, static_cast(nullptr), num_items_fixed, binary_op, ::cuda::std::move(init), stream)); cuda_cub::throw_on_error(status, "after determining reduce temporary storage size"); return tmp_size; @@ -660,7 +661,7 @@ THRUST_RUNTIME_FUNCTION void reduce_n_into_impl( status, cub::DeviceReduce::Reduce, num_items, - (tmp_ptr, tmp_size, first, output, num_items_fixed, binary_op, init, stream)); + (tmp_ptr, tmp_size, first, output, num_items_fixed, binary_op, ::cuda::std::move(init), stream)); cuda_cub::throw_on_error(status, "after reduce invocation"); // Synchronize the stream and get the value. @@ -680,7 +681,7 @@ _CCCL_HOST_DEVICE T reduce_n(execution_policy& policy, InputIt first, Size num_items, T init, BinaryOp binary_op) { THRUST_CDP_DISPATCH( - (init = thrust::cuda_cub::detail::reduce_n_impl(policy, first, num_items, init, binary_op);), + (init = thrust::cuda_cub::detail::reduce_n_impl(policy, first, num_items, init, ::cuda::std::move(binary_op));), (init = thrust::reduce(cvt_to_seq(derived_cast(policy)), first, first + num_items, init, binary_op);)); return init; } @@ -691,7 +692,8 @@ _CCCL_HOST_DEVICE void reduce_n_into( execution_policy& policy, InputIt first, Size num_items, OutputIt output, T init, BinaryOp binary_op) { THRUST_CDP_DISPATCH( - (thrust::cuda_cub::detail::reduce_n_into_impl(policy, first, num_items, output, init, binary_op);), + (thrust::cuda_cub::detail::reduce_n_into_impl( + policy, first, num_items, output, ::cuda::std::move(init), ::cuda::std::move(binary_op));), (thrust::reduce_into(cvt_to_seq(derived_cast(policy)), first, first + num_items, output, init, binary_op);)); } @@ -701,13 +703,13 @@ _CCCL_HOST_DEVICE T reduce(execution_policy& policy, InputIt first, Inp using size_type = thrust::detail::it_difference_t; // FIXME: Check for RA iterator. size_type num_items = static_cast(::cuda::std::distance(first, last)); - return cuda_cub::reduce_n(policy, first, num_items, init, binary_op); + return cuda_cub::reduce_n(policy, first, num_items, ::cuda::std::move(init), ::cuda::std::move(binary_op)); } template _CCCL_HOST_DEVICE T reduce(execution_policy& policy, InputIt first, InputIt last, T init) { - return cuda_cub::reduce(policy, first, last, init, ::cuda::std::plus()); + return cuda_cub::reduce(policy, first, last, ::cuda::std::move(init), ::cuda::std::plus()); } template @@ -725,14 +727,14 @@ reduce_into(execution_policy& policy, InputIt first, InputIt last, Outp using size_type = thrust::detail::it_difference_t; // FIXME: Check for RA iterator. size_type num_items = static_cast(::cuda::std::distance(first, last)); - cuda_cub::reduce_n_into(policy, first, num_items, output, init, binary_op); + cuda_cub::reduce_n_into(policy, first, num_items, output, ::cuda::std::move(init), ::cuda::std::move(binary_op)); } template _CCCL_HOST_DEVICE void reduce_into(execution_policy& policy, InputIt first, InputIt last, OutputIt output, T init) { - cuda_cub::reduce_into(policy, first, last, output, init, ::cuda::std::plus()); + cuda_cub::reduce_into(policy, first, last, output, ::cuda::std::move(init), ::cuda::std::plus()); } template diff --git a/thrust/thrust/system/cuda/detail/remove.h b/thrust/thrust/system/cuda/detail/remove.h index 513a49e31e8..22aae57948d 100644 --- a/thrust/thrust/system/cuda/detail/remove.h +++ b/thrust/thrust/system/cuda/detail/remove.h @@ -17,6 +17,7 @@ # include # include +# include THRUST_NAMESPACE_BEGIN namespace cuda_cub @@ -39,7 +40,12 @@ InputIt _CCCL_HOST_DEVICE remove_if(execution_policy& policy, InputIt f { THRUST_CDP_DISPATCH( (return cuda_cub::detail::copy_if( - policy, first, last, static_cast(nullptr), first, ::cuda::std::not_fn(predicate));), + policy, + first, + last, + static_cast(nullptr), + first, + ::cuda::std::not_fn(::cuda::std::move(predicate)));), (return thrust::remove_if(cvt_to_seq(derived_cast(policy)), first, last, predicate);)); } @@ -69,7 +75,7 @@ template OutputIt _CCCL_HOST_DEVICE remove_copy_if(execution_policy& policy, InputIt first, InputIt last, OutputIt result, Predicate predicate) { - return cuda_cub::copy_if(policy, first, last, result, ::cuda::std::not_fn(predicate)); + return cuda_cub::copy_if(policy, first, last, result, ::cuda::std::not_fn(::cuda::std::move(predicate))); } template @@ -77,7 +83,7 @@ OutputIt _CCCL_HOST_DEVICE remove_copy(execution_policy& policy, InputIt first, InputIt last, OutputIt result, const T& value) { ::cuda::equal_to_value pred{value}; - return cuda_cub::remove_copy_if(policy, first, last, result, pred); + return cuda_cub::remove_copy_if(policy, first, last, result, ::cuda::std::move(pred)); } } // namespace cuda_cub THRUST_NAMESPACE_END diff --git a/thrust/thrust/system/cuda/detail/replace.h b/thrust/thrust/system/cuda/detail/replace.h index 79160764729..0a4ab988d54 100644 --- a/thrust/thrust/system/cuda/detail/replace.h +++ b/thrust/thrust/system/cuda/detail/replace.h @@ -18,6 +18,7 @@ # include # include +# include THRUST_NAMESPACE_BEGIN namespace cuda_cub @@ -66,7 +67,7 @@ OutputIt _CCCL_HOST_DEVICE replace_copy_if( { using output_type = thrust::detail::it_value_t; using new_value_if_t = thrust::detail::new_value_if_f; - return cuda_cub::transform(policy, first, last, result, new_value_if_t{predicate, new_value}); + return cuda_cub::transform(policy, first, last, result, new_value_if_t{::cuda::std::move(predicate), new_value}); } template diff --git a/thrust/thrust/system/cuda/detail/scan.h b/thrust/thrust/system/cuda/detail/scan.h index 903b211750f..f3e129409d8 100644 --- a/thrust/thrust/system/cuda/detail/scan.h +++ b/thrust/thrust/system/cuda/detail/scan.h @@ -28,6 +28,7 @@ # include # include # include +# include # include THRUST_NAMESPACE_BEGIN @@ -200,7 +201,8 @@ _CCCL_HOST_DEVICE OutputIt inclusive_scan_n( ScanOp scan_op) { THRUST_CDP_DISPATCH( - (result = thrust::cuda_cub::detail::inclusive_scan_n_impl(policy, first, num_items, result, init, scan_op);), + (result = thrust::cuda_cub::detail::inclusive_scan_n_impl( + policy, first, num_items, result, ::cuda::std::move(init), ::cuda::std::move(scan_op));), (result = thrust::inclusive_scan(cvt_to_seq(derived_cast(policy)), first, first + num_items, result, init, scan_op);)); return result; @@ -211,7 +213,8 @@ _CCCL_HOST_DEVICE OutputIt inclusive_scan_n( thrust::cuda_cub::execution_policy& policy, InputIt first, Size num_items, OutputIt result, ScanOp scan_op) { THRUST_CDP_DISPATCH( - (result = thrust::cuda_cub::detail::inclusive_scan_n_impl(policy, first, num_items, result, scan_op);), + (result = + thrust::cuda_cub::detail::inclusive_scan_n_impl(policy, first, num_items, result, ::cuda::std::move(scan_op));), (result = thrust::inclusive_scan(cvt_to_seq(derived_cast(policy)), first, first + num_items, result, scan_op);)); return result; } @@ -222,7 +225,7 @@ _CCCL_HOST_DEVICE OutputIt inclusive_scan( { using diff_t = thrust::detail::it_difference_t; diff_t const num_items = ::cuda::std::distance(first, last); - return thrust::cuda_cub::inclusive_scan_n(policy, first, num_items, result, scan_op); + return thrust::cuda_cub::inclusive_scan_n(policy, first, num_items, result, ::cuda::std::move(scan_op)); } template @@ -236,7 +239,8 @@ _CCCL_HOST_DEVICE OutputIt inclusive_scan( { using diff_t = thrust::detail::it_difference_t; diff_t const num_items = ::cuda::std::distance(first, last); - return thrust::cuda_cub::inclusive_scan_n(policy, first, num_items, result, init, scan_op); + return thrust::cuda_cub::inclusive_scan_n( + policy, first, num_items, result, ::cuda::std::move(init), ::cuda::std::move(scan_op)); } template @@ -257,7 +261,8 @@ _CCCL_HOST_DEVICE OutputIt exclusive_scan_n( ScanOp scan_op) { THRUST_CDP_DISPATCH( - (result = thrust::cuda_cub::detail::exclusive_scan_n_impl(policy, first, num_items, result, init, scan_op);), + (result = thrust::cuda_cub::detail::exclusive_scan_n_impl( + policy, first, num_items, result, ::cuda::std::move(init), ::cuda::std::move(scan_op));), (result = thrust::exclusive_scan(cvt_to_seq(derived_cast(policy)), first, first + num_items, result, init, scan_op);)); return result; @@ -274,14 +279,15 @@ _CCCL_HOST_DEVICE OutputIt exclusive_scan( { using diff_t = thrust::detail::it_difference_t; diff_t const num_items = ::cuda::std::distance(first, last); - return thrust::cuda_cub::exclusive_scan_n(policy, first, num_items, result, init, scan_op); + return thrust::cuda_cub::exclusive_scan_n( + policy, first, num_items, result, ::cuda::std::move(init), ::cuda::std::move(scan_op)); } template _CCCL_HOST_DEVICE OutputIt exclusive_scan( thrust::cuda_cub::execution_policy& policy, InputIt first, InputIt last, OutputIt result, T init) { - return thrust::cuda_cub::exclusive_scan(policy, first, last, result, init, ::cuda::std::plus<>{}); + return thrust::cuda_cub::exclusive_scan(policy, first, last, result, ::cuda::std::move(init), ::cuda::std::plus<>{}); } template diff --git a/thrust/thrust/system/cuda/detail/scan_by_key.h b/thrust/thrust/system/cuda/detail/scan_by_key.h index c7a4bf9265a..001ed743134 100644 --- a/thrust/thrust/system/cuda/detail/scan_by_key.h +++ b/thrust/thrust/system/cuda/detail/scan_by_key.h @@ -32,6 +32,7 @@ # include # include +# include # include THRUST_NAMESPACE_BEGIN @@ -335,9 +336,9 @@ ValOutputIt _CCCL_HOST_DEVICE exclusive_scan_by_key( value_first, value_result, ::cuda::std::distance(key_first, key_last), - init, - binary_pred, - scan_op);), + ::cuda::std::move(init), + ::cuda::std::move(binary_pred), + ::cuda::std::move(scan_op));), (ret = thrust::exclusive_scan_by_key( cvt_to_seq(derived_cast(policy)), key_first, key_last, value_first, value_result, init, binary_pred, scan_op);)); return ret; @@ -354,7 +355,7 @@ ValOutputIt _CCCL_HOST_DEVICE exclusive_scan_by_key( BinaryPred binary_pred) { return cuda_cub::exclusive_scan_by_key( - policy, key_first, key_last, value_first, value_result, init, binary_pred, ::cuda::std::plus<>()); + policy, key_first, key_last, value_first, value_result, ::cuda::std::move(init), binary_pred, ::cuda::std::plus<>()); } template @@ -367,7 +368,7 @@ ValOutputIt _CCCL_HOST_DEVICE exclusive_scan_by_key( Init init) { return cuda_cub::exclusive_scan_by_key( - policy, key_first, key_last, value_first, value_result, init, ::cuda::std::equal_to<>()); + policy, key_first, key_last, value_first, value_result, ::cuda::std::move(init), ::cuda::std::equal_to<>()); } template diff --git a/thrust/thrust/system/cuda/detail/tabulate.h b/thrust/thrust/system/cuda/detail/tabulate.h index 7fd9ba26093..02a899095eb 100644 --- a/thrust/thrust/system/cuda/detail/tabulate.h +++ b/thrust/thrust/system/cuda/detail/tabulate.h @@ -23,6 +23,7 @@ # include # include # include +# include THRUST_NAMESPACE_BEGIN namespace cuda_cub @@ -33,7 +34,11 @@ void _CCCL_HOST_DEVICE tabulate(execution_policy& policy, Iterator firs using size_type = ::cuda::std::iter_difference_t; const auto count = ::cuda::std::distance(first, last); cuda_cub::transform_n( - policy, ::cuda::counting_iterator{}, count, first, ::cuda::proclaim_copyable_arguments(tabulate_op)); + policy, + ::cuda::counting_iterator{}, + count, + first, + ::cuda::proclaim_copyable_arguments(::cuda::std::move(tabulate_op))); } } // namespace cuda_cub THRUST_NAMESPACE_END diff --git a/thrust/thrust/system/cuda/detail/transform.h b/thrust/thrust/system/cuda/detail/transform.h index 9a432e56ab9..4f163c0baf9 100644 --- a/thrust/thrust/system/cuda/detail/transform.h +++ b/thrust/thrust/system/cuda/detail/transform.h @@ -29,6 +29,7 @@ # include # include # include +# include # include THRUST_NAMESPACE_BEGIN @@ -74,7 +75,10 @@ OutputIt _CCCL_HOST_DEVICE unary_if_with_stencil( } using unary_transform_t = unary_transform_f; - cuda_cub::parallel_for(policy, unary_transform_t{items, result, stencil, transform_op, predicate}, num_items); + cuda_cub::parallel_for( + policy, + unary_transform_t{items, result, stencil, ::cuda::std::move(transform_op), ::cuda::std::move(predicate)}, + num_items); return result + num_items; } @@ -120,7 +124,9 @@ OutputIt _CCCL_HOST_DEVICE binary_if_with_stencil( { using binary_transform_t = binary_transform_f; cuda_cub::parallel_for( - policy, binary_transform_t{items1, items2, result, stencil, transform_op, predicate}, num_items); + policy, + binary_transform_t{items1, items2, result, stencil, ::cuda::std::move(transform_op), ::cuda::std::move(predicate)}, + num_items); return result + num_items; } @@ -216,7 +222,11 @@ transform(execution_policy& policy, InputIt first, InputIt last, Output { THRUST_CDP_DISPATCH( (return __transform::cub_transform_many( - policy, ::cuda::std::make_tuple(first), result, ::cuda::std::distance(first, last), transform_op);), + policy, + ::cuda::std::make_tuple(first), + result, + ::cuda::std::distance(first, last), + ::cuda::std::move(transform_op));), (return ::cuda::std::transform( first, last, result, __transform::raw_reference_cast_args{transform_op});)); } @@ -230,7 +240,8 @@ _CCCL_API _CCCL_FORCEINLINE OutputIt transform_n( TransformOp transform_op) { THRUST_CDP_DISPATCH( - (return __transform::cub_transform_many(policy, ::cuda::std::make_tuple(first), result, num_items, transform_op);), + (return __transform::cub_transform_many( + policy, ::cuda::std::make_tuple(first), result, num_items, ::cuda::std::move(transform_op));), (return ::cuda::std::transform( first, first + num_items, result, __transform::raw_reference_cast_args{transform_op});)); } @@ -250,8 +261,8 @@ _CCCL_API _CCCL_FORCEINLINE OutputIt transform_if( ::cuda::std::make_tuple(first), result, ::cuda::std::distance(first, last), - transform_op, - predicate);), + ::cuda::std::move(transform_op), + ::cuda::std::move(predicate));), (while (first != last) { if (predicate(raw_reference_cast(*first))) { @@ -271,16 +282,22 @@ _CCCL_API _CCCL_FORCEINLINE OutputIt transform_if_n( TransformOp transform_op, Predicate predicate) { - THRUST_CDP_DISPATCH((return __transform::cub_transform_many( - policy, ::cuda::std::make_tuple(first), result, num_items, transform_op, predicate);), - (for (decltype(num_items) i = 0; i < num_items; i++) { - if (predicate(raw_reference_cast(*first))) - { - *result = transform_op(raw_reference_cast(*first)); - } - ++first; - ++result; - } return result;)); + THRUST_CDP_DISPATCH( + (return __transform::cub_transform_many( + policy, + ::cuda::std::make_tuple(first), + result, + num_items, + ::cuda::std::move(transform_op), + ::cuda::std::move(predicate));), + (for (decltype(num_items) i = 0; i < num_items; i++) { + if (predicate(raw_reference_cast(*first))) + { + *result = transform_op(raw_reference_cast(*first)); + } + ++first; + ++result; + } return result;)); } // one input data stream + stencil @@ -296,7 +313,13 @@ _CCCL_API _CCCL_FORCEINLINE OutputIt transform_if( Predicate predicate) { return __transform::unary_if_with_stencil( - policy, first, result, ::cuda::std::distance(first, last), stencil, transform_op, predicate); + policy, + first, + result, + ::cuda::std::distance(first, last), + stencil, + ::cuda::std::move(transform_op), + ::cuda::std::move(predicate)); } template {transform_op});)); } @@ -350,7 +374,7 @@ _CCCL_API _CCCL_FORCEINLINE OutputIt transform_n( { THRUST_CDP_DISPATCH( (return __transform::cub_transform_many( - policy, ::cuda::std::make_tuple(first1, first2), result, num_items, transform_op);), + policy, ::cuda::std::make_tuple(first1, first2), result, num_items, ::cuda::std::move(transform_op));), (return ::cuda::std::transform(first1, first1 + num_items, first2, @@ -378,7 +402,14 @@ _CCCL_API _CCCL_FORCEINLINE OutputIt transform_if( Predicate predicate) { return __transform::binary_if_with_stencil( - policy, first1, first2, result, ::cuda::std::distance(first1, last1), stencil, transform_op, predicate); + policy, + first1, + first2, + result, + ::cuda::std::distance(first1, last1), + stencil, + ::cuda::std::move(transform_op), + ::cuda::std::move(predicate)); } template #include +#include #include // Contributed by Erich Elsen @@ -84,8 +85,8 @@ find_if(thrust::execution_policy& exec, InputIterator first, Inpu using IteratorTuple = ::cuda::std::tuple>; using ZipIterator = thrust::zip_iterator; - IteratorTuple iter_tuple = - ::cuda::std::make_tuple(XfrmIterator(first, pred), thrust::counting_iterator(0)); + IteratorTuple iter_tuple = ::cuda::std::make_tuple( + XfrmIterator(first, ::cuda::std::move(pred)), thrust::counting_iterator(0)); ZipIterator begin = thrust::make_zip_iterator(iter_tuple); ZipIterator end = begin + n; @@ -116,7 +117,7 @@ template _CCCL_HOST_DEVICE InputIterator find_if_not(thrust::execution_policy& exec, InputIterator first, InputIterator last, Predicate pred) { - return thrust::find_if(exec, first, last, ::cuda::std::not_fn(pred)); + return thrust::find_if(exec, first, last, ::cuda::std::not_fn(::cuda::std::move(pred))); } // end find() } // namespace system::detail::generic THRUST_NAMESPACE_END diff --git a/thrust/thrust/system/detail/generic/inner_product.inl b/thrust/thrust/system/detail/generic/inner_product.inl index 7d3cae8e2c3..6cd79802acd 100644 --- a/thrust/thrust/system/detail/generic/inner_product.inl +++ b/thrust/thrust/system/detail/generic/inner_product.inl @@ -18,6 +18,8 @@ #include #include +#include + THRUST_NAMESPACE_BEGIN namespace system::detail::generic { @@ -31,7 +33,7 @@ _CCCL_HOST_DEVICE OutputType inner_product( { ::cuda::std::plus binary_op1; ::cuda::std::multiplies binary_op2; - return thrust::inner_product(exec, first1, last1, first2, init, binary_op1, binary_op2); + return thrust::inner_product(exec, first1, last1, first2, ::cuda::std::move(init), binary_op1, binary_op2); } // end inner_product() template #include +#include + THRUST_NAMESPACE_BEGIN namespace system::detail::generic { @@ -37,7 +39,7 @@ _CCCL_HOST_DEVICE T reduce(thrust::execution_policy& exec, InputIterator first, InputIterator last, T init) { // use plus by default - return thrust::reduce(exec, first, last, init, ::cuda::std::plus()); + return thrust::reduce(exec, first, last, ::cuda::std::move(init), ::cuda::std::plus()); } // end reduce() template @@ -66,7 +68,7 @@ _CCCL_HOST_DEVICE void reduce_into( T init) { // use plus by default - thrust::reduce_into(exec, first, last, output, init, ::cuda::std::plus()); + thrust::reduce_into(exec, first, last, output, ::cuda::std::move(init), ::cuda::std::plus()); } // end reduce_into() template @@ -79,7 +81,7 @@ _CCCL_HOST_DEVICE void reduce_into( BinaryFunction binary_op) { // use reduce by default - *output = thrust::reduce(exec, first, last, init, binary_op); + *output = thrust::reduce(exec, first, last, ::cuda::std::move(init), ::cuda::std::move(binary_op)); } // end reduce_into() } // namespace system::detail::generic THRUST_NAMESPACE_END diff --git a/thrust/thrust/system/detail/generic/remove.inl b/thrust/thrust/system/detail/generic/remove.inl index 36a5d693191..9ba0ca966bf 100644 --- a/thrust/thrust/system/detail/generic/remove.inl +++ b/thrust/thrust/system/detail/generic/remove.inl @@ -20,6 +20,7 @@ #include #include +#include THRUST_NAMESPACE_BEGIN namespace system::detail::generic @@ -58,7 +59,7 @@ remove_if(thrust::execution_policy& exec, ForwardIterator first, thrust::detail::temporary_array temp(exec, first, last); // remove into temp - return thrust::remove_copy_if(exec, temp.begin(), temp.end(), temp.begin(), first, pred); + return thrust::remove_copy_if(exec, temp.begin(), temp.end(), temp.begin(), first, ::cuda::std::move(pred)); } // end remove_if() template @@ -75,7 +76,7 @@ _CCCL_HOST_DEVICE ForwardIterator remove_if( thrust::detail::temporary_array temp(exec, first, last); // remove into temp - return thrust::remove_copy_if(exec, temp.begin(), temp.end(), stencil, first, pred); + return thrust::remove_copy_if(exec, temp.begin(), temp.end(), stencil, first, ::cuda::std::move(pred)); } // end remove_if() template @@ -86,7 +87,7 @@ _CCCL_HOST_DEVICE OutputIterator remove_copy_if( OutputIterator result, Predicate pred) { - return thrust::remove_copy_if(exec, first, last, first, result, pred); + return thrust::remove_copy_if(exec, first, last, first, result, ::cuda::std::move(pred)); } // end remove_copy_if() template #include +#include + THRUST_NAMESPACE_BEGIN namespace system::detail::generic { @@ -28,8 +30,8 @@ template struct new_value_if { _CCCL_HOST_DEVICE new_value_if(Predicate p, NewType nv) - : pred(p) - , new_value(nv) + : pred(::cuda::std::move(p)) + , new_value(::cuda::std::move(nv)) {} template @@ -55,7 +57,7 @@ template struct constant_unary { _CCCL_HOST_DEVICE constant_unary(T _c) - : c(_c) + : c(::cuda::std::move(_c)) {} template @@ -79,7 +81,7 @@ _CCCL_HOST_DEVICE OutputIterator replace_copy_if( { using OutputType = thrust::detail::it_value_t; - detail::new_value_if op(pred, new_value); + detail::new_value_if op(::cuda::std::move(pred), new_value); return thrust::transform(exec, first, last, result, op); } // end replace_copy_if() @@ -100,7 +102,7 @@ _CCCL_HOST_DEVICE OutputIterator replace_copy_if( { using OutputType = thrust::detail::it_value_t; - detail::new_value_if op(pred, new_value); + detail::new_value_if op(::cuda::std::move(pred), new_value); return thrust::transform(exec, first, last, stencil, result, op); } // end replace_copy_if() @@ -127,7 +129,7 @@ _CCCL_HOST_DEVICE void replace_if( const T& new_value) { detail::constant_unary f(new_value); - thrust::transform_if(exec, first, last, first, first, f, pred); + thrust::transform_if(exec, first, last, first, first, f, ::cuda::std::move(pred)); } // end replace_if() template @@ -140,7 +142,7 @@ _CCCL_HOST_DEVICE void replace_if( const T& new_value) { detail::constant_unary f(new_value); - thrust::transform_if(exec, first, last, stencil, first, f, pred); + thrust::transform_if(exec, first, last, stencil, first, f, ::cuda::std::move(pred)); } // end replace_if() template diff --git a/thrust/thrust/system/detail/generic/scan.h b/thrust/thrust/system/detail/generic/scan.h index 7bc34dc74f3..e49e7bbaa39 100644 --- a/thrust/thrust/system/detail/generic/scan.h +++ b/thrust/thrust/system/detail/generic/scan.h @@ -18,6 +18,7 @@ #include #include +#include THRUST_NAMESPACE_BEGIN namespace system::detail::generic @@ -57,7 +58,7 @@ _CCCL_HOST_DEVICE OutputIterator exclusive_scan( T init) { // assume plus as the associative operator - return thrust::exclusive_scan(exec, first, last, result, init, ::cuda::std::plus<>()); + return thrust::exclusive_scan(exec, first, last, result, ::cuda::std::move(init), ::cuda::std::plus<>()); } // Note: it is an error to call this function: this should be provided by a backend system diff --git a/thrust/thrust/system/detail/generic/scan_by_key.inl b/thrust/thrust/system/detail/generic/scan_by_key.inl index 429f6c65d7e..bbdb9793e6d 100644 --- a/thrust/thrust/system/detail/generic/scan_by_key.inl +++ b/thrust/thrust/system/detail/generic/scan_by_key.inl @@ -22,6 +22,7 @@ #include #include +#include #include THRUST_NAMESPACE_BEGIN @@ -140,7 +141,8 @@ _CCCL_HOST_DEVICE OutputIterator exclusive_scan_by_key( OutputIterator result, T init) { - return thrust::exclusive_scan_by_key(exec, first1, last1, first2, result, init, ::cuda::std::equal_to<>()); + return thrust::exclusive_scan_by_key( + exec, first1, last1, first2, result, ::cuda::std::move(init), ::cuda::std::equal_to<>()); } template ()); + return thrust::exclusive_scan_by_key( + exec, first1, last1, first2, result, ::cuda::std::move(init), binary_pred, ::cuda::std::plus<>()); } template #include +#include THRUST_NAMESPACE_BEGIN namespace system::detail::generic @@ -35,7 +36,7 @@ _CCCL_HOST_DEVICE void tabulate( // to avoid this, specify the counting_iterator's difference_type to be the same as ForwardIterator's. thrust::counting_iterator iter(0); - thrust::transform(exec, iter, iter + ::cuda::std::distance(first, last), first, unary_op); + thrust::transform(exec, iter, iter + ::cuda::std::distance(first, last), first, ::cuda::std::move(unary_op)); } // end tabulate() } // namespace system::detail::generic THRUST_NAMESPACE_END diff --git a/thrust/thrust/system/detail/generic/transform.h b/thrust/thrust/system/detail/generic/transform.h index 51b6442a587..dad8c5c6871 100644 --- a/thrust/thrust/system/detail/generic/transform.h +++ b/thrust/thrust/system/detail/generic/transform.h @@ -20,6 +20,7 @@ #include #include +#include #include THRUST_NAMESPACE_BEGIN @@ -110,7 +111,10 @@ _CCCL_HOST_DEVICE OutputIterator transform( using ZipIterator = thrust::zip_iterator; ZipIterator zipped_result = thrust::for_each( - exec, thrust::make_zip_iterator(first, result), thrust::make_zip_iterator(last, result), UnaryTransformFunctor{op}); + exec, + thrust::make_zip_iterator(first, result), + thrust::make_zip_iterator(last, result), + UnaryTransformFunctor{::cuda::std::move(op)}); return ::cuda::std::get<1>(zipped_result.get_iterator_tuple()); } @@ -139,7 +143,7 @@ _CCCL_HOST_DEVICE OutputIterator transform( exec, thrust::make_zip_iterator(first1, first2, result), thrust::make_zip_iterator(last1, first2, result), - BinaryTransformFunctor{op}); + BinaryTransformFunctor{::cuda::std::move(op)}); return ::cuda::std::get<2>(zipped_result.get_iterator_tuple()); } @@ -197,7 +201,7 @@ _CCCL_HOST_DEVICE ForwardIterator transform_if( exec, thrust::make_zip_iterator(first, stencil, result), thrust::make_zip_iterator(last, stencil, result), - UnaryTransformIfFunctor{unary_op, pred}); + UnaryTransformIfFunctor{::cuda::std::move(unary_op), ::cuda::std::move(pred)}); return ::cuda::std::get<2>(zipped_result.get_iterator_tuple()); } diff --git a/thrust/thrust/system/detail/generic/transform_reduce.inl b/thrust/thrust/system/detail/generic/transform_reduce.inl index f40531ae3b5..c02ae0879b3 100644 --- a/thrust/thrust/system/detail/generic/transform_reduce.inl +++ b/thrust/thrust/system/detail/generic/transform_reduce.inl @@ -16,6 +16,8 @@ #include #include +#include + THRUST_NAMESPACE_BEGIN namespace system::detail::generic { @@ -35,7 +37,7 @@ _CCCL_HOST_DEVICE OutputType transform_reduce( thrust::transform_iterator xfrm_first(first, unary_op); thrust::transform_iterator xfrm_last(last, unary_op); - return thrust::reduce(exec, xfrm_first, xfrm_last, init, binary_op); + return thrust::reduce(exec, xfrm_first, xfrm_last, ::cuda::std::move(init), binary_op); } // end transform_reduce() } // namespace system::detail::generic THRUST_NAMESPACE_END diff --git a/thrust/thrust/system/detail/sequential/copy_if.h b/thrust/thrust/system/detail/sequential/copy_if.h index cf82177a8c2..ff82eb01d7d 100644 --- a/thrust/thrust/system/detail/sequential/copy_if.h +++ b/thrust/thrust/system/detail/sequential/copy_if.h @@ -19,6 +19,8 @@ #include #include +#include + THRUST_NAMESPACE_BEGIN namespace system::detail::sequential { @@ -36,7 +38,7 @@ _CCCL_HOST_DEVICE OutputIterator copy_if( OutputIterator result, Predicate pred) { - thrust::detail::wrapped_function wrapped_pred{pred}; + thrust::detail::wrapped_function wrapped_pred{::cuda::std::move(pred)}; while (first != last) { diff --git a/thrust/thrust/system/detail/sequential/find.h b/thrust/thrust/system/detail/sequential/find.h index 83cb583a198..cb53e2d9959 100644 --- a/thrust/thrust/system/detail/sequential/find.h +++ b/thrust/thrust/system/detail/sequential/find.h @@ -19,6 +19,8 @@ #include #include +#include + THRUST_NAMESPACE_BEGIN namespace system::detail::sequential { @@ -28,7 +30,7 @@ _CCCL_HOST_DEVICE InputIterator find_if(execution_policy&, InputIterator first, InputIterator last, Predicate pred) { // wrap pred - thrust::detail::wrapped_function wrapped_pred{pred}; + thrust::detail::wrapped_function wrapped_pred{::cuda::std::move(pred)}; while (first != last) { diff --git a/thrust/thrust/system/detail/sequential/for_each.h b/thrust/thrust/system/detail/sequential/for_each.h index aefdc787888..b0563915711 100644 --- a/thrust/thrust/system/detail/sequential/for_each.h +++ b/thrust/thrust/system/detail/sequential/for_each.h @@ -19,6 +19,8 @@ #include #include +#include + THRUST_NAMESPACE_BEGIN namespace system::detail::sequential { @@ -28,7 +30,7 @@ _CCCL_HOST_DEVICE InputIterator for_each(sequential::execution_policy&, InputIterator first, InputIterator last, UnaryFunction f) { // wrap f - thrust::detail::wrapped_function wrapped_f{f}; + thrust::detail::wrapped_function wrapped_f{::cuda::std::move(f)}; for (; first != last; ++first) { @@ -43,7 +45,7 @@ _CCCL_HOST_DEVICE InputIterator for_each_n(sequential::execution_policy&, InputIterator first, Size n, UnaryFunction f) { // wrap f - thrust::detail::wrapped_function wrapped_f{f}; + thrust::detail::wrapped_function wrapped_f{::cuda::std::move(f)}; for (Size i = 0; i != n; i++) { diff --git a/thrust/thrust/system/detail/sequential/reduce.h b/thrust/thrust/system/detail/sequential/reduce.h index 8c003974229..1e49d10d916 100644 --- a/thrust/thrust/system/detail/sequential/reduce.h +++ b/thrust/thrust/system/detail/sequential/reduce.h @@ -19,6 +19,8 @@ #include #include +#include + THRUST_NAMESPACE_BEGIN namespace system::detail::sequential { @@ -35,7 +37,7 @@ _CCCL_HOST_DEVICE OutputType reduce( thrust::detail::wrapped_function wrapped_binary_op{binary_op}; // initialize the result - OutputType result = init; + OutputType result = ::cuda::std::move(init); while (begin != end) { diff --git a/thrust/thrust/system/detail/sequential/remove.h b/thrust/thrust/system/detail/sequential/remove.h index 87f7f649ad1..3a85b748122 100644 --- a/thrust/thrust/system/detail/sequential/remove.h +++ b/thrust/thrust/system/detail/sequential/remove.h @@ -19,6 +19,8 @@ #include #include +#include + THRUST_NAMESPACE_BEGIN namespace system::detail::sequential { @@ -28,7 +30,7 @@ _CCCL_HOST_DEVICE ForwardIterator remove_if(sequential::execution_policy&, ForwardIterator first, ForwardIterator last, Predicate pred) { // wrap pred - thrust::detail::wrapped_function wrapped_pred{pred}; + thrust::detail::wrapped_function wrapped_pred{::cuda::std::move(pred)}; // advance iterators until wrapped_pred(*first) is true or we reach the end of input while (first != last && !wrapped_pred(*first)) @@ -69,7 +71,7 @@ _CCCL_HOST_DEVICE ForwardIterator remove_if( Predicate pred) { // wrap pred - thrust::detail::wrapped_function wrapped_pred{pred}; + thrust::detail::wrapped_function wrapped_pred{::cuda::std::move(pred)}; // advance iterators until wrapped_pred(*stencil) is true or we reach the end of input while (first != last && !wrapped_pred(*stencil)) @@ -113,7 +115,7 @@ _CCCL_HOST_DEVICE OutputIterator remove_copy_if( Predicate pred) { // wrap pred - thrust::detail::wrapped_function wrapped_pred{pred}; + thrust::detail::wrapped_function wrapped_pred{::cuda::std::move(pred)}; while (first != last) { @@ -144,7 +146,7 @@ _CCCL_HOST_DEVICE OutputIterator remove_copy_if( Predicate pred) { // wrap pred - thrust::detail::wrapped_function wrapped_pred{pred}; + thrust::detail::wrapped_function wrapped_pred{::cuda::std::move(pred)}; while (first != last) { diff --git a/thrust/thrust/system/detail/sequential/scan.h b/thrust/thrust/system/detail/sequential/scan.h index 21cb904cfe1..048880e262a 100644 --- a/thrust/thrust/system/detail/sequential/scan.h +++ b/thrust/thrust/system/detail/sequential/scan.h @@ -22,6 +22,7 @@ #include #include +#include THRUST_NAMESPACE_BEGIN namespace system::detail::sequential @@ -120,7 +121,7 @@ _CCCL_HOST_DEVICE OutputIterator exclusive_scan( if (first != last) { ValueType tmp = *first; // temporary value allows in-situ scan - ValueType sum = init; + ValueType sum = ::cuda::std::move(init); *result = sum; sum = binary_op(sum, tmp); diff --git a/thrust/thrust/system/omp/detail/copy_if.h b/thrust/thrust/system/omp/detail/copy_if.h index 82b93bfe4a4..90b7c18bbb9 100644 --- a/thrust/thrust/system/omp/detail/copy_if.h +++ b/thrust/thrust/system/omp/detail/copy_if.h @@ -16,6 +16,8 @@ #include #include +#include + THRUST_NAMESPACE_BEGIN namespace system::omp::detail { @@ -33,7 +35,7 @@ OutputIterator copy_if( Predicate pred) { // omp prefers generic::copy_if to cpp::copy_if - return thrust::system::detail::generic::copy_if(exec, first, last, stencil, result, pred); + return thrust::system::detail::generic::copy_if(exec, first, last, stencil, result, ::cuda::std::move(pred)); } // end copy_if() } // end namespace system::omp::detail THRUST_NAMESPACE_END diff --git a/thrust/thrust/system/omp/detail/find.h b/thrust/thrust/system/omp/detail/find.h index 0342d50815a..85cfb4415eb 100644 --- a/thrust/thrust/system/omp/detail/find.h +++ b/thrust/thrust/system/omp/detail/find.h @@ -20,6 +20,8 @@ #include #include +#include + THRUST_NAMESPACE_BEGIN namespace system::omp::detail { @@ -27,7 +29,7 @@ template InputIterator find_if(execution_policy& exec, InputIterator first, InputIterator last, Predicate pred) { // omp prefers generic::find_if to cpp::find_if - return thrust::system::detail::generic::find_if(exec, first, last, pred); + return thrust::system::detail::generic::find_if(exec, first, last, ::cuda::std::move(pred)); } } // end namespace system::omp::detail THRUST_NAMESPACE_END diff --git a/thrust/thrust/system/omp/detail/for_each.h b/thrust/thrust/system/omp/detail/for_each.h index da74e7c3f59..4b114ced643 100644 --- a/thrust/thrust/system/omp/detail/for_each.h +++ b/thrust/thrust/system/omp/detail/for_each.h @@ -26,6 +26,7 @@ #include #include +#include THRUST_NAMESPACE_BEGIN namespace system::omp::detail @@ -48,7 +49,7 @@ RandomAccessIterator for_each_n(execution_policy&, RandomAccessIt } // create a wrapped function for f - thrust::detail::wrapped_function wrapped_f{f}; + thrust::detail::wrapped_function wrapped_f{::cuda::std::move(f)}; // use a signed type for the iteration variable or suffer the consequences of warnings using DifferenceType = thrust::detail::it_difference_t; @@ -68,7 +69,7 @@ template & s, RandomAccessIterator first, RandomAccessIterator last, UnaryFunction f) { - return omp::detail::for_each_n(s, first, ::cuda::std::distance(first, last), f); + return omp::detail::for_each_n(s, first, ::cuda::std::distance(first, last), ::cuda::std::move(f)); } // end for_each() } // end namespace system::omp::detail THRUST_NAMESPACE_END diff --git a/thrust/thrust/system/omp/detail/reduce.h b/thrust/thrust/system/omp/detail/reduce.h index d0d472db471..3e6fcd991ca 100644 --- a/thrust/thrust/system/omp/detail/reduce.h +++ b/thrust/thrust/system/omp/detail/reduce.h @@ -24,6 +24,7 @@ #include #include +#include THRUST_NAMESPACE_BEGIN namespace system::omp::detail @@ -49,13 +50,14 @@ OutputType reduce(execution_policy& exec, thrust::detail::temporary_array partial_sums(exec, decomp1.size() + 1); // set first element of temp array to init - partial_sums[0] = init; + partial_sums[0] = ::cuda::std::move(init); // accumulate partial sums (first level reduction) thrust::system::omp::detail::reduce_intervals(exec, first, partial_sums.begin() + 1, binary_op, decomp1); // reduce partial sums (second level reduction) - thrust::system::omp::detail::reduce_intervals(exec, partial_sums.begin(), partial_sums.begin(), binary_op, decomp2); + thrust::system::omp::detail::reduce_intervals( + exec, partial_sums.begin(), partial_sums.begin(), ::cuda::std::move(binary_op), ::cuda::std::move(decomp2)); return partial_sums[0]; } // end reduce() diff --git a/thrust/thrust/system/omp/detail/remove.h b/thrust/thrust/system/omp/detail/remove.h index 3accf4660b2..e6460037cee 100644 --- a/thrust/thrust/system/omp/detail/remove.h +++ b/thrust/thrust/system/omp/detail/remove.h @@ -16,6 +16,8 @@ #include #include +#include + THRUST_NAMESPACE_BEGIN namespace system::omp::detail { @@ -24,7 +26,7 @@ ForwardIterator remove_if(execution_policy& exec, ForwardIterator first, ForwardIterator last, Predicate pred) { // omp prefers generic::remove_if to cpp::remove_if - return thrust::system::detail::generic::remove_if(exec, first, last, pred); + return thrust::system::detail::generic::remove_if(exec, first, last, ::cuda::std::move(pred)); } template @@ -36,7 +38,7 @@ ForwardIterator remove_if( Predicate pred) { // omp prefers generic::remove_if to cpp::remove_if - return thrust::system::detail::generic::remove_if(exec, first, last, stencil, pred); + return thrust::system::detail::generic::remove_if(exec, first, last, stencil, ::cuda::std::move(pred)); } template @@ -44,7 +46,7 @@ OutputIterator remove_copy_if( execution_policy& exec, InputIterator first, InputIterator last, OutputIterator result, Predicate pred) { // omp prefers generic::remove_copy_if to cpp::remove_copy_if - return thrust::system::detail::generic::remove_copy_if(exec, first, last, result, pred); + return thrust::system::detail::generic::remove_copy_if(exec, first, last, result, ::cuda::std::move(pred)); } template #include #include +#include #include @@ -71,7 +72,7 @@ OutputIterator scan_impl( return result; } - auto wrapped_binary_op = wrapped_function{binary_op}; + auto wrapped_binary_op = wrapped_function{::cuda::std::move(binary_op)}; const int num_threads = omp_get_max_threads(); @@ -83,16 +84,18 @@ OutputIterator scan_impl( { if constexpr (has_init) { - return ::cuda::std::inclusive_scan(first, last, result, wrapped_binary_op, init); + return ::cuda::std::inclusive_scan( + first, last, result, ::cuda::std::move(wrapped_binary_op), ::cuda::std::move(init)); } else { - return ::cuda::std::inclusive_scan(first, last, result, wrapped_binary_op); + return ::cuda::std::inclusive_scan(first, last, result, ::cuda::std::move(wrapped_binary_op)); } } else { - return ::cuda::std::exclusive_scan(first, last, result, init, wrapped_binary_op); + return ::cuda::std::exclusive_scan( + first, last, result, ::cuda::std::move(init), ::cuda::std::move(wrapped_binary_op)); } } @@ -179,7 +182,7 @@ OutputIterator inclusive_scan( OutputIterator result, BinaryFunction binary_op) { - return inclusive_scan(exec, first, last, result, __no_init_tag{}, binary_op); + return inclusive_scan(exec, first, last, result, __no_init_tag{}, ::cuda::std::move(binary_op)); } template (exec, first, last, result, init, binary_op); + return scan_impl(exec, first, last, result, ::cuda::std::move(init), ::cuda::std::move(binary_op)); } template (exec, first, last, result, init, binary_op); + return scan_impl(exec, first, last, result, ::cuda::std::move(init), ::cuda::std::move(binary_op)); } } // namespace system::omp::detail THRUST_NAMESPACE_END diff --git a/thrust/thrust/system/tbb/detail/copy_if.h b/thrust/thrust/system/tbb/detail/copy_if.h index 2578cea4c97..d852f60b56a 100644 --- a/thrust/thrust/system/tbb/detail/copy_if.h +++ b/thrust/thrust/system/tbb/detail/copy_if.h @@ -19,6 +19,7 @@ #include #include +#include #include #include @@ -41,7 +42,7 @@ struct body : first(first) , stencil(stencil) , result(result) - , pred{pred} + , pred{::cuda::std::move(pred)} , sum(0) {} @@ -106,7 +107,7 @@ copy_if(tag, InputIterator1 first, InputIterator1 last, InputIterator2 stencil, if (n != 0) { - Body body(first, stencil, result, pred); + Body body(first, stencil, result, ::cuda::std::move(pred)); ::tbb::parallel_scan(::tbb::blocked_range(0, n), body); ::cuda::std::advance(result, body.sum); } diff --git a/thrust/thrust/system/tbb/detail/find.h b/thrust/thrust/system/tbb/detail/find.h index 0539ac1c76d..5015a4c0c85 100644 --- a/thrust/thrust/system/tbb/detail/find.h +++ b/thrust/thrust/system/tbb/detail/find.h @@ -15,6 +15,8 @@ #include #include +#include + THRUST_NAMESPACE_BEGIN namespace system::tbb::detail { @@ -22,7 +24,7 @@ template InputIterator find_if(execution_policy& exec, InputIterator first, InputIterator last, Predicate pred) { // tbb prefers generic::find_if to cpp::find_if - return thrust::system::detail::generic::find_if(exec, first, last, pred); + return thrust::system::detail::generic::find_if(exec, first, last, ::cuda::std::move(pred)); } } // end namespace system::tbb::detail THRUST_NAMESPACE_END diff --git a/thrust/thrust/system/tbb/detail/for_each.h b/thrust/thrust/system/tbb/detail/for_each.h index 86311bc0087..91a1037fa7d 100644 --- a/thrust/thrust/system/tbb/detail/for_each.h +++ b/thrust/thrust/system/tbb/detail/for_each.h @@ -19,6 +19,7 @@ #include #include +#include #include #include @@ -36,7 +37,7 @@ struct body body(RandomAccessIterator first, UnaryFunction f) : m_first(first) - , m_f(f) + , m_f(::cuda::std::move(f)) {} void operator()(const ::tbb::blocked_range& r) const @@ -49,14 +50,14 @@ struct body template body make_body(RandomAccessIterator first, UnaryFunction f) { - return body(first, f); + return body(first, ::cuda::std::move(f)); } // end make_body() } // namespace for_each_detail template RandomAccessIterator for_each_n(execution_policy&, RandomAccessIterator first, Size n, UnaryFunction f) { - ::tbb::parallel_for(::tbb::blocked_range(0, n), for_each_detail::make_body(first, f)); + ::tbb::parallel_for(::tbb::blocked_range(0, n), for_each_detail::make_body(first, ::cuda::std::move(f))); // return the end of the range return first + n; @@ -66,7 +67,7 @@ template & s, RandomAccessIterator first, RandomAccessIterator last, UnaryFunction f) { - return tbb::detail::for_each_n(s, first, ::cuda::std::distance(first, last), f); + return tbb::detail::for_each_n(s, first, ::cuda::std::distance(first, last), ::cuda::std::move(f)); } // end for_each() } // end namespace system::tbb::detail THRUST_NAMESPACE_END diff --git a/thrust/thrust/system/tbb/detail/reduce.h b/thrust/thrust/system/tbb/detail/reduce.h index 7b04e85073f..9637c579621 100644 --- a/thrust/thrust/system/tbb/detail/reduce.h +++ b/thrust/thrust/system/tbb/detail/reduce.h @@ -23,6 +23,7 @@ #include #include +#include #include #include @@ -43,7 +44,7 @@ struct body // note: we only initialize sum with init to avoid calling OutputType's default constructor body(RandomAccessIterator first, OutputType init, BinaryFunction binary_op) : first(first) - , sum(init) + , sum(::cuda::std::move(init)) , first_call(true) , binary_op{binary_op} {} @@ -114,7 +115,7 @@ OutputType reduce( using Body = typename reduce_detail::body; Body reduce_body(begin, init, binary_op); ::tbb::parallel_reduce(::tbb::blocked_range(0, n), reduce_body); - return binary_op(init, reduce_body.sum); + return binary_op(::cuda::std::move(init), reduce_body.sum); } } } // end namespace system::tbb::detail diff --git a/thrust/thrust/system/tbb/detail/remove.h b/thrust/thrust/system/tbb/detail/remove.h index 73872aeee26..f751e146cd8 100644 --- a/thrust/thrust/system/tbb/detail/remove.h +++ b/thrust/thrust/system/tbb/detail/remove.h @@ -15,6 +15,8 @@ #include #include +#include + THRUST_NAMESPACE_BEGIN namespace system::tbb::detail { @@ -23,7 +25,7 @@ ForwardIterator remove_if(execution_policy& exec, ForwardIterator first, ForwardIterator last, Predicate pred) { // tbb prefers generic::remove_if to cpp::remove_if - return thrust::system::detail::generic::remove_if(exec, first, last, pred); + return thrust::system::detail::generic::remove_if(exec, first, last, ::cuda::std::move(pred)); } template @@ -35,7 +37,7 @@ ForwardIterator remove_if( Predicate pred) { // tbb prefers generic::remove_if to cpp::remove_if - return thrust::system::detail::generic::remove_if(exec, first, last, stencil, pred); + return thrust::system::detail::generic::remove_if(exec, first, last, stencil, ::cuda::std::move(pred)); } template @@ -43,7 +45,7 @@ OutputIterator remove_copy_if( execution_policy& exec, InputIterator first, InputIterator last, OutputIterator result, Predicate pred) { // tbb prefers generic::remove_copy_if to cpp::remove_copy_if - return thrust::system::detail::generic::remove_copy_if(exec, first, last, result, pred); + return thrust::system::detail::generic::remove_copy_if(exec, first, last, result, ::cuda::std::move(pred)); } template #include #include +#include #include #include @@ -45,8 +46,8 @@ struct inclusive_body inclusive_body(InputIterator input, OutputIterator output, BinaryFunction binary_op, ValueType init) : input(input) , output(output) - , binary_op{binary_op} - , sum(init) + , binary_op{::cuda::std::move(binary_op)} + , sum(::cuda::std::move(init)) , first_call(true) {} @@ -146,8 +147,8 @@ struct exclusive_body exclusive_body(InputIterator input, OutputIterator output, BinaryFunction binary_op, ValueType init) : input(input) , output(output) - , binary_op{binary_op} - , sum(init) + , binary_op{::cuda::std::move(binary_op)} + , sum(::cuda::std::move(init)) , first_call(true) {} @@ -258,7 +259,7 @@ OutputIterator inclusive_scan( if (n != 0) { using Body = typename scan_detail::inclusive_body; - Body scan_body(first, result, binary_op, init); + Body scan_body(first, result, binary_op, ::cuda::std::move(init)); ::tbb::parallel_scan(::tbb::blocked_range(0, n), scan_body); } @@ -282,7 +283,7 @@ OutputIterator exclusive_scan( if (n != 0) { using Body = typename scan_detail::exclusive_body; - Body scan_body(first, result, binary_op, init); + Body scan_body(first, result, binary_op, ::cuda::std::move(init)); ::tbb::parallel_scan(::tbb::blocked_range(0, n), scan_body); } diff --git a/thrust/thrust/transform.h b/thrust/thrust/transform.h index 678aa487fe0..5aa20807862 100644 --- a/thrust/thrust/transform.h +++ b/thrust/thrust/transform.h @@ -22,6 +22,7 @@ #include #include +#include // Include all active backend system implementations (generic, sequential, host and device) #include @@ -112,7 +113,8 @@ _CCCL_HOST_DEVICE OutputIterator transform( { _CCCL_NVTX_RANGE_SCOPE("thrust::transform"); using thrust::system::detail::generic::transform; - return transform(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, op); + return transform( + thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, ::cuda::std::move(op)); } //! This version of \p transform applies a unary function to each element of an input sequence and stores the result in @@ -254,7 +256,13 @@ _CCCL_HOST_DEVICE OutputIterator transform( { _CCCL_NVTX_RANGE_SCOPE("thrust::transform"); using thrust::system::detail::generic::transform; - return transform(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, result, op); + return transform( + thrust::detail::derived_cast(thrust::detail::strip_const(exec)), + first1, + last1, + first2, + result, + ::cuda::std::move(op)); } //! This version of \p transform applies a binary function to each pair of elements from two input sequences and stores @@ -585,7 +593,13 @@ _CCCL_HOST_DEVICE ForwardIterator transform_if( _CCCL_NVTX_RANGE_SCOPE("thrust::transform_if"); using thrust::system::detail::generic::transform_if; return transform_if( - thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, stencil, result, op, pred); + thrust::detail::derived_cast(thrust::detail::strip_const(exec)), + first, + last, + stencil, + result, + ::cuda::std::move(op), + ::cuda::std::move(pred)); } /*! This version of \p transform_if conditionally applies a unary function