Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
1b913c9
Replace thrust::discard_iterator with cuda::discard_iterator
Jacobfaib Apr 2, 2026
6636949
fixup! Replace thrust::discard_iterator with cuda::discard_iterator
Jacobfaib Apr 2, 2026
6845498
fixup! Replace thrust::discard_iterator with cuda::discard_iterator
Jacobfaib Apr 2, 2026
06c787a
fixup! Replace thrust::discard_iterator with cuda::discard_iterator
Jacobfaib Apr 2, 2026
822f769
fixup! Replace thrust::discard_iterator with cuda::discard_iterator
Jacobfaib Apr 2, 2026
034f5e0
fixup! Replace thrust::discard_iterator with cuda::discard_iterator
Jacobfaib Apr 2, 2026
9460495
fixup! Replace thrust::discard_iterator with cuda::discard_iterator
Jacobfaib Apr 2, 2026
8832ccf
fixup! Replace thrust::discard_iterator with cuda::discard_iterator
Jacobfaib Apr 2, 2026
a375276
fixup! Replace thrust::discard_iterator with cuda::discard_iterator
Jacobfaib Apr 2, 2026
906be40
fixup! Replace thrust::discard_iterator with cuda::discard_iterator
Jacobfaib Apr 3, 2026
c3fedf1
fixup! Replace thrust::discard_iterator with cuda::discard_iterator
Jacobfaib Apr 3, 2026
4487121
fixup! Replace thrust::discard_iterator with cuda::discard_iterator
Jacobfaib Apr 3, 2026
4dc713b
fixup! Replace thrust::discard_iterator with cuda::discard_iterator
Jacobfaib Apr 3, 2026
5377ca2
fixup! Replace thrust::discard_iterator with cuda::discard_iterator
Jacobfaib Apr 6, 2026
d3686ef
fixup! Replace thrust::discard_iterator with cuda::discard_iterator
Jacobfaib Apr 6, 2026
f391578
fixup! Replace thrust::discard_iterator with cuda::discard_iterator
Jacobfaib Apr 8, 2026
7a784d6
fixup! Replace thrust::discard_iterator with cuda::discard_iterator
Jacobfaib Apr 8, 2026
a07b14d
fixup! Replace thrust::discard_iterator with cuda::discard_iterator
Jacobfaib Apr 8, 2026
77259a0
fixup! Replace thrust::discard_iterator with cuda::discard_iterator
Jacobfaib Apr 8, 2026
bc037d4
fixup! Replace thrust::discard_iterator with cuda::discard_iterator
Jacobfaib Apr 8, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions cub/cub/device/device_transform.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <cuda/__iterator/zip_iterator.h>
#include <cuda/__stream/get_stream.h>
#include <cuda/std/__execution/env.h>
#include <cuda/std/__iterator/iterator_traits.h>
#include <cuda/std/tuple>

CUB_NAMESPACE_BEGIN
Expand Down Expand Up @@ -433,7 +434,7 @@ struct DeviceTransform
{
static_assert(::cuda::std::is_invocable_v<Generator>, "The passed generator must be a nullary function object");
static_assert(
::cuda::std::is_assignable_v<detail::it_reference_t<RandomAccessIteratorOut>,
::cuda::std::is_assignable_v<::cuda::std::iter_reference_t<RandomAccessIteratorOut>,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Important: This is a change in semantics, why is this necessary?

See:

// the following iterator helpers are not named iter_value_t etc, like the C++20 facilities, because they are defined in
// terms of C++17 iterator_traits and not the new C++20 indirectly_readable trait etc. This allows them to detect nested
// value_type, difference_type and reference aliases, which the new C+20 traits do not consider (they only consider
// specializations of iterator_traits). Also, a value_type of void remains supported (needed by some output iterators).
template <typename It>
using it_value_t = typename ::cuda::std::iterator_traits<It>::value_type;
template <typename It>
using it_reference_t = typename ::cuda::std::iterator_traits<It>::reference;
template <typename It>
using it_difference_t = typename ::cuda::std::iterator_traits<It>::difference_type;
template <typename It>
using it_pointer_t = typename ::cuda::std::iterator_traits<It>::pointer;

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See the other comment. There is a change in value_type between the iterators which is not handled by CUB.

::cuda::std::invoke_result_t<Generator>>,
"The return value of the generator's call operator must be assignable to the dereferenced output iterator");

Expand Down Expand Up @@ -499,7 +500,7 @@ struct DeviceTransform
CUB_RUNTIME_FUNCTION static cudaError_t
Fill(RandomAccessIteratorOut output, NumItemsT num_items, Value value, Env env = {})
{
static_assert(::cuda::std::is_assignable_v<detail::it_reference_t<RandomAccessIteratorOut>, Value>,
static_assert(::cuda::std::is_assignable_v<::cuda::std::iter_reference_t<RandomAccessIteratorOut>, Value>,
"The passed value must be assignable to the dereferenced output iterator");

_CCCL_NVTX_RANGE_SCOPE("cub::DeviceTransform::Fill");
Expand Down
29 changes: 15 additions & 14 deletions cub/cub/device/dispatch/dispatch_scan.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -1029,20 +1029,21 @@ template <typename AccumT,
typename ScanOpT,
typename InitValueT,
typename OffsetT,
typename PolicySelector = policy_selector_from_types<detail::it_value_t<InputIteratorT>,
detail::it_value_t<OutputIteratorT>,
AccumT,
OffsetT,
ScanOpT>,
typename KernelSource = DeviceScanKernelSource<
PolicySelector,
THRUST_NS_QUALIFIER::try_unwrap_contiguous_iterator_t<InputIteratorT>,
THRUST_NS_QUALIFIER::try_unwrap_contiguous_iterator_t<OutputIteratorT>,
ScanOpT,
InitValueT,
OffsetT,
AccumT,
EnforceInclusive>,
typename PolicySelector =
policy_selector_from_types<detail::non_void_value_t<InputIteratorT, detail::it_value_t<OutputIteratorT>>,
detail::non_void_value_t<OutputIteratorT, detail::it_value_t<InputIteratorT>>,
AccumT,
OffsetT,
ScanOpT>,
typename KernelSource = DeviceScanKernelSource<
PolicySelector,
THRUST_NS_QUALIFIER::try_unwrap_contiguous_iterator_t<InputIteratorT>,
THRUST_NS_QUALIFIER::try_unwrap_contiguous_iterator_t<OutputIteratorT>,
ScanOpT,
InitValueT,
OffsetT,
AccumT,
EnforceInclusive>,
typename KernelLauncherFactory = CUB_DETAIL_DEFAULT_KERNEL_LAUNCHER_FACTORY>
CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE auto dispatch_with_accum(
void* d_temp_storage,
Expand Down
5 changes: 3 additions & 2 deletions cub/cub/device/dispatch/kernels/kernel_scan_warpspeed.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <cub/device/dispatch/kernels/scan_warpspeed_policy.cuh>
#include <cub/thread/thread_reduce.cuh>
#include <cub/thread/thread_scan.cuh>
#include <cub/util_type.cuh>
#include <cub/warp/warp_reduce.cuh>
#include <cub/warp/warp_scan.cuh>

Expand Down Expand Up @@ -935,8 +936,8 @@ _CCCL_API constexpr bool use_warpspeed(
template <typename InputIteratorT, typename OutputIteratorT, typename AccumT>
_CCCL_API constexpr bool use_warpspeed(const scan_warpspeed_policy& policy)
{
using InputT = it_value_t<InputIteratorT>;
using OutputT = it_value_t<OutputIteratorT>;
using InputT = non_void_value_t<InputIteratorT, it_value_t<OutputIteratorT>>;
using OutputT = non_void_value_t<OutputIteratorT, it_value_t<InputIteratorT>>;
Comment on lines +939 to +940
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Important: This is probably wrong, since we cannot just take the traits of the input type in case we have no output type.

Why is this necessary? Which test is failing?

Copy link
Copy Markdown
Contributor Author

@Jacobfaib Jacobfaib Apr 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cuda::discard_iterator::value_type is void, while thrust::discard_iterator::value_type is not. Many CUB algorithms seemingly don't handle this properly and/or had special handling for thrust::discard_iterator (or other proxy iterators) and so failed to compile with the new discard iterator.

return use_warpspeed(
policy,
static_cast<int>(sizeof(InputT)),
Expand Down
4 changes: 2 additions & 2 deletions cub/test/catch2_test_device_select_flagged_if.cu
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@

#include <cub/device/device_select.cuh>

#include <thrust/iterator/discard_iterator.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/logical.h>

#include <cuda/functional>
#include <cuda/iterator>

#include <algorithm>

Expand Down Expand Up @@ -38,7 +38,7 @@ get_reference(c2h::device_vector<T> const& in, c2h::device_vector<FlagT> const&
auto zipped_in_it = thrust::make_zip_iterator(h_flags.cbegin(), reference.cbegin());

// Discards the flags part and only keeps the items
auto zipped_out_it = thrust::make_zip_iterator(thrust::make_discard_iterator(), reference.begin());
auto zipped_out_it = thrust::make_zip_iterator(cuda::make_discard_iterator(), reference.begin());

auto end =
std::copy_if(zipped_in_it, zipped_in_it + in.size(), zipped_out_it, predicate_op_wrapper_t<Pred>{if_predicate});
Expand Down
2 changes: 1 addition & 1 deletion cub/test/catch2_test_nvrtc.cu
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ TEST_CASE("Test nvrtc", "[test][nvrtc]")
#include <thrust/iterator/constant_iterator.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/discard_iterator.h>
#include <cuda/iterator>
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Important: Please revert. We want to continue to test the old iterator with NVRTC.

#include <thrust/iterator/permutation_iterator.h>
#include <thrust/iterator/reverse_iterator.h>
#include <thrust/iterator/tabulate_output_iterator.h>
Expand Down
7 changes: 3 additions & 4 deletions cub/test/test_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@
#include <cub/util_ptx.cuh>
#include <cub/util_type.cuh>

#include <thrust/iterator/discard_iterator.h>

#include <cuda/iterator>
#include <cuda/std/algorithm>

#include <nv/target>
Expand Down Expand Up @@ -1150,10 +1149,10 @@ inline int CompareDeviceResults(
* Verify the contents of a device array match those
* of a host array
*/
template <typename S, typename OffsetT>
template <typename S>
int CompareDeviceResults(
S* /*h_reference*/,
THRUST_NS_QUALIFIER::discard_iterator<OffsetT> /*d_data*/,
cuda::discard_iterator /*d_data*/,
std::size_t /*num_items*/,
bool /*verbose*/ = true,
bool /*display_data*/ = false)
Expand Down
21 changes: 21 additions & 0 deletions libcudacxx/include/cuda/__iterator/discard_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <cuda/std/__iterator/concepts.h>
#include <cuda/std/__iterator/default_sentinel.h>
#include <cuda/std/__iterator/iterator_traits.h>
#include <cuda/std/__iterator/readable_traits.h>
#include <cuda/std/__type_traits/is_same.h>
#include <cuda/std/__type_traits/remove_cvref.h>
#include <cuda/std/cstdint>
Expand Down Expand Up @@ -76,6 +77,12 @@ class discard_iterator
public:
struct __discard_proxy
{
constexpr __discard_proxy() noexcept = default;

template <class _Tp>
_CCCL_API constexpr explicit __discard_proxy(const _Tp&) noexcept
{}

_CCCL_TEMPLATE(class _Tp)
_CCCL_REQUIRES((!::cuda::std::is_same_v<::cuda::std::remove_cvref_t<_Tp>, __discard_proxy>) )
_CCCL_API constexpr const __discard_proxy& operator=(_Tp&&) const noexcept
Expand Down Expand Up @@ -320,6 +327,20 @@ _CCCL_REQUIRES(::cuda::std::__integer_like<_Integer>)

_CCCL_END_NAMESPACE_CUDA

_CCCL_BEGIN_NAMESPACE_CUDA_STD

// We need to specialize these traits because discard_iterator's value_type is usually
// void, but you are allowed to read (and write) to it nonetheless. If we don't specialize
// here, then quite a few downstream algorithms (like zip's) break because they only see a
// value_type of void.
template <>
struct indirectly_readable_traits<::cuda::discard_iterator>
{
using value_type = ::cuda::discard_iterator::__discard_proxy;
};

_CCCL_END_NAMESPACE_CUDA_STD

#include <cuda/std/__cccl/epilogue.h>

#endif // _CUDA___ITERATOR_DISCARD_ITERATOR_H
1 change: 1 addition & 0 deletions thrust/cmake/ThrustHeaderTesting.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ set(
"thrust/iterator/tabulate_output_iterator\\.h"
"thrust/iterator/strided_iterator\\.h"
"thrust/iterator/constant_iterator\\.h"
"thrust/iterator/discard_iterator\\.h"
)

cccl_get_cudatoolkit()
Expand Down
5 changes: 3 additions & 2 deletions thrust/examples/set_operations.cu
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
#include <thrust/device_vector.h>
#include <thrust/extrema.h>
#include <thrust/iterator/discard_iterator.h>
#include <thrust/merge.h>
#include <thrust/set_operations.h>

#include <cuda/iterator>

#include <iostream>

// This example illustrates use of the set operation algorithms
Expand Down Expand Up @@ -127,7 +128,7 @@ template <typename Vector>
void SetIntersectionSize(const Vector& A, const Vector& B)
{
// computes the exact size of the intersection without allocating output
thrust::discard_iterator<> C_begin, C_end;
cuda::discard_iterator C_begin, C_end;

C_end = thrust::set_intersection(A.begin(), A.end(), B.begin(), B.end(), C_begin);

Expand Down
3 changes: 1 addition & 2 deletions thrust/examples/sum_columns.cu
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#include <thrust/execution_policy.h>
#include <thrust/for_each.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/discard_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/random.h>
#include <thrust/reduce.h>
Expand Down Expand Up @@ -53,7 +52,7 @@ int main()
// Sum each column, storing the result in a new vector.
thrust::universal_vector<int> sums(cols);
thrust::reduce_by_key(
thrust::device, col_idx_begin, col_idx_end, M_transposed, thrust::make_discard_iterator(), sums.begin());
thrust::device, col_idx_begin, col_idx_end, M_transposed, cuda::make_discard_iterator(), sums.begin());

// Output the result.
thrust::for_each_n(thrust::seq, flat_idx, rows, [&](int i) {
Expand Down
3 changes: 1 addition & 2 deletions thrust/examples/sum_rows.cu
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#include <thrust/execution_policy.h>
#include <thrust/for_each.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/discard_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/random.h>
#include <thrust/reduce.h>
Expand Down Expand Up @@ -45,7 +44,7 @@ int main()
// Sum each row, storing the result in a new vector.
thrust::universal_vector<int> sums(rows);
thrust::reduce_by_key(
thrust::device, row_idx_begin, row_idx_end, M.data_handle(), thrust::make_discard_iterator(), sums.begin());
thrust::device, row_idx_begin, row_idx_end, M.data_handle(), cuda::make_discard_iterator(), sums.begin());

// Output the result.
thrust::for_each_n(thrust::seq, flat_idx, rows, [&](int i) {
Expand Down
33 changes: 17 additions & 16 deletions thrust/testing/binary_search_vector.cu
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
#include <thrust/binary_search.h>
#include <thrust/detail/allocator/allocator_system.h>
#include <thrust/iterator/discard_iterator.h>
#include <thrust/iterator/retag.h>
#include <thrust/sequence.h>
#include <thrust/sort.h>

#include <cuda/iterator>

#include <unittest/unittest.h>

//////////////////////
Expand Down Expand Up @@ -343,12 +344,12 @@ struct TestVectorLowerBoundDiscardIterator
thrust::host_vector<T> h_input = unittest::random_integers<T>(2 * n);
thrust::device_vector<T> d_input = h_input;

thrust::discard_iterator<> h_result =
thrust::lower_bound(h_vec.begin(), h_vec.end(), h_input.begin(), h_input.end(), thrust::make_discard_iterator());
thrust::discard_iterator<> d_result =
thrust::lower_bound(d_vec.begin(), d_vec.end(), d_input.begin(), d_input.end(), thrust::make_discard_iterator());
cuda::discard_iterator h_result =
thrust::lower_bound(h_vec.begin(), h_vec.end(), h_input.begin(), h_input.end(), cuda::make_discard_iterator());
cuda::discard_iterator d_result =
thrust::lower_bound(d_vec.begin(), d_vec.end(), d_input.begin(), d_input.end(), cuda::make_discard_iterator());

thrust::discard_iterator<> reference(2 * n);
cuda::discard_iterator reference(2 * n);

ASSERT_EQUAL_QUIET(reference, h_result);
ASSERT_EQUAL_QUIET(reference, d_result);
Expand All @@ -368,12 +369,12 @@ struct TestVectorUpperBoundDiscardIterator
thrust::host_vector<T> h_input = unittest::random_integers<T>(2 * n);
thrust::device_vector<T> d_input = h_input;

thrust::discard_iterator<> h_result =
thrust::upper_bound(h_vec.begin(), h_vec.end(), h_input.begin(), h_input.end(), thrust::make_discard_iterator());
thrust::discard_iterator<> d_result =
thrust::upper_bound(d_vec.begin(), d_vec.end(), d_input.begin(), d_input.end(), thrust::make_discard_iterator());
cuda::discard_iterator h_result =
thrust::upper_bound(h_vec.begin(), h_vec.end(), h_input.begin(), h_input.end(), cuda::make_discard_iterator());
cuda::discard_iterator d_result =
thrust::upper_bound(d_vec.begin(), d_vec.end(), d_input.begin(), d_input.end(), cuda::make_discard_iterator());

thrust::discard_iterator<> reference(2 * n);
cuda::discard_iterator reference(2 * n);

ASSERT_EQUAL_QUIET(reference, h_result);
ASSERT_EQUAL_QUIET(reference, d_result);
Expand All @@ -393,12 +394,12 @@ struct TestVectorBinarySearchDiscardIterator
thrust::host_vector<T> h_input = unittest::random_integers<T>(2 * n);
thrust::device_vector<T> d_input = h_input;

thrust::discard_iterator<> h_result = thrust::binary_search(
h_vec.begin(), h_vec.end(), h_input.begin(), h_input.end(), thrust::make_discard_iterator());
thrust::discard_iterator<> d_result = thrust::binary_search(
d_vec.begin(), d_vec.end(), d_input.begin(), d_input.end(), thrust::make_discard_iterator());
cuda::discard_iterator h_result =
thrust::binary_search(h_vec.begin(), h_vec.end(), h_input.begin(), h_input.end(), cuda::make_discard_iterator());
cuda::discard_iterator d_result =
thrust::binary_search(d_vec.begin(), d_vec.end(), d_input.begin(), d_input.end(), cuda::make_discard_iterator());

thrust::discard_iterator<> reference(2 * n);
cuda::discard_iterator reference(2 * n);

ASSERT_EQUAL_QUIET(reference, h_result);
ASSERT_EQUAL_QUIET(reference, d_result);
Expand Down
13 changes: 7 additions & 6 deletions thrust/testing/catch2_test_adjacent_difference.cu
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
#include <thrust/adjacent_difference.h>
#include <thrust/device_free.h>
#include <thrust/device_malloc.h>
#include <thrust/iterator/discard_iterator.h>
#include <thrust/iterator/retag.h>

#include <cuda/iterator>

#include "catch2_test_helper.h"
#include <unittest/random.h>
#include <unittest/special_types.h>
Expand Down Expand Up @@ -112,12 +113,12 @@ TEMPLATE_LIST_TEST_CASE("AdjacentDifferenceDiscardIterator", "[adjacent_differen
thrust::host_vector<T> h_input = unittest::random_samples<T>(n);
thrust::device_vector<T> d_input = h_input;

thrust::discard_iterator<> h_result =
thrust::adjacent_difference(h_input.begin(), h_input.end(), thrust::make_discard_iterator());
thrust::discard_iterator<> d_result =
thrust::adjacent_difference(d_input.begin(), d_input.end(), thrust::make_discard_iterator());
cuda::discard_iterator h_result =
thrust::adjacent_difference(h_input.begin(), h_input.end(), cuda::make_discard_iterator());
cuda::discard_iterator d_result =
thrust::adjacent_difference(d_input.begin(), d_input.end(), cuda::make_discard_iterator());

thrust::discard_iterator<> reference(n);
cuda::discard_iterator reference(n);

CHECK((reference == h_result));
CHECK((reference == d_result));
Expand Down
Loading
Loading