From 713ab180edefd02d2609103717a11e08171b7383 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 7 Apr 2026 20:19:22 +0000 Subject: [PATCH 1/4] Add non-template ref-to-value converting constructor to __basic_any The __basic_any value specialization inherits converting constructors from templates that accept __basic_any<_OtherInterface>. When a proxy type wraps a __basic_any reference type (e.g. __basic_any) and provides operator __basic_any&(), these template constructors cannot match: template argument deduction deduces the proxy type, not the underlying __basic_any, and implicit user-defined conversions are not considered during deduction. Add a non-template constructor __basic_any(__basic_any<_Interface&> const&) for the ref-to-value conversion case. Non-template parameters participate in implicit conversion sequences, allowing proxy types to convert through their user-defined conversion operator. This fixes any_resource construction from proxy-wrapped resource_ref and any_synchronous_resource from proxy-wrapped synchronous_resource_ref without requiring per-type overloads. Fixes #8316 --- .../cuda/__utility/__basic_any/basic_any_value.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/libcudacxx/include/cuda/__utility/__basic_any/basic_any_value.h b/libcudacxx/include/cuda/__utility/__basic_any/basic_any_value.h index 2fd5ae0184a..743bbf34bdd 100644 --- a/libcudacxx/include/cuda/__utility/__basic_any/basic_any_value.h +++ b/libcudacxx/include/cuda/__utility/__basic_any/basic_any_value.h @@ -160,13 +160,25 @@ struct _CCCL_TYPE_VISIBILITY_DEFAULT __basic_any : __basic_any_base<_Interface> //! `__icopyable<>`. //! @post `has_value() == __other.has_value()`. _CCCL_TEMPLATE(class _OtherInterface) - _CCCL_REQUIRES((!::cuda::std::same_as<_OtherInterface, _Interface>) - _CCCL_AND __any_convertible_to<__basic_any<_OtherInterface> const&, __basic_any>) + _CCCL_REQUIRES( + (!::cuda::std::same_as<_OtherInterface, _Interface>) _CCCL_AND(!::cuda::std::same_as<_OtherInterface, _Interface&>) + _CCCL_AND __any_convertible_to<__basic_any<_OtherInterface> const&, __basic_any>) _CCCL_API __basic_any(__basic_any<_OtherInterface> const& __other) { __convert_from(__other); } + //! @brief Non-template converting constructor from the corresponding + //! reference type `__basic_any<_Interface&>`. This enables implicit + //! conversion from proxy types (e.g. Cython's __Pyx_FakeReference) + //! that wrap a `__basic_any<_Interface&>` and provide + //! `operator __basic_any<_Interface&>&()`, since non-template parameters + //! participate in implicit conversion sequences. + _CCCL_API __basic_any(__basic_any<_Interface&> const& __other) + { + __convert_from(__other); + } + #if _CCCL_COMPILER(CLANG, <, 12) || _CCCL_COMPILER(GCC, <, 11) // Older versions of clang and gcc need help disambiguating between // __basic_any<__ireference> and __basic_any. From 3818b258bd522fb95eee6a772610df671c15f59e Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 7 Apr 2026 20:48:57 +0000 Subject: [PATCH 2/4] Add tests for any_resource/any_synchronous_resource from proxy-wrapped refs Add regression tests for #8316 verifying that any_resource and any_synchronous_resource can be constructed from a value_proxy that wraps resource_ref or synchronous_resource_ref via operator T&(), mimicking Cython's __Pyx_FakeReference proxy pattern. --- .../any_resource/any_resource.cu | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/libcudacxx/test/libcudacxx/cuda/memory_resource/any_resource/any_resource.cu b/libcudacxx/test/libcudacxx/cuda/memory_resource/any_resource/any_resource.cu index 8815e8f81d9..65d69fd8f7a 100644 --- a/libcudacxx/test/libcudacxx/cuda/memory_resource/any_resource/any_resource.cu +++ b/libcudacxx/test/libcudacxx/cuda/memory_resource/any_resource/any_resource.cu @@ -348,4 +348,61 @@ TEST_CASE("regression test for NVIDIA/cccl#8037", "[container][resource]") { STATIC_REQUIRE(cuda::std::move_constructible); } + +// Minimal proxy that wraps a value and provides operator T&(), mimicking +// Cython's __Pyx_FakeReference which wraps intermediate expression results. +template +struct value_proxy +{ + T value; + + template + explicit value_proxy(Args&&... args) + : value(std::forward(args)...) + {} + + operator T&() + { + return value; + } +}; + +// See https://github.com/NVIDIA/cccl/issues/8316 +TEST_CASE("any_resource from proxy-wrapped resource_ref", "[container][resource]") +{ + host_device_resource mr; + cuda::mr::resource_ref ref{mr}; + + SECTION("direct construction from resource_ref") + { + cuda::mr::any_resource any{ref}; + CHECK(any == ref); + } + + SECTION("construction from proxy-wrapped resource_ref") + { + value_proxy> proxy{mr}; + cuda::mr::any_resource any{proxy}; + CHECK(any == ref); + } +} + +TEST_CASE("any_synchronous_resource from proxy-wrapped synchronous_resource_ref", "[container][resource]") +{ + host_device_resource mr; + cuda::mr::synchronous_resource_ref ref{mr}; + + SECTION("direct construction from synchronous_resource_ref") + { + cuda::mr::any_synchronous_resource any{ref}; + CHECK(any == ref); + } + + SECTION("construction from proxy-wrapped synchronous_resource_ref") + { + value_proxy> proxy{mr}; + cuda::mr::any_synchronous_resource any{proxy}; + CHECK(any == ref); + } +} #endif // __CUDA_ARCH__ From 107475473cacf4900079b1e5212dd785c896758b Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 7 Apr 2026 16:14:48 -0500 Subject: [PATCH 3/4] Update libcudacxx/include/cuda/__utility/__basic_any/basic_any_value.h Co-authored-by: Eric Niebler --- libcudacxx/include/cuda/__utility/__basic_any/basic_any_value.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcudacxx/include/cuda/__utility/__basic_any/basic_any_value.h b/libcudacxx/include/cuda/__utility/__basic_any/basic_any_value.h index 743bbf34bdd..a6a10ac5e99 100644 --- a/libcudacxx/include/cuda/__utility/__basic_any/basic_any_value.h +++ b/libcudacxx/include/cuda/__utility/__basic_any/basic_any_value.h @@ -161,7 +161,7 @@ struct _CCCL_TYPE_VISIBILITY_DEFAULT __basic_any : __basic_any_base<_Interface> //! @post `has_value() == __other.has_value()`. _CCCL_TEMPLATE(class _OtherInterface) _CCCL_REQUIRES( - (!::cuda::std::same_as<_OtherInterface, _Interface>) _CCCL_AND(!::cuda::std::same_as<_OtherInterface, _Interface&>) + (!::cuda::std::same_as<_OtherInterface&, _Interface&>) _CCCL_AND __any_convertible_to<__basic_any<_OtherInterface> const&, __basic_any>) _CCCL_API __basic_any(__basic_any<_OtherInterface> const& __other) { From 69b44f59707c1a1c0219656387eb3d7f3b4289a9 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 10 Apr 2026 16:14:41 +0000 Subject: [PATCH 4/4] Constrain ref-to-value constructors, add move overload, fix tests - Add enable_if<__copyable> to the non-template converting constructors from __basic_any<_Interface&>, since converting a ref to a value requires copying the referenced object. - Add the corresponding move constructor from __basic_any<_Interface&>&&. - Update the template move converting constructor's same_as guard to use reference comparison (same_as) for consistency with the copy converting constructor. - In tests, use CHECK((expr)) to prevent Catch2 expression decomposition from triggering an nvcc auto NTTP SFINAE bug via ADL. - Guard proxy construction tests with _CCCL_CUDA_COMPILER(NVCC, <, 12, 9) since older nvcc treats auto NTTP deduction failures as hard errors. - Add rvalue proxy test sections. --- .../__utility/__basic_any/basic_any_value.h | 20 +++++++---- .../any_resource/any_resource.cu | 34 +++++++++++++++---- 2 files changed, 42 insertions(+), 12 deletions(-) diff --git a/libcudacxx/include/cuda/__utility/__basic_any/basic_any_value.h b/libcudacxx/include/cuda/__utility/__basic_any/basic_any_value.h index a6a10ac5e99..c8fd21c8b58 100644 --- a/libcudacxx/include/cuda/__utility/__basic_any/basic_any_value.h +++ b/libcudacxx/include/cuda/__utility/__basic_any/basic_any_value.h @@ -147,7 +147,7 @@ struct _CCCL_TYPE_VISIBILITY_DEFAULT __basic_any : __basic_any_base<_Interface> //! @post `__other.has_value() == false` and `has_value()` is `true` if and //! only if `__other.has_value()` was `true`. _CCCL_TEMPLATE(class _OtherInterface) - _CCCL_REQUIRES((!::cuda::std::same_as<_OtherInterface, _Interface>) + _CCCL_REQUIRES((!::cuda::std::same_as<_OtherInterface&, _Interface&>) _CCCL_AND __any_convertible_to<__basic_any<_OtherInterface>, __basic_any>) _CCCL_API __basic_any(__basic_any<_OtherInterface>&& __other) { @@ -160,25 +160,33 @@ struct _CCCL_TYPE_VISIBILITY_DEFAULT __basic_any : __basic_any_base<_Interface> //! `__icopyable<>`. //! @post `has_value() == __other.has_value()`. _CCCL_TEMPLATE(class _OtherInterface) - _CCCL_REQUIRES( - (!::cuda::std::same_as<_OtherInterface&, _Interface&>) - _CCCL_AND __any_convertible_to<__basic_any<_OtherInterface> const&, __basic_any>) + _CCCL_REQUIRES((!::cuda::std::same_as<_OtherInterface&, _Interface&>) + _CCCL_AND __any_convertible_to<__basic_any<_OtherInterface> const&, __basic_any>) _CCCL_API __basic_any(__basic_any<_OtherInterface> const& __other) { __convert_from(__other); } - //! @brief Non-template converting constructor from the corresponding - //! reference type `__basic_any<_Interface&>`. This enables implicit + //! @brief Non-template converting constructors from the corresponding + //! reference type `__basic_any<_Interface&>`. These enable implicit //! conversion from proxy types (e.g. Cython's __Pyx_FakeReference) //! that wrap a `__basic_any<_Interface&>` and provide //! `operator __basic_any<_Interface&>&()`, since non-template parameters //! participate in implicit conversion sequences. + //! @pre `_Interface` must extend `__icopyable<>` (converting a ref to + //! a value requires copying the referenced object). + template = 0> _CCCL_API __basic_any(__basic_any<_Interface&> const& __other) { __convert_from(__other); } + template = 0> + _CCCL_API __basic_any(__basic_any<_Interface&>&& __other) + { + __convert_from(__other); + } + #if _CCCL_COMPILER(CLANG, <, 12) || _CCCL_COMPILER(GCC, <, 11) // Older versions of clang and gcc need help disambiguating between // __basic_any<__ireference> and __basic_any. diff --git a/libcudacxx/test/libcudacxx/cuda/memory_resource/any_resource/any_resource.cu b/libcudacxx/test/libcudacxx/cuda/memory_resource/any_resource/any_resource.cu index 65d69fd8f7a..94322509696 100644 --- a/libcudacxx/test/libcudacxx/cuda/memory_resource/any_resource/any_resource.cu +++ b/libcudacxx/test/libcudacxx/cuda/memory_resource/any_resource/any_resource.cu @@ -376,15 +376,28 @@ TEST_CASE("any_resource from proxy-wrapped resource_ref", "[container][resource] SECTION("direct construction from resource_ref") { cuda::mr::any_resource any{ref}; - CHECK(any == ref); + // Extra parens prevent Catch2's expression decomposition, which triggers + // an nvcc bug with auto NTTP deduction in __satisfies during ADL. + CHECK((any == ref)); } - SECTION("construction from proxy-wrapped resource_ref") +# if !_CCCL_CUDA_COMPILER(NVCC, <, 12, 9) + // nvcc before CTK 12.9 has a bug where auto NTTP deduction failures in + // __satisfies are hard errors instead of SFINAE during overload resolution. + SECTION("construction from lvalue proxy-wrapped resource_ref") { value_proxy> proxy{mr}; cuda::mr::any_resource any{proxy}; - CHECK(any == ref); + CHECK((any == ref)); } + + SECTION("construction from rvalue proxy-wrapped resource_ref") + { + cuda::mr::any_resource any{ + value_proxy>{mr}}; + CHECK((any == ref)); + } +# endif // !_CCCL_CUDA_COMPILER(NVCC, <, 12, 9) } TEST_CASE("any_synchronous_resource from proxy-wrapped synchronous_resource_ref", "[container][resource]") @@ -395,14 +408,23 @@ TEST_CASE("any_synchronous_resource from proxy-wrapped synchronous_resource_ref" SECTION("direct construction from synchronous_resource_ref") { cuda::mr::any_synchronous_resource any{ref}; - CHECK(any == ref); + CHECK((any == ref)); } - SECTION("construction from proxy-wrapped synchronous_resource_ref") +# if !_CCCL_CUDA_COMPILER(NVCC, <, 12, 9) + SECTION("construction from lvalue proxy-wrapped synchronous_resource_ref") { value_proxy> proxy{mr}; cuda::mr::any_synchronous_resource any{proxy}; - CHECK(any == ref); + CHECK((any == ref)); + } + + SECTION("construction from rvalue proxy-wrapped synchronous_resource_ref") + { + cuda::mr::any_synchronous_resource any{ + value_proxy>{mr}}; + CHECK((any == ref)); } +# endif // !_CCCL_CUDA_COMPILER(NVCC, <, 12, 9) } #endif // __CUDA_ARCH__