diff --git a/docs/libcudacxx/extended_api/type_traits.rst b/docs/libcudacxx/extended_api/type_traits.rst index 8588b77c375..36148f5af7a 100644 --- a/docs/libcudacxx/extended_api/type_traits.rst +++ b/docs/libcudacxx/extended_api/type_traits.rst @@ -8,6 +8,7 @@ Type traits :maxdepth: 1 type_traits/is_floating_point + type_traits/is_trivially_copyable type_traits/vector_types .. list-table:: @@ -28,3 +29,8 @@ Type traits - Type traits for CUDA vector types - CCCL 3.3.0 - CUDA 13.3 + + * - :ref:`cuda::is_trivially_copyable ` + - Relaxed trivially copyable check including extended floating-point types + - CCCL 3.4.0 + - CUDA 13.4 diff --git a/docs/libcudacxx/extended_api/type_traits/is_trivially_copyable.rst b/docs/libcudacxx/extended_api/type_traits/is_trivially_copyable.rst new file mode 100644 index 00000000000..f6164f04948 --- /dev/null +++ b/docs/libcudacxx/extended_api/type_traits/is_trivially_copyable.rst @@ -0,0 +1,68 @@ +.. _libcudacxx-extended-api-type_traits-is_trivially_copyable: + +``cuda::is_trivially_copyable`` +======================================= + +Defined in the ```` header. + +.. code:: cuda + + namespace cuda { + + template + constexpr bool is_trivially_copyable_v = /* see below */; + + template + using is_trivially_copyable = cuda::std::bool_constant>; + + } // namespace cuda + +``cuda::is_trivially_copyable_v`` is a variable template that extends ``cuda::std::is_trivially_copyable`` to also recognize CUDA extended floating-point scalar and vector types as trivially copyable. + +A type ``T`` satisfies ``cuda::is_trivially_copyable`` if any of the following holds: + +- ``T`` is trivially copyable. +- ``T`` is an extended floating-point scalar type (e.g. ``__half``, ``__nv_bfloat16``, ``__nv_fp8_e4m3``). +- ``T`` is an extended floating-point vector type (e.g. ``__half2``, ``__nv_bfloat162``, ``__nv_fp8x2_e4m3``). + +The trait also propagates through composite types: + +- C-style arrays: ``T[N]`` and ``T[]`` are trivially copyable when ``T`` is. +- ``cuda::std::array``: trivially copyable when ``T`` is also trivially copyable. +- ``cuda::std::pair``: trivially copyable when both ``T1`` and ``T2`` are. +- ``cuda::std::tuple``: trivially copyable when all ``Ts...`` are. +- ``cuda::std::complex``: trivially copyable when ``T`` is. +- ``cuda::complex``: trivially copyable when ``T`` is. +- Aggregates: trivially copyable when all members are. + +``const`` qualification is handled transparently, while ``volatile`` is compiler dependent. + +Examples +-------- + +.. code:: cuda + + #include + #include + #include + #include + + #include + + // Standard trivially copyable types + static_assert(cuda::is_trivially_copyable_v); + static_assert(cuda::is_trivially_copyable_v); + + // Extended floating-point types + static_assert(cuda::is_trivially_copyable_v<__half>); + static_assert(cuda::is_trivially_copyable_v<__nv_bfloat16>); + static_assert(cuda::is_trivially_copyable_v<__half2>); + static_assert(cuda::is_trivially_copyable_v>); + static_assert(cuda::is_trivially_copyable_v>); + + // Composite types containing extended floating-point types + static_assert(cuda::is_trivially_copyable_v<__half[4]>); + static_assert(cuda::is_trivially_copyable_v>); + static_assert(cuda::is_trivially_copyable_v>); + static_assert(cuda::is_trivially_copyable_v>); + static_assert(cuda::is_trivially_copyable_v>); diff --git a/docs/libcudacxx/standard_api/numerics_library/bit.rst b/docs/libcudacxx/standard_api/numerics_library/bit.rst index 9904db7352e..44079a4d81a 100644 --- a/docs/libcudacxx/standard_api/numerics_library/bit.rst +++ b/docs/libcudacxx/standard_api/numerics_library/bit.rst @@ -3,6 +3,16 @@ ```` ================== +``cuda::std::bit_cast`` +----------------------- + +``cuda::std::bit_cast`` extended the standard ``std::bit_cast`` to also recognize CUDA extended floating-point scalar and vector types as trivially copyable. + +**Limitations** + +- The function can be used in ``constexpr`` contexts only when the source and destination types are trivially copyable. +- The function cannot be used in ``constexpr`` contexts with MSVC <= 19.25 and GCC <= 10. + CUDA Performance Considerations ------------------------------- diff --git a/libcudacxx/include/cuda/__type_traits/is_trivially_copyable.h b/libcudacxx/include/cuda/__type_traits/is_trivially_copyable.h new file mode 100644 index 00000000000..61f6b9bc906 --- /dev/null +++ b/libcudacxx/include/cuda/__type_traits/is_trivially_copyable.h @@ -0,0 +1,99 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef __CUDA__TYPE_TRAITS_IS_TRIVIALLY_COPYABLE_H +#define __CUDA__TYPE_TRAITS_IS_TRIVIALLY_COPYABLE_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +_CCCL_BEGIN_NAMESPACE_CUDA + +template +inline constexpr bool __is_aggregate_trivially_copyable_v = false; + +//! Users are allowed to specialize this variable template for their own types +template +inline constexpr bool __is_trivially_copyable_v = + ::cuda::std::is_trivially_copyable_v<_Tp> || ::cuda::std::__is_extended_floating_point_v<_Tp> +#if _CCCL_HAS_CTK() + || ::cuda::is_extended_fp_vector_type_v<_Tp> +#endif // _CCCL_HAS_CTK() + || __is_aggregate_trivially_copyable_v<_Tp>; + +template +inline constexpr bool __is_trivially_copyable_v<_Tp[]> = __is_trivially_copyable_v<_Tp>; + +template +inline constexpr bool __is_trivially_copyable_v<_Tp[_Size]> = __is_trivially_copyable_v<_Tp>; + +template +inline constexpr bool __is_trivially_copyable_v<::cuda::std::array<_Tp, _Size>> = __is_trivially_copyable_v<_Tp>; + +template +inline constexpr bool __is_trivially_copyable_v<::cuda::std::pair<_T1, _T2>> = + __is_trivially_copyable_v<_T1> && __is_trivially_copyable_v<_T2>; + +template +inline constexpr bool __is_trivially_copyable_v<::cuda::std::tuple<_Ts...>> = (__is_trivially_copyable_v<_Ts> && ...); + +template +inline constexpr bool __is_trivially_copyable_v<::cuda::std::complex<_Tp>> = __is_trivially_copyable_v<_Tp>; + +template +inline constexpr bool __is_trivially_copyable_v<::cuda::complex<_Tp>> = __is_trivially_copyable_v<_Tp>; + +// if all the previous conditions fail, check if the type is an aggregate and all its members are trivially copyable +template +using __is_trivially_copyable_callable = ::cuda::std::bool_constant<__is_trivially_copyable_v<_Tp>>; + +template +inline constexpr bool + __is_aggregate_trivially_copyable_v<_Tp, ::cuda::std::enable_if_t<::cuda::std::is_aggregate_v<_Tp>>> = + ::cuda::std::__aggregate_all_of<__is_trivially_copyable_callable, _Tp>::value; + +//---------------------------------------------------------------------------------------------------------------------- +// public traits + +template +inline constexpr bool is_trivially_copyable_v = __is_trivially_copyable_v<::cuda::std::remove_const_t<_Tp>>; + +// defined as alias so users cannot specialize it (they should specialize the variable template instead) +template +using is_trivially_copyable = ::cuda::std::bool_constant>; + +_CCCL_END_NAMESPACE_CUDA + +#include + +#endif // __CUDA__TYPE_TRAITS_IS_TRIVIALLY_COPYABLE_H diff --git a/libcudacxx/include/cuda/std/__bit/bit_cast.h b/libcudacxx/include/cuda/std/__bit/bit_cast.h index e7886ad4976..b17e02bdb40 100644 --- a/libcudacxx/include/cuda/std/__bit/bit_cast.h +++ b/libcudacxx/include/cuda/std/__bit/bit_cast.h @@ -4,7 +4,7 @@ // under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// SPDX-FileCopyrightText: Copyright (c) 2024-26 NVIDIA CORPORATION & AFFILIATES. // //===----------------------------------------------------------------------===// @@ -21,10 +21,9 @@ # pragma system_header #endif // no system header -#include -#include -#include -#include +#include +#include +#include #include #include @@ -42,37 +41,43 @@ _CCCL_BEGIN_NAMESPACE_CUDA_STD #else // ^^^ _CCCL_BUILTIN_BIT_CAST ^^^ / vvv !_CCCL_BUILTIN_BIT_CAST vvv # define _CCCL_CONSTEXPR_BIT_CAST # define _CCCL_HAS_CONSTEXPR_BIT_CAST() 0 -# if _CCCL_COMPILER(GCC, >=, 8) -// GCC starting with GCC8 warns about our extended floating point types having protected data members +#endif // !_CCCL_BUILTIN_BIT_CAST + +#if _CCCL_COMPILER(GCC, >=, 8) _CCCL_DIAG_PUSH _CCCL_DIAG_SUPPRESS_GCC("-Wclass-memaccess") -# endif // _CCCL_COMPILER(GCC, >=, 8) -#endif // !_CCCL_BUILTIN_BIT_CAST +#endif // _CCCL_COMPILER(GCC, >=, 8) -template = 0, - enable_if_t || __is_extended_floating_point_v<_To>, int> = 0, - enable_if_t || __is_extended_floating_point_v<_From>, int> = 0> -[[nodiscard]] _CCCL_API inline _CCCL_CONSTEXPR_BIT_CAST _To bit_cast(const _From& __from) noexcept +template +[[nodiscard]] _CCCL_API inline _To __bit_cast_memcpy(const _From& __from) noexcept { -#if defined(_CCCL_BUILTIN_BIT_CAST) - return _CCCL_BUILTIN_BIT_CAST(_To, __from); -#else // ^^^ _CCCL_BUILTIN_BIT_CAST ^^^ / vvv !_CCCL_BUILTIN_BIT_CAST vvv - static_assert(is_trivially_default_constructible_v<_To>, - "The compiler does not support __builtin_bit_cast, so bit_cast additionally requires the destination " - "type to be trivially constructible"); + static_assert(::cuda::std::default_initializable<_To>, + "bit_cast memcpy fallback requires the destination type to be default initializable"); _To __temp; ::cuda::std::memcpy(&__temp, &__from, sizeof(_To)); return __temp; -#endif // !_CCCL_BUILTIN_BIT_CAST } -#if !defined(_CCCL_BUILTIN_BIT_CAST) -# if _CCCL_COMPILER(GCC, >=, 8) +#if _CCCL_COMPILER(GCC, >=, 8) _CCCL_DIAG_POP -# endif // _CCCL_COMPILER(GCC, >=, 8) -#endif // !_CCCL_BUILTIN_BIT_CAST +#endif // _CCCL_COMPILER(GCC, >=, 8) + +_CCCL_TEMPLATE(class _To, class _From) +_CCCL_REQUIRES((sizeof(_To) == sizeof(_From)) _CCCL_AND(::cuda::is_trivially_copyable_v<_To>) + _CCCL_AND(::cuda::is_trivially_copyable_v<_From>)) +[[nodiscard]] _CCCL_API inline _CCCL_CONSTEXPR_BIT_CAST _To bit_cast(const _From& __from) noexcept +{ +#if defined(_CCCL_BUILTIN_BIT_CAST) + if constexpr (::cuda::std::is_trivially_copyable_v<_To> && ::cuda::std::is_trivially_copyable_v<_From>) + { + return _CCCL_BUILTIN_BIT_CAST(_To, __from); + } + else +#endif // _CCCL_BUILTIN_BIT_CAST + { + return ::cuda::std::__bit_cast_memcpy<_To>(__from); + } +} _CCCL_END_NAMESPACE_CUDA_STD diff --git a/libcudacxx/include/cuda/std/__type_traits/aggregate_members.h b/libcudacxx/include/cuda/std/__type_traits/aggregate_members.h new file mode 100644 index 00000000000..7ccdab175ea --- /dev/null +++ b/libcudacxx/include/cuda/std/__type_traits/aggregate_members.h @@ -0,0 +1,163 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _CUDA_STD___TYPE_TRAITS_AGGREGATE_MEMBERS_H +#define _CUDA_STD___TYPE_TRAITS_AGGREGATE_MEMBERS_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include +#include +#include + +#include + +_CCCL_DIAG_PUSH +_CCCL_DIAG_SUPPRESS_CLANG("-Wmissing-field-initializers") + +_CCCL_BEGIN_NAMESPACE_CUDA_STD + +#if defined(_CCCL_BUILTIN_STRUCTURED_BINDING_SIZE) + +// Returns the number of aggregate members, or `-1` if the type is not an aggregate. +template = 0, int> = 0> +inline constexpr int __aggregate_arity_v = _CCCL_BUILTIN_STRUCTURED_BINDING_SIZE(_Tp); + +#else // ^^^ _CCCL_BUILTIN_STRUCTURED_BINDING_SIZE ^^^ / !_CCCL_BUILTIN_STRUCTURED_BINDING_SIZE vvv + +// provide a generic way to initialize an aggregate member +struct __any_aggregate_member +{ + template + _CCCL_API constexpr operator _Tp&&() const; +}; + +template > +struct __aggregate_arity_impl +{ + template + _CCCL_API auto operator()(_Args... __args) -> decltype(_Self{}(__args..., __any_aggregate_member{})); + + template + _CCCL_API auto operator()(_Args...) const -> char (*)[sizeof...(_Args) + 1]; // return the number of members + 1 +}; + +// T is not an aggregate, return 1 +template +struct __aggregate_arity_impl<_Tp, false> +{ + _CCCL_API auto operator()() const -> char*; +}; + +// Returns the number of aggregate members, or `-1` if the type is not an aggregate. +template +inline constexpr int __aggregate_arity_v = int{sizeof(*__aggregate_arity_impl<_Tp>{}())} - 2; + +#endif // ^^^ !_CCCL_BUILTIN_STRUCTURED_BINDING_SIZE ^^^ + +// Apply a Predicate to every aggregate member + +// provide a generic way to initialize an aggregate member but only if the Predicate is true +template