Skip to content
Open
Show file tree
Hide file tree
Changes from 22 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/libcudacxx/extended_api/type_traits.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Type traits
:maxdepth: 1

type_traits/is_floating_point
type_traits/is_trivially_copyable
type_traits/vector_types

.. list-table::
Expand All @@ -28,3 +29,8 @@ Type traits
- Type traits for CUDA vector types
- CCCL 3.3.0
- CUDA 13.3

* - :ref:`cuda::is_trivially_copyable <libcudacxx-extended-api-type_traits-is_trivially_copyable>`
- Relaxed trivially copyable check including extended floating-point types
- CCCL 3.4.0
- CUDA 13.4
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
.. _libcudacxx-extended-api-type_traits-is_trivially_copyable:

``cuda::is_trivially_copyable``
=======================================

Defined in the ``<cuda/type_traits>`` header.

.. code:: cuda

namespace cuda {

template <typename T>
constexpr bool is_trivially_copyable_v = /* see below */;

template <typename T>
using is_trivially_copyable = cuda::std::bool_constant<is_trivially_copyable_v<T>>;

} // namespace cuda

``cuda::is_trivially_copyable_v<T>`` is a variable template that extends ``cuda::std::is_trivially_copyable`` to also recognize CUDA extended floating-point scalar and vector types as trivially copyable.

A type ``T`` satisfies ``cuda::is_trivially_copyable`` if any of the following holds:

- ``T`` is trivially copyable.
- ``T`` is an extended floating-point scalar type (e.g. ``__half``, ``__nv_bfloat16``, ``__nv_fp8_e4m3``).
- ``T`` is an extended floating-point vector type (e.g. ``__half2``, ``__nv_bfloat162``, ``__nv_fp8x2_e4m3``).

The trait also propagates through composite types:

- C-style arrays: ``T[N]`` and ``T[]`` are trivially copyable when ``T`` is.
- ``cuda::std::array<T, N>``: trivially copyable when ``T`` is also trivially copyable.
- ``cuda::std::pair<T1, T2>``: trivially copyable when both ``T1`` and ``T2`` are.
- ``cuda::std::tuple<Ts...>``: trivially copyable when all ``Ts...`` are.
- Aggregates: trivially copyable when all members are.

``const`` qualification is handled transparently, while ``volatile`` is compiler dependent.

Examples
--------

.. code:: cuda

#include <cuda/type_traits>
#include <cuda/std/array>
#include <cuda/std/tuple>
#include <cuda/std/utility>

#include <cuda_fp16.h>

// Standard trivially copyable types
static_assert(cuda::is_trivially_copyable_v<int>);
static_assert(cuda::is_trivially_copyable_v<float>);

// Extended floating-point types
static_assert(cuda::is_trivially_copyable_v<__half>);
static_assert(cuda::is_trivially_copyable_v<__nv_bfloat16>);
static_assert(cuda::is_trivially_copyable_v<__half2>);

// Composite types containing extended floating-point types
static_assert(cuda::is_trivially_copyable_v<__half[4]>);
static_assert(cuda::is_trivially_copyable_v<cuda::std::array<__half, 4>>);
static_assert(cuda::is_trivially_copyable_v<cuda::std::pair<__half, __half>>);
static_assert(cuda::is_trivially_copyable_v<cuda::std::tuple<__half, __half>>);
static_assert(cuda::is_trivially_copyable_v<cuda::std::pair<__half, int>>);
84 changes: 84 additions & 0 deletions libcudacxx/include/cuda/__type_traits/is_trivially_copyable.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

#ifndef __CUDA__TYPE_TRAITS_IS_TRIVIALLY_COPYABLE_H
#define __CUDA__TYPE_TRAITS_IS_TRIVIALLY_COPYABLE_H

#include <cuda/std/detail/__config>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#include <cuda/__type_traits/is_vector_type.h>
#include <cuda/std/__cstddef/types.h>
#include <cuda/std/__fwd/array.h>
#include <cuda/std/__fwd/pair.h>
#include <cuda/std/__fwd/tuple.h>
#include <cuda/std/__type_traits/aggregate_members.h>
#include <cuda/std/__type_traits/enable_if.h>
#include <cuda/std/__type_traits/integral_constant.h>
#include <cuda/std/__type_traits/is_extended_floating_point.h>
#include <cuda/std/__type_traits/is_trivially_copyable.h>
#include <cuda/std/__type_traits/remove_const.h>

#include <cuda/std/__cccl/prologue.h>

_CCCL_BEGIN_NAMESPACE_CUDA

template <typename _Tp, typename = void>
constexpr bool __is_aggregate_trivially_copyable_v = false;

//! Users are allowed to specialize this variable template for their own types
template <typename _Tp>
constexpr bool is_trivially_copyable_v =
::cuda::std::is_trivially_copyable_v<::cuda::std::remove_const_t<_Tp>>
|| ::cuda::std::__is_extended_floating_point_v<::cuda::std::remove_const_t<_Tp>>
#if _CCCL_HAS_CTK()
|| ::cuda::is_extended_fp_vector_type_v<::cuda::std::remove_const_t<_Tp>>
#endif // _CCCL_HAS_CTK()
|| __is_aggregate_trivially_copyable_v<::cuda::std::remove_const_t<_Tp>>;

template <typename _Tp>
constexpr bool is_trivially_copyable_v<_Tp[]> = is_trivially_copyable_v<_Tp>;

template <typename _Tp, ::cuda::std::size_t _Size>
constexpr bool is_trivially_copyable_v<_Tp[_Size]> = is_trivially_copyable_v<_Tp>;

template <typename _Tp, ::cuda::std::size_t _Size>
constexpr bool is_trivially_copyable_v<::cuda::std::array<_Tp, _Size>> = is_trivially_copyable_v<_Tp>;

template <typename _T1, typename _T2>
constexpr bool is_trivially_copyable_v<::cuda::std::pair<_T1, _T2>> =
is_trivially_copyable_v<_T1> && is_trivially_copyable_v<_T2>;

template <typename... _Ts>
constexpr bool is_trivially_copyable_v<::cuda::std::tuple<_Ts...>> = (is_trivially_copyable_v<_Ts> && ...);

// if all the previous conditions fail, check if the type is an aggregate and all its members are trivially copyable
template <typename _Tp>
using __is_trivially_copyable_callable = ::cuda::std::bool_constant<is_trivially_copyable_v<_Tp>>;

template <typename _Tp>
constexpr bool __is_aggregate_trivially_copyable_v<_Tp, ::cuda::std::enable_if_t<::cuda::std::is_aggregate_v<_Tp>>> =
::cuda::std::__aggregate_all_of<__is_trivially_copyable_callable, _Tp>::value;

// defined as alias so users cannot specialize it (they should specialize the variable template instead)
template <typename _Tp>
using is_trivially_copyable = ::cuda::std::bool_constant<is_trivially_copyable_v<_Tp>>;

_CCCL_END_NAMESPACE_CUDA

#include <cuda/std/__cccl/epilogue.h>

#endif // __CUDA__TYPE_TRAITS_IS_TRIVIALLY_COPYABLE_H
55 changes: 31 additions & 24 deletions libcudacxx/include/cuda/std/__bit/bit_cast.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
// SPDX-FileCopyrightText: Copyright (c) 2024-26 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

Expand All @@ -21,9 +21,8 @@
# pragma system_header
#endif // no system header

#include <cuda/std/__type_traits/enable_if.h>
#include <cuda/std/__type_traits/is_extended_floating_point.h>
#include <cuda/std/__type_traits/is_trivially_copyable.h>
#include <cuda/__type_traits/is_trivially_copyable.h>
#include <cuda/std/__concepts/concept_macros.h>
#include <cuda/std/__type_traits/is_trivially_default_constructible.h>
#include <cuda/std/cstring>

Expand All @@ -42,37 +41,45 @@ _CCCL_BEGIN_NAMESPACE_CUDA_STD
#else // ^^^ _CCCL_BUILTIN_BIT_CAST ^^^ / vvv !_CCCL_BUILTIN_BIT_CAST vvv
# define _CCCL_CONSTEXPR_BIT_CAST
# define _CCCL_HAS_CONSTEXPR_BIT_CAST() 0
# if _CCCL_COMPILER(GCC, >=, 8)
// GCC starting with GCC8 warns about our extended floating point types having protected data members
#endif // !_CCCL_BUILTIN_BIT_CAST

#if _CCCL_COMPILER(GCC, >=, 8)
_CCCL_DIAG_PUSH
_CCCL_DIAG_SUPPRESS_GCC("-Wclass-memaccess")
# endif // _CCCL_COMPILER(GCC, >=, 8)
#endif // !_CCCL_BUILTIN_BIT_CAST
#endif // _CCCL_COMPILER(GCC, >=, 8)

template <class _To,
class _From,
enable_if_t<(sizeof(_To) == sizeof(_From)), int> = 0,
enable_if_t<is_trivially_copyable_v<_To> || __is_extended_floating_point_v<_To>, int> = 0,
enable_if_t<is_trivially_copyable_v<_From> || __is_extended_floating_point_v<_From>, int> = 0>
[[nodiscard]] _CCCL_API inline _CCCL_CONSTEXPR_BIT_CAST _To bit_cast(const _From& __from) noexcept
template <class _To, class _From>
[[nodiscard]] _CCCL_API inline _To __bit_cast_memcpy(const _From& __from) noexcept
{
#if defined(_CCCL_BUILTIN_BIT_CAST)
return _CCCL_BUILTIN_BIT_CAST(_To, __from);
#else // ^^^ _CCCL_BUILTIN_BIT_CAST ^^^ / vvv !_CCCL_BUILTIN_BIT_CAST vvv
static_assert(is_trivially_default_constructible_v<_To>,
"The compiler does not support __builtin_bit_cast, so bit_cast additionally requires the destination "
"type to be trivially constructible");
static_assert(::cuda::std::is_trivially_default_constructible_v<_To>,
"bit_cast requires the destination type to be trivially constructible");
_To __temp;
::cuda::std::memcpy(&__temp, &__from, sizeof(_To));
return __temp;
#endif // !_CCCL_BUILTIN_BIT_CAST
}

#if !defined(_CCCL_BUILTIN_BIT_CAST)
# if _CCCL_COMPILER(GCC, >=, 8)
#if _CCCL_COMPILER(GCC, >=, 8)
_CCCL_DIAG_POP
# endif // _CCCL_COMPILER(GCC, >=, 8)
#endif // _CCCL_COMPILER(GCC, >=, 8)

_CCCL_TEMPLATE(class _To, class _From)
_CCCL_REQUIRES((sizeof(_To) == sizeof(_From)) _CCCL_AND(::cuda::is_trivially_copyable_v<_To>)
_CCCL_AND(::cuda::is_trivially_copyable_v<_From>))
[[nodiscard]] _CCCL_API inline _CCCL_CONSTEXPR_BIT_CAST _To bit_cast(const _From& __from) noexcept
{
#if defined(_CCCL_BUILTIN_BIT_CAST)
if constexpr (::cuda::std::is_trivially_copyable_v<_To> && ::cuda::std::is_trivially_copyable_v<_From>)
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

here the trick. We know that __nv_bfloat2 and __half2 are not trivially copyable but, we also know, that these types are not available in constant expressions so we can dispatch depending on this property

{
return _CCCL_BUILTIN_BIT_CAST(_To, __from);
}
else
{
return ::cuda::std::__bit_cast_memcpy<_To>(__from);
}
#else // ^^^ _CCCL_BUILTIN_BIT_CAST ^^^ / vvv !_CCCL_BUILTIN_BIT_CAST vvv
return ::cuda::std::__bit_cast_memcpy<_To>(__from);
#endif // !_CCCL_BUILTIN_BIT_CAST
}

_CCCL_END_NAMESPACE_CUDA_STD

Expand Down
Loading
Loading