Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/libcudacxx/extended_api/type_traits.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Type traits
:maxdepth: 1

type_traits/is_floating_point
type_traits/is_trivially_copyable_relaxed
type_traits/vector_types

.. list-table::
Expand All @@ -24,6 +25,11 @@ Type traits
- CCCL 3.0.0
- CUDA 13.0

* - :ref:`cuda::is_trivially_copyable_relaxed <libcudacxx-extended-api-type_traits-is_trivially_copyable_relaxed>`
- Relaxed trivially copyable check including extended floating-point types
- CCCL 3.4.0
- CUDA 13.4

* - :ref:`Vector Type Traits <libcudacxx-extended-api-type_traits-vector_types>`
- Type traits for CUDA vector types
- CCCL 3.3.0
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
.. _libcudacxx-extended-api-type_traits-is_trivially_copyable_relaxed:

``cuda::is_trivially_copyable_relaxed``
=======================================

Defined in the ``<cuda/type_traits>`` header.

.. code:: cuda

namespace cuda {

template <typename T>
struct is_trivially_copyable_relaxed;

template <typename T>
constexpr bool is_trivially_copyable_relaxed_v = is_trivially_copyable_relaxed<T>::value;

} // namespace cuda

``cuda::is_trivially_copyable_relaxed<T>`` is a type trait that extends ``cuda::std::is_trivially_copyable`` to also recognize CUDA extended floating-point scalar and vector types as trivially copyable.

A type ``T`` satisfies ``cuda::is_trivially_copyable_relaxed`` if any of the following holds:

- ``T`` is trivially copyable.
- ``T`` is an extended floating-point scalar type (e.g. ``__half``, ``__nv_bfloat16``, ``__nv_fp8_e4m3``).
- ``T`` is an extended floating-point vector type (e.g. ``__half2``, ``__nv_bfloat162``, ``__nv_fp8x2_e4m3``).

The trait also propagates through composite types:

- C-style arrays: ``T[N]`` and ``T[]`` are relaxed trivially copyable when ``T`` is.
- ``cuda::std::array<T, N>``: relaxed trivially copyable when ``T`` is.
- ``cuda::std::pair<T1, T2>``: relaxed trivially copyable when both ``T1`` and ``T2`` are.
- ``cuda::std::tuple<Ts...>``: relaxed trivially copyable when all ``Ts...`` are.

``const``, ``volatile``, and ``const volatile`` qualifications are handled transparently.

Custom Specialization
---------------------

Users may specialize ``cuda::is_trivially_copyable_relaxed`` for their own types whose memory representation is safe to copy
with ``memcpy`` but that the compiler does not consider trivially copyable.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't this just UB?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not for the types that we care about. Said that, the user could provide an object that triggers UB. I can highlight this point in the documentation but we cannot do anything to explicitly prevent it.

A common case is a type that wraps extended floating-point fields and provides user-defined copy operations
solely to add ``__host__ __device__`` annotations:

.. code:: cuda

struct HalfWrapper {
__half value;
};

struct NonTriviallyCopyable {
__host__ __device__ NonTriviallyCopyable(const NonTriviallyCopyable&) {}
};

// Specializing the trait
template <>
struct cuda::is_trivially_copyable_relaxed<HalfWrapper> : cuda::std::true_type {};

template <>
struct cuda::is_trivially_copyable_relaxed<NonTriviallyCopyable> : cuda::std::true_type {};

static_assert(cuda::is_trivially_copyable_relaxed_v<HalfWrapper>);
static_assert(cuda::is_trivially_copyable_relaxed_v<NonTriviallyCopyable>);

Examples
--------

.. code:: cuda

#include <cuda/type_traits>
#include <cuda/std/array>
#include <cuda/std/tuple>
#include <cuda/std/utility>

#include <cuda_fp16.h>

// Standard trivially copyable types
static_assert(cuda::is_trivially_copyable_relaxed_v<int>);
static_assert(cuda::is_trivially_copyable_relaxed_v<float>);

// Extended floating-point types
static_assert(cuda::is_trivially_copyable_relaxed_v<__half>);
static_assert(cuda::is_trivially_copyable_relaxed_v<__nv_bfloat16>);
static_assert(cuda::is_trivially_copyable_relaxed_v<__half2>);

// Composite types containing extended floating-point types
static_assert(cuda::is_trivially_copyable_relaxed_v<__half[4]>);
static_assert(cuda::is_trivially_copyable_relaxed_v<cuda::std::array<__half, 4>>);
static_assert(cuda::is_trivially_copyable_relaxed_v<cuda::std::pair<__half, int>>);
static_assert(cuda::is_trivially_copyable_relaxed_v<cuda::std::tuple<__half, float, double>>);
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

#ifndef __CUDA__TYPE_TRAITS_IS_TRIVIALLY_COPYABLE_RELAXED_H
#define __CUDA__TYPE_TRAITS_IS_TRIVIALLY_COPYABLE_RELAXED_H

#include <cuda/std/detail/__config>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#include <cuda/__type_traits/is_vector_type.h>
#include <cuda/std/__cstddef/types.h>
#include <cuda/std/__fwd/array.h>
#include <cuda/std/__fwd/pair.h>
#include <cuda/std/__fwd/tuple.h>
#include <cuda/std/__type_traits/integral_constant.h>
#include <cuda/std/__type_traits/is_extended_floating_point.h>
#include <cuda/std/__type_traits/is_trivially_copyable.h>
#include <cuda/std/__type_traits/remove_cv.h>

#include <cuda/std/__cccl/prologue.h>

_CCCL_BEGIN_NAMESPACE_CUDA

template <typename _Tp, typename _Up = ::cuda::std::remove_cv_t<_Tp>>
constexpr bool __is_trivially_copyable_relaxed_impl_v =
::cuda::std::__is_extended_floating_point_v<_Up> || ::cuda::is_extended_fp_vector_type_v<_Up>
|| ::cuda::std::is_trivially_copyable_v<_Up>;

template <typename _Tp>
struct is_trivially_copyable_relaxed : ::cuda::std::bool_constant<::cuda::__is_trivially_copyable_relaxed_impl_v<_Tp>>
{};

template <typename _Tp>
struct is_trivially_copyable_relaxed<_Tp[]> : is_trivially_copyable_relaxed<_Tp>
{};

template <typename _Tp, ::cuda::std::size_t _Size>
struct is_trivially_copyable_relaxed<_Tp[_Size]> : is_trivially_copyable_relaxed<_Tp>
{};

template <typename _Tp, ::cuda::std::size_t _Size>
struct is_trivially_copyable_relaxed<::cuda::std::array<_Tp, _Size>> : is_trivially_copyable_relaxed<_Tp>
{};

// cuda::std::pair
template <typename _T1, typename _T2>
struct is_trivially_copyable_relaxed<::cuda::std::pair<_T1, _T2>>
: ::cuda::std::bool_constant<__is_trivially_copyable_relaxed_impl_v<_T1> && __is_trivially_copyable_relaxed_impl_v<_T2>>
{};

// cuda::std::tuple
template <typename... _Ts>
struct is_trivially_copyable_relaxed<::cuda::std::tuple<_Ts...>>
: ::cuda::std::bool_constant<(__is_trivially_copyable_relaxed_impl_v<_Ts> && ...)>
{};

template <typename _Tp>
constexpr bool is_trivially_copyable_relaxed_v = is_trivially_copyable_relaxed<_Tp>::value;

_CCCL_END_NAMESPACE_CUDA

#include <cuda/std/__cccl/epilogue.h>

#endif // __CUDA__TYPE_TRAITS_IS_TRIVIALLY_COPYABLE_RELAXED_H
1 change: 1 addition & 0 deletions libcudacxx/include/cuda/type_traits
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#endif // no system header

#include <cuda/__type_traits/is_floating_point.h>
#include <cuda/__type_traits/is_trivially_copyable_relaxed.h>
#include <cuda/__type_traits/is_vector_type.h>
#include <cuda/__type_traits/vector_type.h>
#include <cuda/std/type_traits>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

#include <cuda/std/array>
#include <cuda/std/tuple>
#include <cuda/std/utility>
#include <cuda/type_traits>

#include "test_macros.h"

template <class T>
__host__ __device__ void test_is_trivially_copyable_relaxed()
{
static_assert(cuda::is_trivially_copyable_relaxed<T>::value);
static_assert(cuda::is_trivially_copyable_relaxed<const T>::value);
static_assert(cuda::is_trivially_copyable_relaxed<volatile T>::value);
static_assert(cuda::is_trivially_copyable_relaxed<const volatile T>::value);
static_assert(cuda::is_trivially_copyable_relaxed_v<T>);
static_assert(cuda::is_trivially_copyable_relaxed_v<const T>);
static_assert(cuda::is_trivially_copyable_relaxed_v<volatile T>);
static_assert(cuda::is_trivially_copyable_relaxed_v<const volatile T>);
}

template <class T>
__host__ __device__ void test_is_not_trivially_copyable_relaxed()
{
static_assert(!cuda::is_trivially_copyable_relaxed<T>::value);
static_assert(!cuda::is_trivially_copyable_relaxed<const T>::value);
static_assert(!cuda::is_trivially_copyable_relaxed<volatile T>::value);
static_assert(!cuda::is_trivially_copyable_relaxed<const volatile T>::value);
static_assert(!cuda::is_trivially_copyable_relaxed_v<T>);
static_assert(!cuda::is_trivially_copyable_relaxed_v<const T>);
static_assert(!cuda::is_trivially_copyable_relaxed_v<volatile T>);
static_assert(!cuda::is_trivially_copyable_relaxed_v<const volatile T>);
}

struct TrivialPod
{
int x;
float y;
};

class NonTriviallyCopyable
{
public:
__host__ __device__ NonTriviallyCopyable(const NonTriviallyCopyable&) {}
};

__host__ __device__ void test()
{
// standard trivially copyable types
test_is_trivially_copyable_relaxed<int>();
test_is_trivially_copyable_relaxed<float>();
test_is_trivially_copyable_relaxed<double>();
test_is_trivially_copyable_relaxed<TrivialPod>();

// C-style arrays of trivially copyable types
static_assert(cuda::is_trivially_copyable_relaxed_v<int[4]>);
static_assert(cuda::is_trivially_copyable_relaxed_v<const int[4]>);
static_assert(cuda::is_trivially_copyable_relaxed_v<TrivialPod[2]>);

// cuda::std::array, pair, tuple of trivially copyable types
test_is_trivially_copyable_relaxed<cuda::std::array<int, 4>>();
test_is_trivially_copyable_relaxed<cuda::std::pair<int, float>>();
test_is_trivially_copyable_relaxed<cuda::std::tuple<int, float, double>>();
test_is_trivially_copyable_relaxed<cuda::std::tuple<>>();

// extended floating point scalar types
#if _CCCL_HAS_NVFP16()
test_is_trivially_copyable_relaxed<__half>();
#endif // _CCCL_HAS_NVFP16()
#if _CCCL_HAS_NVBF16()
test_is_trivially_copyable_relaxed<__nv_bfloat16>();
#endif // _CCCL_HAS_NVBF16()
#if _CCCL_HAS_NVFP8_E4M3()
test_is_trivially_copyable_relaxed<__nv_fp8_e4m3>();
#endif // _CCCL_HAS_NVFP8_E4M3()

// extended floating point vector types
#if _CCCL_HAS_NVFP16()
test_is_trivially_copyable_relaxed<__half2>();
#endif // _CCCL_HAS_NVFP16()
#if _CCCL_HAS_NVBF16()
test_is_trivially_copyable_relaxed<__nv_bfloat162>();
#endif // _CCCL_HAS_NVBF16()
#if _CCCL_HAS_NVFP8()
test_is_trivially_copyable_relaxed<__nv_fp8x2_e4m3>();
#endif // _CCCL_HAS_NVFP8()

// compositions of extended floating point types
#if _CCCL_HAS_NVFP16()
static_assert(cuda::is_trivially_copyable_relaxed_v<__half[4]>);
static_assert(cuda::is_trivially_copyable_relaxed_v<const __half[4]>);
test_is_trivially_copyable_relaxed<cuda::std::array<__half, 4>>();
test_is_trivially_copyable_relaxed<cuda::std::pair<__half, int>>();
test_is_trivially_copyable_relaxed<cuda::std::tuple<__half, float>>();
#endif // _CCCL_HAS_NVFP16()
#if _CCCL_HAS_NVBF16()
test_is_trivially_copyable_relaxed<cuda::std::array<__nv_bfloat16, 2>>();
test_is_trivially_copyable_relaxed<cuda::std::pair<__nv_bfloat16, int>>();
#endif // _CCCL_HAS_NVBF16()

// nested compositions
#if _CCCL_HAS_NVFP16()
test_is_trivially_copyable_relaxed<cuda::std::array<cuda::std::pair<__half, int>, 2>>();
test_is_trivially_copyable_relaxed<cuda::std::tuple<cuda::std::array<__half, 4>, int>>();
test_is_trivially_copyable_relaxed<cuda::std::pair<cuda::std::tuple<__half, float>, double>>();
#endif // _CCCL_HAS_NVFP16()

#if _CCCL_HAS_NVFP16() && _CCCL_HAS_NVBF16()
test_is_trivially_copyable_relaxed<cuda::std::tuple<__half, __nv_bfloat16, float>>();
#endif // _CCCL_HAS_NVFP16() && _CCCL_HAS_NVBF16()

// non-trivially copyable types
test_is_not_trivially_copyable_relaxed<NonTriviallyCopyable>();
}

int main(int, char**)
{
test();
return 0;
}
Loading