Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 25 additions & 11 deletions libcudacxx/include/nv/detail/__target_macros
Original file line number Diff line number Diff line change
Expand Up @@ -453,10 +453,16 @@
//----------------------------------------------------------------------------------------------------------------------
// architecture-specific SM versions

# if defined(__CUDA_ARCH_SPECIFIC__)
# define _NV_TARGET_CHECK_ARCH_SPECIFIC(_MAJOR, _MINOR) (__CUDA_ARCH_SPECIFIC__ == (_MAJOR * 100 + _MINOR * 10))
# else
# define _NV_TARGET_CHECK_ARCH_SPECIFIC(_MAJOR, _MINOR) (0)
# endif

// Re-enable sm_90a support in nvcc.
# undef NV_HAS_FEATURE_SM_90a
# define NV_HAS_FEATURE_SM_90a __NV_HAS_FEATURE_SM_90a
# if defined(__CUDA_ARCH_FEAT_SM90_ALL) || (defined(__CUDA_ARCH_SPECIFIC__) && (__CUDA_ARCH_SPECIFIC__ == 900))
# if defined(__CUDA_ARCH_FEAT_SM90_ALL) || _NV_TARGET_CHECK_ARCH_SPECIFIC(9, 0)
# define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_90a 1
# else
# define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_90a 0
Expand All @@ -465,7 +471,7 @@
// Re-enable sm_100a support in nvcc.
# undef NV_HAS_FEATURE_SM_100a
# define NV_HAS_FEATURE_SM_100a __NV_HAS_FEATURE_SM_100a
# if defined(__CUDA_ARCH_FEAT_SM100_ALL) || (defined(__CUDA_ARCH_SPECIFIC__) && (__CUDA_ARCH_SPECIFIC__ == 1000))
# if defined(__CUDA_ARCH_FEAT_SM100_ALL) || _NV_TARGET_CHECK_ARCH_SPECIFIC(10, 0)
# define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_100a 1
# else
# define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_100a 0
Expand All @@ -474,7 +480,7 @@
// Re-enable sm_103a support in nvcc.
# undef NV_HAS_FEATURE_SM_103a
# define NV_HAS_FEATURE_SM_103a __NV_HAS_FEATURE_SM_103a
# if defined(__CUDA_ARCH_FEAT_SM103_ALL) || (defined(__CUDA_ARCH_SPECIFIC__) && (__CUDA_ARCH_SPECIFIC__ == 1030))
# if defined(__CUDA_ARCH_FEAT_SM103_ALL) || _NV_TARGET_CHECK_ARCH_SPECIFIC(10, 3)
# define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_103a 1
# else
# define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_103a 0
Expand All @@ -483,7 +489,7 @@
// Re-enable sm_110a support in nvcc.
# undef NV_HAS_FEATURE_SM_110a
# define NV_HAS_FEATURE_SM_110a __NV_HAS_FEATURE_SM_110a
# if defined(__CUDA_ARCH_FEAT_SM110_ALL) || (defined(__CUDA_ARCH_SPECIFIC__) && (__CUDA_ARCH_SPECIFIC__ == 1100))
# if defined(__CUDA_ARCH_FEAT_SM110_ALL) || _NV_TARGET_CHECK_ARCH_SPECIFIC(11, 0)
# define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_110a 1
# else
# define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_110a 0
Expand All @@ -492,7 +498,7 @@
// Re-enable sm_120a support in nvcc.
# undef NV_HAS_FEATURE_SM_120a
# define NV_HAS_FEATURE_SM_120a __NV_HAS_FEATURE_SM_120a
# if defined(__CUDA_ARCH_FEAT_SM120_ALL) || (defined(__CUDA_ARCH_SPECIFIC__) && (__CUDA_ARCH_SPECIFIC__ == 1200))
# if defined(__CUDA_ARCH_FEAT_SM120_ALL) || _NV_TARGET_CHECK_ARCH_SPECIFIC(12, 0)
# define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_120a 1
# else
# define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_120a 0
Expand All @@ -501,7 +507,7 @@
// Re-enable sm_121a support in nvcc.
# undef NV_HAS_FEATURE_SM_121a
# define NV_HAS_FEATURE_SM_121a __NV_HAS_FEATURE_SM_121a
# if defined(__CUDA_ARCH_FEAT_SM121_ALL) || (defined(__CUDA_ARCH_SPECIFIC__) && (__CUDA_ARCH_SPECIFIC__ == 1210))
# if defined(__CUDA_ARCH_FEAT_SM121_ALL) || _NV_TARGET_CHECK_ARCH_SPECIFIC(12, 1)
# define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_121a 1
# else
# define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_121a 0
Expand All @@ -510,10 +516,18 @@
//----------------------------------------------------------------------------------------------------------------------
// family-specific SM versions

# if defined(__CUDA_ARCH_FAMILY_SPECIFIC__)
# define _NV_TARGET_CHECK_FAMILY_SPECIFIC(_MAJOR, _MINOR) \
(__CUDA_ARCH_FAMILY_SPECIFIC__ >= (_MAJOR * 100 + _MINOR * 10) \
&& __CUDA_ARCH_FAMILY_SPECIFIC__ < (_MAJOR + 1) * 100)
# else
# define _NV_TARGET_CHECK_FAMILY_SPECIFIC(_MAJOR, _MINOR) (0)
# endif

// Re-enable sm_100f support in nvcc.
# undef NV_HAS_FEATURE_SM_100f
# define NV_HAS_FEATURE_SM_100f __NV_HAS_FEATURE_SM_100f
# if defined(__CUDA_ARCH_FAMILY_SPECIFIC__) && (__CUDA_ARCH_FAMILY_SPECIFIC__ == 1000)
# if _NV_TARGET_CHECK_FAMILY_SPECIFIC(10, 0)
# define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_100f 1
# else
# define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_100f 0
Expand All @@ -522,7 +536,7 @@
// Re-enable sm_103f support in nvcc.
# undef NV_HAS_FEATURE_SM_103f
# define NV_HAS_FEATURE_SM_103f __NV_HAS_FEATURE_SM_103f
# if defined(__CUDA_ARCH_FAMILY_SPECIFIC__) && (__CUDA_ARCH_FAMILY_SPECIFIC__ == 1030)
# if _NV_TARGET_CHECK_FAMILY_SPECIFIC(10, 3)
# define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_103f 1
# else
# define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_103f 0
Expand All @@ -531,7 +545,7 @@
// Re-enable sm_110f support in nvcc.
# undef NV_HAS_FEATURE_SM_110f
# define NV_HAS_FEATURE_SM_110f __NV_HAS_FEATURE_SM_110f
# if defined(__CUDA_ARCH_FAMILY_SPECIFIC__) && (__CUDA_ARCH_FAMILY_SPECIFIC__ == 1100)
# if _NV_TARGET_CHECK_FAMILY_SPECIFIC(11, 0)
# define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_110f 1
# else
# define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_110f 0
Expand All @@ -540,7 +554,7 @@
// Re-enable sm_120f support in nvcc.
# undef NV_HAS_FEATURE_SM_120f
# define NV_HAS_FEATURE_SM_120f __NV_HAS_FEATURE_SM_120f
# if defined(__CUDA_ARCH_FAMILY_SPECIFIC__) && (__CUDA_ARCH_FAMILY_SPECIFIC__ == 1200)
# if _NV_TARGET_CHECK_FAMILY_SPECIFIC(12, 0)
# define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_120f 1
# else
# define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_120f 0
Expand All @@ -549,7 +563,7 @@
// Re-enable sm_121f support in nvcc.
# undef NV_HAS_FEATURE_SM_121f
# define NV_HAS_FEATURE_SM_121f __NV_HAS_FEATURE_SM_121f
# if defined(__CUDA_ARCH_FAMILY_SPECIFIC__) && (__CUDA_ARCH_FAMILY_SPECIFIC__ == 1210)
# if _NV_TARGET_CHECK_FAMILY_SPECIFIC(12, 1)
# define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_121f 1
# else
# define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_121f 0
Expand Down
49 changes: 3 additions & 46 deletions libcudacxx/test/nvtarget/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,48 +1,5 @@
# This unit just checks that nv/target can be used with the specified dialects.

set(nvtarget_dialects 11 14 17 20)

set_directory_properties(
PROPERTIES INCLUDE_DIRECTORIES "${libcudacxx_SOURCE_DIR}/include"
)

add_custom_target(libcudacxx.test.nvtarget)

foreach (dialect ${nvtarget_dialects})
if ("cxx_std_${dialect}" IN_LIST CMAKE_CXX_COMPILE_FEATURES)
add_library(
"libcudacxx.test.nvtarget.cpp${dialect}"
OBJECT
nvtargettest.cpp
)
# This is required to override CMAKE_CXX_STANDARD - target_compile_features is insufficient.
set_target_properties(
"libcudacxx.test.nvtarget.cpp${dialect}"
PROPERTIES CXX_STANDARD "${dialect}" CXX_STANDARD_REQUIRED ON
)
add_dependencies(
libcudacxx.test.nvtarget
"libcudacxx.test.nvtarget.cpp${dialect}"
)
endif()

if (
"cuda_std_${dialect}" IN_LIST CMAKE_CUDA_COMPILE_FEATURES
AND "cxx_std_${dialect}" IN_LIST CMAKE_CXX_COMPILE_FEATURES
)
add_library(
"libcudacxx.test.nvtarget.cuda${dialect}"
OBJECT
nvtargettest.cu
)
# This is required to override CMAKE_CXX_STANDARD - target_compile_features is insufficient.
set_target_properties(
"libcudacxx.test.nvtarget.cuda${dialect}"
PROPERTIES CUDA_STANDARD "${dialect}" CUDA_STANDARD_REQUIRED ON
)
add_dependencies(
libcudacxx.test.nvtarget
"libcudacxx.test.nvtarget.cuda${dialect}"
)
endif()
endforeach()
add_subdirectory(arch_specific)
add_subdirectory(dialect)
add_subdirectory(family_specific)
20 changes: 20 additions & 0 deletions libcudacxx/test/nvtarget/arch_specific/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
if (NOT "${CMAKE_CUDA_COMPILER_ID}" STREQUAL "NVIDIA")
return()
endif()

# arch-specific features are supported since 12.9
if ("${CMAKE_CUDA_COMPILER_VERSION}" VERSION_LESS "12.9")
return()
endif()

set_directory_properties(
PROPERTIES INCLUDE_DIRECTORIES "${libcudacxx_SOURCE_DIR}/include"
)

set(target_name "libcudacxx.test.nvtarget.arch_specific")
add_library(${target_name} OBJECT arch_specific.cu)
set_target_properties(
${target_name}
PROPERTIES CUDA_ARCHITECTURES "103a-virtual"
)
add_dependencies(libcudacxx.test.nvtarget ${target_name})
59 changes: 59 additions & 0 deletions libcudacxx/test/nvtarget/arch_specific/arch_specific.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
//===----------------------------------------------------------------------===//
//
// Part of libcu++, the C++ Standard Library for your entire system,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

// This test checks if arch-specific NV target macros work properly.

#include <nv/target>

// Currently, nvcc is the only compiler that supports arch-specific architectures.
#if !defined(__NVCC__)
# error "This test works with nvcc only."
#endif // !__NVCC__

#if defined(__CUDA_ARCH__)
# if __CUDA_ARCH_SPECIFIC__ != 1030
# error "This test must be compiled for sm_103a target."
# endif // __CUDA_ARCH_SPECIFIC__ != 1030
#endif // __CUDA_ARCH__

#define CHECK_TRUE(_PRED) \
do \
{ \
NV_IF_ELSE_TARGET(_PRED, static_assert(true);, static_assert(false);) \
} while (0)
#define CHECK_FALSE(_PRED) \
do \
{ \
NV_IF_ELSE_TARGET(_PRED, static_assert(false);, static_assert(true);) \
} while (0)

__host__ __device__ void fn()
{
#if defined(__CUDA_ARCH__)
CHECK_TRUE(NV_IS_EXACTLY_SM_103);

CHECK_TRUE(NV_HAS_FEATURE_SM_103a);

CHECK_TRUE(NV_HAS_FEATURE_SM_100f);
CHECK_TRUE(NV_HAS_FEATURE_SM_103f);
#else // ^^^ __CUDA_ARCH__ ^^^ / vvv !__CUDA_ARCH__ vvv
CHECK_TRUE(NV_IS_HOST);

CHECK_FALSE(NV_HAS_FEATURE_SM_103a);

CHECK_FALSE(NV_HAS_FEATURE_SM_100f);
CHECK_FALSE(NV_HAS_FEATURE_SM_103f);
#endif // ^^^ !__CUDA_ARCH__ ^^^

CHECK_FALSE(NV_HAS_FEATURE_SM_100a);
CHECK_FALSE(NV_HAS_FEATURE_SM_110a);

CHECK_FALSE(NV_HAS_FEATURE_SM_110f);
}
47 changes: 47 additions & 0 deletions libcudacxx/test/nvtarget/dialect/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# This unit just checks that nv/target can be used with the specified dialects.

set(nvtarget_dialects 11 14 17 20)

# cudafe++ 12.9 has problem compiling some parts of <type_traits> when using libstdc++ 14, so let's just filter out
# dialect version 11.
if (
"${CMAKE_CUDA_COMPILER_ID}" STREQUAL "NVIDIA"
AND "${CMAKE_CUDA_COMPILER_VERSION}" VERSION_GREATER_EQUAL "12.9"
AND "${CMAKE_CUDA_COMPILER_VERSION}" VERSION_LESS "13.0"
AND "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU"
AND "${CMAKE_CXX_COMPILER_VERSION}" VERSION_GREATER_EQUAL "14.0"
AND "${CMAKE_CXX_COMPILER_VERSION}" VERSION_LESS "15.0"
)
list(FILTER nvtarget_dialects EXCLUDE REGEX "^11$")
endif()

set_directory_properties(
PROPERTIES INCLUDE_DIRECTORIES "${libcudacxx_SOURCE_DIR}/include"
)

foreach (dialect ${nvtarget_dialects})
if ("cxx_std_${dialect}" IN_LIST CMAKE_CXX_COMPILE_FEATURES)
set(target_name "libcudacxx.test.nvtarget.dialect.cpp${dialect}")
add_library(${target_name} OBJECT nvtargettest.cpp)
# This is required to override CMAKE_CXX_STANDARD - target_compile_features is insufficient.
set_target_properties(
${target_name}
PROPERTIES CXX_STANDARD "${dialect}" CXX_STANDARD_REQUIRED ON
)
add_dependencies(libcudacxx.test.nvtarget ${target_name})
endif()

if (
"cuda_std_${dialect}" IN_LIST CMAKE_CUDA_COMPILE_FEATURES
AND "cxx_std_${dialect}" IN_LIST CMAKE_CXX_COMPILE_FEATURES
)
set(target_name "libcudacxx.test.nvtarget.dialect.cuda${dialect}")
add_library(${target_name} OBJECT nvtargettest.cu)
# This is required to override CMAKE_CXX_STANDARD - target_compile_features is insufficient.
set_target_properties(
${target_name}
PROPERTIES CUDA_STANDARD "${dialect}" CUDA_STANDARD_REQUIRED ON
)
add_dependencies(libcudacxx.test.nvtarget ${target_name})
endif()
endforeach()
13 changes: 13 additions & 0 deletions libcudacxx/test/nvtarget/dialect/nvtargettest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
//===----------------------------------------------------------------------===//
//
// Part of libcu++, the C++ Standard Library for your entire system,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

#include <nv/target>

void fn() {}
13 changes: 13 additions & 0 deletions libcudacxx/test/nvtarget/dialect/nvtargettest.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
//===----------------------------------------------------------------------===//
//
// Part of libcu++, the C++ Standard Library for your entire system,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

#include <nv/target>

__host__ __device__ void fn() {}
20 changes: 20 additions & 0 deletions libcudacxx/test/nvtarget/family_specific/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
if (NOT "${CMAKE_CUDA_COMPILER_ID}" STREQUAL "NVIDIA")
return()
endif()

# family-specific features are supported since 12.9
if ("${CMAKE_CUDA_COMPILER_VERSION}" VERSION_LESS "12.9")
return()
endif()

set_directory_properties(
PROPERTIES INCLUDE_DIRECTORIES "${libcudacxx_SOURCE_DIR}/include"
)

set(target_name "libcudacxx.test.nvtarget.family_specific")
add_library(${target_name} OBJECT family_specific.cu)
set_target_properties(
${target_name}
PROPERTIES CUDA_ARCHITECTURES "103f-virtual"
)
add_dependencies(libcudacxx.test.nvtarget ${target_name})
Loading
Loading