diff --git a/libcudacxx/include/nv/detail/__target_macros b/libcudacxx/include/nv/detail/__target_macros index 8d3c2bcf521..d21c38be2ee 100644 --- a/libcudacxx/include/nv/detail/__target_macros +++ b/libcudacxx/include/nv/detail/__target_macros @@ -453,10 +453,16 @@ //---------------------------------------------------------------------------------------------------------------------- // architecture-specific SM versions +# if defined(__CUDA_ARCH_SPECIFIC__) +# define _NV_TARGET_CHECK_ARCH_SPECIFIC(_MAJOR, _MINOR) (__CUDA_ARCH_SPECIFIC__ == (_MAJOR * 100 + _MINOR * 10)) +# else +# define _NV_TARGET_CHECK_ARCH_SPECIFIC(_MAJOR, _MINOR) (0) +# endif + // Re-enable sm_90a support in nvcc. # undef NV_HAS_FEATURE_SM_90a # define NV_HAS_FEATURE_SM_90a __NV_HAS_FEATURE_SM_90a -# if defined(__CUDA_ARCH_FEAT_SM90_ALL) || (defined(__CUDA_ARCH_SPECIFIC__) && (__CUDA_ARCH_SPECIFIC__ == 900)) +# if defined(__CUDA_ARCH_FEAT_SM90_ALL) || _NV_TARGET_CHECK_ARCH_SPECIFIC(9, 0) # define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_90a 1 # else # define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_90a 0 @@ -465,7 +471,7 @@ // Re-enable sm_100a support in nvcc. # undef NV_HAS_FEATURE_SM_100a # define NV_HAS_FEATURE_SM_100a __NV_HAS_FEATURE_SM_100a -# if defined(__CUDA_ARCH_FEAT_SM100_ALL) || (defined(__CUDA_ARCH_SPECIFIC__) && (__CUDA_ARCH_SPECIFIC__ == 1000)) +# if defined(__CUDA_ARCH_FEAT_SM100_ALL) || _NV_TARGET_CHECK_ARCH_SPECIFIC(10, 0) # define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_100a 1 # else # define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_100a 0 @@ -474,7 +480,7 @@ // Re-enable sm_103a support in nvcc. # undef NV_HAS_FEATURE_SM_103a # define NV_HAS_FEATURE_SM_103a __NV_HAS_FEATURE_SM_103a -# if defined(__CUDA_ARCH_FEAT_SM103_ALL) || (defined(__CUDA_ARCH_SPECIFIC__) && (__CUDA_ARCH_SPECIFIC__ == 1030)) +# if defined(__CUDA_ARCH_FEAT_SM103_ALL) || _NV_TARGET_CHECK_ARCH_SPECIFIC(10, 3) # define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_103a 1 # else # define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_103a 0 @@ -483,7 +489,7 @@ // Re-enable sm_110a support in nvcc. # undef NV_HAS_FEATURE_SM_110a # define NV_HAS_FEATURE_SM_110a __NV_HAS_FEATURE_SM_110a -# if defined(__CUDA_ARCH_FEAT_SM110_ALL) || (defined(__CUDA_ARCH_SPECIFIC__) && (__CUDA_ARCH_SPECIFIC__ == 1100)) +# if defined(__CUDA_ARCH_FEAT_SM110_ALL) || _NV_TARGET_CHECK_ARCH_SPECIFIC(11, 0) # define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_110a 1 # else # define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_110a 0 @@ -492,7 +498,7 @@ // Re-enable sm_120a support in nvcc. # undef NV_HAS_FEATURE_SM_120a # define NV_HAS_FEATURE_SM_120a __NV_HAS_FEATURE_SM_120a -# if defined(__CUDA_ARCH_FEAT_SM120_ALL) || (defined(__CUDA_ARCH_SPECIFIC__) && (__CUDA_ARCH_SPECIFIC__ == 1200)) +# if defined(__CUDA_ARCH_FEAT_SM120_ALL) || _NV_TARGET_CHECK_ARCH_SPECIFIC(12, 0) # define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_120a 1 # else # define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_120a 0 @@ -501,7 +507,7 @@ // Re-enable sm_121a support in nvcc. # undef NV_HAS_FEATURE_SM_121a # define NV_HAS_FEATURE_SM_121a __NV_HAS_FEATURE_SM_121a -# if defined(__CUDA_ARCH_FEAT_SM121_ALL) || (defined(__CUDA_ARCH_SPECIFIC__) && (__CUDA_ARCH_SPECIFIC__ == 1210)) +# if defined(__CUDA_ARCH_FEAT_SM121_ALL) || _NV_TARGET_CHECK_ARCH_SPECIFIC(12, 1) # define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_121a 1 # else # define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_121a 0 @@ -510,10 +516,18 @@ //---------------------------------------------------------------------------------------------------------------------- // family-specific SM versions +# if defined(__CUDA_ARCH_FAMILY_SPECIFIC__) +# define _NV_TARGET_CHECK_FAMILY_SPECIFIC(_MAJOR, _MINOR) \ + (__CUDA_ARCH_FAMILY_SPECIFIC__ >= (_MAJOR * 100 + _MINOR * 10) \ + && __CUDA_ARCH_FAMILY_SPECIFIC__ < (_MAJOR + 1) * 100) +# else +# define _NV_TARGET_CHECK_FAMILY_SPECIFIC(_MAJOR, _MINOR) (0) +# endif + // Re-enable sm_100f support in nvcc. # undef NV_HAS_FEATURE_SM_100f # define NV_HAS_FEATURE_SM_100f __NV_HAS_FEATURE_SM_100f -# if defined(__CUDA_ARCH_FAMILY_SPECIFIC__) && (__CUDA_ARCH_FAMILY_SPECIFIC__ == 1000) +# if _NV_TARGET_CHECK_FAMILY_SPECIFIC(10, 0) # define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_100f 1 # else # define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_100f 0 @@ -522,7 +536,7 @@ // Re-enable sm_103f support in nvcc. # undef NV_HAS_FEATURE_SM_103f # define NV_HAS_FEATURE_SM_103f __NV_HAS_FEATURE_SM_103f -# if defined(__CUDA_ARCH_FAMILY_SPECIFIC__) && (__CUDA_ARCH_FAMILY_SPECIFIC__ == 1030) +# if _NV_TARGET_CHECK_FAMILY_SPECIFIC(10, 3) # define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_103f 1 # else # define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_103f 0 @@ -531,7 +545,7 @@ // Re-enable sm_110f support in nvcc. # undef NV_HAS_FEATURE_SM_110f # define NV_HAS_FEATURE_SM_110f __NV_HAS_FEATURE_SM_110f -# if defined(__CUDA_ARCH_FAMILY_SPECIFIC__) && (__CUDA_ARCH_FAMILY_SPECIFIC__ == 1100) +# if _NV_TARGET_CHECK_FAMILY_SPECIFIC(11, 0) # define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_110f 1 # else # define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_110f 0 @@ -540,7 +554,7 @@ // Re-enable sm_120f support in nvcc. # undef NV_HAS_FEATURE_SM_120f # define NV_HAS_FEATURE_SM_120f __NV_HAS_FEATURE_SM_120f -# if defined(__CUDA_ARCH_FAMILY_SPECIFIC__) && (__CUDA_ARCH_FAMILY_SPECIFIC__ == 1200) +# if _NV_TARGET_CHECK_FAMILY_SPECIFIC(12, 0) # define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_120f 1 # else # define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_120f 0 @@ -549,7 +563,7 @@ // Re-enable sm_121f support in nvcc. # undef NV_HAS_FEATURE_SM_121f # define NV_HAS_FEATURE_SM_121f __NV_HAS_FEATURE_SM_121f -# if defined(__CUDA_ARCH_FAMILY_SPECIFIC__) && (__CUDA_ARCH_FAMILY_SPECIFIC__ == 1210) +# if _NV_TARGET_CHECK_FAMILY_SPECIFIC(12, 1) # define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_121f 1 # else # define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_121f 0 diff --git a/libcudacxx/test/nvtarget/CMakeLists.txt b/libcudacxx/test/nvtarget/CMakeLists.txt index b85e589086d..90e374f59a4 100644 --- a/libcudacxx/test/nvtarget/CMakeLists.txt +++ b/libcudacxx/test/nvtarget/CMakeLists.txt @@ -1,48 +1,5 @@ -# This unit just checks that nv/target can be used with the specified dialects. - -set(nvtarget_dialects 11 14 17 20) - -set_directory_properties( - PROPERTIES INCLUDE_DIRECTORIES "${libcudacxx_SOURCE_DIR}/include" -) - add_custom_target(libcudacxx.test.nvtarget) -foreach (dialect ${nvtarget_dialects}) - if ("cxx_std_${dialect}" IN_LIST CMAKE_CXX_COMPILE_FEATURES) - add_library( - "libcudacxx.test.nvtarget.cpp${dialect}" - OBJECT - nvtargettest.cpp - ) - # This is required to override CMAKE_CXX_STANDARD - target_compile_features is insufficient. - set_target_properties( - "libcudacxx.test.nvtarget.cpp${dialect}" - PROPERTIES CXX_STANDARD "${dialect}" CXX_STANDARD_REQUIRED ON - ) - add_dependencies( - libcudacxx.test.nvtarget - "libcudacxx.test.nvtarget.cpp${dialect}" - ) - endif() - - if ( - "cuda_std_${dialect}" IN_LIST CMAKE_CUDA_COMPILE_FEATURES - AND "cxx_std_${dialect}" IN_LIST CMAKE_CXX_COMPILE_FEATURES - ) - add_library( - "libcudacxx.test.nvtarget.cuda${dialect}" - OBJECT - nvtargettest.cu - ) - # This is required to override CMAKE_CXX_STANDARD - target_compile_features is insufficient. - set_target_properties( - "libcudacxx.test.nvtarget.cuda${dialect}" - PROPERTIES CUDA_STANDARD "${dialect}" CUDA_STANDARD_REQUIRED ON - ) - add_dependencies( - libcudacxx.test.nvtarget - "libcudacxx.test.nvtarget.cuda${dialect}" - ) - endif() -endforeach() +add_subdirectory(arch_specific) +add_subdirectory(dialect) +add_subdirectory(family_specific) diff --git a/libcudacxx/test/nvtarget/arch_specific/CMakeLists.txt b/libcudacxx/test/nvtarget/arch_specific/CMakeLists.txt new file mode 100644 index 00000000000..1e2fb1253f9 --- /dev/null +++ b/libcudacxx/test/nvtarget/arch_specific/CMakeLists.txt @@ -0,0 +1,20 @@ +if (NOT "${CMAKE_CUDA_COMPILER_ID}" STREQUAL "NVIDIA") + return() +endif() + +# arch-specific features are supported since 12.9 +if ("${CMAKE_CUDA_COMPILER_VERSION}" VERSION_LESS "12.9") + return() +endif() + +set_directory_properties( + PROPERTIES INCLUDE_DIRECTORIES "${libcudacxx_SOURCE_DIR}/include" +) + +set(target_name "libcudacxx.test.nvtarget.arch_specific") +add_library(${target_name} OBJECT arch_specific.cu) +set_target_properties( + ${target_name} + PROPERTIES CUDA_ARCHITECTURES "103a-virtual" +) +add_dependencies(libcudacxx.test.nvtarget ${target_name}) diff --git a/libcudacxx/test/nvtarget/arch_specific/arch_specific.cu b/libcudacxx/test/nvtarget/arch_specific/arch_specific.cu new file mode 100644 index 00000000000..8b51a984766 --- /dev/null +++ b/libcudacxx/test/nvtarget/arch_specific/arch_specific.cu @@ -0,0 +1,59 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +// This test checks if arch-specific NV target macros work properly. + +#include + +// Currently, nvcc is the only compiler that supports arch-specific architectures. +#if !defined(__NVCC__) +# error "This test works with nvcc only." +#endif // !__NVCC__ + +#if defined(__CUDA_ARCH__) +# if __CUDA_ARCH_SPECIFIC__ != 1030 +# error "This test must be compiled for sm_103a target." +# endif // __CUDA_ARCH_SPECIFIC__ != 1030 +#endif // __CUDA_ARCH__ + +#define CHECK_TRUE(_PRED) \ + do \ + { \ + NV_IF_ELSE_TARGET(_PRED, static_assert(true);, static_assert(false);) \ + } while (0) +#define CHECK_FALSE(_PRED) \ + do \ + { \ + NV_IF_ELSE_TARGET(_PRED, static_assert(false);, static_assert(true);) \ + } while (0) + +__host__ __device__ void fn() +{ +#if defined(__CUDA_ARCH__) + CHECK_TRUE(NV_IS_EXACTLY_SM_103); + + CHECK_TRUE(NV_HAS_FEATURE_SM_103a); + + CHECK_TRUE(NV_HAS_FEATURE_SM_100f); + CHECK_TRUE(NV_HAS_FEATURE_SM_103f); +#else // ^^^ __CUDA_ARCH__ ^^^ / vvv !__CUDA_ARCH__ vvv + CHECK_TRUE(NV_IS_HOST); + + CHECK_FALSE(NV_HAS_FEATURE_SM_103a); + + CHECK_FALSE(NV_HAS_FEATURE_SM_100f); + CHECK_FALSE(NV_HAS_FEATURE_SM_103f); +#endif // ^^^ !__CUDA_ARCH__ ^^^ + + CHECK_FALSE(NV_HAS_FEATURE_SM_100a); + CHECK_FALSE(NV_HAS_FEATURE_SM_110a); + + CHECK_FALSE(NV_HAS_FEATURE_SM_110f); +} diff --git a/libcudacxx/test/nvtarget/dialect/CMakeLists.txt b/libcudacxx/test/nvtarget/dialect/CMakeLists.txt new file mode 100644 index 00000000000..0ab6fae2d5f --- /dev/null +++ b/libcudacxx/test/nvtarget/dialect/CMakeLists.txt @@ -0,0 +1,47 @@ +# This unit just checks that nv/target can be used with the specified dialects. + +set(nvtarget_dialects 11 14 17 20) + +# cudafe++ 12.9 has problem compiling some parts of when using libstdc++ 14, so let's just filter out +# dialect version 11. +if ( + "${CMAKE_CUDA_COMPILER_ID}" STREQUAL "NVIDIA" + AND "${CMAKE_CUDA_COMPILER_VERSION}" VERSION_GREATER_EQUAL "12.9" + AND "${CMAKE_CUDA_COMPILER_VERSION}" VERSION_LESS "13.0" + AND "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" + AND "${CMAKE_CXX_COMPILER_VERSION}" VERSION_GREATER_EQUAL "14.0" + AND "${CMAKE_CXX_COMPILER_VERSION}" VERSION_LESS "15.0" +) + list(FILTER nvtarget_dialects EXCLUDE REGEX "^11$") +endif() + +set_directory_properties( + PROPERTIES INCLUDE_DIRECTORIES "${libcudacxx_SOURCE_DIR}/include" +) + +foreach (dialect ${nvtarget_dialects}) + if ("cxx_std_${dialect}" IN_LIST CMAKE_CXX_COMPILE_FEATURES) + set(target_name "libcudacxx.test.nvtarget.dialect.cpp${dialect}") + add_library(${target_name} OBJECT nvtargettest.cpp) + # This is required to override CMAKE_CXX_STANDARD - target_compile_features is insufficient. + set_target_properties( + ${target_name} + PROPERTIES CXX_STANDARD "${dialect}" CXX_STANDARD_REQUIRED ON + ) + add_dependencies(libcudacxx.test.nvtarget ${target_name}) + endif() + + if ( + "cuda_std_${dialect}" IN_LIST CMAKE_CUDA_COMPILE_FEATURES + AND "cxx_std_${dialect}" IN_LIST CMAKE_CXX_COMPILE_FEATURES + ) + set(target_name "libcudacxx.test.nvtarget.dialect.cuda${dialect}") + add_library(${target_name} OBJECT nvtargettest.cu) + # This is required to override CMAKE_CXX_STANDARD - target_compile_features is insufficient. + set_target_properties( + ${target_name} + PROPERTIES CUDA_STANDARD "${dialect}" CUDA_STANDARD_REQUIRED ON + ) + add_dependencies(libcudacxx.test.nvtarget ${target_name}) + endif() +endforeach() diff --git a/libcudacxx/test/nvtarget/dialect/nvtargettest.cpp b/libcudacxx/test/nvtarget/dialect/nvtargettest.cpp new file mode 100644 index 00000000000..e985cd6cffc --- /dev/null +++ b/libcudacxx/test/nvtarget/dialect/nvtargettest.cpp @@ -0,0 +1,13 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include + +void fn() {} diff --git a/libcudacxx/test/nvtarget/dialect/nvtargettest.cu b/libcudacxx/test/nvtarget/dialect/nvtargettest.cu new file mode 100644 index 00000000000..6b4599f610d --- /dev/null +++ b/libcudacxx/test/nvtarget/dialect/nvtargettest.cu @@ -0,0 +1,13 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include + +__host__ __device__ void fn() {} diff --git a/libcudacxx/test/nvtarget/family_specific/CMakeLists.txt b/libcudacxx/test/nvtarget/family_specific/CMakeLists.txt new file mode 100644 index 00000000000..0934f24001f --- /dev/null +++ b/libcudacxx/test/nvtarget/family_specific/CMakeLists.txt @@ -0,0 +1,20 @@ +if (NOT "${CMAKE_CUDA_COMPILER_ID}" STREQUAL "NVIDIA") + return() +endif() + +# family-specific features are supported since 12.9 +if ("${CMAKE_CUDA_COMPILER_VERSION}" VERSION_LESS "12.9") + return() +endif() + +set_directory_properties( + PROPERTIES INCLUDE_DIRECTORIES "${libcudacxx_SOURCE_DIR}/include" +) + +set(target_name "libcudacxx.test.nvtarget.family_specific") +add_library(${target_name} OBJECT family_specific.cu) +set_target_properties( + ${target_name} + PROPERTIES CUDA_ARCHITECTURES "103f-virtual" +) +add_dependencies(libcudacxx.test.nvtarget ${target_name}) diff --git a/libcudacxx/test/nvtarget/family_specific/family_specific.cu b/libcudacxx/test/nvtarget/family_specific/family_specific.cu new file mode 100644 index 00000000000..79253d69c3b --- /dev/null +++ b/libcudacxx/test/nvtarget/family_specific/family_specific.cu @@ -0,0 +1,56 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +// This test checks if family-specific NV target macros work properly. + +#include + +// Currently, nvcc is the only compiler that supports arch-specific architectures. +#if !defined(__NVCC__) +# error "This test works with nvcc only." +#endif // !__NVCC__ + +#if defined(__CUDA_ARCH__) +# if __CUDA_ARCH_FAMILY_SPECIFIC__ != 1030 +# error "This test must be compiled for sm_103f target." +# endif // __CUDA_ARCH_FAMILY_SPECIFIC__ != 1030 +#endif // __CUDA_ARCH__ + +#define CHECK_TRUE(_PRED) \ + do \ + { \ + NV_IF_ELSE_TARGET(_PRED, static_assert(true);, static_assert(false);) \ + } while (0) +#define CHECK_FALSE(_PRED) \ + do \ + { \ + NV_IF_ELSE_TARGET(_PRED, static_assert(false);, static_assert(true);) \ + } while (0) + +__host__ __device__ void fn() +{ +#if defined(__CUDA_ARCH__) + CHECK_TRUE(NV_IS_EXACTLY_SM_103); + + CHECK_TRUE(NV_HAS_FEATURE_SM_100f); + CHECK_TRUE(NV_HAS_FEATURE_SM_103f); +#else // ^^^ __CUDA_ARCH__ ^^^ / vvv !__CUDA_ARCH__ vvv + CHECK_TRUE(NV_IS_HOST); + + CHECK_FALSE(NV_HAS_FEATURE_SM_100f); + CHECK_FALSE(NV_HAS_FEATURE_SM_103f); +#endif // ^^^ !__CUDA_ARCH__ ^^^ + + CHECK_FALSE(NV_HAS_FEATURE_SM_110f); + + CHECK_FALSE(NV_HAS_FEATURE_SM_100a); + CHECK_FALSE(NV_HAS_FEATURE_SM_103a); + CHECK_FALSE(NV_HAS_FEATURE_SM_110a); +} diff --git a/libcudacxx/test/nvtarget/nvtargettest.cpp b/libcudacxx/test/nvtarget/nvtargettest.cpp deleted file mode 100644 index 6f47369479e..00000000000 --- a/libcudacxx/test/nvtarget/nvtargettest.cpp +++ /dev/null @@ -1,6 +0,0 @@ -#include - -int main() -{ - return 0; -} diff --git a/libcudacxx/test/nvtarget/nvtargettest.cu b/libcudacxx/test/nvtarget/nvtargettest.cu deleted file mode 100644 index 6f47369479e..00000000000 --- a/libcudacxx/test/nvtarget/nvtargettest.cu +++ /dev/null @@ -1,6 +0,0 @@ -#include - -int main() -{ - return 0; -}