Skip to content

Commit eb1a4a6

Browse files
author
Jenkins
committed
merge main into amd-staging
Change-Id: I920e3c6518a78e762ce6b770d7e8c701e41593dc
2 parents 9548705 + e6549b8 commit eb1a4a6

File tree

36 files changed

+741
-690
lines changed

36 files changed

+741
-690
lines changed

clang/include/clang/Basic/Builtins.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4824,6 +4824,12 @@ def HLSLStep: LangBuiltin<"HLSL_LANG"> {
48244824
let Prototype = "void(...)";
48254825
}
48264826

4827+
def HLSLRadians : LangBuiltin<"HLSL_LANG"> {
4828+
let Spellings = ["__builtin_hlsl_elementwise_radians"];
4829+
let Attributes = [NoThrow, Const];
4830+
let Prototype = "void(...)";
4831+
}
4832+
48274833
// Builtins for XRay.
48284834
def XRayCustomEvent : Builtin {
48294835
let Spellings = ["__xray_customevent"];

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18896,6 +18896,15 @@ case Builtin::BI__builtin_hlsl_elementwise_isinf: {
1889618896
retType, CGM.getHLSLRuntime().getSignIntrinsic(),
1889718897
ArrayRef<Value *>{Op0}, nullptr, "hlsl.sign");
1889818898
}
18899+
case Builtin::BI__builtin_hlsl_elementwise_radians: {
18900+
Value *Op0 = EmitScalarExpr(E->getArg(0));
18901+
assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
18902+
"radians operand must have a float representation");
18903+
return Builder.CreateIntrinsic(
18904+
/*ReturnType=*/Op0->getType(),
18905+
CGM.getHLSLRuntime().getRadiansIntrinsic(), ArrayRef<Value *>{Op0},
18906+
nullptr, "hlsl.radians");
18907+
}
1889918908
}
1890018909
return nullptr;
1890118910
}

clang/lib/CodeGen/CGHLSLRuntime.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ class CGHLSLRuntime {
8383
GENERATE_HLSL_INTRINSIC_FUNCTION(Saturate, saturate)
8484
GENERATE_HLSL_INTRINSIC_FUNCTION(Sign, sign)
8585
GENERATE_HLSL_INTRINSIC_FUNCTION(Step, step)
86+
GENERATE_HLSL_INTRINSIC_FUNCTION(Radians, radians)
8687
GENERATE_HLSL_INTRINSIC_FUNCTION(ThreadId, thread_id)
8788
GENERATE_HLSL_INTRINSIC_FUNCTION(FDot, fdot)
8889
GENERATE_HLSL_INTRINSIC_FUNCTION(SDot, sdot)

clang/lib/Format/TokenAnnotator.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3738,6 +3738,13 @@ static bool isFunctionDeclarationName(const LangOptions &LangOpts,
37383738

37393739
const auto *Prev = Current.getPreviousNonComment();
37403740
assert(Prev);
3741+
3742+
if (Prev->is(tok::coloncolon))
3743+
Prev = Prev->Previous;
3744+
3745+
if (!Prev)
3746+
return false;
3747+
37413748
const auto &Previous = *Prev;
37423749

37433750
if (const auto *PrevPrev = Previous.getPreviousNonComment();

clang/lib/Format/UnwrappedLineParser.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2131,6 +2131,11 @@ void UnwrappedLineParser::parseStructuralElement(
21312131
return;
21322132
}
21332133
break;
2134+
case tok::greater:
2135+
nextToken();
2136+
if (FormatTok->is(tok::l_brace))
2137+
FormatTok->Previous->setFinalizedType(TT_TemplateCloser);
2138+
break;
21342139
default:
21352140
nextToken();
21362141
break;

clang/lib/Headers/hlsl/hlsl_intrinsics.h

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2138,5 +2138,35 @@ _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign)
21382138
int3 sign(double3);
21392139
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign)
21402140
int4 sign(double4);
2141+
2142+
//===----------------------------------------------------------------------===//
2143+
// radians builtins
2144+
//===----------------------------------------------------------------------===//
2145+
2146+
/// \fn T radians(T Val)
2147+
/// \brief Converts the specified value from degrees to radians.
2148+
2149+
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
2150+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians)
2151+
half radians(half);
2152+
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
2153+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians)
2154+
half2 radians(half2);
2155+
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
2156+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians)
2157+
half3 radians(half3);
2158+
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
2159+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians)
2160+
half4 radians(half4);
2161+
2162+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians)
2163+
float radians(float);
2164+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians)
2165+
float2 radians(float2);
2166+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians)
2167+
float3 radians(float3);
2168+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians)
2169+
float4 radians(float4);
2170+
21412171
} // namespace hlsl
21422172
#endif //_HLSL_HLSL_INTRINSICS_H_

clang/lib/Sema/SemaHLSL.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1896,6 +1896,7 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
18961896
return true;
18971897
break;
18981898
}
1899+
case Builtin::BI__builtin_hlsl_elementwise_radians:
18991900
case Builtin::BI__builtin_hlsl_elementwise_rsqrt:
19001901
case Builtin::BI__builtin_hlsl_elementwise_frac: {
19011902
if (CheckFloatOrHalfRepresentations(&SemaRef, TheCall))
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
2+
// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
3+
// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
4+
// RUN: --check-prefixes=CHECK,NATIVE_HALF \
5+
// RUN: -DTARGET=dx -DFNATTRS=noundef
6+
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
7+
// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
8+
// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
9+
// RUN: -DTARGET=dx -DFNATTRS=noundef
10+
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
11+
// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \
12+
// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
13+
// RUN: --check-prefixes=CHECK,NATIVE_HALF \
14+
// RUN: -DTARGET=spv -DFNATTRS="spir_func noundef"
15+
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
16+
// RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
17+
// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
18+
// RUN: -DTARGET=spv -DFNATTRS="spir_func noundef"
19+
20+
21+
// NATIVE_HALF: define [[FNATTRS]] half @
22+
// NATIVE_HALF: %{{.*}} = call half @llvm.[[TARGET]].radians.f16(
23+
// NATIVE_HALF: ret half %{{.*}}
24+
// NO_HALF: define [[FNATTRS]] float @
25+
// NO_HALF: %{{.*}} = call float @llvm.[[TARGET]].radians.f32(
26+
// NO_HALF: ret float %{{.*}}
27+
half test_radians_half(half p0) { return radians(p0); }
28+
// NATIVE_HALF: define [[FNATTRS]] <2 x half> @
29+
// NATIVE_HALF: %{{.*}} = call <2 x half> @llvm.[[TARGET]].radians.v2f16
30+
// NATIVE_HALF: ret <2 x half> %{{.*}}
31+
// NO_HALF: define [[FNATTRS]] <2 x float> @
32+
// NO_HALF: %{{.*}} = call <2 x float> @llvm.[[TARGET]].radians.v2f32(
33+
// NO_HALF: ret <2 x float> %{{.*}}
34+
half2 test_radians_half2(half2 p0) { return radians(p0); }
35+
// NATIVE_HALF: define [[FNATTRS]] <3 x half> @
36+
// NATIVE_HALF: %{{.*}} = call <3 x half> @llvm.[[TARGET]].radians.v3f16
37+
// NATIVE_HALF: ret <3 x half> %{{.*}}
38+
// NO_HALF: define [[FNATTRS]] <3 x float> @
39+
// NO_HALF: %{{.*}} = call <3 x float> @llvm.[[TARGET]].radians.v3f32(
40+
// NO_HALF: ret <3 x float> %{{.*}}
41+
half3 test_radians_half3(half3 p0) { return radians(p0); }
42+
// NATIVE_HALF: define [[FNATTRS]] <4 x half> @
43+
// NATIVE_HALF: %{{.*}} = call <4 x half> @llvm.[[TARGET]].radians.v4f16
44+
// NATIVE_HALF: ret <4 x half> %{{.*}}
45+
// NO_HALF: define [[FNATTRS]] <4 x float> @
46+
// NO_HALF: %{{.*}} = call <4 x float> @llvm.[[TARGET]].radians.v4f32(
47+
// NO_HALF: ret <4 x float> %{{.*}}
48+
half4 test_radians_half4(half4 p0) { return radians(p0); }
49+
50+
// CHECK: define [[FNATTRS]] float @
51+
// CHECK: %{{.*}} = call float @llvm.[[TARGET]].radians.f32(
52+
// CHECK: ret float %{{.*}}
53+
float test_radians_float(float p0) { return radians(p0); }
54+
// CHECK: define [[FNATTRS]] <2 x float> @
55+
// CHECK: %{{.*}} = call <2 x float> @llvm.[[TARGET]].radians.v2f32
56+
// CHECK: ret <2 x float> %{{.*}}
57+
float2 test_radians_float2(float2 p0) { return radians(p0); }
58+
// CHECK: define [[FNATTRS]] <3 x float> @
59+
// CHECK: %{{.*}} = call <3 x float> @llvm.[[TARGET]].radians.v3f32
60+
// CHECK: ret <3 x float> %{{.*}}
61+
float3 test_radians_float3(float3 p0) { return radians(p0); }
62+
// CHECK: define [[FNATTRS]] <4 x float> @
63+
// CHECK: %{{.*}} = call <4 x float> @llvm.[[TARGET]].radians.v4f32
64+
// CHECK: ret <4 x float> %{{.*}}
65+
float4 test_radians_float4(float4 p0) { return radians(p0); }
66+

clang/test/SemaHLSL/BuiltIns/half-float-only-errors.hlsl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -DTEST_FUNC=__builtin_elementwise_tan
1818
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -DTEST_FUNC=__builtin_elementwise_tanh
1919
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -DTEST_FUNC=__builtin_elementwise_trunc
20+
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -DTEST_FUNC=__builtin_hlsl_elementwise_radians
2021

2122
double test_double_builtin(double p0) {
2223
return TEST_FUNC(p0);
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -verify-ignore-unexpected
2+
3+
float test_too_few_arg() {
4+
return __builtin_hlsl_elementwise_radians();
5+
// expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
6+
}
7+
8+
float2 test_too_many_arg(float2 p0) {
9+
return __builtin_hlsl_elementwise_radians(p0, p0);
10+
// expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
11+
}
12+
13+
float builtin_bool_to_float_type_promotion(bool p1) {
14+
return __builtin_hlsl_elementwise_radians(p1);
15+
// expected-error@-1 {passing 'bool' to parameter of incompatible type 'float'}}
16+
}
17+
18+
float builtin_radians_int_to_float_promotion(int p1) {
19+
return __builtin_hlsl_elementwise_radians(p1);
20+
// expected-error@-1 {{passing 'int' to parameter of incompatible type 'float'}}
21+
}
22+
23+
float2 builtin_radians_int2_to_float2_promotion(int2 p1) {
24+
return __builtin_hlsl_elementwise_radians(p1);
25+
// expected-error@-1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(float)))) float' (vector of 2 'float' values)}}
26+
}
27+

clang/unittests/Format/TokenAnnotatorTest.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1007,6 +1007,14 @@ TEST_F(TokenAnnotatorTest, UnderstandsOverloadedOperators) {
10071007
EXPECT_TOKEN(Tokens[6], tok::r_paren, TT_OverloadedOperator);
10081008
EXPECT_TOKEN(Tokens[7], tok::l_paren, TT_OverloadedOperatorLParen);
10091009
EXPECT_TOKEN(Tokens[9], tok::amp, TT_PointerOrReference);
1010+
1011+
Tokens = annotate("friend ostream& ::operator<<(ostream& lhs, foo& rhs);");
1012+
ASSERT_EQ(Tokens.size(), 17u) << Tokens;
1013+
EXPECT_TOKEN(Tokens[4], tok::kw_operator, TT_FunctionDeclarationName);
1014+
EXPECT_TOKEN(Tokens[5], tok::lessless, TT_OverloadedOperator);
1015+
EXPECT_TOKEN(Tokens[6], tok::l_paren, TT_OverloadedOperatorLParen);
1016+
EXPECT_TOKEN(Tokens[8], tok::amp, TT_PointerOrReference);
1017+
EXPECT_TOKEN(Tokens[12], tok::amp, TT_PointerOrReference);
10101018
}
10111019

10121020
TEST_F(TokenAnnotatorTest, OverloadedOperatorInTemplate) {
@@ -3546,6 +3554,11 @@ TEST_F(TokenAnnotatorTest, TemplateInstantiation) {
35463554
ASSERT_EQ(Tokens.size(), 11u) << Tokens;
35473555
EXPECT_TOKEN(Tokens[2], tok::less, TT_TemplateOpener);
35483556
EXPECT_TOKEN(Tokens[6], tok::greater, TT_TemplateCloser);
3557+
3558+
Tokens = annotate("return std::conditional_t<T::value == U::value, T, U>{};");
3559+
ASSERT_EQ(Tokens.size(), 21u) << Tokens;
3560+
EXPECT_TOKEN(Tokens[4], tok::less, TT_TemplateOpener);
3561+
EXPECT_TOKEN(Tokens[16], tok::greater, TT_TemplateCloser);
35493562
}
35503563

35513564
} // namespace

flang/include/flang/Runtime/CUDA/common.h

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
#ifndef FORTRAN_RUNTIME_CUDA_COMMON_H_
1010
#define FORTRAN_RUNTIME_CUDA_COMMON_H_
1111

12-
#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h"
1312
#include "flang/Runtime/descriptor.h"
1413
#include "flang/Runtime/entry-names.h"
1514

@@ -35,16 +34,4 @@ static constexpr unsigned kDeviceToDevice = 2;
3534
terminator.Crash("'%s' failed with '%s'", #expr, name); \
3635
}(expr)
3736

38-
static inline unsigned getMemType(cuf::DataAttribute attr) {
39-
if (attr == cuf::DataAttribute::Device)
40-
return kMemTypeDevice;
41-
if (attr == cuf::DataAttribute::Managed)
42-
return kMemTypeManaged;
43-
if (attr == cuf::DataAttribute::Unified)
44-
return kMemTypeUnified;
45-
if (attr == cuf::DataAttribute::Pinned)
46-
return kMemTypePinned;
47-
llvm::report_fatal_error("unsupported memory type");
48-
}
49-
5037
#endif // FORTRAN_RUNTIME_CUDA_COMMON_H_

flang/lib/Optimizer/Transforms/CufOpConversion.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,18 @@ using namespace Fortran::runtime::cuda;
3636

3737
namespace {
3838

39+
static inline unsigned getMemType(cuf::DataAttribute attr) {
40+
if (attr == cuf::DataAttribute::Device)
41+
return kMemTypeDevice;
42+
if (attr == cuf::DataAttribute::Managed)
43+
return kMemTypeManaged;
44+
if (attr == cuf::DataAttribute::Unified)
45+
return kMemTypeUnified;
46+
if (attr == cuf::DataAttribute::Pinned)
47+
return kMemTypePinned;
48+
llvm::report_fatal_error("unsupported memory type");
49+
}
50+
3951
template <typename OpTy>
4052
static bool isPinned(OpTy op) {
4153
if (op.getDataAttr() && *op.getDataAttr() == cuf::DataAttribute::Pinned)

libc/docs/gpu/using.rst

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,10 @@ described in the `clang documentation
3434
by the OpenMP toolchain, but is currently opt-in for the CUDA and HIP toolchains
3535
through the ``--offload-new-driver``` and ``-fgpu-rdc`` flags.
3636

37-
In order or link the GPU runtime, we simply pass this library to the embedded
38-
device linker job. This can be done using the ``-Xoffload-linker`` option, which
39-
forwards an argument to a ``clang`` job used to create the final GPU executable.
40-
The toolchain should pick up the C libraries automatically in most cases, so
37+
In order or link the GPU runtime, we simply pass this library to the embedded
38+
device linker job. This can be done using the ``-Xoffload-linker`` option, which
39+
forwards an argument to a ``clang`` job used to create the final GPU executable.
40+
The toolchain should pick up the C libraries automatically in most cases, so
4141
this shouldn't be necessary.
4242

4343
.. code-block:: sh
@@ -189,7 +189,7 @@ final executable.
189189

190190
#include <stdio.h>
191191

192-
int main() { fputs("Hello from AMDGPU!\n", stdout); }
192+
int main() { printf("Hello from AMDGPU!\n"); }
193193

194194
This program can then be compiled using the ``clang`` compiler. Note that
195195
``-flto`` and ``-mcpu=`` should be defined. This is because the GPU
@@ -227,28 +227,26 @@ Building for NVPTX targets
227227
^^^^^^^^^^^^^^^^^^^^^^^^^^
228228

229229
The infrastructure is the same as the AMDGPU example. However, the NVPTX binary
230-
utilities are very limited and must be targeted directly. There is no linker
231-
support for static libraries so we need to link in the ``libc.bc`` bitcode and
232-
inform the compiler driver of the file's contents.
230+
utilities are very limited and must be targeted directly. A utility called
231+
``clang-nvlink-wrapper`` instead wraps around the standard link job to give the
232+
illusion that ``nvlink`` is a functional linker.
233233

234234
.. code-block:: c++
235235

236236
#include <stdio.h>
237237

238238
int main(int argc, char **argv, char **envp) {
239-
fputs("Hello from NVPTX!\n", stdout);
239+
printf("Hello from NVPTX!\n");
240240
}
241241
242242
Additionally, the NVPTX ABI requires that every function signature matches. This
243243
requires us to pass the full prototype from ``main``. The installation will
244244
contain the ``nvptx-loader`` utility if the CUDA driver was found during
245-
compilation.
245+
compilation. Using link time optimization will help hide this.
246246

247247
.. code-block:: sh
248248
249-
$> clang hello.c --target=nvptx64-nvidia-cuda -march=native \
250-
-x ir <install>/lib/nvptx64-nvidia-cuda/libc.bc \
251-
-x ir <install>/lib/nvptx64-nvidia-cuda/crt1.o
249+
$> clang hello.c --target=nvptx64-nvidia-cuda -mcpu=native -flto -lc <install>/lib/nvptx64-nvidia-cuda/crt1.o
252250
$> nvptx-loader --threads 2 --blocks 2 a.out
253251
Hello from NVPTX!
254252
Hello from NVPTX!

llvm/include/llvm/IR/IntrinsicsDirectX.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,4 +86,5 @@ def int_dx_rsqrt : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]
8686
def int_dx_wave_is_first_lane : DefaultAttrsIntrinsic<[llvm_i1_ty], [], [IntrConvergent]>;
8787
def int_dx_sign : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_any_ty], [IntrNoMem]>;
8888
def int_dx_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>], [IntrNoMem]>;
89+
def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
8990
}

0 commit comments

Comments
 (0)