Skip to content

[HLSL] move rcp to cgbuiltins #88401

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18303,9 +18303,16 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
Value *Op0 = EmitScalarExpr(E->getArg(0));
if (!E->getArg(0)->getType()->hasFloatingRepresentation())
llvm_unreachable("rcp operand must have a float representation");
return Builder.CreateIntrinsic(
/*ReturnType=*/Op0->getType(), Intrinsic::dx_rcp,
ArrayRef<Value *>{Op0}, nullptr, "dx.rcp");
llvm::Type *Ty = Op0->getType();
llvm::Type *EltTy = Ty->getScalarType();
Constant *One =
Ty->isVectorTy()
? ConstantVector::getSplat(
ElementCount::getFixed(
dyn_cast<FixedVectorType>(Ty)->getNumElements()),
ConstantFP::get(EltTy, 1.0))
: ConstantFP::get(EltTy, 1.0);
return Builder.CreateFDiv(One, Op0, "hlsl.rcp");
}
case Builtin::BI__builtin_hlsl_elementwise_rsqrt: {
Value *Op0 = EmitScalarExpr(E->getArg(0));
Expand Down
127 changes: 88 additions & 39 deletions clang/test/CodeGenHLSL/builtins/rcp.hlsl
Original file line number Diff line number Diff line change
@@ -1,53 +1,102 @@
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
// RUN: --check-prefixes=CHECK,NATIVE_HALF
// RUN: --check-prefixes=CHECK,DXIL_CHECK,DXIL_NATIVE_HALF,NATIVE_HALF
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF

// NATIVE_HALF: define noundef half @
// NATIVE_HALF: %dx.rcp = call half @llvm.dx.rcp.f16(
// NATIVE_HALF: ret half %dx.rcp
// NO_HALF: define noundef float @"?test_rcp_half@@YA$halff@$halff@@Z"(
// NO_HALF: %dx.rcp = call float @llvm.dx.rcp.f32(
// NO_HALF: ret float %dx.rcp
// RUN: -o - | FileCheck %s --check-prefixes=CHECK,DXIL_CHECK,NO_HALF,DXIL_NO_HALF
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \
// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
// RUN: --check-prefixes=CHECK,NATIVE_HALF,SPIR_NATIVE_HALF,SPIR_CHECK
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
// RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF,SPIR_NO_HALF,SPIR_CHECK

// DXIL_NATIVE_HALF: define noundef half @
// SPIR_NATIVE_HALF: define spir_func noundef half @
// NATIVE_HALF: %hlsl.rcp = fdiv half 0xH3C00, %{{.*}}
// NATIVE_HALF: ret half %hlsl.rcp
// DXIL_NO_HALF: define noundef float @
// SPIR_NO_HALF: define spir_func noundef float @
// NO_HALF: %hlsl.rcp = fdiv float 1.000000e+00, %{{.*}}
// NO_HALF: ret float %hlsl.rcp
half test_rcp_half(half p0) { return rcp(p0); }
// NATIVE_HALF: define noundef <2 x half> @
// NATIVE_HALF: %dx.rcp = call <2 x half> @llvm.dx.rcp.v2f16
// NATIVE_HALF: ret <2 x half> %dx.rcp
// NO_HALF: define noundef <2 x float> @
// NO_HALF: %dx.rcp = call <2 x float> @llvm.dx.rcp.v2f32(
// NO_HALF: ret <2 x float> %dx.rcp

// DXIL_NATIVE_HALF: define noundef <2 x half> @
// SPIR_NATIVE_HALF: define spir_func noundef <2 x half> @
// NATIVE_HALF: %hlsl.rcp = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, %{{.*}}
// NATIVE_HALF: ret <2 x half> %hlsl.rcp
// DXIL_NO_HALF: define noundef <2 x float> @
// SPIR_NO_HALF: define spir_func noundef <2 x float> @
// NO_HALF: %hlsl.rcp = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %{{.*}}
// NO_HALF: ret <2 x float> %hlsl.rcp
half2 test_rcp_half2(half2 p0) { return rcp(p0); }
// NATIVE_HALF: define noundef <3 x half> @
// NATIVE_HALF: %dx.rcp = call <3 x half> @llvm.dx.rcp.v3f16
// NATIVE_HALF: ret <3 x half> %dx.rcp
// NO_HALF: define noundef <3 x float> @
// NO_HALF: %dx.rcp = call <3 x float> @llvm.dx.rcp.v3f32(
// NO_HALF: ret <3 x float> %dx.rcp

// DXIL_NATIVE_HALF: define noundef <3 x half> @
// SPIR_NATIVE_HALF: define spir_func noundef <3 x half> @
// NATIVE_HALF: %hlsl.rcp = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, %{{.*}}
// NATIVE_HALF: ret <3 x half> %hlsl.rcp
// DXIL_NO_HALF: define noundef <3 x float> @
// SPIR_NO_HALF: define spir_func noundef <3 x float> @
// NO_HALF: %hlsl.rcp = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %{{.*}}
// NO_HALF: ret <3 x float> %hlsl.rcp
half3 test_rcp_half3(half3 p0) { return rcp(p0); }
// NATIVE_HALF: define noundef <4 x half> @
// NATIVE_HALF: %dx.rcp = call <4 x half> @llvm.dx.rcp.v4f16
// NATIVE_HALF: ret <4 x half> %dx.rcp
// NO_HALF: define noundef <4 x float> @
// NO_HALF: %dx.rcp = call <4 x float> @llvm.dx.rcp.v4f32(
// NO_HALF: ret <4 x float> %dx.rcp

// DXIL_NATIVE_HALF: define noundef <4 x half> @
// SPIR_NATIVE_HALF: define spir_func noundef <4 x half> @
// NATIVE_HALF: %hlsl.rcp = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, %{{.*}}
// NATIVE_HALF: ret <4 x half> %hlsl.rcp
// DXIL_NO_HALF: define noundef <4 x float> @
// SPIR_NO_HALF: define spir_func noundef <4 x float> @
// NO_HALF: %hlsl.rcp = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %{{.*}}
// NO_HALF: ret <4 x float> %hlsl.rcp
half4 test_rcp_half4(half4 p0) { return rcp(p0); }

// CHECK: define noundef float @
// CHECK: %dx.rcp = call float @llvm.dx.rcp.f32(
// CHECK: ret float %dx.rcp
// DXIL_CHECK: define noundef float @
// SPIR_CHECK: define spir_func noundef float @
// CHECK: %hlsl.rcp = fdiv float 1.000000e+00, %{{.*}}
// CHECK: ret float %hlsl.rcp
float test_rcp_float(float p0) { return rcp(p0); }
// CHECK: define noundef <2 x float> @
// CHECK: %dx.rcp = call <2 x float> @llvm.dx.rcp.v2f32
// CHECK: ret <2 x float> %dx.rcp

// DXIL_CHECK: define noundef <2 x float> @
// SPIR_CHECK: define spir_func noundef <2 x float> @
// CHECK: %hlsl.rcp = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %{{.*}}
// CHECK: ret <2 x float> %hlsl.rcp
float2 test_rcp_float2(float2 p0) { return rcp(p0); }
// CHECK: define noundef <3 x float> @
// CHECK: %dx.rcp = call <3 x float> @llvm.dx.rcp.v3f32
// CHECK: ret <3 x float> %dx.rcp

// DXIL_CHECK: define noundef <3 x float> @
// SPIR_CHECK: define spir_func noundef <3 x float> @
// CHECK: %hlsl.rcp = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %{{.*}}
// CHECK: ret <3 x float> %hlsl.rcp
float3 test_rcp_float3(float3 p0) { return rcp(p0); }
// CHECK: define noundef <4 x float> @
// CHECK: %dx.rcp = call <4 x float> @llvm.dx.rcp.v4f32
// CHECK: ret <4 x float> %dx.rcp

// DXIL_CHECK: define noundef <4 x float> @
// SPIR_CHECK: define spir_func noundef <4 x float> @
// CHECK: %hlsl.rcp = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %{{.*}}
// CHECK: ret <4 x float> %hlsl.rcp
float4 test_rcp_float4(float4 p0) { return rcp(p0); }

// DXIL_CHECK: define noundef double @
// SPIR_CHECK: define spir_func noundef double @
// CHECK: %hlsl.rcp = fdiv double 1.000000e+00, %{{.*}}
// CHECK: ret double %hlsl.rcp
double test_rcp_double(double p0) { return rcp(p0); }

// DXIL_CHECK: define noundef <2 x double> @
// SPIR_CHECK: define spir_func noundef <2 x double> @
// CHECK: %hlsl.rcp = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, %{{.*}}
// CHECK: ret <2 x double> %hlsl.rcp
double2 test_rcp_double2(double2 p0) { return rcp(p0); }

// DXIL_CHECK: define noundef <3 x double> @
// SPIR_CHECK: define spir_func noundef <3 x double> @
// CHECK: %hlsl.rcp = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, %{{.*}}
// CHECK: ret <3 x double> %hlsl.rcp
double3 test_rcp_double3(double3 p0) { return rcp(p0); }

// DXIL_CHECK: define noundef <4 x double> @
// SPIR_CHECK: define spir_func noundef <4 x double> @
// CHECK: %hlsl.rcp = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, %{{.*}}
// CHECK: ret <4 x double> %hlsl.rcp
double4 test_rcp_double4(double4 p0) { return rcp(p0); }
22 changes: 0 additions & 22 deletions llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ static bool isIntrinsicExpansion(Function &F) {
case Intrinsic::dx_clamp:
case Intrinsic::dx_uclamp:
case Intrinsic::dx_lerp:
case Intrinsic::dx_rcp:
case Intrinsic::dx_sdot:
case Intrinsic::dx_udot:
return true;
Expand Down Expand Up @@ -218,25 +217,6 @@ static bool expandPowIntrinsic(CallInst *Orig) {
return true;
}

static bool expandRcpIntrinsic(CallInst *Orig) {
Value *X = Orig->getOperand(0);
IRBuilder<> Builder(Orig->getParent());
Builder.SetInsertPoint(Orig);
Type *Ty = X->getType();
Type *EltTy = Ty->getScalarType();
Constant *One =
Ty->isVectorTy()
? ConstantVector::getSplat(
ElementCount::getFixed(
dyn_cast<FixedVectorType>(Ty)->getNumElements()),
ConstantFP::get(EltTy, 1.0))
: ConstantFP::get(EltTy, 1.0);
auto *Result = Builder.CreateFDiv(One, X, "dx.rcp");
Orig->replaceAllUsesWith(Result);
Orig->eraseFromParent();
return true;
}

static Intrinsic::ID getMaxForClamp(Type *ElemTy,
Intrinsic::ID ClampIntrinsic) {
if (ClampIntrinsic == Intrinsic::dx_uclamp)
Expand Down Expand Up @@ -300,8 +280,6 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
return expandClampIntrinsic(Orig, F.getIntrinsicID());
case Intrinsic::dx_lerp:
return expandLerpIntrinsic(Orig);
case Intrinsic::dx_rcp:
return expandRcpIntrinsic(Orig);
case Intrinsic::dx_sdot:
case Intrinsic::dx_udot:
return expandIntegerDot(Orig, F.getIntrinsicID());
Expand Down
52 changes: 0 additions & 52 deletions llvm/test/CodeGen/DirectX/rcp.ll

This file was deleted.

126 changes: 126 additions & 0 deletions llvm/test/CodeGen/SPIRV/hlsl-intrinsics/rcp.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s

; CHECK-DAG: %[[#float_64:]] = OpTypeFloat 64
; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32
; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16
; CHECK-DAG: %[[#vec2_float_16:]] = OpTypeVector %[[#float_16]] 2
; CHECK-DAG: %[[#vec2_float_32:]] = OpTypeVector %[[#float_32]] 2
; CHECK-DAG: %[[#vec2_float_64:]] = OpTypeVector %[[#float_64]] 2
; CHECK-DAG: %[[#vec3_float_16:]] = OpTypeVector %[[#float_16]] 3
; CHECK-DAG: %[[#vec3_float_32:]] = OpTypeVector %[[#float_32]] 3
; CHECK-DAG: %[[#vec3_float_64:]] = OpTypeVector %[[#float_64]] 3
; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4
; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4
; CHECK-DAG: %[[#vec4_float_64:]] = OpTypeVector %[[#float_64]] 4
; CHECK-DAG: %[[#const_f64_1:]] = OpConstant %[[#float_64]] 1
; CHECK-DAG: %[[#const_f32_1:]] = OpConstant %[[#float_32:]] 1
; CHECK-DAG: %[[#const_f16_1:]] = OpConstant %[[#float_16:]] 1

; CHECK-DAG: %[[#vec2_const_ones_f16:]] = OpConstantComposite %[[#vec2_float_16:]] %[[#const_f16_1:]] %[[#const_f16_1:]]
; CHECK-DAG: %[[#vec3_const_ones_f16:]] = OpConstantComposite %[[#vec3_float_16:]] %[[#const_f16_1:]] %[[#const_f16_1:]] %[[#const_f16_1:]]
; CHECK-DAG: %[[#vec4_const_ones_f16:]] = OpConstantComposite %[[#vec4_float_16:]] %[[#const_f16_1:]] %[[#const_f16_1:]] %[[#const_f16_1:]] %[[#const_f16_1:]]

; CHECK-DAG: %[[#vec2_const_ones_f32:]] = OpConstantComposite %[[#vec2_float_32:]] %[[#const_f32_1:]] %[[#const_f32_1:]]
; CHECK-DAG: %[[#vec3_const_ones_f32:]] = OpConstantComposite %[[#vec3_float_32:]] %[[#const_f32_1:]] %[[#const_f32_1:]] %[[#const_f32_1:]]
; CHECK-DAG: %[[#vec4_const_ones_f32:]] = OpConstantComposite %[[#vec4_float_32:]] %[[#const_f32_1:]] %[[#const_f32_1:]] %[[#const_f32_1:]] %[[#const_f32_1:]]

; CHECK-DAG: %[[#vec2_const_ones_f64:]] = OpConstantComposite %[[#vec2_float_64:]] %[[#const_f64_1:]] %[[#const_f64_1:]]
; CHECK-DAG: %[[#vec3_const_ones_f64:]] = OpConstantComposite %[[#vec3_float_64:]] %[[#const_f64_1:]] %[[#const_f64_1:]] %[[#const_f64_1:]]
; CHECK-DAG: %[[#vec4_const_ones_f64:]] = OpConstantComposite %[[#vec4_float_64:]] %[[#const_f64_1:]] %[[#const_f64_1:]] %[[#const_f64_1:]] %[[#const_f64_1:]]


define spir_func noundef half @test_rcp_half(half noundef %p0) #0 {
entry:
; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#float_16:]]
; CHECK: OpFDiv %[[#float_16:]] %[[#const_f16_1:]] %[[#arg0:]]
%hlsl.rcp = fdiv half 0xH3C00, %p0
ret half %hlsl.rcp
}

define spir_func noundef <2 x half> @test_rcp_half2(<2 x half> noundef %p0) #0 {
entry:
; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec2_float_16:]]
; CHECK: OpFDiv %[[#vec2_float_16:]] %[[#vec2_const_ones_f16:]] %[[#arg0:]]
%hlsl.rcp = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, %p0
ret <2 x half> %hlsl.rcp
}

define spir_func noundef <3 x half> @test_rcp_half3(<3 x half> noundef %p0) #0 {
entry:
; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec3_float_16:]]
; CHECK: OpFDiv %[[#vec3_float_16:]] %[[#vec3_const_ones_f16:]] %[[#arg0:]]
%hlsl.rcp = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, %p0
ret <3 x half> %hlsl.rcp
}

define spir_func noundef <4 x half> @test_rcp_half4(<4 x half> noundef %p0) #0 {
entry:
; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_16:]]
; CHECK: OpFDiv %[[#vec4_float_16:]] %[[#vec4_const_ones_f16:]] %[[#arg0:]]
%hlsl.rcp = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, %p0
ret <4 x half> %hlsl.rcp
}

define spir_func noundef float @test_rcp_float(float noundef %p0) #0 {
entry:
; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#float_32:]]
; CHECK: OpFDiv %[[#float_32:]] %[[#const_f32_1:]] %[[#arg0:]]
%hlsl.rcp = fdiv float 1.000000e+00, %p0
ret float %hlsl.rcp
}

define spir_func noundef <2 x float> @test_rcp_float2(<2 x float> noundef %p0) #0 {
entry:
; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec2_float_32:]]
; CHECK: OpFDiv %[[#vec2_float_32:]] %[[#vec2_const_ones_f32:]] %[[#arg0:]]
%hlsl.rcp = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %p0
ret <2 x float> %hlsl.rcp
}

define spir_func noundef <3 x float> @test_rcp_float3(<3 x float> noundef %p0) #0 {
entry:
; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec3_float_32:]]
; CHECK: OpFDiv %[[#vec3_float_32:]] %[[#vec3_const_ones_f32:]] %[[#arg0:]]
%hlsl.rcp = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %p0
ret <3 x float> %hlsl.rcp
}

define spir_func noundef <4 x float> @test_rcp_float4(<4 x float> noundef %p0) #0 {
entry:
; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_32:]]
; CHECK: OpFDiv %[[#vec4_float_32:]] %[[#vec4_const_ones_f32:]] %[[#arg0:]]
%hlsl.rcp = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %p0
ret <4 x float> %hlsl.rcp
}

define spir_func noundef double @test_rcp_double(double noundef %p0) #0 {
entry:
; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#float_64:]]
; CHECK: OpFDiv %[[#float_64:]] %[[#const_f64_1:]] %[[#arg0:]]
%hlsl.rcp = fdiv double 1.000000e+00, %p0
ret double %hlsl.rcp
}

define spir_func noundef <2 x double> @test_rcp_double2(<2 x double> noundef %p0) #0 {
entry:
; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec2_float_64:]]
; CHECK: OpFDiv %[[#vec2_float_64:]] %[[#vec2_const_ones_f64:]] %[[#arg0:]]
%hlsl.rcp = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, %p0
ret <2 x double> %hlsl.rcp
}

define spir_func noundef <3 x double> @test_rcp_double3(<3 x double> noundef %p0) #0 {
entry:
; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec3_float_64:]]
; CHECK: OpFDiv %[[#vec3_float_64:]] %[[#vec3_const_ones_f64:]] %[[#arg0:]]
%hlsl.rcp = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, %p0
ret <3 x double> %hlsl.rcp
}

define spir_func noundef <4 x double> @test_rcp_double4(<4 x double> noundef %p0) #0 {
entry:
; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_64:]]
; CHECK: OpFDiv %[[#vec4_float_64:]] %[[#vec4_const_ones_f64:]] %[[#arg0:]]
%hlsl.rcp = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, %p0
ret <4 x double> %hlsl.rcp
}