[HLSL] move rcp to cgbuiltins (#88401)

farzonl · Farzon Lotfi · web-flow · commit 4036a6946e54 · 2024-04-11T18:26:25.000-04:00
Removing the intrinsic because there is no opCodes for rcp in DXIL or SPIR-V. Moving means we don't have to re-implement this feature for each backend. fixes #87784 Co-authored-by: Farzon Lotfi <farzon@farzon.com>
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18303,9 +18303,16 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
     Value *Op0 = EmitScalarExpr(E->getArg(0));
     if (!E->getArg(0)->getType()->hasFloatingRepresentation())
       llvm_unreachable("rcp operand must have a float representation");
-    return Builder.CreateIntrinsic(
-        /*ReturnType=*/Op0->getType(), Intrinsic::dx_rcp,
-        ArrayRef<Value *>{Op0}, nullptr, "dx.rcp");
+    llvm::Type *Ty = Op0->getType();
+    llvm::Type *EltTy = Ty->getScalarType();
+    Constant *One =
+        Ty->isVectorTy()
+            ? ConstantVector::getSplat(
+                  ElementCount::getFixed(
+                      dyn_cast<FixedVectorType>(Ty)->getNumElements()),
+                  ConstantFP::get(EltTy, 1.0))
+            : ConstantFP::get(EltTy, 1.0);
+    return Builder.CreateFDiv(One, Op0, "hlsl.rcp");
   }
   case Builtin::BI__builtin_hlsl_elementwise_rsqrt: {
     Value *Op0 = EmitScalarExpr(E->getArg(0));
diff --git a/clang/test/CodeGenHLSL/builtins/rcp.hlsl b/clang/test/CodeGenHLSL/builtins/rcp.hlsl
@@ -1,53 +1,102 @@
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ 
-// RUN:   --check-prefixes=CHECK,NATIVE_HALF
+// RUN:   --check-prefixes=CHECK,DXIL_CHECK,DXIL_NATIVE_HALF,NATIVE_HALF
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
-// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF
-
-// NATIVE_HALF: define noundef half @
-// NATIVE_HALF: %dx.rcp = call half @llvm.dx.rcp.f16(
-// NATIVE_HALF: ret half %dx.rcp
-// NO_HALF: define noundef float @"?test_rcp_half@@YA$halff@$halff@@Z"(
-// NO_HALF: %dx.rcp = call float @llvm.dx.rcp.f32(
-// NO_HALF: ret float %dx.rcp
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,DXIL_CHECK,NO_HALF,DXIL_NO_HALF
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   spirv-unknown-vulkan-compute %s -fnative-half-type \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ 
+// RUN:   --check-prefixes=CHECK,NATIVE_HALF,SPIR_NATIVE_HALF,SPIR_CHECK
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF,SPIR_NO_HALF,SPIR_CHECK
+
+// DXIL_NATIVE_HALF: define noundef half @
+// SPIR_NATIVE_HALF: define spir_func noundef half @
+// NATIVE_HALF: %hlsl.rcp = fdiv half 0xH3C00, %{{.*}} 
+// NATIVE_HALF: ret half %hlsl.rcp
+// DXIL_NO_HALF: define noundef float @
+// SPIR_NO_HALF: define spir_func noundef float @
+// NO_HALF: %hlsl.rcp = fdiv float 1.000000e+00, %{{.*}}
+// NO_HALF: ret float %hlsl.rcp
 half test_rcp_half(half p0) { return rcp(p0); }
-// NATIVE_HALF: define noundef <2 x half> @
-// NATIVE_HALF: %dx.rcp = call <2 x half> @llvm.dx.rcp.v2f16
-// NATIVE_HALF: ret <2 x half> %dx.rcp
-// NO_HALF: define noundef <2 x float> @
-// NO_HALF: %dx.rcp = call <2 x float> @llvm.dx.rcp.v2f32(
-// NO_HALF: ret <2 x float> %dx.rcp
+
+// DXIL_NATIVE_HALF: define noundef <2 x half> @
+// SPIR_NATIVE_HALF: define spir_func noundef <2 x half> @
+// NATIVE_HALF: %hlsl.rcp = fdiv <2 x half> <half  0xH3C00, half  0xH3C00>, %{{.*}} 
+// NATIVE_HALF: ret <2 x half> %hlsl.rcp
+// DXIL_NO_HALF: define noundef <2 x float> @
+// SPIR_NO_HALF: define spir_func noundef <2 x float> @
+// NO_HALF: %hlsl.rcp = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %{{.*}}
+// NO_HALF: ret <2 x float> %hlsl.rcp
 half2 test_rcp_half2(half2 p0) { return rcp(p0); }
-// NATIVE_HALF: define noundef <3 x half> @
-// NATIVE_HALF: %dx.rcp = call <3 x half> @llvm.dx.rcp.v3f16
-// NATIVE_HALF: ret <3 x half> %dx.rcp
-// NO_HALF: define noundef <3 x float> @
-// NO_HALF: %dx.rcp = call <3 x float> @llvm.dx.rcp.v3f32(
-// NO_HALF: ret <3 x float> %dx.rcp
+
+// DXIL_NATIVE_HALF: define noundef <3 x half> @
+// SPIR_NATIVE_HALF: define spir_func noundef <3 x half> @
+// NATIVE_HALF: %hlsl.rcp = fdiv <3 x half> <half  0xH3C00, half  0xH3C00, half  0xH3C00>, %{{.*}} 
+// NATIVE_HALF: ret <3 x half> %hlsl.rcp
+// DXIL_NO_HALF: define noundef <3 x float> @
+// SPIR_NO_HALF: define spir_func noundef <3 x float> @
+// NO_HALF: %hlsl.rcp = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %{{.*}}
+// NO_HALF: ret <3 x float> %hlsl.rcp
 half3 test_rcp_half3(half3 p0) { return rcp(p0); }
-// NATIVE_HALF: define noundef <4 x half> @
-// NATIVE_HALF: %dx.rcp = call <4 x half> @llvm.dx.rcp.v4f16
-// NATIVE_HALF: ret <4 x half> %dx.rcp
-// NO_HALF: define noundef <4 x float> @
-// NO_HALF: %dx.rcp = call <4 x float> @llvm.dx.rcp.v4f32(
-// NO_HALF: ret <4 x float> %dx.rcp
+
+// DXIL_NATIVE_HALF: define noundef <4 x half> @
+// SPIR_NATIVE_HALF: define spir_func noundef <4 x half> @
+// NATIVE_HALF: %hlsl.rcp = fdiv <4 x half> <half  0xH3C00, half  0xH3C00, half  0xH3C00, half  0xH3C00>, %{{.*}} 
+// NATIVE_HALF: ret <4 x half> %hlsl.rcp
+// DXIL_NO_HALF: define noundef <4 x float> @
+// SPIR_NO_HALF: define spir_func noundef <4 x float> @
+// NO_HALF: %hlsl.rcp = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %{{.*}}
+// NO_HALF: ret <4 x float> %hlsl.rcp
 half4 test_rcp_half4(half4 p0) { return rcp(p0); }
 
-// CHECK: define noundef float @
-// CHECK: %dx.rcp = call float @llvm.dx.rcp.f32(
-// CHECK: ret float %dx.rcp
+// DXIL_CHECK: define noundef float @
+// SPIR_CHECK: define spir_func noundef float @
+// CHECK: %hlsl.rcp = fdiv float 1.000000e+00, %{{.*}}
+// CHECK: ret float %hlsl.rcp
 float test_rcp_float(float p0) { return rcp(p0); }
-// CHECK: define noundef <2 x float> @
-// CHECK: %dx.rcp = call <2 x float> @llvm.dx.rcp.v2f32
-// CHECK: ret <2 x float> %dx.rcp
+
+// DXIL_CHECK: define noundef <2 x float> @
+// SPIR_CHECK: define spir_func noundef <2 x float> @
+// CHECK: %hlsl.rcp = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %{{.*}}
+// CHECK: ret <2 x float> %hlsl.rcp
 float2 test_rcp_float2(float2 p0) { return rcp(p0); }
-// CHECK: define noundef <3 x float> @
-// CHECK: %dx.rcp = call <3 x float> @llvm.dx.rcp.v3f32
-// CHECK: ret <3 x float> %dx.rcp
+
+// DXIL_CHECK: define noundef <3 x float> @
+// SPIR_CHECK: define spir_func noundef <3 x float> @
+// CHECK: %hlsl.rcp = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %{{.*}}
+// CHECK: ret <3 x float> %hlsl.rcp
 float3 test_rcp_float3(float3 p0) { return rcp(p0); }
-// CHECK: define noundef <4 x float> @
-// CHECK: %dx.rcp = call <4 x float> @llvm.dx.rcp.v4f32
-// CHECK: ret <4 x float> %dx.rcp
+
+// DXIL_CHECK: define noundef <4 x float> @
+// SPIR_CHECK: define spir_func noundef <4 x float> @
+// CHECK: %hlsl.rcp = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %{{.*}}
+// CHECK: ret <4 x float> %hlsl.rcp
 float4 test_rcp_float4(float4 p0) { return rcp(p0); }
+
+// DXIL_CHECK: define noundef double @
+// SPIR_CHECK: define spir_func noundef double @
+// CHECK: %hlsl.rcp = fdiv double 1.000000e+00, %{{.*}} 
+// CHECK: ret double %hlsl.rcp
+double test_rcp_double(double p0) { return rcp(p0); }
+
+// DXIL_CHECK: define noundef <2 x double> @
+// SPIR_CHECK: define spir_func noundef <2 x double> @
+// CHECK: %hlsl.rcp = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, %{{.*}}
+// CHECK: ret <2 x double> %hlsl.rcp
+double2 test_rcp_double2(double2 p0) { return rcp(p0); }
+
+// DXIL_CHECK: define noundef <3 x double> @
+// SPIR_CHECK: define spir_func noundef <3 x double> @
+// CHECK: %hlsl.rcp = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, %{{.*}}
+// CHECK: ret <3 x double> %hlsl.rcp
+double3 test_rcp_double3(double3 p0) { return rcp(p0); }
+
+// DXIL_CHECK: define noundef <4 x double> @
+// SPIR_CHECK: define spir_func noundef <4 x double> @
+// CHECK: %hlsl.rcp = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, %{{.*}}
+// CHECK: ret <4 x double> %hlsl.rcp
+double4 test_rcp_double4(double4 p0) { return rcp(p0); }
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -42,7 +42,6 @@ static bool isIntrinsicExpansion(Function &F) {
   case Intrinsic::dx_clamp:
   case Intrinsic::dx_uclamp:
   case Intrinsic::dx_lerp:
-  case Intrinsic::dx_rcp:
   case Intrinsic::dx_sdot:
   case Intrinsic::dx_udot:
     return true;
@@ -218,25 +217,6 @@ static bool expandPowIntrinsic(CallInst *Orig) {
   return true;
 }
 
-static bool expandRcpIntrinsic(CallInst *Orig) {
-  Value *X = Orig->getOperand(0);
-  IRBuilder<> Builder(Orig->getParent());
-  Builder.SetInsertPoint(Orig);
-  Type *Ty = X->getType();
-  Type *EltTy = Ty->getScalarType();
-  Constant *One =
-      Ty->isVectorTy()
-          ? ConstantVector::getSplat(
-                ElementCount::getFixed(
-                    dyn_cast<FixedVectorType>(Ty)->getNumElements()),
-                ConstantFP::get(EltTy, 1.0))
-          : ConstantFP::get(EltTy, 1.0);
-  auto *Result = Builder.CreateFDiv(One, X, "dx.rcp");
-  Orig->replaceAllUsesWith(Result);
-  Orig->eraseFromParent();
-  return true;
-}
-
 static Intrinsic::ID getMaxForClamp(Type *ElemTy,
                                     Intrinsic::ID ClampIntrinsic) {
   if (ClampIntrinsic == Intrinsic::dx_uclamp)
@@ -300,8 +280,6 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
     return expandClampIntrinsic(Orig, F.getIntrinsicID());
   case Intrinsic::dx_lerp:
     return expandLerpIntrinsic(Orig);
-  case Intrinsic::dx_rcp:
-    return expandRcpIntrinsic(Orig);
   case Intrinsic::dx_sdot:
   case Intrinsic::dx_udot:
     return expandIntegerDot(Orig, F.getIntrinsicID());
diff --git a/llvm/test/CodeGen/DirectX/rcp.ll b/llvm/test/CodeGen/DirectX/rcp.ll
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/rcp.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/rcp.ll
@@ -0,0 +1,126 @@
+ ; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s
+
+; CHECK-DAG: %[[#float_64:]] = OpTypeFloat 64
+; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32
+; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16
+; CHECK-DAG: %[[#vec2_float_16:]] = OpTypeVector %[[#float_16]] 2
+; CHECK-DAG: %[[#vec2_float_32:]] = OpTypeVector %[[#float_32]] 2
+; CHECK-DAG: %[[#vec2_float_64:]] = OpTypeVector %[[#float_64]] 2
+; CHECK-DAG: %[[#vec3_float_16:]] = OpTypeVector %[[#float_16]] 3
+; CHECK-DAG: %[[#vec3_float_32:]] = OpTypeVector %[[#float_32]] 3
+; CHECK-DAG: %[[#vec3_float_64:]] = OpTypeVector %[[#float_64]] 3
+; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4
+; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4
+; CHECK-DAG: %[[#vec4_float_64:]] = OpTypeVector %[[#float_64]] 4
+; CHECK-DAG: %[[#const_f64_1:]] = OpConstant %[[#float_64]] 1
+; CHECK-DAG: %[[#const_f32_1:]] = OpConstant %[[#float_32:]] 1
+; CHECK-DAG: %[[#const_f16_1:]] = OpConstant %[[#float_16:]] 1
+
+; CHECK-DAG: %[[#vec2_const_ones_f16:]] = OpConstantComposite %[[#vec2_float_16:]] %[[#const_f16_1:]] %[[#const_f16_1:]]
+; CHECK-DAG: %[[#vec3_const_ones_f16:]] = OpConstantComposite %[[#vec3_float_16:]] %[[#const_f16_1:]] %[[#const_f16_1:]] %[[#const_f16_1:]]
+; CHECK-DAG: %[[#vec4_const_ones_f16:]] = OpConstantComposite %[[#vec4_float_16:]] %[[#const_f16_1:]] %[[#const_f16_1:]] %[[#const_f16_1:]] %[[#const_f16_1:]]
+
+; CHECK-DAG: %[[#vec2_const_ones_f32:]] = OpConstantComposite %[[#vec2_float_32:]] %[[#const_f32_1:]] %[[#const_f32_1:]]
+; CHECK-DAG: %[[#vec3_const_ones_f32:]] = OpConstantComposite %[[#vec3_float_32:]] %[[#const_f32_1:]] %[[#const_f32_1:]] %[[#const_f32_1:]]
+; CHECK-DAG: %[[#vec4_const_ones_f32:]] = OpConstantComposite %[[#vec4_float_32:]] %[[#const_f32_1:]] %[[#const_f32_1:]] %[[#const_f32_1:]] %[[#const_f32_1:]]
+
+; CHECK-DAG: %[[#vec2_const_ones_f64:]] = OpConstantComposite %[[#vec2_float_64:]] %[[#const_f64_1:]] %[[#const_f64_1:]]
+; CHECK-DAG: %[[#vec3_const_ones_f64:]] = OpConstantComposite %[[#vec3_float_64:]] %[[#const_f64_1:]] %[[#const_f64_1:]] %[[#const_f64_1:]]
+; CHECK-DAG: %[[#vec4_const_ones_f64:]] = OpConstantComposite %[[#vec4_float_64:]] %[[#const_f64_1:]] %[[#const_f64_1:]] %[[#const_f64_1:]] %[[#const_f64_1:]]
+
+
+define spir_func noundef half @test_rcp_half(half noundef %p0) #0 {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#float_16:]]
+  ; CHECK: OpFDiv %[[#float_16:]] %[[#const_f16_1:]] %[[#arg0:]]
+  %hlsl.rcp = fdiv half 0xH3C00, %p0
+  ret half %hlsl.rcp
+}
+
+define spir_func noundef <2 x half> @test_rcp_half2(<2 x half> noundef %p0) #0 {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec2_float_16:]]
+  ; CHECK: OpFDiv %[[#vec2_float_16:]] %[[#vec2_const_ones_f16:]] %[[#arg0:]]
+  %hlsl.rcp = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, %p0
+  ret <2 x half> %hlsl.rcp
+}
+
+define spir_func noundef <3 x half> @test_rcp_half3(<3 x half> noundef %p0) #0 {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec3_float_16:]]
+  ; CHECK: OpFDiv %[[#vec3_float_16:]] %[[#vec3_const_ones_f16:]] %[[#arg0:]]
+  %hlsl.rcp = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, %p0
+  ret <3 x half> %hlsl.rcp
+}
+
+define spir_func noundef <4 x half> @test_rcp_half4(<4 x half> noundef %p0) #0 {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_16:]]
+  ; CHECK: OpFDiv %[[#vec4_float_16:]] %[[#vec4_const_ones_f16:]] %[[#arg0:]]
+  %hlsl.rcp = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, %p0
+  ret <4 x half> %hlsl.rcp
+}
+
+define spir_func noundef float @test_rcp_float(float noundef %p0) #0 {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#float_32:]]
+  ; CHECK: OpFDiv %[[#float_32:]] %[[#const_f32_1:]] %[[#arg0:]]
+  %hlsl.rcp = fdiv float 1.000000e+00, %p0
+  ret float %hlsl.rcp
+}
+
+define spir_func noundef <2 x float> @test_rcp_float2(<2 x float> noundef %p0) #0 {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec2_float_32:]]
+  ; CHECK: OpFDiv %[[#vec2_float_32:]] %[[#vec2_const_ones_f32:]] %[[#arg0:]]
+  %hlsl.rcp = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %p0
+  ret <2 x float> %hlsl.rcp
+}
+
+define spir_func noundef <3 x float> @test_rcp_float3(<3 x float> noundef %p0) #0 {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec3_float_32:]]
+  ; CHECK: OpFDiv %[[#vec3_float_32:]] %[[#vec3_const_ones_f32:]] %[[#arg0:]]
+  %hlsl.rcp = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %p0
+  ret <3 x float> %hlsl.rcp
+}
+
+define spir_func noundef <4 x float> @test_rcp_float4(<4 x float> noundef %p0) #0 {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_32:]]
+  ; CHECK: OpFDiv %[[#vec4_float_32:]] %[[#vec4_const_ones_f32:]] %[[#arg0:]]
+  %hlsl.rcp = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %p0
+  ret <4 x float> %hlsl.rcp
+}
+
+define spir_func noundef double @test_rcp_double(double noundef %p0) #0 {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#float_64:]]
+  ; CHECK: OpFDiv %[[#float_64:]] %[[#const_f64_1:]] %[[#arg0:]]
+  %hlsl.rcp = fdiv double 1.000000e+00, %p0
+  ret double %hlsl.rcp
+}
+
+define spir_func noundef <2 x double> @test_rcp_double2(<2 x double> noundef %p0) #0 {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec2_float_64:]]
+  ; CHECK: OpFDiv %[[#vec2_float_64:]] %[[#vec2_const_ones_f64:]] %[[#arg0:]]
+  %hlsl.rcp = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, %p0
+  ret <2 x double> %hlsl.rcp
+}
+
+define spir_func noundef <3 x double> @test_rcp_double3(<3 x double> noundef %p0) #0 {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec3_float_64:]]
+  ; CHECK: OpFDiv %[[#vec3_float_64:]] %[[#vec3_const_ones_f64:]] %[[#arg0:]]
+  %hlsl.rcp = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, %p0
+  ret <3 x double> %hlsl.rcp
+}
+
+define spir_func noundef <4 x double> @test_rcp_double4(<4 x double> noundef %p0) #0 {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_64:]]
+  ; CHECK: OpFDiv %[[#vec4_float_64:]] %[[#vec4_const_ones_f64:]] %[[#arg0:]]
+  %hlsl.rcp = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, %p0
+  ret <4 x double> %hlsl.rcp
+}