Add support for frexp. Move vector look up to just callInst and extractValue instruction visits

farzonl · farzonl · commit da2e3c16aaea · 2024-10-18T19:07:21.000-04:00
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -91,6 +91,5 @@ def int_dx_sign : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32
 def int_dx_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>], [IntrNoMem]>;
 def int_dx_splitdouble : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0>], 
     [LLVMScalarOrSameVectorWidth<0, llvm_double_ty>], [IntrNoMem]>;
-
 def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
 }
diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -197,9 +197,15 @@ struct VectorLayout {
   uint64_t SplitSize = 0;
 };
 
-static bool isStructOfVectors(Type *Ty) {
-  return isa<StructType>(Ty) && Ty->getNumContainedTypes() > 0 &&
-         isa<FixedVectorType>(Ty->getContainedType(0));
+static bool isStructAllVectors(Type *Ty) {
+  if (!isa<StructType>(Ty))
+    return false;
+
+  for(unsigned I = 0; I < Ty->getNumContainedTypes(); I++)
+    if (!isa<FixedVectorType>(Ty->getContainedType(I)))
+      return false;
+
+  return true;
 }
 
 /// Concatenate the given fragments to a single vector value of the type
@@ -558,10 +564,7 @@ void ScalarizerVisitor::transferMetadataAndIRFlags(Instruction *Op,
 // Determine how Ty is split, if at all.
 std::optional<VectorSplit> ScalarizerVisitor::getVectorSplit(Type *Ty) {
   VectorSplit Split;
-  if (isStructOfVectors(Ty))
-    Split.VecTy = cast<FixedVectorType>(Ty->getContainedType(0));
-  else
-    Split.VecTy = dyn_cast<FixedVectorType>(Ty);
+  Split.VecTy = dyn_cast<FixedVectorType>(Ty);
   if (!Split.VecTy)
     return {};
 
@@ -676,14 +679,24 @@ bool ScalarizerVisitor::splitBinary(Instruction &I, const Splitter &Split) {
 bool ScalarizerVisitor::isTriviallyScalarizable(Intrinsic::ID ID) {
   if (isTriviallyVectorizable(ID))
     return true;
+  switch (ID) {
+    case Intrinsic::frexp:
+    return true;
+  }
   return Intrinsic::isTargetIntrinsic(ID) &&
          TTI->isTargetIntrinsicTriviallyScalarizable(ID);
 }
 
 /// If a call to a vector typed intrinsic function, split into a scalar call per
 /// element if possible for the intrinsic.
 bool ScalarizerVisitor::splitCall(CallInst &CI) {
-  std::optional<VectorSplit> VS = getVectorSplit(CI.getType());
+  Type* CallType = CI.getType();
+  bool areAllVectors = isStructAllVectors(CallType);
+   std::optional<VectorSplit> VS;
+  if (areAllVectors)
+    VS = getVectorSplit(CallType->getContainedType(0));
+  else
+    VS = getVectorSplit(CallType);
   if (!VS)
     return false;
 
@@ -708,6 +721,18 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) {
   if (isVectorIntrinsicWithOverloadTypeAtArg(ID, -1))
     Tys.push_back(VS->SplitTy);
 
+  if(areAllVectors) {
+    Type* PrevType = CallType->getContainedType(0);
+    Type* CallType = CI.getType();
+    for(unsigned I = 1; I < CallType->getNumContainedTypes(); I++) {
+      Type* CurrType = cast<FixedVectorType>(CallType->getContainedType(I));
+      if(PrevType != CurrType) {
+        std::optional<VectorSplit> CurrVS = getVectorSplit(CurrType);
+        Tys.push_back(CurrVS->SplitTy);
+        PrevType = CurrType;
+      }
+    }
+  }
   // Assumes that any vector type has the same number of elements as the return
   // vector type, which is true for all current intrinsics.
   for (unsigned I = 0; I != NumArgs; ++I) {
@@ -1043,15 +1068,13 @@ bool ScalarizerVisitor::visitExtractValueInst(ExtractValueInst &EVI) {
   Value *Op = EVI.getOperand(0);
   Type *OpTy = Op->getType();
   ValueVector Res;
-  if (!isStructOfVectors(OpTy))
+  if (!isStructAllVectors(OpTy))
     return false;
-  // Note: isStructOfVectors is also used in getVectorSplit.
-  // The intent is to bail on this visit if it isn't a struct
-  // of vectors. Downside is that when it is true we do two
-  // isStructOfVectors calls.
-  std::optional<VectorSplit> VS = getVectorSplit(OpTy);
+  Type* VecType = cast<FixedVectorType>(OpTy->getContainedType(0));
+  std::optional<VectorSplit> VS = getVectorSplit(VecType);
   if (!VS)
     return false;
+  IRBuilder<> Builder(&EVI);
   Scatterer Op0 = scatter(&EVI, Op, *VS);
   assert(!EVI.getIndices().empty() && "Make sure an index exists");
   // Note for our use case we only care about the top level index.
@@ -1252,7 +1275,7 @@ bool ScalarizerVisitor::finish() {
           Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
 
         // Iterate over each element in the struct
-        uint NumOfStructElements = Ty->getNumElements();
+        unsigned NumOfStructElements = Ty->getNumElements();
         SmallVector<ValueVector, 4> ElemCV(NumOfStructElements);
         for (unsigned I = 0; I < NumOfStructElements; ++I) {
           for (auto *CVelem : CV) {
diff --git a/llvm/test/CodeGen/DirectX/split-double.ll b/llvm/test/CodeGen/DirectX/split-double.ll
@@ -1,10 +1,32 @@
+; RUN: opt -S -scalarizer  -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
 
-; RUN: opt -S -scalarizer -scalarize-load-store -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+define void @test_vector_double_split_void(<3 x double> noundef %d) {
+  %hlsl.asuint = call { <3 x i32>, <3 x i32> }  @llvm.dx.splitdouble.v3i32(<3 x double> %d)
+  ret void
+}
 
-define noundef <3 x i32> @test_vector_double_split(<3 x double> noundef %d) local_unnamed_addr {
-    %hlsl.asuint = call { <3 x i32>, <3 x i32> }  @llvm.dx.splitdouble.v3i32(<3 x double> %d)
-    %1 = extractvalue { <3 x i32>, <3 x i32> } %hlsl.asuint, 0
-    %2 = extractvalue { <3 x i32>, <3 x i32> } %hlsl.asuint, 1
-    %3 = add <3 x i32> %1, %2
-    ret <3 x i32> %3
+define noundef <3 x i32> @test_vector_double_split(<3 x double> noundef %d) {
+  ; CHECK: [[ee0:%.*]] = extractelement <3 x double> %d, i64 0
+  ; CHECK: [[ie0:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[ee0]])
+  ; CHECK: [[ee1:%.*]] = extractelement <3 x double> %d, i64 1
+  ; CHECK: [[ie1:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[ee1]])
+  ; CHECK: [[ee2:%.*]] = extractelement <3 x double> %d, i64 2
+  ; CHECK: [[ie2:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[ee2]])
+  ; CHECK: [[ev00:%.*]] = extractvalue { i32, i32 } [[ie0]], 0
+  ; CHECK: [[ev01:%.*]] = extractvalue { i32, i32 } [[ie1]], 0
+  ; CHECK: [[ev02:%.*]] = extractvalue { i32, i32 } [[ie2]], 0
+  ; CHECK: [[ev10:%.*]] = extractvalue { i32, i32 } [[ie0]], 1
+  ; CHECK: [[ev11:%.*]] = extractvalue { i32, i32 } [[ie1]], 1
+  ; CHECK: [[ev12:%.*]] = extractvalue { i32, i32 } [[ie2]], 1
+  ; CHECK: [[add1:%.*]] = add i32 [[ev00]], [[ev10]]
+  ; CHECK: [[add2:%.*]] = add i32 [[ev01]], [[ev11]]
+  ; CHECK: [[add3:%.*]] = add i32 [[ev02]], [[ev12]]
+  ; CHECK: insertelement <3 x i32> poison, i32 [[add1]], i64 0
+  ; CHECK: insertelement <3 x i32> %{{.*}}, i32 [[add2]], i64 1
+  ; CHECK: insertelement <3 x i32> %{{.*}}, i32 [[add3]], i64 2
+  %hlsl.asuint = call { <3 x i32>, <3 x i32> }  @llvm.dx.splitdouble.v3i32(<3 x double> %d)
+  %1 = extractvalue { <3 x i32>, <3 x i32> } %hlsl.asuint, 0
+  %2 = extractvalue { <3 x i32>, <3 x i32> } %hlsl.asuint, 1
+  %3 = add <3 x i32> %1, %2
+  ret <3 x i32> %3
 }
diff --git a/llvm/test/Transforms/Scalarizer/frexp.ll b/llvm/test/Transforms/Scalarizer/frexp.ll
@@ -0,0 +1,66 @@
+; RUN: opt %s -passes='function(scalarizer<load-store>)' -S | FileCheck %s
+
+; CHECK-LABEL: @test_vector_half_frexp_half
+define noundef <2 x half> @test_vector_half_frexp_half(<2 x half> noundef %h) {
+  ; CHECK: [[ee0:%.*]] = extractelement <2 x half> %h, i64 0
+  ; CHECK-NEXT: [[ie0:%.*]] = call { half, i32 } @llvm.frexp.f16.i32(half [[ee0]])
+  ; CHECK-NEXT: [[ee1:%.*]] = extractelement <2 x half> %h, i64 1
+  ; CHECK-NEXT: [[ie1:%.*]] = call { half, i32 } @llvm.frexp.f16.i32(half [[ee1]])
+  ; CHECK-NEXT: [[ev00:%.*]] = extractvalue { half, i32 } [[ie0]], 0
+  ; CHECK-NEXT: [[ev01:%.*]] = extractvalue { half, i32 } [[ie1]], 0
+  ; CHECK-NEXT: insertelement <2 x half> poison, half [[ev00]], i64 0
+  ; CHECK-NEXT: insertelement <2 x half> %{{.*}}, half [[ev01]], i64 1
+  %r =  call { <2 x half>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x half> %h)
+  %e0 = extractvalue { <2 x half>, <2 x i32> } %r, 0
+  ret <2 x half> %e0
+}
+
+; CHECK-LABEL: @test_vector_half_frexp_int
+define noundef <2 x i32> @test_vector_half_frexp_int(<2 x half> noundef %h) {
+  ; CHECK: [[ee0:%.*]] = extractelement <2 x half> %h, i64 0
+  ; CHECK-NEXT: [[ie0:%.*]] = call { half, i32 } @llvm.frexp.f16.i32(half [[ee0]])
+  ; CHECK-NEXT: [[ee1:%.*]] = extractelement <2 x half> %h, i64 1
+  ; CHECK-NEXT: [[ie1:%.*]] = call { half, i32 } @llvm.frexp.f16.i32(half [[ee1]])
+  ; CHECK-NEXT: [[ev10:%.*]] = extractvalue { half, i32 } [[ie0]], 1
+  ; CHECK-NEXT: [[ev11:%.*]] = extractvalue { half, i32 } [[ie1]], 1
+  ; CHECK-NEXT: insertelement <2 x i32> poison, i32 [[ev10]], i64 0
+  ; CHECK-NEXT: insertelement <2 x i32> %{{.*}}, i32 [[ev11]], i64 1
+  %r =  call { <2 x half>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x half> %h)
+  %e1 = extractvalue { <2 x half>, <2 x i32> } %r, 1
+  ret <2 x i32> %e1
+}
+
+
+define noundef <2 x float> @test_vector_float_frexp_int(<2 x float> noundef %f) {
+  ; CHECK: [[ee0:%.*]] = extractelement <2 x float> %f, i64 0
+  ; CHECK-NEXT: [[ie0:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[ee0]])
+  ; CHECK-NEXT: [[ee1:%.*]] = extractelement <2 x float> %f, i64 1
+  ; CHECK-NEXT: [[ie1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[ee1]])
+  ; CHECK-NEXT: [[ev00:%.*]] = extractvalue { float, i32 } [[ie0]], 0
+  ; CHECK-NEXT: [[ev01:%.*]] = extractvalue { float, i32 } [[ie1]], 0
+  ; CHECK-NEXT: insertelement <2 x float> poison, float [[ev00]], i64 0
+  ; CHECK-NEXT: insertelement <2 x float> %{{.*}}, float [[ev01]], i64 1
+  ; CHECK-NEXT: extractvalue { float, i32 } [[ie0]], 1
+  ; CHECK-NEXT: extractvalue { float, i32 } [[ie1]], 1
+  %1 =  call { <2 x float>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x float> %f)
+  %2 = extractvalue { <2 x float>, <2 x i32> } %1, 0
+  %3 = extractvalue { <2 x float>, <2 x i32> } %1, 1
+  ret <2 x float> %2
+}
+
+define noundef <2 x double> @test_vector_double_frexp_int(<2 x double> noundef %d) {
+  ; CHECK: [[ee0:%.*]] = extractelement <2 x double> %d, i64 0
+  ; CHECK-NEXT: [[ie0:%.*]] = call { double, i32 } @llvm.frexp.f64.i32(double [[ee0]])
+  ; CHECK-NEXT: [[ee1:%.*]] = extractelement <2 x double> %d, i64 1
+  ; CHECK-NEXT: [[ie1:%.*]] = call { double, i32 } @llvm.frexp.f64.i32(double [[ee1]])
+  ; CHECK-NEXT: [[ev00:%.*]] = extractvalue { double, i32 } [[ie0]], 0
+  ; CHECK-NEXT: [[ev01:%.*]] = extractvalue { double, i32 } [[ie1]], 0
+  ; CHECK-NEXT: insertelement <2 x double> poison, double [[ev00]], i64 0
+  ; CHECK-NEXT: insertelement <2 x double> %{{.*}}, double [[ev01]], i64 1
+  ; CHECK-NEXT: extractvalue { double, i32 } [[ie0]], 1
+  ; CHECK-NEXT: extractvalue { double, i32 } [[ie1]], 1
+  %1 =  call { <2 x double>, <2 x i32> } @llvm.frexp.v2f64.v2i32(<2 x double> %d)
+  %2 = extractvalue { <2 x double>, <2 x i32> } %1, 0
+  %3 = extractvalue { <2 x double>, <2 x i32> } %1, 1
+  ret <2 x double> %2
+}

Original file line number	Diff line number	Diff line change
`@@ -91,6 +91,5 @@ def int_dx_sign : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32`
`91`	`91`	`def int_dx_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>], [IntrNoMem]>;`
`92`	`92`	`def int_dx_splitdouble : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0>],`
`93`	`93`	`[LLVMScalarOrSameVectorWidth<0, llvm_double_ty>], [IntrNoMem]>;`
`94`		`-`
`95`	`94`	`def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;`
`96`	`95`	`}`