intel
diff --git a/‎IGC/Compiler/CustomSafeOptPass.cpp
Lines changed: 19 additions & 34 deletions b/‎IGC/Compiler/CustomSafeOptPass.cpp
Lines changed: 19 additions & 34 deletions
diff --git a/‎IGC/Compiler/tests/SplitIndirectEEtoSel/basic.ll
Lines changed: 87 additions & 64 deletions b/‎IGC/Compiler/tests/SplitIndirectEEtoSel/basic.ll
Lines changed: 87 additions & 64 deletions
@@ -5828,6 +5828,15 @@ void SplitIndirectEEtoSel::visitExtractElementInst(llvm::ExtractElementInst& I)
         return;
     }
 
+    // ignore if index instruction is OverflowingBinaryOperator and doesn't have nsw or nuw
+    if (OverflowingBinaryOperator* indexOp = dyn_cast<OverflowingBinaryOperator>(index))
+    {
+        if (!indexOp->hasNoSignedWrap() && !indexOp->hasNoUnsignedWrap())
+        {
+            return;
+        }
+    }
+
     // used to calculate offsets
     int64_t add = 0;
     int64_t mul = 1;
@@ -5838,56 +5847,32 @@ void SplitIndirectEEtoSel::visitExtractElementInst(llvm::ExtractElementInst& I)
        %271 = extractelement <12 x float> %234, i32 %270
     */
     Value* Val1 = nullptr;
-    Value* Val2 = nullptr;
     ConstantInt* ci_add = nullptr;
     ConstantInt* ci_mul = nullptr;
 
-    auto pat_add = m_Add(m_Value(Val2), m_ConstantInt(ci_add));
-    auto pat_mul = m_Mul(m_Value(Val1), m_ConstantInt(ci_mul));
+    auto pat1 = m_Add(m_Mul(m_Value(Val1), m_ConstantInt(ci_mul)), m_ConstantInt(ci_add));
+    auto pat2 = m_Mul(m_Value(Val1), m_ConstantInt(ci_mul));
     // Some code shows `shl+or` instead of mul+add.
-    auto pat_or = m_Or(m_Value(Val2), m_ConstantInt(ci_add));
-    auto pat_shl = m_Shl(m_Value(Val1), m_ConstantInt(ci_mul));
+    auto pat21 = m_Or(m_Shl(m_Value(Val1), m_ConstantInt(ci_mul)), m_ConstantInt(ci_add));
+    auto pat22 = m_Shl(m_Value(Val1), m_ConstantInt(ci_mul));
 
-    if (match(index, pat_mul) || (match(index, pat_add) && match(Val2, pat_mul)))
+    if (match(index, pat1) || match(index, pat2))
     {
+        add = ci_add ? ci_add->getSExtValue() : 0;
         mul = ci_mul ? ci_mul->getSExtValue() : 1;
+        index = Val1;
     }
-    else if (match(index, pat_shl) || (match(index, pat_or) && match(Val2, pat_shl)))
+    else if (match(index, pat21) || match(index, pat22))
     {
+        add = ci_add ? ci_add->getSExtValue() : 0;
         mul = ci_mul ? (1LL << ci_mul->getSExtValue()) : 1LL;
-    }
-    // Instruction::hasPoisonGeneratingFlags() could be used instead
-    // after llvm9 support is dropped
-    auto hasNoOverflow = [](Value* value) {
-        if (OverflowingBinaryOperator *OBO = dyn_cast<OverflowingBinaryOperator>(value))
-           return OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap();
-        return true;
-    };
-
-    // If pattern matched check that corresponding index calculation has nsw or nuw
-    if (Val1)
-    {
-       // Transformation could still be profitable,
-       // but index and it's multiplier shouldn't be modified
-       if (!hasNoOverflow(index) || (Val2 && !hasNoOverflow(Val2)))
-       {
-          mul = 1;
-       }
-       else
-       {
-          add = ci_add ? ci_add->getSExtValue() : 0;
-          index = Val1;
-       }
+        index = Val1;
     }
 
     if (!isProfitableToSplit(num, mul, add))
         return;
 
-#if LLVM_VERSION_MAJOR < 14
     Value* vTemp = llvm::UndefValue::get(eleType);
-#else
-    Value* vTemp = llvm::PoisonValue::get(eleType);
-#endif
     IRBuilder<> builder(I.getNextNode());
 
     // returns true if we can skip this icmp, such as:
 
@@ -1,87 +1,110 @@
 ;=========================== begin_copyright_notice ============================
 ;
-; Copyright (C) 2024 Intel Corporation
+; Copyright (C) 2022 Intel Corporation
 ;
 ; SPDX-License-Identifier: MIT
 ;
 ;============================ end_copyright_notice =============================
 ;
-; RUN: igc_opt -SplitIndirectEEtoSel -S < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,%LLVM_DEPENDENT_CHECK_PREFIX%
+; RUN: igc_opt -enable-debugify -SplitIndirectEEtoSel -S < %s 2>&1 | FileCheck %s
 ; ------------------------------------------------
 ; SplitIndirectEEtoSel
 ; ------------------------------------------------
+; Debug-info related check
 ;
-; This test checks argument and instruction as index
-; for extractelement for SplitIndirectEEtoSel pass
-; no special patterns matched
-; ------------------------------------------------
-
-; ------------------------------------------------
-; Case1: index is argument, profitable
-; ------------------------------------------------
-
-define void @test_arg(i32 %src1, <4 x float> %src2, float* %dst) {
-; CHECK-LABEL: @test_arg(
-; CHECK:    [[TMP1:%.*]] = extractelement <4 x float> [[SRC2:%.*]], i32 [[SRC1:%.*]]
-; CHECK:    [[TMP2:%.*]] = icmp eq i32 [[SRC1]], 0
-; CHECK:    [[TMP3:%.*]] = extractelement <4 x float> [[SRC2]], i32 0
-; CHECK-LLVM-14-PLUS:    [[TMP4:%.*]] = select i1 [[TMP2]], float [[TMP3]], float poison
-; CHECK-PRE-LLVM-14:    [[TMP4:%.*]] = select i1 [[TMP2]], float [[TMP3]], float undef
-; CHECK:    [[TMP5:%.*]] = icmp eq i32 [[SRC1]], 1
-; CHECK:    [[TMP6:%.*]] = extractelement <4 x float> [[SRC2]], i32 1
-; CHECK:    [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP6]], float [[TMP4]]
-; CHECK:    [[TMP8:%.*]] = icmp eq i32 [[SRC1]], 2
-; CHECK:    [[TMP9:%.*]] = extractelement <4 x float> [[SRC2]], i32 2
-; CHECK:    [[TMP10:%.*]] = select i1 [[TMP8]], float [[TMP9]], float [[TMP7]]
-; CHECK:    [[TMP11:%.*]] = icmp eq i32 [[SRC1]], 3
-; CHECK:    [[TMP12:%.*]] = extractelement <4 x float> [[SRC2]], i32 3
-; CHECK:    [[TMP13:%.*]] = select i1 [[TMP11]], float [[TMP12]], float [[TMP10]]
-; CHECK:    store float [[TMP13]], float* [[DST:%.*]], align 4
+; CHECK-NOT: WARNING
+; CHECK: CheckModuleDebugify: PASS
+define void @test_nuw(i32 %src1, <12 x float> %src2, float* %dst) {
+; CHECK-LABEL: @test_nuw(
+; CHECK:    [[TMP1:%.*]] = icmp eq i32 [[SRC1:%.*]], 0
+; CHECK:    [[TMP2:%.*]] = extractelement <12 x float> [[SRC2:%.*]], i32 0
+; CHECK:    [[TMP3:%.*]] = select i1 [[TMP1]], float [[TMP2]], float undef
+; CHECK:    [[TMP4:%.*]] = icmp eq i32 [[SRC1]], 1
+; CHECK:    [[TMP5:%.*]] = extractelement <12 x float> [[SRC2]], i32 3
+; CHECK:    [[TMP6:%.*]] = select i1 [[TMP4]], float [[TMP5]], float [[TMP3]]
+; CHECK:    [[TMP7:%.*]] = icmp eq i32 [[SRC1]], 2
+; CHECK:    [[TMP8:%.*]] = extractelement <12 x float> [[SRC2]], i32 6
+; CHECK:    [[TMP9:%.*]] = select i1 [[TMP7]], float [[TMP8]], float [[TMP6]]
+; CHECK:    [[TMP10:%.*]] = icmp eq i32 [[SRC1]], 3
+; CHECK:    [[TMP11:%.*]] = extractelement <12 x float> [[SRC2]], i32 9
+; CHECK:    [[TMP12:%.*]] = select i1 [[TMP10]], float [[TMP11]], float [[TMP9]]
+; CHECK:    store float [[TMP12]], float* [[DST:%.*]], align 4
+; CHECK:    [[TMP13:%.*]] = add i32 [[TMP18:%.*]], 1
+; CHECK:    [[TMP14:%.*]] = extractelement <12 x float> [[SRC2]], i32 [[TMP13]]
+; CHECK:    store float [[TMP14]], float* [[DST]], align 4
+; CHECK:    [[TMP15:%.*]] = add i32 [[TMP18]], 2
+; CHECK:    [[TMP17:%.*]] = extractelement <12 x float> [[SRC2]], i32 [[TMP15]]
+; CHECK:    store float [[TMP17]], float* [[DST]], align 4
+; CHECK:    ret void
 ;
-  %1 = extractelement <4 x float> %src2, i32 %src1
-  store float %1, float* %dst, align 4
+  %1 = mul nuw i32 %src1, 3
+  %2 = extractelement <12 x float> %src2, i32 %1
+  store float %2, float* %dst, align 4
+  %3 = add i32 %1, 1
+  %4 = extractelement <12 x float> %src2, i32 %3
+  store float %4, float* %dst, align 4
+  %5 = add i32 %1, 2
+  %6 = extractelement <12 x float> %src2, i32 %5
+  store float %6, float* %dst, align 4
   ret void
 }
 
-; ------------------------------------------------
-; Case2: index is instruction, profitable
-; ------------------------------------------------
-
-define void @test_instr(i32 %src1, <4 x float> %src2, float* %dst) {
-; CHECK-LABEL: @test_instr(
-; CHECK:    [[TMP1:%.*]] = add i32 [[SRC1:%.*]], 13
-; CHECK:    [[TMP2:%.*]] = extractelement <4 x float> [[SRC2:%.*]], i32 [[TMP1]]
-; CHECK:    [[TMP3:%.*]] = icmp eq i32 [[TMP1]], 0
-; CHECK:    [[TMP4:%.*]] = extractelement <4 x float> [[SRC2]], i32 0
-; CHECK-LLVM-14-PLUS:    [[TMP5:%.*]] = select i1 [[TMP3]], float [[TMP4]], float poison
-; CHECK-PRE-LLVM-14:    [[TMP5:%.*]] = select i1 [[TMP3]], float [[TMP4]], float undef
-; CHECK:    [[TMP6:%.*]] = icmp eq i32 [[TMP1]], 1
-; CHECK:    [[TMP7:%.*]] = extractelement <4 x float> [[SRC2]], i32 1
-; CHECK:    [[TMP8:%.*]] = select i1 [[TMP6]], float [[TMP7]], float [[TMP5]]
-; CHECK:    [[TMP9:%.*]] = icmp eq i32 [[TMP1]], 2
-; CHECK:    [[TMP10:%.*]] = extractelement <4 x float> [[SRC2]], i32 2
-; CHECK:    [[TMP11:%.*]] = select i1 [[TMP9]], float [[TMP10]], float [[TMP8]]
-; CHECK:    [[TMP12:%.*]] = icmp eq i32 [[TMP1]], 3
-; CHECK:    [[TMP13:%.*]] = extractelement <4 x float> [[SRC2]], i32 3
-; CHECK:    [[TMP14:%.*]] = select i1 [[TMP12]], float [[TMP13]], float [[TMP11]]
-; CHECK:    store float [[TMP14]], float* [[DST:%.*]], align 4
+define void @test_nsw(i32 %src1, <12 x float> %src2, float* %dst) {
+; CHECK-LABEL: @test_nsw(
+; CHECK:    [[TMP1:%.*]] = icmp eq i32 [[SRC1:%.*]], 0
+; CHECK:    [[TMP2:%.*]] = extractelement <12 x float> [[SRC2:%.*]], i32 0
+; CHECK:    [[TMP3:%.*]] = select i1 [[TMP1]], float [[TMP2]], float undef
+; CHECK:    [[TMP4:%.*]] = icmp eq i32 [[SRC1]], 1
+; CHECK:    [[TMP5:%.*]] = extractelement <12 x float> [[SRC2]], i32 3
+; CHECK:    [[TMP6:%.*]] = select i1 [[TMP4]], float [[TMP5]], float [[TMP3]]
+; CHECK:    [[TMP7:%.*]] = icmp eq i32 [[SRC1]], 2
+; CHECK:    [[TMP8:%.*]] = extractelement <12 x float> [[SRC2]], i32 6
+; CHECK:    [[TMP9:%.*]] = select i1 [[TMP7]], float [[TMP8]], float [[TMP6]]
+; CHECK:    [[TMP10:%.*]] = icmp eq i32 [[SRC1]], 3
+; CHECK:    [[TMP11:%.*]] = extractelement <12 x float> [[SRC2]], i32 9
+; CHECK:    [[TMP12:%.*]] = select i1 [[TMP10]], float [[TMP11]], float [[TMP9]]
+; CHECK:    store float [[TMP12]], float* [[DST:%.*]], align 4
+; CHECK:    [[TMP13:%.*]] = add i32 [[TMP18:%.*]], 1
+; CHECK:    [[TMP14:%.*]] = extractelement <12 x float> [[SRC2]], i32 [[TMP13]]
+; CHECK:    store float [[TMP14]], float* [[DST]], align 4
+; CHECK:    [[TMP15:%.*]] = add i32 [[TMP18]], 2
+; CHECK:    [[TMP17:%.*]] = extractelement <12 x float> [[SRC2]], i32 [[TMP15]]
+; CHECK:    store float [[TMP17]], float* [[DST]], align 4
+; CHECK:    ret void
 ;
-  %1 = add i32 %src1, 13
-  %2 = extractelement <4 x float> %src2, i32 %1
+  %1 = mul nsw i32 %src1, 3
+  %2 = extractelement <12 x float> %src2, i32 %1
   store float %2, float* %dst, align 4
+  %3 = add i32 %1, 1
+  %4 = extractelement <12 x float> %src2, i32 %3
+  store float %4, float* %dst, align 4
+  %5 = add i32 %1, 2
+  %6 = extractelement <12 x float> %src2, i32 %5
+  store float %6, float* %dst, align 4
   ret void
 }
 
-; ------------------------------------------------
-; Case3: index is argument but transformation not profitable
-; ------------------------------------------------
-
-define void @test_arg_not_profit(i32 %src1, <12 x float> %src2, float* %dst) {
-; CHECK-LABEL: @test_arg_not_profit(
-; CHECK:    [[TMP1:%.*]] = extractelement <12 x float> [[SRC2:%.*]], i32 [[SRC1:%.*]]
-; CHECK:    store float [[TMP1]], float* [[DST:%.*]], align 4
+define void @test(i32 %src1, <12 x float> %src2, float* %dst) {
+; CHECK-LABEL: @test(
+; CHECK:    [[TMP1:%.*]] = mul i32 [[SRC1:%.*]], 3
+; CHECK:    [[TMP2:%.*]] = extractelement <12 x float> [[SRC2:%.*]], i32 [[TMP1]]
+; CHECK:    store float [[TMP2]], float* [[DST:%.*]], align 4
+; CHECK:    [[TMP3:%.*]] = add i32 [[TMP1]], 1
+; CHECK:    [[TMP4:%.*]] = extractelement <12 x float> [[SRC2]], i32 [[TMP3]]
+; CHECK:    store float [[TMP4]], float* [[DST]], align 4
+; CHECK:    [[TMP5:%.*]] = add i32 [[TMP1]], 2
+; CHECK:    [[TMP6:%.*]] = extractelement <12 x float> [[SRC2]], i32 [[TMP5]]
+; CHECK:    store float [[TMP6]], float* [[DST]], align 4
+; CHECK:    ret void
 ;
-  %1 = extractelement <12 x float> %src2, i32 %src1
-  store float %1, float* %dst, align 4
+  %1 = mul i32 %src1, 3
+  %2 = extractelement <12 x float> %src2, i32 %1
+  store float %2, float* %dst, align 4
+  %3 = add i32 %1, 1
+  %4 = extractelement <12 x float> %src2, i32 %3
+  store float %4, float* %dst, align 4
+  %5 = add i32 %1, 2
+  %6 = extractelement <12 x float> %src2, i32 %5
+  store float %6, float* %dst, align 4
   ret void
 }