intel
diff --git a/‎IGC/Compiler/CustomSafeOptPass.cpp
Lines changed: 34 additions & 19 deletions b/‎IGC/Compiler/CustomSafeOptPass.cpp
Lines changed: 34 additions & 19 deletions
diff --git a/‎IGC/Compiler/tests/SplitIndirectEEtoSel/basic.ll
Lines changed: 64 additions & 87 deletions b/‎IGC/Compiler/tests/SplitIndirectEEtoSel/basic.ll
Lines changed: 64 additions & 87 deletions
@@ -5817,15 +5817,6 @@ void SplitIndirectEEtoSel::visitExtractElementInst(llvm::ExtractElementInst& I)
         return;
     }
 
-    // ignore if index instruction is OverflowingBinaryOperator and doesn't have nsw or nuw
-    if (OverflowingBinaryOperator* indexOp = dyn_cast<OverflowingBinaryOperator>(index))
-    {
-        if (!indexOp->hasNoSignedWrap() && !indexOp->hasNoUnsignedWrap())
-        {
-            return;
-        }
-    }
-
     // used to calculate offsets
     int64_t add = 0;
     int64_t mul = 1;
@@ -5836,32 +5827,56 @@ void SplitIndirectEEtoSel::visitExtractElementInst(llvm::ExtractElementInst& I)
        %271 = extractelement <12 x float> %234, i32 %270
     */
     Value* Val1 = nullptr;
+    Value* Val2 = nullptr;
     ConstantInt* ci_add = nullptr;
     ConstantInt* ci_mul = nullptr;
 
-    auto pat1 = m_Add(m_Mul(m_Value(Val1), m_ConstantInt(ci_mul)), m_ConstantInt(ci_add));
-    auto pat2 = m_Mul(m_Value(Val1), m_ConstantInt(ci_mul));
+    auto pat_add = m_Add(m_Value(Val2), m_ConstantInt(ci_add));
+    auto pat_mul = m_Mul(m_Value(Val1), m_ConstantInt(ci_mul));
     // Some code shows `shl+or` instead of mul+add.
-    auto pat21 = m_Or(m_Shl(m_Value(Val1), m_ConstantInt(ci_mul)), m_ConstantInt(ci_add));
-    auto pat22 = m_Shl(m_Value(Val1), m_ConstantInt(ci_mul));
+    auto pat_or = m_Or(m_Value(Val2), m_ConstantInt(ci_add));
+    auto pat_shl = m_Shl(m_Value(Val1), m_ConstantInt(ci_mul));
 
-    if (match(index, pat1) || match(index, pat2))
+    if (match(index, pat_mul) || (match(index, pat_add) && match(Val2, pat_mul)))
     {
-        add = ci_add ? ci_add->getSExtValue() : 0;
         mul = ci_mul ? ci_mul->getSExtValue() : 1;
-        index = Val1;
     }
-    else if (match(index, pat21) || match(index, pat22))
+    else if (match(index, pat_shl) || (match(index, pat_or) && match(Val2, pat_shl)))
     {
-        add = ci_add ? ci_add->getSExtValue() : 0;
         mul = ci_mul ? (1LL << ci_mul->getSExtValue()) : 1LL;
-        index = Val1;
+    }
+    // Instruction::hasPoisonGeneratingFlags() could be used instead
+    // after llvm9 support is dropped
+    auto hasNoOverflow = [](Value* value) {
+        if (OverflowingBinaryOperator *OBO = dyn_cast<OverflowingBinaryOperator>(value))
+           return OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap();
+        return true;
+    };
+
+    // If pattern matched check that corresponding index calculation has nsw or nuw
+    if (Val1)
+    {
+       // Transformation could still be profitable,
+       // but index and it's multiplier shouldn't be modified
+       if (!hasNoOverflow(index) || (Val2 && !hasNoOverflow(Val2)))
+       {
+          mul = 1;
+       }
+       else
+       {
+          add = ci_add ? ci_add->getSExtValue() : 0;
+          index = Val1;
+       }
     }
 
     if (!isProfitableToSplit(num, mul, add))
         return;
 
+#if LLVM_VERSION_MAJOR < 14
     Value* vTemp = llvm::UndefValue::get(eleType);
+#else
+    Value* vTemp = llvm::PoisonValue::get(eleType);
+#endif
     IRBuilder<> builder(I.getNextNode());
 
     // returns true if we can skip this icmp, such as:
 
@@ -1,110 +1,87 @@
 ;=========================== begin_copyright_notice ============================
 ;
-; Copyright (C) 2022 Intel Corporation
+; Copyright (C) 2024 Intel Corporation
 ;
 ; SPDX-License-Identifier: MIT
 ;
 ;============================ end_copyright_notice =============================
 ;
-; RUN: igc_opt -enable-debugify -SplitIndirectEEtoSel -S < %s 2>&1 | FileCheck %s
+; RUN: igc_opt -SplitIndirectEEtoSel -S < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,%LLVM_DEPENDENT_CHECK_PREFIX%
 ; ------------------------------------------------
 ; SplitIndirectEEtoSel
 ; ------------------------------------------------
-; Debug-info related check
 ;
-; CHECK-NOT: WARNING
-; CHECK: CheckModuleDebugify: PASS
-define void @test_nuw(i32 %src1, <12 x float> %src2, float* %dst) {
-; CHECK-LABEL: @test_nuw(
-; CHECK:    [[TMP1:%.*]] = icmp eq i32 [[SRC1:%.*]], 0
-; CHECK:    [[TMP2:%.*]] = extractelement <12 x float> [[SRC2:%.*]], i32 0
-; CHECK:    [[TMP3:%.*]] = select i1 [[TMP1]], float [[TMP2]], float undef
-; CHECK:    [[TMP4:%.*]] = icmp eq i32 [[SRC1]], 1
-; CHECK:    [[TMP5:%.*]] = extractelement <12 x float> [[SRC2]], i32 3
-; CHECK:    [[TMP6:%.*]] = select i1 [[TMP4]], float [[TMP5]], float [[TMP3]]
-; CHECK:    [[TMP7:%.*]] = icmp eq i32 [[SRC1]], 2
-; CHECK:    [[TMP8:%.*]] = extractelement <12 x float> [[SRC2]], i32 6
-; CHECK:    [[TMP9:%.*]] = select i1 [[TMP7]], float [[TMP8]], float [[TMP6]]
-; CHECK:    [[TMP10:%.*]] = icmp eq i32 [[SRC1]], 3
-; CHECK:    [[TMP11:%.*]] = extractelement <12 x float> [[SRC2]], i32 9
-; CHECK:    [[TMP12:%.*]] = select i1 [[TMP10]], float [[TMP11]], float [[TMP9]]
-; CHECK:    store float [[TMP12]], float* [[DST:%.*]], align 4
-; CHECK:    [[TMP13:%.*]] = add i32 [[TMP18:%.*]], 1
-; CHECK:    [[TMP14:%.*]] = extractelement <12 x float> [[SRC2]], i32 [[TMP13]]
-; CHECK:    store float [[TMP14]], float* [[DST]], align 4
-; CHECK:    [[TMP15:%.*]] = add i32 [[TMP18]], 2
-; CHECK:    [[TMP17:%.*]] = extractelement <12 x float> [[SRC2]], i32 [[TMP15]]
-; CHECK:    store float [[TMP17]], float* [[DST]], align 4
-; CHECK:    ret void
+; This test checks argument and instruction as index
+; for extractelement for SplitIndirectEEtoSel pass
+; no special patterns matched
+; ------------------------------------------------
+
+; ------------------------------------------------
+; Case1: index is argument, profitable
+; ------------------------------------------------
+
+define void @test_arg(i32 %src1, <4 x float> %src2, float* %dst) {
+; CHECK-LABEL: @test_arg(
+; CHECK:    [[TMP1:%.*]] = extractelement <4 x float> [[SRC2:%.*]], i32 [[SRC1:%.*]]
+; CHECK:    [[TMP2:%.*]] = icmp eq i32 [[SRC1]], 0
+; CHECK:    [[TMP3:%.*]] = extractelement <4 x float> [[SRC2]], i32 0
+; CHECK-LLVM-14-PLUS:    [[TMP4:%.*]] = select i1 [[TMP2]], float [[TMP3]], float poison
+; CHECK-PRE-LLVM-14:    [[TMP4:%.*]] = select i1 [[TMP2]], float [[TMP3]], float undef
+; CHECK:    [[TMP5:%.*]] = icmp eq i32 [[SRC1]], 1
+; CHECK:    [[TMP6:%.*]] = extractelement <4 x float> [[SRC2]], i32 1
+; CHECK:    [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP6]], float [[TMP4]]
+; CHECK:    [[TMP8:%.*]] = icmp eq i32 [[SRC1]], 2
+; CHECK:    [[TMP9:%.*]] = extractelement <4 x float> [[SRC2]], i32 2
+; CHECK:    [[TMP10:%.*]] = select i1 [[TMP8]], float [[TMP9]], float [[TMP7]]
+; CHECK:    [[TMP11:%.*]] = icmp eq i32 [[SRC1]], 3
+; CHECK:    [[TMP12:%.*]] = extractelement <4 x float> [[SRC2]], i32 3
+; CHECK:    [[TMP13:%.*]] = select i1 [[TMP11]], float [[TMP12]], float [[TMP10]]
+; CHECK:    store float [[TMP13]], float* [[DST:%.*]], align 4
 ;
-  %1 = mul nuw i32 %src1, 3
-  %2 = extractelement <12 x float> %src2, i32 %1
-  store float %2, float* %dst, align 4
-  %3 = add i32 %1, 1
-  %4 = extractelement <12 x float> %src2, i32 %3
-  store float %4, float* %dst, align 4
-  %5 = add i32 %1, 2
-  %6 = extractelement <12 x float> %src2, i32 %5
-  store float %6, float* %dst, align 4
+  %1 = extractelement <4 x float> %src2, i32 %src1
+  store float %1, float* %dst, align 4
   ret void
 }
 
-define void @test_nsw(i32 %src1, <12 x float> %src2, float* %dst) {
-; CHECK-LABEL: @test_nsw(
-; CHECK:    [[TMP1:%.*]] = icmp eq i32 [[SRC1:%.*]], 0
-; CHECK:    [[TMP2:%.*]] = extractelement <12 x float> [[SRC2:%.*]], i32 0
-; CHECK:    [[TMP3:%.*]] = select i1 [[TMP1]], float [[TMP2]], float undef
-; CHECK:    [[TMP4:%.*]] = icmp eq i32 [[SRC1]], 1
-; CHECK:    [[TMP5:%.*]] = extractelement <12 x float> [[SRC2]], i32 3
-; CHECK:    [[TMP6:%.*]] = select i1 [[TMP4]], float [[TMP5]], float [[TMP3]]
-; CHECK:    [[TMP7:%.*]] = icmp eq i32 [[SRC1]], 2
-; CHECK:    [[TMP8:%.*]] = extractelement <12 x float> [[SRC2]], i32 6
-; CHECK:    [[TMP9:%.*]] = select i1 [[TMP7]], float [[TMP8]], float [[TMP6]]
-; CHECK:    [[TMP10:%.*]] = icmp eq i32 [[SRC1]], 3
-; CHECK:    [[TMP11:%.*]] = extractelement <12 x float> [[SRC2]], i32 9
-; CHECK:    [[TMP12:%.*]] = select i1 [[TMP10]], float [[TMP11]], float [[TMP9]]
-; CHECK:    store float [[TMP12]], float* [[DST:%.*]], align 4
-; CHECK:    [[TMP13:%.*]] = add i32 [[TMP18:%.*]], 1
-; CHECK:    [[TMP14:%.*]] = extractelement <12 x float> [[SRC2]], i32 [[TMP13]]
-; CHECK:    store float [[TMP14]], float* [[DST]], align 4
-; CHECK:    [[TMP15:%.*]] = add i32 [[TMP18]], 2
-; CHECK:    [[TMP17:%.*]] = extractelement <12 x float> [[SRC2]], i32 [[TMP15]]
-; CHECK:    store float [[TMP17]], float* [[DST]], align 4
-; CHECK:    ret void
+; ------------------------------------------------
+; Case2: index is instruction, profitable
+; ------------------------------------------------
+
+define void @test_instr(i32 %src1, <4 x float> %src2, float* %dst) {
+; CHECK-LABEL: @test_instr(
+; CHECK:    [[TMP1:%.*]] = add i32 [[SRC1:%.*]], 13
+; CHECK:    [[TMP2:%.*]] = extractelement <4 x float> [[SRC2:%.*]], i32 [[TMP1]]
+; CHECK:    [[TMP3:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK:    [[TMP4:%.*]] = extractelement <4 x float> [[SRC2]], i32 0
+; CHECK-LLVM-14-PLUS:    [[TMP5:%.*]] = select i1 [[TMP3]], float [[TMP4]], float poison
+; CHECK-PRE-LLVM-14:    [[TMP5:%.*]] = select i1 [[TMP3]], float [[TMP4]], float undef
+; CHECK:    [[TMP6:%.*]] = icmp eq i32 [[TMP1]], 1
+; CHECK:    [[TMP7:%.*]] = extractelement <4 x float> [[SRC2]], i32 1
+; CHECK:    [[TMP8:%.*]] = select i1 [[TMP6]], float [[TMP7]], float [[TMP5]]
+; CHECK:    [[TMP9:%.*]] = icmp eq i32 [[TMP1]], 2
+; CHECK:    [[TMP10:%.*]] = extractelement <4 x float> [[SRC2]], i32 2
+; CHECK:    [[TMP11:%.*]] = select i1 [[TMP9]], float [[TMP10]], float [[TMP8]]
+; CHECK:    [[TMP12:%.*]] = icmp eq i32 [[TMP1]], 3
+; CHECK:    [[TMP13:%.*]] = extractelement <4 x float> [[SRC2]], i32 3
+; CHECK:    [[TMP14:%.*]] = select i1 [[TMP12]], float [[TMP13]], float [[TMP11]]
+; CHECK:    store float [[TMP14]], float* [[DST:%.*]], align 4
 ;
-  %1 = mul nsw i32 %src1, 3
-  %2 = extractelement <12 x float> %src2, i32 %1
+  %1 = add i32 %src1, 13
+  %2 = extractelement <4 x float> %src2, i32 %1
   store float %2, float* %dst, align 4
-  %3 = add i32 %1, 1
-  %4 = extractelement <12 x float> %src2, i32 %3
-  store float %4, float* %dst, align 4
-  %5 = add i32 %1, 2
-  %6 = extractelement <12 x float> %src2, i32 %5
-  store float %6, float* %dst, align 4
   ret void
 }
 
-define void @test(i32 %src1, <12 x float> %src2, float* %dst) {
-; CHECK-LABEL: @test(
-; CHECK:    [[TMP1:%.*]] = mul i32 [[SRC1:%.*]], 3
-; CHECK:    [[TMP2:%.*]] = extractelement <12 x float> [[SRC2:%.*]], i32 [[TMP1]]
-; CHECK:    store float [[TMP2]], float* [[DST:%.*]], align 4
-; CHECK:    [[TMP3:%.*]] = add i32 [[TMP1]], 1
-; CHECK:    [[TMP4:%.*]] = extractelement <12 x float> [[SRC2]], i32 [[TMP3]]
-; CHECK:    store float [[TMP4]], float* [[DST]], align 4
-; CHECK:    [[TMP5:%.*]] = add i32 [[TMP1]], 2
-; CHECK:    [[TMP6:%.*]] = extractelement <12 x float> [[SRC2]], i32 [[TMP5]]
-; CHECK:    store float [[TMP6]], float* [[DST]], align 4
-; CHECK:    ret void
+; ------------------------------------------------
+; Case3: index is argument but transformation not profitable
+; ------------------------------------------------
+
+define void @test_arg_not_profit(i32 %src1, <12 x float> %src2, float* %dst) {
+; CHECK-LABEL: @test_arg_not_profit(
+; CHECK:    [[TMP1:%.*]] = extractelement <12 x float> [[SRC2:%.*]], i32 [[SRC1:%.*]]
+; CHECK:    store float [[TMP1]], float* [[DST:%.*]], align 4
 ;
-  %1 = mul i32 %src1, 3
-  %2 = extractelement <12 x float> %src2, i32 %1
-  store float %2, float* %dst, align 4
-  %3 = add i32 %1, 1
-  %4 = extractelement <12 x float> %src2, i32 %3
-  store float %4, float* %dst, align 4
-  %5 = add i32 %1, 2
-  %6 = extractelement <12 x float> %src2, i32 %5
-  store float %6, float* %dst, align 4
+  %1 = extractelement <12 x float> %src2, i32 %src1
+  store float %1, float* %dst, align 4
   ret void
 }