symexpr handles or and shl

jgu222 · igcbot · commit 804ed3c0f537 · 2023-10-17T15:47:32.000+02:00
Improve symexpr to handle some special instructions:
    1. or with a constant : change to add if it can be, and
    2. shl by a constant  : change to mul
diff --git a/IGC/Compiler/CISACodeGen/SLMConstProp.cpp b/IGC/Compiler/CISACodeGen/SLMConstProp.cpp
@@ -372,10 +372,36 @@ void SymbolicEvaluation::getSymExprOrConstant(const Value* V, SymExpr*& S, int64
 
     if (const ConstantInt * CI = dyn_cast<const ConstantInt>(V))
     {
+        // symExpr handles symbols with the same bit size, thus sext/zext/trunc
+        // are not handled. With this, a result from either signed or unsigned
+        // integer operations will end up with the same bit pattern. Here, we
+        // choose to use sext on constants.
         C = CI->getSExtValue();
         return;
     }
 
+    // Used for nomalizing shift amount.
+    //   For example, i64, its type mask is 03F(63 = 64 - 1).
+    auto getTypeMask = [](const Type* Ty) -> uint32_t {
+        // For simplicity, only handle type whose size is power of 2.
+        uint32_t nbits = Ty->getScalarSizeInBits();
+        if (nbits > 0 && isPowerOf2_32(nbits))
+            return (nbits - 1);
+        return 0;
+    };
+
+    // Return value:
+    //   Shift amount: if it is valid and greater than 0
+    //   0 : invalid
+    auto getShlAmt = [&getTypeMask](const Instruction* ShlInst) -> uint32_t {
+        IGC_ASSERT(ShlInst->getOpcode() == Instruction::Shl);
+        uint32_t shtAmtMask = getTypeMask(ShlInst->getType());
+        ConstantInt* cI = cast<ConstantInt>(ShlInst->getOperand(1));
+        if (cI && shtAmtMask > 0)
+            return (uint32_t)(cI->getZExtValue() & shtAmtMask);
+        return 0;
+    };
+
     // Instructions/Operators handled for now:
     //   GEP
     //   bitcast (inttoptr, ptrtoint, etc)
@@ -515,6 +541,35 @@ void SymbolicEvaluation::getSymExprOrConstant(const Value* V, SymExpr*& S, int64
             }
             break;
         }
+        case Instruction::Or:
+        {
+            // Check if it is actually an add.
+            //
+            //   %mul = shl nuw nsw i64 %v, 1
+            //   %add = or i64 %mul, 1
+            //     -->  %add = add %mul, 1
+            const Value* V0 = Op->getOperand(0);
+            const Value* V1 = Op->getOperand(1);
+            getSymExprOrConstant(V0, S0, C0);
+            getSymExprOrConstant(V1, S1, C1);
+            if (!S0 && !S1) {
+                C = C0 | C1;
+                return;
+            }
+
+            // Case: 'or V0  Const' or 'or const  V1'
+            if ((S0 && !S1) || (!S0 && S1)) {
+                const Value* tV = (S0 ? V0 : V1);
+                const uint64_t tC = (uint64_t)(S0 ? C1 : C0);
+                const Instruction* tI = dyn_cast<Instruction>(tV);
+                if (tI && tI->getOpcode() == Instruction::Shl) {
+                    uint32_t shtAmt = getShlAmt(tI);
+                    if (shtAmt > 0 && (1ull << shtAmt) > tC)
+                        S = add(S0 ? S0 : S1, tC);
+                }
+            }
+            break;
+        }
         case Instruction::Mul:
         {
             const Value* V0 = Op->getOperand(0);
@@ -538,7 +593,34 @@ void SymbolicEvaluation::getSymExprOrConstant(const Value* V, SymExpr*& S, int64
 
             break;
         }
+        case Instruction::Shl:
+        {
+            // shl is a mul
+            //     shl a,  b, 2
+            //  -> mul a, b, (1 << 2)
+            const Value* V0 = Op->getOperand(0);
+            const Value* V1 = Op->getOperand(1);
+            getSymExprOrConstant(V0, S0, C0);
+            getSymExprOrConstant(V1, S1, C1);
+
+            uint32_t shtAmtMask = getTypeMask(V->getType());
+            if (shtAmtMask == 0) // sanity
+                break;
 
+            if (!S1) {
+                C1 = (C1 & shtAmtMask);
+            }
+
+            if (!S0 && !S1) {
+                C = (C0 << C1);
+                return;
+            }
+            if (!S1) {
+                uint64_t tC = (1ull << C1);
+                S = mul(S0, tC);
+            }
+            break;
+        }
         case Instruction::BitCast:
         case Instruction::IntToPtr:
         case Instruction::PtrToInt:
diff --git a/IGC/Compiler/tests/LdStCombine/store_address_or_inst.ll b/IGC/Compiler/tests/LdStCombine/store_address_or_inst.ll
@@ -0,0 +1,58 @@
+;=========================== begin_copyright_notice ============================
+;
+; Copyright (C) 2017-2023 Intel Corporation
+;
+; SPDX-License-Identifier: MIT
+;
+;============================ end_copyright_notice =============================
+
+
+
+
+ ; Given  a0 = shl a, 1
+ ;        a1 = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(1)* %d, i64 %a0
+ ;        store <2 x i32> %v0, <2 x i32> addrspace(1)* %a1, align 8
+ ;        a2 = or i64 %a0, 1
+ ;        a3 = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(1)* %d, i64 %a2
+ ;        store <2 x i32> %v1, <2 x i32> addrspace(1)* %a3, align 8
+ ;   combined into
+ ;        store <4xi32>
+ ;
+ ; This is to test that symbolic expression can handle 'or' and 'shl' instructions
+ ;
+ ; CHECK-LABEL: target datalayout
+ ; CHECK: %__StructSOALayout_ = type <{ <2 x i32>, <2 x i32> }>
+ ; CHECK-LABEL: define spir_kernel void @test_st
+ ; CHECK: load <4 x i32>,
+ ; CHECK: [[TMP1:%.*]] = insertvalue %__StructSOALayout_ undef, <2 x i32> %{{.*}}, 0
+ ; CHECK: [[TMP2:%.*]] = insertvalue %__StructSOALayout_ [[TMP1]], <2 x i32> %{{.*}}, 1
+ ; CHECK: [[TMP3:%.*]] = call <4 x i32> @llvm.genx.GenISA.bitcastfromstruct.v4i32.__StructSOALayout_(%__StructSOALayout_ [[TMP2]])
+ ; CHECK: store <4 x i32> [[TMP3]]
+ ; CHECK: ret void
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-n8:16:32"
+target triple = "spir64-unknown-unknown"
+
+; Function Attrs: convergent nounwind
+define spir_kernel void @test_st(<2 x i32> addrspace(1)* %d, <4 x i32> addrspace(1)* %s, <8 x i32> %r0, <8 x i32> %payloadHeader, <3 x i32> %enqueuedLocalSize, i16 %localIdX, i16 %localIdY, i16 %localIdZ) #0 {
+entry:
+  %payloadHeader.scalar = extractelement <8 x i32> %payloadHeader, i32 0
+  %enqueuedLocalSize.scalar = extractelement <3 x i32> %enqueuedLocalSize, i32 0
+  %r0.scalar17 = extractelement <8 x i32> %r0, i32 1
+  %mul.i.i.i = mul i32 %enqueuedLocalSize.scalar, %r0.scalar17
+  %localIdX2 = zext i16 %localIdX to i32
+  %add.i.i.i = add i32 %mul.i.i.i, %localIdX2
+  %add4.i.i.i = add i32 %add.i.i.i, %payloadHeader.scalar
+  %conv.i.i.i = zext i32 %add4.i.i.i to i64
+  %mul = shl nuw nsw i64 %conv.i.i.i, 1
+  %arrayidx = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %s, i64 %mul
+  %0 = load <4 x i32>, <4 x i32> addrspace(1)* %arrayidx, align 16
+  %vecinit1.assembled.vect36 = shufflevector <4 x i32> %0, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+  %vecinit4.assembled.vect37 = shufflevector <4 x i32> %0, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %arrayidx5 = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(1)* %d, i64 %mul
+  store <2 x i32> %vecinit1.assembled.vect36, <2 x i32> addrspace(1)* %arrayidx5, align 8
+  %add = or i64 %mul, 1
+  %arrayidx6 = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(1)* %d, i64 %add
+  store <2 x i32> %vecinit4.assembled.vect37, <2 x i32> addrspace(1)* %arrayidx6, align 8
+  ret void
+}