Fixed vector member of layout struct

jgu222 · igcbot · commit 94113870f7c2 · 2023-07-04T02:28:27.000+02:00
As &lt;3 x i32&gt; has alloc size of 16 bytes, it will not be packed if it is a
member of a layout struct. To avoid adding this vector into layout struct,
checking if a vector's store size is the same as its alloc size, if not,
this vector cannot be a member of layout struct.

The problem is shwon in this case :
   struct &lt;{ &lt;3 x float&gt;, i32 }&gt;
its size is 5 x 4 = 20 bytes, not 16 bytes. But layout struct requires
its size to be 16 bytes as it must be packed.
diff --git a/IGC/Compiler/CISACodeGen/MemOpt.cpp b/IGC/Compiler/CISACodeGen/MemOpt.cpp
@@ -3513,16 +3513,27 @@ Value* LdStCombine::gatherCopy(
             bool isLvl1 = (remainingBytes == DstEltBytes && eBytes == DstEltBytes);
             // true if v is a legal vector at level 2
             bool isLvl2 = (remainingBytes >= (eBytes * n));
-            bool isSimpleVec = isSimpleVector(v);
-            if (isLvl1 && !isSimpleVec)
+            bool keepVector = true;
+            if (isLvl1 || isLvl2) {
+                if (isSimpleVector(v)) {
+                    keepVector = false;
+                }
+                else if ((eBytes * n) != m_DL->getTypeAllocSize(iVTy)) {
+                    // If vector size isn't packed (store size != alloc size),
+                    // cannot keep vector.
+                    // For example, alloc size(<3 x i32>) = 16B, not 12B
+                    keepVector = false;
+                }
+            }
+            if (isLvl1 && keepVector)
             {   // case 1
                 // 1st level vector member
                 eltVals.push_back(v);
                 allEltVals.push_back(eltVals);
 
                 eltVals.clear();
             }
-            else if (isLvl2 && !isSimpleVec)
+            else if (isLvl2 && keepVector)
             {   // case 2
                 // 2nd level vector member
                 eltVals.push_back(v);
diff --git a/IGC/Compiler/tests/LdStCombine/store_no_vec3_member.ll b/IGC/Compiler/tests/LdStCombine/store_no_vec3_member.ll
@@ -0,0 +1,52 @@
+;=========================== begin_copyright_notice ============================
+;
+; Copyright (C) 2017-2023 Intel Corporation
+;
+; SPDX-License-Identifier: MIT
+;
+;============================ end_copyright_notice =============================
+
+
+
+
+ ; Given  store <3xi32>
+ ;        store float
+ ;   combined into  <{i32, i32, i32, float}>
+ ;        insertvalue
+ ;        insertvalue
+ ;        insertvalue
+ ;        insertvalue
+ ;   not into <{ <3 x i32>, float }> as this struct cannot be packed.
+ ;
+ ; CHECK-LABEL: target triple
+ ; CHECK: %__StructSOALayout_ = type <{ i32, i32, i32, float }>
+ ; CHECK-LABEL: define spir_kernel void @test_novec3member
+ ; CHECK: [[STMP1:%.*]] = insertvalue %__StructSOALayout_ undef, i32 %{{.*}}, 0
+ ; CHECK: [[STMP2:%.*]] = insertvalue %__StructSOALayout_ [[STMP1]], i32 %{{.*}}, 1
+ ; CHECK: [[STMP3:%.*]] = insertvalue %__StructSOALayout_ [[STMP2]], i32 %{{.*}}, 2
+ ; CHECK: [[STMP4:%.*]] = insertvalue %__StructSOALayout_ [[STMP3]], float %{{.*}}, 3
+ ; CHECK: [[STMP5:%.*]] = call <4 x i32> @llvm.genx.GenISA.bitcastfromstruct.v4i32.__StructSOALayout_(%__StructSOALayout_ [[STMP4]])
+ ; CHECK: store <4 x i32> [[STMP5]]
+ ; CHECK: ret void
+ ;
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-n8:16:32"
+target triple = "spir64-unknown-unknown"
+
+; Function Attrs: convergent nounwind
+define spir_kernel void @test_novec3member(i32 addrspace(1)* %d, <3 x i32> addrspace(1)* %ss, float addrspace(1)* %sf, i16 %localIdX, i16 %localIdY, i16 %localIdZ) {
+entry:
+  %conv.i.i = zext i16 %localIdX to i64
+  %arrayidx = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %ss, i64 %conv.i.i
+  %loadVec3 = load <3 x i32>, <3 x i32> addrspace(1)* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, float addrspace(1)* %sf, i64 %conv.i.i
+  %vf = load float, float addrspace(1)* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds i32, i32 addrspace(1)* %d, i64 %conv.i.i
+  %daddr1 = bitcast i32 addrspace(1)* %arrayidx3 to <3 x i32> addrspace(1)*
+  store <3 x i32> %loadVec3, <3 x i32> addrspace(1)* %daddr1, align 4
+  %add = add nuw nsw i64 %conv.i.i, 3
+  %arrayidx10 = getelementptr inbounds i32, i32 addrspace(1)* %d, i64 %add
+  %daddr2 = bitcast i32 addrspace(1)* %arrayidx10 to float addrspace(1)*
+  store float %vf, float addrspace(1)* %daddr2, align 4
+  ret void
+}