Skip to content

Commit 9411387

Browse files
jgu222igcbot
authored andcommitted
Fixed vector member of layout struct
As <3 x i32> has alloc size of 16 bytes, it will not be packed if it is a member of a layout struct. To avoid adding this vector into layout struct, checking if a vector's store size is the same as its alloc size, if not, this vector cannot be a member of layout struct. The problem is shwon in this case : struct <{ <3 x float>, i32 }> its size is 5 x 4 = 20 bytes, not 16 bytes. But layout struct requires its size to be 16 bytes as it must be packed.
1 parent 0f83760 commit 9411387

File tree

2 files changed

+66
-3
lines changed

2 files changed

+66
-3
lines changed

IGC/Compiler/CISACodeGen/MemOpt.cpp

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3513,16 +3513,27 @@ Value* LdStCombine::gatherCopy(
35133513
bool isLvl1 = (remainingBytes == DstEltBytes && eBytes == DstEltBytes);
35143514
// true if v is a legal vector at level 2
35153515
bool isLvl2 = (remainingBytes >= (eBytes * n));
3516-
bool isSimpleVec = isSimpleVector(v);
3517-
if (isLvl1 && !isSimpleVec)
3516+
bool keepVector = true;
3517+
if (isLvl1 || isLvl2) {
3518+
if (isSimpleVector(v)) {
3519+
keepVector = false;
3520+
}
3521+
else if ((eBytes * n) != m_DL->getTypeAllocSize(iVTy)) {
3522+
// If vector size isn't packed (store size != alloc size),
3523+
// cannot keep vector.
3524+
// For example, alloc size(<3 x i32>) = 16B, not 12B
3525+
keepVector = false;
3526+
}
3527+
}
3528+
if (isLvl1 && keepVector)
35183529
{ // case 1
35193530
// 1st level vector member
35203531
eltVals.push_back(v);
35213532
allEltVals.push_back(eltVals);
35223533

35233534
eltVals.clear();
35243535
}
3525-
else if (isLvl2 && !isSimpleVec)
3536+
else if (isLvl2 && keepVector)
35263537
{ // case 2
35273538
// 2nd level vector member
35283539
eltVals.push_back(v);
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2017-2023 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
10+
11+
12+
; Given store <3xi32>
13+
; store float
14+
; combined into <{i32, i32, i32, float}>
15+
; insertvalue
16+
; insertvalue
17+
; insertvalue
18+
; insertvalue
19+
; not into <{ <3 x i32>, float }> as this struct cannot be packed.
20+
;
21+
; CHECK-LABEL: target triple
22+
; CHECK: %__StructSOALayout_ = type <{ i32, i32, i32, float }>
23+
; CHECK-LABEL: define spir_kernel void @test_novec3member
24+
; CHECK: [[STMP1:%.*]] = insertvalue %__StructSOALayout_ undef, i32 %{{.*}}, 0
25+
; CHECK: [[STMP2:%.*]] = insertvalue %__StructSOALayout_ [[STMP1]], i32 %{{.*}}, 1
26+
; CHECK: [[STMP3:%.*]] = insertvalue %__StructSOALayout_ [[STMP2]], i32 %{{.*}}, 2
27+
; CHECK: [[STMP4:%.*]] = insertvalue %__StructSOALayout_ [[STMP3]], float %{{.*}}, 3
28+
; CHECK: [[STMP5:%.*]] = call <4 x i32> @llvm.genx.GenISA.bitcastfromstruct.v4i32.__StructSOALayout_(%__StructSOALayout_ [[STMP4]])
29+
; CHECK: store <4 x i32> [[STMP5]]
30+
; CHECK: ret void
31+
;
32+
33+
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-n8:16:32"
34+
target triple = "spir64-unknown-unknown"
35+
36+
; Function Attrs: convergent nounwind
37+
define spir_kernel void @test_novec3member(i32 addrspace(1)* %d, <3 x i32> addrspace(1)* %ss, float addrspace(1)* %sf, i16 %localIdX, i16 %localIdY, i16 %localIdZ) {
38+
entry:
39+
%conv.i.i = zext i16 %localIdX to i64
40+
%arrayidx = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %ss, i64 %conv.i.i
41+
%loadVec3 = load <3 x i32>, <3 x i32> addrspace(1)* %arrayidx, align 4
42+
%arrayidx2 = getelementptr inbounds float, float addrspace(1)* %sf, i64 %conv.i.i
43+
%vf = load float, float addrspace(1)* %arrayidx2, align 4
44+
%arrayidx3 = getelementptr inbounds i32, i32 addrspace(1)* %d, i64 %conv.i.i
45+
%daddr1 = bitcast i32 addrspace(1)* %arrayidx3 to <3 x i32> addrspace(1)*
46+
store <3 x i32> %loadVec3, <3 x i32> addrspace(1)* %daddr1, align 4
47+
%add = add nuw nsw i64 %conv.i.i, 3
48+
%arrayidx10 = getelementptr inbounds i32, i32 addrspace(1)* %d, i64 %add
49+
%daddr2 = bitcast i32 addrspace(1)* %arrayidx10 to float addrspace(1)*
50+
store float %vf, float addrspace(1)* %daddr2, align 4
51+
ret void
52+
}

0 commit comments

Comments
 (0)