Skip to content

Commit 9291fc3

Browse files
smilczekigcbot
authored andcommitted
Account for data layout in handleGEPInst calculating offset.
In handleGEPInst while calculating pScalarizedIdx, the data layout isn't taken into account, which causes miscalculation of offsets of elements when promoting stores/loads to registers in LowerGEPForPrivMem. This commit aims to address that.
1 parent e3d9f75 commit 9291fc3

File tree

2 files changed

+45
-1
lines changed

2 files changed

+45
-1
lines changed

IGC/Compiler/CISACodeGen/LowerGEPForPrivMem.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -867,7 +867,10 @@ std::pair<unsigned int, Type*> TransposeHelper::getArrSizeAndEltType(Type* T)
867867
}
868868
else
869869
{
870-
arr_sz = (unsigned)cast<IGCLLVM::FixedVectorType>(T)->getNumElements();
870+
auto* vTy = cast<IGCLLVM::FixedVectorType>(T);
871+
unsigned int vector_size_in_bytes = int_cast<unsigned int>(m_DL.getTypeAllocSize(T));
872+
unsigned int elt_size_in_bytes = int_cast<unsigned int>(m_DL.getTypeAllocSize(vTy->getElementType()));
873+
arr_sz = vector_size_in_bytes / elt_size_in_bytes;
871874
}
872875
retTy = cast<VectorType>(T)->getElementType();
873876
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2024 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
;
9+
; RUN: igc_opt -igc-priv-mem-to-reg -S < %s 2>&1 | FileCheck %s
10+
; ------------------------------------------------
11+
; LowerGEPForPrivMem
12+
; ------------------------------------------------
13+
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-n8:16:32"
14+
15+
define void @test(<4 x i32> %a, <4 x i32>* %b) {
16+
; CHECK-LABEL: @test(
17+
; CHECK: insertelement <20 x float> {{.*}}, i{{[0-9]*}} 12
18+
; CHECK: insertelement <20 x float> {{.*}}, i{{[0-9]*}} 13
19+
; CHECK: insertelement <20 x float> {{.*}}, i{{[0-9]*}} 14
20+
; CHECK: insertelement <20 x float> {{.*}}, i{{[0-9]*}} 15
21+
; CHECK-NOT: insertelement <20 x float> {{.*}}, i{{[0-9]*}} 9
22+
; CHECK-NOT: insertelement <20 x float> {{.*}}, i{{[0-9]*}} 10
23+
; CHECK-NOT: insertelement <20 x float> {{.*}}, i{{[0-9]*}} 11
24+
%data = alloca [5 x <3 x float>]
25+
%gepf3 = getelementptr inbounds [5 x <3 x float>], [5 x <3 x float>]* %data, i64 0, i64 3
26+
%bcf4 = bitcast <3 x float>* %gepf3 to <4 x float>*
27+
%v0 = insertelement <4 x float> undef, float 1.000000e+00, i64 0
28+
%v1 = insertelement <4 x float> %v0, float 2.000000e+00, i64 1
29+
%v2 = insertelement <4 x float> %v1, float 3.000000e+00, i64 2
30+
store <4 x float> %v2, <4 x float>* %bcf4
31+
ret void
32+
}
33+
34+
!igc.functions = !{!0}
35+
36+
!0 = !{void (<4 x i32>, <4 x i32>*)* @test, !1}
37+
!1 = !{!2, !3}
38+
!2 = !{!"function_type", i32 0}
39+
!3 = !{!"implicit_arg_desc"}
40+
!4 = !{i32 8}
41+
!5 = !{i1 true}

0 commit comments

Comments
 (0)