Skip to content

Commit 9024b62

Browse files
jgu222igcbot
authored andcommitted
[OCL]More SOA transpose optim
Set EnablePrivMemNewSOATranspose=2, which will do SOA for array of dw/qw/simple-struct
1 parent cc4c6f8 commit 9024b62

File tree

3 files changed

+102
-2
lines changed

3 files changed

+102
-2
lines changed
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2023 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
; REQUIRES: regkeys
10+
;
11+
; No SOA transpose
12+
; RUN: igc_opt --ocl --platformpvc --igc-private-mem-resolution --regkey EnablePrivMemNewSOATranspose=2 -S %s | FileCheck --check-prefix=CHECK-K2 %s
13+
;
14+
; SOA transpose on the entire struct
15+
; RUN: igc_opt --ocl --platformpvc --igc-private-mem-resolution --regkey EnablePrivMemNewSOATranspose=3 -S %s | FileCheck --check-prefix=CHECK-K3 %s
16+
;
17+
18+
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-n8:16:32"
19+
target triple = "spir64-unknown-unknown"
20+
21+
%struct.Packed = type <{ i8, i16, i8, float }>
22+
23+
; CHECK-LABEL: @test
24+
;;
25+
;; prolog in entry block. Get buffer's perThreadOffset
26+
;;
27+
; CHECK-K2: [[T00:%.*]] = call i16 @llvm.genx.GenISA.simdLaneId()
28+
; CHECK-K2: [[simdLaneId:%.*]] = zext i16 [[T00]] to i32
29+
; CHECK-K2: [[simdSize:%.*]] = call i32 @llvm.genx.GenISA.simdSize()
30+
; CHECK-K2: [[T01:%.*]] = call i32 @llvm.genx.GenISA.hw.thread.id.alloca.i32()
31+
; CHECK-K2: [[T02:%.*]] = mul i32 [[simdSize]], 8192
32+
; CHECK-K2: [[perThreadOffset:%.*]] = mul i32 [[T01]], [[T02]]
33+
;
34+
;; No SOA transpose
35+
;;
36+
; CHECK-K2: {{.*}} = mul i32 [[simdLaneId]], 8192
37+
;
38+
;;
39+
;;
40+
; CHECK-K3: [[T00:%.*]] = call i16 @llvm.genx.GenISA.simdLaneId()
41+
; CHECK-K3: [[simdLaneId:%.*]] = zext i16 [[T00]] to i32
42+
; CHECK-K3: [[simdSize:%.*]] = call i32 @llvm.genx.GenISA.simdSize()
43+
; CHECK-K3: [[T01:%.*]] = call i32 @llvm.genx.GenISA.hw.thread.id.alloca.i32()
44+
; CHECK-K3: [[T02:%.*]] = mul i32 [[simdSize]], 8192
45+
; CHECK-K3: [[perThreadOffset:%.*]] = mul i32 [[T01]], [[T02]]
46+
;
47+
;; SOA transpose for the entire packed struct
48+
;;
49+
; CHECK-K3: {{.*}} = mul i32 [[simdLaneId]], 8
50+
51+
; Function Attrs: nofree nosync nounwind
52+
define spir_kernel void @test(i32 addrspace(1)* nocapture writeonly %d, <8 x i32> %r0, <8 x i32> %payloadHeader, <3 x i32> %enqueuedLocalSize, i16 %localIdX, i16 %localIdY, i16 %localIdZ, i8* nocapture readnone %privateBase) {
53+
entry:
54+
%payloadHeader.scalar = extractelement <8 x i32> %payloadHeader, i64 0
55+
%enqueuedLocalSize.scalar = extractelement <3 x i32> %enqueuedLocalSize, i64 0
56+
%r0.scalar18 = extractelement <8 x i32> %r0, i64 1
57+
%pb = alloca [1024 x %struct.Packed ], align 4
58+
%tmp0 = mul i32 %enqueuedLocalSize.scalar, %r0.scalar18
59+
%localIdX3 = zext i16 %localIdX to i32
60+
%tmp1 = add i32 %tmp0, %localIdX3
61+
%ix = add i32 %tmp1, %payloadHeader.scalar
62+
%idx = zext i32 %ix to i64
63+
%tmp2 = bitcast [1024 x %struct.Packed ]* %pb to i8*
64+
call void @llvm.lifetime.start.p0i8(i64 8192, i8* nonnull %tmp2)
65+
%staddr0 = getelementptr inbounds [1024 x %struct.Packed ], [1024 x %struct.Packed ]* %pb, i64 0, i64 %idx, i32 1
66+
store i16 1, i16* %staddr0, align 1
67+
%staddr1 = getelementptr inbounds [1024 x %struct.Packed ], [1024 x %struct.Packed ]* %pb, i64 0, i64 %idx, i32 3
68+
store float 0.000000e+00, float* %staddr1, align 4
69+
call void @llvm.lifetime.end.p0i8(i64 8192, i8* nonnull %tmp2)
70+
;
71+
; CHECK: ret
72+
ret void
73+
}
74+
75+
; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn
76+
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
77+
78+
; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn
79+
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)
80+
81+
!IGCMetadata = !{!0}
82+
!igc.functions = !{!6}
83+
84+
!0 = !{!"ModuleMD", !1, !3}
85+
!1 = !{!"compOpt", !2}
86+
!2 = !{!"UseScratchSpacePrivateMemory", i1 true}
87+
!3 = !{!"FuncMD", !4, !5}
88+
!4 = !{!"FuncMDMap[1]", void (i32 addrspace(1)*, <8 x i32>, <8 x i32>, <3 x i32>, i16, i16, i16, i8*)* @test}
89+
!5 = !{!"FuncMDValue[1]", !2}
90+
!6 = !{void (i32 addrspace(1)*, <8 x i32>, <8 x i32>, <3 x i32>, i16, i16, i16, i8*)* @test, !408}
91+
!408 = !{!409, !410}
92+
!409 = !{!"function_type", i32 0}
93+
!410 = !{!"implicit_arg_desc", !411, !412, !413, !414, !415, !416, !417}
94+
!411 = !{i32 0}
95+
!412 = !{i32 1}
96+
!413 = !{i32 6}
97+
!414 = !{i32 7}
98+
!415 = !{i32 8}
99+
!416 = !{i32 9}
100+
!417 = !{i32 12}

IGC/Compiler/tests/PrivateMemoryResolution/alloca_big_array.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
;
77
;============================ end_copyright_notice =============================
88
;
9-
; REQUIRES : regkeys
9+
; REQUIRES: regkeys
1010
; RUN: igc_opt --regkey EnablePrivMemNewSOATranspose=0 --igc-private-mem-resolution --platformpvc -S %s 2>&1 | FileCheck %s
1111

1212
define spir_kernel void @testallocabig(i8* %privateBase) {

IGC/common/igc_flags.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -823,7 +823,7 @@ DECLARE_IGC_REGKEY(int, OverrideCsWalkOrder, 0, "Override compute wa
823823
DECLARE_IGC_REGKEY(bool, OverrideCsTileLayoutEnable, false, "Enable overriding compute walker tile layout", true)
824824
DECLARE_IGC_REGKEY(bool, OverrideCsTileLayout, 0, "Override compute walker tile layout. False is linear. True is TileY", true)
825825
DECLARE_IGC_REGKEY(DWORD, MemCpyLoweringUnrollThreshold, 12, "Min number of mem instructions that require non-unrolled loop when lowering memcpy", false)
826-
DECLARE_IGC_REGKEY(DWORD, EnablePrivMemNewSOATranspose, 1, "0 : disable new algo; 1 and up : enable new algo. " \
826+
DECLARE_IGC_REGKEY(DWORD, EnablePrivMemNewSOATranspose, 2, "0 : disable new algo; 1 and up : enable new algo. " \
827827
"1 : enable new algo just for array of struct; " \
828828
"2 : 1 plus new algo for array of dw[xn]/qw[xn],etc " \
829829
"3 : 2 plus new algo for array of complicated struct.", true)

0 commit comments

Comments
 (0)