Skip to content

Commit 30ce11f

Browse files
dguzhaevigcbot
authored andcommitted
Add ConstantCoalescing tests
* Removed obsolete function * Add missed case in ScatterToSampler * Add LIT tests
1 parent 6bc576c commit 30ce11f

File tree

3 files changed

+193
-7
lines changed

3 files changed

+193
-7
lines changed

IGC/Compiler/CISACodeGen/ConstantCoalescing.cpp

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -598,13 +598,6 @@ void ConstantCoalescing::ProcessBlock(
598598
} // loop over inst in block
599599
}
600600

601-
// sort the CB loads with index order
602-
bool sortFunction(BufChunk* buf1, BufChunk* buf2)
603-
{
604-
return (buf1->addrSpace < buf2->addrSpace ||
605-
(buf1->addrSpace == buf2->addrSpace && buf1->chunkStart < buf2->chunkStart));
606-
}
607-
608601
bool ConstantCoalescing::profitableChunkSize(
609602
uint32_t ub, uint32_t lb, uint32_t eltSizeInBytes)
610603
{
@@ -2082,6 +2075,7 @@ bool ConstantCoalescing::IsSamplerAlignedAddress(Value* addr) const
20822075
(inst->getOpcode() == Instruction::Shl ||
20832076
inst->getOpcode() == Instruction::Mul ||
20842077
inst->getOpcode() == Instruction::And ||
2078+
inst->getOpcode() == Instruction::Or ||
20852079
inst->getOpcode() == Instruction::Add))
20862080
{
20872081
ConstantInt* src1ConstVal = dyn_cast<ConstantInt>(inst->getOperand(1));
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2024 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
;
9+
; RUN: igc_opt --igc-constant-coalescing -dce -S < %s | FileCheck %s
10+
; ------------------------------------------------
11+
; ConstantCoalescing
12+
; ------------------------------------------------
13+
14+
; Test checks const addr space loads merging:
15+
; This test is for covering getPointerBaseWithConstantOffset function
16+
; which is part of DecomposePtrExp, used to calculate base from bitcast
17+
; or a getelementptr instruction.
18+
19+
; Merge
20+
define void @test_merge(float addrspace(2)* %src) {
21+
; CHECK-LABEL: define void @test_merge(
22+
; CHECK-SAME: float addrspace(2)* [[SRC:%.*]]) {
23+
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint float addrspace(2)* [[SRC]] to i64
24+
; CHECK-NEXT: [[CHUNKPTR:%.*]] = inttoptr i64 [[TMP1]] to <2 x i32> addrspace(2)*
25+
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32> addrspace(2)* [[CHUNKPTR]], align 4
26+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
27+
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
28+
; CHECK-NEXT: call void @use.i32(i32 [[TMP3]])
29+
; CHECK-NEXT: call void @use.i32(i32 [[TMP4]])
30+
; CHECK-NEXT: ret void
31+
;
32+
%1 = bitcast float addrspace(2)* %src to i32 addrspace(2)*
33+
%2 = getelementptr i32, i32 addrspace(2)* %1, i32 1
34+
%3 = load i32, i32 addrspace(2)* %1
35+
%4 = load i32, i32 addrspace(2)* %2
36+
call void @use.i32(i32 %3)
37+
call void @use.i32(i32 %4)
38+
ret void
39+
}
40+
41+
; TODO: check, these are potentially worse
42+
define void @test_vectorize(float addrspace(2)* %src) {
43+
; CHECK-LABEL: define void @test_vectorize(
44+
; CHECK-SAME: float addrspace(2)* [[SRC:%.*]]) {
45+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float addrspace(2)* [[SRC]] to i32 addrspace(2)*
46+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32 addrspace(2)* [[TMP1]], i32 2
47+
; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint float addrspace(2)* [[SRC]] to i64
48+
; CHECK-NEXT: [[CHUNKPTR:%.*]] = inttoptr i64 [[TMP3]] to <1 x i32> addrspace(2)*
49+
; CHECK-NEXT: [[TMP4:%.*]] = load <1 x i32>, <1 x i32> addrspace(2)* [[CHUNKPTR]], align 4
50+
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i32> [[TMP4]], i32 0
51+
; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(2)* [[TMP2]], align 4
52+
; CHECK-NEXT: call void @use.i32(i32 [[TMP5]])
53+
; CHECK-NEXT: call void @use.i32(i32 [[TMP6]])
54+
; CHECK-NEXT: ret void
55+
;
56+
%1 = bitcast float addrspace(2)* %src to i32 addrspace(2)*
57+
%2 = getelementptr i32, i32 addrspace(2)* %1, i32 2
58+
%3 = load i32, i32 addrspace(2)* %1
59+
%4 = load i32, i32 addrspace(2)* %2
60+
call void @use.i32(i32 %3)
61+
call void @use.i32(i32 %4)
62+
ret void
63+
}
64+
65+
define void @test_nonconst_gep(i32 addrspace(2)* %src, i32 %off) {
66+
; CHECK-LABEL: define void @test_nonconst_gep(
67+
; CHECK-SAME: i32 addrspace(2)* [[SRC:%.*]], i32 [[OFF:%.*]]) {
68+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32 addrspace(2)* [[SRC]], i32 [[OFF]]
69+
; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32 addrspace(2)* [[TMP1]], align 4
70+
; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint i32 addrspace(2)* [[TMP1]] to i64
71+
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], 4
72+
; CHECK-NEXT: [[CHUNKPTR:%.*]] = inttoptr i64 [[TMP4]] to <1 x i32> addrspace(2)*
73+
; CHECK-NEXT: [[TMP5:%.*]] = load <1 x i32>, <1 x i32> addrspace(2)* [[CHUNKPTR]], align 4
74+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i32> [[TMP5]], i32 0
75+
; CHECK-NEXT: call void @use.i32(i32 [[TMP2]])
76+
; CHECK-NEXT: call void @use.i32(i32 [[TMP6]])
77+
; CHECK-NEXT: ret void
78+
;
79+
%1 = getelementptr i32, i32 addrspace(2)* %src, i32 %off
80+
%2 = getelementptr i32, i32 addrspace(2)* %1, i32 1
81+
%3 = load i32, i32 addrspace(2)* %1
82+
%4 = load i32, i32 addrspace(2)* %2
83+
call void @use.i32(i32 %3)
84+
call void @use.i32(i32 %4)
85+
ret void
86+
}
87+
88+
declare void @use.i32(i32)
89+
90+
!igc.functions = !{!0, !4, !5}
91+
92+
!0 = !{void (float addrspace(2)*)* @test_merge, !1}
93+
!1 = !{!2, !3}
94+
!2 = !{!"function_type", i32 0}
95+
!3 = !{!"implicit_arg_desc"}
96+
!4 = !{void (float addrspace(2)*)* @test_vectorize, !1}
97+
!5 = !{void (i32 addrspace(2)*, i32)* @test_nonconst_gep, !1}
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2024 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
; REQUIRES: llvm-14-plus
9+
; RUN: igc_opt --opaque-pointers --igc-constant-coalescing -dce -S < %s | FileCheck %s
10+
; ------------------------------------------------
11+
; ConstantCoalescing
12+
; ------------------------------------------------
13+
14+
; Test checks const addr space loads merging:
15+
; This test is for covering getPointerBaseWithConstantOffset function
16+
; which is part of DecomposePtrExp, used to calculate base from bitcast
17+
; or a getelementptr instruction.
18+
19+
; TODO: check, with opaque pointers on, pass doesn't optimize test_merge case
20+
21+
define void @test_merge(ptr addrspace(2) %src) {
22+
; CHECK-LABEL: define void @test_merge(
23+
; CHECK-SAME: ptr addrspace(2) [[SRC:%.*]]) {
24+
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(2) [[SRC]], align 4
25+
; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr addrspace(2) [[SRC]] to i64
26+
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], 4
27+
; CHECK-NEXT: [[CHUNKPTR:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(2)
28+
; CHECK-NEXT: [[TMP4:%.*]] = load <1 x i32>, ptr addrspace(2) [[CHUNKPTR]], align 4
29+
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i32> [[TMP4]], i32 0
30+
; CHECK-NEXT: call void @use.i32(i32 [[TMP1]])
31+
; CHECK-NEXT: call void @use.i32(i32 [[TMP5]])
32+
; CHECK-NEXT: ret void
33+
;
34+
%1 = getelementptr i32, ptr addrspace(2) %src, i32 1
35+
%2 = load i32, ptr addrspace(2) %src, align 4
36+
%3 = load i32, ptr addrspace(2) %1, align 4
37+
call void @use.i32(i32 %2)
38+
call void @use.i32(i32 %3)
39+
ret void
40+
}
41+
42+
define void @test_vectorize(ptr addrspace(2) %src) {
43+
; CHECK-LABEL: define void @test_vectorize(
44+
; CHECK-SAME: ptr addrspace(2) [[SRC:%.*]]) {
45+
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(2) [[SRC]], align 4
46+
; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr addrspace(2) [[SRC]] to i64
47+
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], 8
48+
; CHECK-NEXT: [[CHUNKPTR:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(2)
49+
; CHECK-NEXT: [[TMP4:%.*]] = load <1 x i32>, ptr addrspace(2) [[CHUNKPTR]], align 4
50+
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i32> [[TMP4]], i32 0
51+
; CHECK-NEXT: call void @use.i32(i32 [[TMP1]])
52+
; CHECK-NEXT: call void @use.i32(i32 [[TMP5]])
53+
; CHECK-NEXT: ret void
54+
;
55+
%1 = getelementptr i32, ptr addrspace(2) %src, i32 2
56+
%2 = load i32, ptr addrspace(2) %src, align 4
57+
%3 = load i32, ptr addrspace(2) %1, align 4
58+
call void @use.i32(i32 %2)
59+
call void @use.i32(i32 %3)
60+
ret void
61+
}
62+
63+
define void @test_nonconst_gep(ptr addrspace(2) %src, i32 %off) {
64+
; CHECK-LABEL: define void @test_nonconst_gep(
65+
; CHECK-SAME: ptr addrspace(2) [[SRC:%.*]], i32 [[OFF:%.*]]) {
66+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr addrspace(2) [[SRC]], i32 [[OFF]]
67+
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(2) [[TMP1]], align 4
68+
; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr addrspace(2) [[TMP1]] to i64
69+
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], 4
70+
; CHECK-NEXT: [[CHUNKPTR:%.*]] = inttoptr i64 [[TMP4]] to ptr addrspace(2)
71+
; CHECK-NEXT: [[TMP5:%.*]] = load <1 x i32>, ptr addrspace(2) [[CHUNKPTR]], align 4
72+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i32> [[TMP5]], i32 0
73+
; CHECK-NEXT: call void @use.i32(i32 [[TMP2]])
74+
; CHECK-NEXT: call void @use.i32(i32 [[TMP6]])
75+
; CHECK-NEXT: ret void
76+
;
77+
%1 = getelementptr i32, ptr addrspace(2) %src, i32 %off
78+
%2 = getelementptr i32, ptr addrspace(2) %1, i32 1
79+
%3 = load i32, ptr addrspace(2) %1, align 4
80+
%4 = load i32, ptr addrspace(2) %2, align 4
81+
call void @use.i32(i32 %3)
82+
call void @use.i32(i32 %4)
83+
ret void
84+
}
85+
86+
declare void @use.i32(i32)
87+
88+
!igc.functions = !{!0, !4, !5}
89+
90+
!0 = !{ptr @test_merge, !1}
91+
!1 = !{!2, !3}
92+
!2 = !{!"function_type", i32 0}
93+
!3 = !{!"implicit_arg_desc"}
94+
!4 = !{ptr @test_vectorize, !1}
95+
!5 = !{ptr @test_nonconst_gep, !1}

0 commit comments

Comments
 (0)