Skip to content

Commit 0ef3e8c

Browse files
committed
[LSV] Fix codegen tests
Also, avoid merging the same class into itself. Change-Id: Ib64fd98de5c908262947648ad14dc53b61814642
1 parent f347eb6 commit 0ef3e8c

36 files changed

+1573
-1573
lines changed

llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -325,8 +325,8 @@ class Vectorizer {
325325
Instruction *ChainElem, Instruction *ChainBegin,
326326
const DenseMap<Instruction *, APInt /*OffsetFromLeader*/> &ChainOffsets);
327327

328-
/// Merge the equivalence classes if casts could be inserted in one to match
329-
/// the scalar bitwidth of the instructions in the other class.
328+
/// Merge equivalence classes if casts could be inserted in one to match
329+
/// the total bitwidth of the instructions.
330330
void insertCastsToMergeClasses(EquivalenceClassMap &EQClasses);
331331

332332
/// Merges the equivalence classes if they have underlying objects that differ
@@ -1346,7 +1346,10 @@ void Vectorizer::insertCastsToMergeClasses(EquivalenceClassMap &EQClasses) {
13461346
DenseSet<EqClassKey> ClassesToErase;
13471347
for (auto EC1 : EQClasses) {
13481348
for (auto EC2 : EQClasses) {
1349-
if (ClassesToErase.contains(EC2.first) || EC1 <= EC2)
1349+
// Skip if EC2 was already merged before, EC1 follows EC2 in the
1350+
// collection or EC1 is the same as EC2.
1351+
if (ClassesToErase.contains(EC2.first) || EC1 <= EC2 ||
1352+
EC1.first == EC2.first)
13501353
continue;
13511354

13521355
auto [Ptr1, AS1, TySize1, IsLoad1] = EC1.first;

llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll

Lines changed: 474 additions & 460 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/bitop3.ll

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -113,15 +113,10 @@ define amdgpu_ps float @and_and_not_and(i32 %a, i32 %b, i32 %c) {
113113
}
114114

115115
define amdgpu_ps float @and_and_and(i32 %a, i32 %b, i32 %c) {
116-
; GFX950-SDAG-LABEL: and_and_and:
117-
; GFX950-SDAG: ; %bb.0:
118-
; GFX950-SDAG-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x80
119-
; GFX950-SDAG-NEXT: ; return to shader part epilog
120-
;
121-
; GFX950-GISEL-LABEL: and_and_and:
122-
; GFX950-GISEL: ; %bb.0:
123-
; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x80
124-
; GFX950-GISEL-NEXT: ; return to shader part epilog
116+
; GCN-LABEL: and_and_and:
117+
; GCN: ; %bb.0:
118+
; GCN-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x80
119+
; GCN-NEXT: ; return to shader part epilog
125120
%and1 = and i32 %a, %c
126121
%and2 = and i32 %and1, %b
127122
%ret_cast = bitcast i32 %and2 to float
@@ -131,15 +126,10 @@ define amdgpu_ps float @and_and_and(i32 %a, i32 %b, i32 %c) {
131126
; ========= Multi bit functions =========
132127

133128
define amdgpu_ps float @test_12(i32 %a, i32 %b) {
134-
; GFX950-SDAG-LABEL: test_12:
135-
; GFX950-SDAG: ; %bb.0:
136-
; GFX950-SDAG-NEXT: v_bitop3_b32 v0, v0, v1, v0 bitop3:0xc
137-
; GFX950-SDAG-NEXT: ; return to shader part epilog
138-
;
139-
; GFX950-GISEL-LABEL: test_12:
140-
; GFX950-GISEL: ; %bb.0:
141-
; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v1, v0 bitop3:0xc
142-
; GFX950-GISEL-NEXT: ; return to shader part epilog
129+
; GCN-LABEL: test_12:
130+
; GCN: ; %bb.0:
131+
; GCN-NEXT: v_bitop3_b32 v0, v0, v1, v0 bitop3:0xc
132+
; GCN-NEXT: ; return to shader part epilog
143133
%nota = xor i32 %a, -1
144134
%and1 = and i32 %nota, %b
145135
%ret_cast = bitcast i32 %and1 to float

llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll

Lines changed: 72 additions & 73 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/build_vector.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -271,13 +271,13 @@ define amdgpu_kernel void @build_v2i32_from_v4i16_shuffle(ptr addrspace(1) %out,
271271
; GFX8: ; %bb.0: ; %entry
272272
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
273273
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
274-
; GFX8-NEXT: s_lshl_b32 s3, s3, 16
275-
; GFX8-NEXT: s_lshl_b32 s2, s2, 16
276-
; GFX8-NEXT: v_mov_b32_e32 v3, s1
277-
; GFX8-NEXT: v_mov_b32_e32 v0, s2
278-
; GFX8-NEXT: v_mov_b32_e32 v1, s3
279-
; GFX8-NEXT: v_mov_b32_e32 v2, s0
280-
; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
274+
; GFX8-NEXT: v_mov_b32_e32 v0, s0
275+
; GFX8-NEXT: v_mov_b32_e32 v1, s1
276+
; GFX8-NEXT: s_lshl_b32 s0, s3, 16
277+
; GFX8-NEXT: s_lshl_b32 s1, s2, 16
278+
; GFX8-NEXT: v_mov_b32_e32 v2, s1
279+
; GFX8-NEXT: v_mov_b32_e32 v3, s0
280+
; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
281281
; GFX8-NEXT: s_endpgm
282282
;
283283
; GFX10-LABEL: build_v2i32_from_v4i16_shuffle:

llvm/test/CodeGen/AMDGPU/divrem24-assume.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
define amdgpu_kernel void @divrem24_assume(ptr addrspace(1) %arg, i32 %arg1) {
55
; CHECK-LABEL: @divrem24_assume(
66
; CHECK-NEXT: bb:
7-
; CHECK-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0
7+
; CHECK-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x(), !range [[RNG0:![0-9]+]]
88
; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[ARG1:%.*]], 42
99
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP2]])
1010
; CHECK-NEXT: [[TMP0:%.*]] = uitofp i32 [[TMP]] to float

llvm/test/CodeGen/AMDGPU/fabs.f16.ll

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -218,13 +218,13 @@ define amdgpu_kernel void @s_fabs_v4f16(ptr addrspace(1) %out, <4 x half> %in) {
218218
; CI-NEXT: s_mov_b32 flat_scratch_lo, s13
219219
; CI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
220220
; CI-NEXT: s_waitcnt lgkmcnt(0)
221-
; CI-NEXT: s_and_b32 s3, s3, 0x7fff7fff
222-
; CI-NEXT: s_and_b32 s2, s2, 0x7fff7fff
223-
; CI-NEXT: v_mov_b32_e32 v3, s1
224-
; CI-NEXT: v_mov_b32_e32 v0, s2
225-
; CI-NEXT: v_mov_b32_e32 v1, s3
226-
; CI-NEXT: v_mov_b32_e32 v2, s0
227-
; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
221+
; CI-NEXT: v_mov_b32_e32 v0, s0
222+
; CI-NEXT: v_mov_b32_e32 v1, s1
223+
; CI-NEXT: s_and_b32 s0, s3, 0x7fff7fff
224+
; CI-NEXT: s_and_b32 s1, s2, 0x7fff7fff
225+
; CI-NEXT: v_mov_b32_e32 v2, s1
226+
; CI-NEXT: v_mov_b32_e32 v3, s0
227+
; CI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
228228
; CI-NEXT: s_endpgm
229229
;
230230
; VI-LABEL: s_fabs_v4f16:
@@ -234,13 +234,13 @@ define amdgpu_kernel void @s_fabs_v4f16(ptr addrspace(1) %out, <4 x half> %in) {
234234
; VI-NEXT: s_mov_b32 flat_scratch_lo, s13
235235
; VI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
236236
; VI-NEXT: s_waitcnt lgkmcnt(0)
237-
; VI-NEXT: s_and_b32 s3, s3, 0x7fff7fff
238-
; VI-NEXT: s_and_b32 s2, s2, 0x7fff7fff
239-
; VI-NEXT: v_mov_b32_e32 v3, s1
240-
; VI-NEXT: v_mov_b32_e32 v0, s2
241-
; VI-NEXT: v_mov_b32_e32 v1, s3
242-
; VI-NEXT: v_mov_b32_e32 v2, s0
243-
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
237+
; VI-NEXT: v_mov_b32_e32 v0, s0
238+
; VI-NEXT: v_mov_b32_e32 v1, s1
239+
; VI-NEXT: s_and_b32 s0, s3, 0x7fff7fff
240+
; VI-NEXT: s_and_b32 s1, s2, 0x7fff7fff
241+
; VI-NEXT: v_mov_b32_e32 v2, s1
242+
; VI-NEXT: v_mov_b32_e32 v3, s0
243+
; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
244244
; VI-NEXT: s_endpgm
245245
;
246246
; GFX9-LABEL: s_fabs_v4f16:

llvm/test/CodeGen/AMDGPU/fabs.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -115,13 +115,13 @@ define amdgpu_kernel void @fabs_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
115115
; VI: ; %bb.0:
116116
; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
117117
; VI-NEXT: s_waitcnt lgkmcnt(0)
118-
; VI-NEXT: s_bitset0_b32 s3, 31
119-
; VI-NEXT: s_bitset0_b32 s2, 31
120-
; VI-NEXT: v_mov_b32_e32 v3, s1
121-
; VI-NEXT: v_mov_b32_e32 v0, s2
122-
; VI-NEXT: v_mov_b32_e32 v1, s3
123-
; VI-NEXT: v_mov_b32_e32 v2, s0
124-
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
118+
; VI-NEXT: v_mov_b32_e32 v0, s0
119+
; VI-NEXT: v_mov_b32_e32 v1, s1
120+
; VI-NEXT: s_and_b32 s0, s3, 0x7fffffff
121+
; VI-NEXT: s_and_b32 s1, s2, 0x7fffffff
122+
; VI-NEXT: v_mov_b32_e32 v2, s1
123+
; VI-NEXT: v_mov_b32_e32 v3, s0
124+
; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
125125
; VI-NEXT: s_endpgm
126126
%fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
127127
store <2 x float> %fabs, ptr addrspace(1) %out

0 commit comments

Comments
 (0)