Skip to content

Commit ada2fbf

Browse files
authored
[GISel] Fix ShuffleVector assert (llvm#139769)
Fixes issue: llvm#139752 When G_SHUFFLE_VECTOR has only 1 element then it is possible the vector is decayed into a scalar.
1 parent 744a469 commit ada2fbf

File tree

3 files changed

+84
-22
lines changed

3 files changed

+84
-22
lines changed

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -420,8 +420,11 @@ void CombinerHelper::applyCombineShuffleToBuildVector(MachineInstr &MI) const {
420420
else
421421
Extracts.push_back(Unmerge2.getReg(Val - Width));
422422
}
423-
424-
Builder.buildBuildVector(MI.getOperand(0).getReg(), Extracts);
423+
assert(Extracts.size() > 0 && "Expected at least one element in the shuffle");
424+
if (Extracts.size() == 1)
425+
Builder.buildCopy(MI.getOperand(0).getReg(), Extracts[0]);
426+
else
427+
Builder.buildBuildVector(MI.getOperand(0).getReg(), Extracts);
425428
MI.eraseFromParent();
426429
}
427430

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
3+
4+
; Description: an end-to-end IR test for https://github.com/llvm/llvm-project/issues/139752
5+
; To test combine shuffle_vectors into build_vector
6+
7+
define amdgpu_gs <4 x float> @_amdgpu_gs_main() {
8+
; CHECK-LABEL: _amdgpu_gs_main:
9+
; CHECK: ; %bb.0: ; %bb
10+
; CHECK-NEXT: v_mov_b32_e32 v0, 16
11+
; CHECK-NEXT: ds_read2_b32 v[0:1], v0 offset1:1
12+
; CHECK-NEXT: s_mov_b32 s0, 0
13+
; CHECK-NEXT: s_mov_b32 s1, s0
14+
; CHECK-NEXT: s_mov_b32 s2, s0
15+
; CHECK-NEXT: s_mov_b32 s3, s0
16+
; CHECK-NEXT: v_mov_b32_e32 v2, 0
17+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
18+
; CHECK-NEXT: buffer_store_dwordx4 v[0:3], v2, s[0:3], 0 idxen
19+
; CHECK-NEXT: s_nop 0
20+
; CHECK-NEXT: v_mov_b32_e32 v0, v1
21+
; CHECK-NEXT: s_waitcnt vmcnt(0)
22+
; CHECK-NEXT: ; return to shader part epilog
23+
bb:
24+
%i = load <1 x float>, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) null, i32 16), align 4
25+
%i1 = load <1 x float>, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) null, i32 20), align 4
26+
%i2 = shufflevector <1 x float> %i, <1 x float> zeroinitializer, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
27+
call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %i2, <4 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0)
28+
%i3 = shufflevector <1 x float> %i1, <1 x float> zeroinitializer, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
29+
ret <4 x float> %i3
30+
}
31+
32+
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write)
33+
declare void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32 immarg) #0
34+
35+

llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-shuffle.mir

Lines changed: 44 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,10 @@ body: |
2020
; CHECK-NEXT: SI_RETURN
2121
%0:_(p3) = COPY $vgpr0
2222
%1:_(p3) = COPY $vgpr1
23-
%12:_(<8 x s16>) = G_IMPLICIT_DEF
24-
%10:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
25-
%11:_(<4 x s16>) = G_SHUFFLE_VECTOR %10(<8 x s16>), %12, shufflemask(4, 5, 6, 7)
26-
G_STORE %11(<4 x s16>), %1(p3) :: (store (<4 x s16>), addrspace 3)
23+
%2:_(<8 x s16>) = G_IMPLICIT_DEF
24+
%3:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
25+
%4:_(<4 x s16>) = G_SHUFFLE_VECTOR %3(<8 x s16>), %2, shufflemask(4, 5, 6, 7)
26+
G_STORE %4(<4 x s16>), %1(p3) :: (store (<4 x s16>), addrspace 3)
2727
SI_RETURN
2828
...
2929

@@ -46,10 +46,10 @@ body: |
4646
; CHECK-NEXT: SI_RETURN
4747
%0:_(p3) = COPY $vgpr0
4848
%1:_(p3) = COPY $vgpr1
49-
%12:_(<8 x s16>) = G_IMPLICIT_DEF
50-
%10:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
51-
%11:_(<2 x s16>) = G_SHUFFLE_VECTOR %10(<8 x s16>), %12, shufflemask(3, 4)
52-
G_STORE %11(<2 x s16>), %1(p3) :: (store (<2 x s16>), addrspace 3)
49+
%2:_(<8 x s16>) = G_IMPLICIT_DEF
50+
%3:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
51+
%4:_(<2 x s16>) = G_SHUFFLE_VECTOR %3(<8 x s16>), %2, shufflemask(3, 4)
52+
G_STORE %4(<2 x s16>), %1(p3) :: (store (<2 x s16>), addrspace 3)
5353
SI_RETURN
5454
5555
...
@@ -73,10 +73,10 @@ body: |
7373
; CHECK-NEXT: SI_RETURN
7474
%0:_(p3) = COPY $vgpr0
7575
%1:_(p3) = COPY $vgpr1
76-
%12:_(<8 x s16>) = G_IMPLICIT_DEF
77-
%10:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
78-
%11:_(<3 x s16>) = G_SHUFFLE_VECTOR %10(<8 x s16>), %12, shufflemask(0, 1, 2)
79-
G_STORE %11(<3 x s16>), %1(p3) :: (store (<3 x s16>), addrspace 3)
76+
%2:_(<8 x s16>) = G_IMPLICIT_DEF
77+
%3:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
78+
%4:_(<3 x s16>) = G_SHUFFLE_VECTOR %3(<8 x s16>), %2, shufflemask(0, 1, 2)
79+
G_STORE %4(<3 x s16>), %1(p3) :: (store (<3 x s16>), addrspace 3)
8080
SI_RETURN
8181
...
8282

@@ -101,10 +101,10 @@ body: |
101101
; CHECK-NEXT: SI_RETURN
102102
%0:_(p3) = COPY $vgpr0
103103
%1:_(p3) = COPY $vgpr1
104-
%12:_(<8 x s16>) = G_IMPLICIT_DEF
105-
%10:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
106-
%11:_(<4 x s16>) = G_SHUFFLE_VECTOR %10(<8 x s16>), %12, shufflemask(4, 5, -1, 7)
107-
G_STORE %11(<4 x s16>), %1(p3) :: (store (<4 x s16>), addrspace 3)
104+
%2:_(<8 x s16>) = G_IMPLICIT_DEF
105+
%3:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
106+
%4:_(<4 x s16>) = G_SHUFFLE_VECTOR %3(<8 x s16>), %2, shufflemask(4, 5, -1, 7)
107+
G_STORE %4(<4 x s16>), %1(p3) :: (store (<4 x s16>), addrspace 3)
108108
SI_RETURN
109109
...
110110

@@ -128,10 +128,34 @@ body: |
128128
; CHECK-NEXT: SI_RETURN
129129
%0:_(p3) = COPY $vgpr0
130130
%1:_(p3) = COPY $vgpr1
131-
%12:_(<8 x s16>) = G_IMPLICIT_DEF
132-
%10:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
133-
%11:_(<4 x s16>) = G_SHUFFLE_VECTOR %10(<8 x s16>), %12, shufflemask(6, 7, 8, 9)
134-
G_STORE %11(<4 x s16>), %1(p3) :: (store (<4 x s16>), addrspace 3)
131+
%2:_(<8 x s16>) = G_IMPLICIT_DEF
132+
%3:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
133+
%4:_(<4 x s16>) = G_SHUFFLE_VECTOR %3(<8 x s16>), %2, shufflemask(6, 7, 8, 9)
134+
G_STORE %4(<4 x s16>), %1(p3) :: (store (<4 x s16>), addrspace 3)
135135
SI_RETURN
136136
...
137137

138+
139+
---
140+
name: shuffle_vector_to_copy
141+
tracksRegLiveness: true
142+
body: |
143+
bb.0:
144+
liveins: $vgpr0, $vgpr1
145+
; CHECK-LABEL: name: shuffle_vector_to_copy
146+
; CHECK: liveins: $vgpr0, $vgpr1
147+
; CHECK-NEXT: {{ $}}
148+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
149+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr1
150+
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p3) :: (load (<8 x s16>), align 8, addrspace 3)
151+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<8 x s16>)
152+
; CHECK-NEXT: G_STORE [[UV4]](s16), [[COPY1]](p3) :: (store (s16), addrspace 3)
153+
; CHECK-NEXT: SI_RETURN
154+
%0:_(p3) = COPY $vgpr0
155+
%1:_(p3) = COPY $vgpr1
156+
%2:_(<8 x s16>) = G_IMPLICIT_DEF
157+
%3:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
158+
%4:_(s16) = G_SHUFFLE_VECTOR %3(<8 x s16>), %2, shufflemask(4)
159+
G_STORE %4(s16), %1(p3) :: (store (s16), addrspace 3)
160+
SI_RETURN
161+
...

0 commit comments

Comments
 (0)