Skip to content

Commit 0247a75

Browse files
authored
AMDGPU: Add some tests for folding immediates into subregister uses (#129663)
1 parent 54ad114 commit 0247a75

File tree

2 files changed

+163
-0
lines changed

2 files changed

+163
-0
lines changed

llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,3 +393,37 @@ body: |
393393
SI_RETURN implicit $vgpr0, implicit $vgpr1
394394
395395
...
396+
397+
---
398+
name: fold_frame_index__through_reg_sequence_to_user_subreg
399+
tracksRegLiveness: true
400+
frameInfo:
401+
maxAlignment: 4
402+
localFrameSize: 16384
403+
stack:
404+
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
405+
body: |
406+
bb.0:
407+
liveins: $sgpr8
408+
; CHECK-LABEL: name: fold_frame_index__through_reg_sequence_to_user_subreg
409+
; CHECK: liveins: $sgpr8
410+
; CHECK-NEXT: {{ $}}
411+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr8
412+
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 123
413+
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
414+
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
415+
; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], %stack.0, implicit-def $scc
416+
; CHECK-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], 123, implicit-def $scc
417+
; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_I32_]]
418+
; CHECK-NEXT: $sgpr5 = COPY [[S_ADD_I32_1]]
419+
; CHECK-NEXT: SI_RETURN implicit $sgpr4, implicit $sgpr5
420+
%0:sreg_32 = COPY $sgpr8
421+
%1:sreg_32 = S_MOV_B32 123
422+
%2:sreg_32 = S_MOV_B32 %stack.0
423+
%3:sreg_64 = REG_SEQUENCE %1, %subreg.sub0, %2, %subreg.sub1
424+
%4:sreg_32 = S_ADD_I32 %0, %3.sub1, implicit-def $scc
425+
%5:sreg_32 = S_ADD_I32 %0, %3.sub0, implicit-def $scc
426+
$sgpr4 = COPY %4
427+
$sgpr5 = COPY %5
428+
SI_RETURN implicit $sgpr4, implicit $sgpr5
429+
...
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -run-pass=si-fold-operands -o - %s | FileCheck %s
3+
4+
# Make sure materializes of 64-bit immediates fold the correct value
5+
# into subregister uses.
6+
7+
---
8+
name: s_mov_b64_sub1_folds_wrong_value_0
9+
tracksRegLiveness: true
10+
body: |
11+
bb.0:
12+
liveins: $sgpr8_sgpr9
13+
14+
; CHECK-LABEL: name: s_mov_b64_sub1_folds_wrong_value_0
15+
; CHECK: liveins: $sgpr8_sgpr9
16+
; CHECK-NEXT: {{ $}}
17+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
18+
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[COPY]].sub1, %subreg.sub1
19+
; CHECK-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[REG_SEQUENCE]].sub0, 8, implicit-def $scc
20+
; CHECK-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[REG_SEQUENCE]].sub1, 8, implicit-def $scc, implicit $scc
21+
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
22+
; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE1]]
23+
%0:sgpr_64 = COPY $sgpr8_sgpr9
24+
%1:sreg_64 = REG_SEQUENCE %0.sub0, %subreg.sub0, %0.sub1, %subreg.sub1
25+
%2:sreg_64 = S_MOV_B64 8
26+
%3:sreg_32 = S_ADD_U32 %1.sub0, %2.sub0, implicit-def $scc
27+
%4:sreg_32 = S_ADDC_U32 %1.sub1, %2.sub1, implicit-def $scc, implicit $scc
28+
%5:sreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
29+
S_ENDPGM 0, implicit %5
30+
31+
...
32+
33+
---
34+
name: v_mov_b64_pseudo_sub1_folds_wrong_value
35+
tracksRegLiveness: true
36+
body: |
37+
bb.0:
38+
liveins: $vgpr8_vgpr9
39+
40+
; CHECK-LABEL: name: v_mov_b64_pseudo_sub1_folds_wrong_value
41+
; CHECK: liveins: $vgpr8_vgpr9
42+
; CHECK-NEXT: {{ $}}
43+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr8_vgpr9
44+
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[COPY]].sub1, %subreg.sub1
45+
; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[REG_SEQUENCE]].sub0, 30064771075, 0, implicit $exec
46+
; CHECK-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[REG_SEQUENCE]].sub1, 30064771075, [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
47+
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
48+
; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE1]]
49+
%0:vreg_64 = COPY $vgpr8_vgpr9
50+
%1:vreg_64 = REG_SEQUENCE %0.sub0, %subreg.sub0, %0.sub1, %subreg.sub1
51+
%2:vreg_64 = V_MOV_B64_PSEUDO 30064771075, implicit $exec
52+
%3:vgpr_32, %6:sreg_64_xexec = V_ADD_CO_U32_e64 %1.sub0, %2.sub0, 0, implicit $exec
53+
%4:vgpr_32, %7:sreg_64_xexec = V_ADDC_U32_e64 %1.sub1, %2.sub1, %6, 0, implicit $exec
54+
%5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
55+
S_ENDPGM 0, implicit %5
56+
57+
...
58+
59+
---
60+
name: subreg_fold_imm
61+
tracksRegLiveness: true
62+
body: |
63+
bb.0:
64+
liveins: $sgpr8_sgpr9, $vgpr8_vgpr9
65+
66+
; CHECK-LABEL: name: subreg_fold_imm
67+
; CHECK: liveins: $sgpr8_sgpr9, $vgpr8_vgpr9
68+
; CHECK-NEXT: {{ $}}
69+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
70+
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[COPY]].sub1, %subreg.sub1
71+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]]
72+
; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[REG_SEQUENCE]].sub1, [[COPY1]].sub1, 0, implicit $exec
73+
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
74+
%0:sgpr_64 = COPY $sgpr8_sgpr9
75+
%1:sreg_64 = REG_SEQUENCE %0.sub0, %subreg.sub0, %0.sub1, %subreg.sub1
76+
%2:vreg_64 = COPY %0
77+
%3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 %1.sub1, %2.sub1, 0, implicit $exec
78+
S_ENDPGM 0, implicit %3
79+
80+
...
81+
82+
---
83+
name: s_mov_b64_into_reg_sequence_user
84+
tracksRegLiveness: true
85+
body: |
86+
bb.0:
87+
liveins: $sgpr8_sgpr9
88+
89+
; CHECK-LABEL: name: s_mov_b64_into_reg_sequence_user
90+
; CHECK: liveins: $sgpr8_sgpr9
91+
; CHECK-NEXT: {{ $}}
92+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
93+
; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 8
94+
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B64_]].sub1, %subreg.sub0, [[S_MOV_B64_]].sub0, %subreg.sub1
95+
; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[REG_SEQUENCE]], implicit-def $scc
96+
; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]]
97+
%0:sgpr_64 = COPY $sgpr8_sgpr9
98+
%1:sreg_64 = S_MOV_B64 8
99+
%2:sreg_64 = REG_SEQUENCE %1.sub1, %subreg.sub0, %1.sub0, %subreg.sub1
100+
%3:sreg_64 = S_AND_B64 %0, %2, implicit-def $scc
101+
S_ENDPGM 0, implicit %3
102+
103+
...
104+
105+
---
106+
name: s_mov_b64_into_reg_sequence_user_with_subregs
107+
tracksRegLiveness: true
108+
body: |
109+
bb.0:
110+
liveins: $sgpr8_sgpr9
111+
112+
; CHECK-LABEL: name: s_mov_b64_into_reg_sequence_user_with_subregs
113+
; CHECK: liveins: $sgpr8_sgpr9
114+
; CHECK-NEXT: {{ $}}
115+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
116+
; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 8
117+
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B64_]].sub1, %subreg.sub0, [[S_MOV_B64_]].sub0, %subreg.sub1
118+
; CHECK-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]].sub0, 8, implicit-def $scc
119+
; CHECK-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY]].sub1, 8, implicit-def $scc, implicit $scc
120+
; CHECK-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[S_ADDC_U32_]]
121+
%0:sgpr_64 = COPY $sgpr8_sgpr9
122+
%1:sreg_64 = S_MOV_B64 8
123+
%2:sreg_64 = REG_SEQUENCE %1.sub1, %subreg.sub0, %1.sub0, %subreg.sub1
124+
%3:sreg_32 = S_ADD_U32 %0.sub0, %2.sub0, implicit-def $scc
125+
%4:sreg_32 = S_ADDC_U32 %0.sub1, %2.sub1, implicit-def $scc, implicit $scc
126+
S_ENDPGM 0, implicit %3, implicit %4
127+
128+
...
129+

0 commit comments

Comments
 (0)