1
1
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2
- # RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass si-fold-operands -mattr="+wavefrontsize32",+ real-true16 -verify-machineinstrs -o - %s | FileCheck %s
2
+ # RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass si-fold-operands -mattr=+ real-true16 -o - %s | FileCheck %s
3
3
4
4
---
5
5
name : fold_16bit_madmix_clamp
6
6
tracksRegLiveness : true
7
7
registers :
8
8
body : |
9
- bb.0.entry :
9
+ bb.0:
10
10
liveins: $vgpr0, $vgpr1, $vgpr2
11
11
; CHECK-LABEL: name: fold_16bit_madmix_clamp
12
12
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
@@ -16,18 +16,139 @@ body: |
16
16
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
17
17
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
18
18
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
19
- ; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 1 , [[COPY3]], 0, 0, implicit $mode, implicit $exec
19
+ ; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 0 , [[COPY3]], 0, 0, implicit $mode, implicit $exec
20
20
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_16 = COPY [[V_FMA_MIXLO_F16_]]
21
- ; CHECK-NEXT: $vgpr0 = COPY [[COPY4]]
21
+ ; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY4]], 0, [[COPY4]], -1, 0, 0, implicit $mode, implicit $exec
22
+ ; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
22
23
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
23
- %10:vgpr_32 = COPY $vgpr2
24
- %9:vgpr_32 = COPY $vgpr1
25
- %8:vgpr_32 = COPY $vgpr0
26
- %12:sreg_32 = IMPLICIT_DEF
27
- %13:vgpr_32 = COPY %12:sreg_32
28
- %11:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, %8:vgpr_32, 8, %9:vgpr_32, 0, %10:vgpr_32, 0, %13:vgpr_32, 0, 0, implicit $mode, implicit $exec
29
- %15:vgpr_16 = COPY %11:vgpr_32
30
- %14:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %15:vgpr_16, 0, %15:vgpr_16, -1, 0, 0, implicit $mode, implicit $exec
31
- $vgpr0 = COPY %14:vgpr_16
24
+ %0:vgpr_32 = COPY $vgpr2
25
+ %1:vgpr_32 = COPY $vgpr1
26
+ %2:vgpr_32 = COPY $vgpr0
27
+ %3:sreg_32 = IMPLICIT_DEF
28
+ %4:vgpr_32 = COPY %3
29
+ %5:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, %2, 8, %1, 0, %0, 0, %4, 0, 0, implicit $mode, implicit $exec
30
+ %6:vgpr_16 = COPY %5
31
+ %7:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %6, 0, %6, -1, 0, 0, implicit $mode, implicit $exec
32
+ $vgpr0 = COPY %7
33
+ S_ENDPGM 0, implicit $vgpr0
34
+ ...
35
+
36
+ ---
37
+ name : fold_16bit_subreg_folded_clamp
38
+ tracksRegLiveness : true
39
+ registers :
40
+ body : |
41
+ bb.0:
42
+ liveins: $vgpr0, $vgpr1, $vgpr2
43
+ ; CHECK-LABEL: name: fold_16bit_madmix_clamp
44
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
45
+ ; CHECK-NEXT: {{ $}}
46
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
47
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
48
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
49
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
50
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
51
+ ; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec
52
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_16 = COPY [[V_FMA_MIXLO_F16_]]
53
+ ; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY4]], 0, [[COPY4]], -1, 0, 0, implicit $mode, implicit $exec
54
+ ; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
55
+ ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
56
+ %0:vgpr_32 = COPY $vgpr2
57
+ %1:vgpr_32 = COPY $vgpr1
58
+ %2:vgpr_32 = COPY $vgpr0
59
+ %3:sreg_32 = IMPLICIT_DEF
60
+ %4:vgpr_32 = COPY %3
61
+ %5:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, %2, 8, %1, 0, %0, 0, %4, 0, 0, implicit $mode, implicit $exec
62
+ %6:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %5.lo16, 0, %5.lo16, -1, 0, 0, implicit $mode, implicit $exec
63
+ $vgpr0 = COPY %6
64
+ S_ENDPGM 0, implicit $vgpr0
65
+ ...
66
+
67
+ ---
68
+ name : fold_16bit_subreg_clamp
69
+ tracksRegLiveness : true
70
+ registers :
71
+ body : |
72
+ bb.0:
73
+ liveins: $vgpr0, $vgpr1, $vgpr2
74
+ ; CHECK-LABEL: name: fold_16bit_subreg_clamp
75
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
76
+ ; CHECK-NEXT: {{ $}}
77
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
78
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
79
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
80
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
81
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
82
+ ; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec
83
+ ; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[V_FMA_MIXLO_F16_]].lo16, 0, [[V_FMA_MIXLO_F16_]].lo16, -1, 0, 0, implicit $mode, implicit $exec
84
+ ; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
85
+ ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
86
+ %0:vgpr_32 = COPY $vgpr2
87
+ %1:vgpr_32 = COPY $vgpr1
88
+ %2:vgpr_32 = COPY $vgpr0
89
+ %3:sreg_32 = IMPLICIT_DEF
90
+ %4:vgpr_32 = COPY %3
91
+ %5:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, %2, 8, %1, 0, %0, 0, %4, 0, 0, implicit $mode, implicit $exec
92
+ %6:vgpr_16 = COPY %5.lo16
93
+ %7:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %6, 0, %6, -1, 0, 0, implicit $mode, implicit $exec
94
+ $vgpr0 = COPY %7
95
+ S_ENDPGM 0, implicit $vgpr0
96
+ ...
97
+
98
+ ---
99
+ name : fold_16bit_phyreg_clamp
100
+ tracksRegLiveness : true
101
+ registers :
102
+ body : |
103
+ bb.0:
104
+ liveins: $vgpr0, $vgpr1, $vgpr2
105
+ ; CHECK-LABEL: name: fold_16bit_phyreg_clamp
106
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
107
+ ; CHECK-NEXT: {{ $}}
108
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
109
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
110
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
111
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
112
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
113
+ ; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec
114
+ ; CHECK-NEXT: $vgpr10_lo16 = COPY [[V_FMA_MIXLO_F16_]]
115
+ ; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, $vgpr10_lo16, 0, $vgpr10_lo16, -1, 0, 0, implicit $mode, implicit $exec
116
+ ; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
117
+ ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
118
+ %0:vgpr_32 = COPY $vgpr2
119
+ %1:vgpr_32 = COPY $vgpr1
120
+ %2:vgpr_32 = COPY $vgpr0
121
+ %3:sreg_32 = IMPLICIT_DEF
122
+ %4:vgpr_32 = COPY %3
123
+ %5:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, %2, 8, %1, 0, %0, 0, %4, 0, 0, implicit $mode, implicit $exec
124
+ $vgpr10_lo16 = COPY %5
125
+ %6:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, $vgpr10_lo16, 0, $vgpr10_lo16, -1, 0, 0, implicit $mode, implicit $exec
126
+ $vgpr0 = COPY %6
127
+ S_ENDPGM 0, implicit $vgpr0
128
+ ...
129
+
130
+ ---
131
+ name : fold_16bit_undef_clamp
132
+ tracksRegLiveness : true
133
+ registers :
134
+ body : |
135
+ bb.0:
136
+ liveins: $vgpr0, $vgpr1, $vgpr2
137
+ ; CHECK-LABEL: name: fold_16bit_undef_clamp
138
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
139
+ ; CHECK-NEXT: {{ $}}
140
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
141
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
142
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
143
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
144
+ ; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[DEF]], 0, [[DEF]], -1, 0, 0, implicit $mode, implicit $exec
145
+ ; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
146
+ ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
147
+ %0:vgpr_32 = COPY $vgpr2
148
+ %1:vgpr_32 = COPY $vgpr1
149
+ %2:vgpr_32 = COPY $vgpr0
150
+ %3:vgpr_16 = IMPLICIT_DEF
151
+ %4:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %3, 0, %3, -1, 0, 0, implicit $mode, implicit $exec
152
+ $vgpr0 = COPY %4
32
153
S_ENDPGM 0, implicit $vgpr0
33
154
...
0 commit comments