@@ -57,4 +57,153 @@ body: |
57
57
%4:vgpr_16 = COPY %3:sgpr_lo16
58
58
%5:vgpr_32 = V_ALIGNBIT_B32_t16_e64 0, %0:sreg_32, 0, killed %1:sreg_32, 0, killed %4:vgpr_16, 0, 0, implicit $exec
59
59
S_ENDPGM 0, implicit %5
60
+
61
+ ---
62
+ name : fold_16bit_madmix_clamp
63
+ tracksRegLiveness : true
64
+ registers :
65
+ body : |
66
+ bb.0:
67
+ liveins: $vgpr0, $vgpr1, $vgpr2
68
+ ; CHECK-LABEL: name: fold_16bit_madmix_clamp
69
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
70
+ ; CHECK-NEXT: {{ $}}
71
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
72
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
73
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
74
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
75
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
76
+ ; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 1, [[COPY3]], 0, 0, implicit $mode, implicit $exec
77
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_16 = COPY [[V_FMA_MIXLO_F16_]]
78
+ ; CHECK-NEXT: $vgpr0 = COPY [[V_FMA_MIXLO_F16_]]
79
+ ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
80
+ %0:vgpr_32 = COPY $vgpr2
81
+ %1:vgpr_32 = COPY $vgpr1
82
+ %2:vgpr_32 = COPY $vgpr0
83
+ %3:sreg_32 = IMPLICIT_DEF
84
+ %4:vgpr_32 = COPY %3
85
+ %5:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, %2, 8, %1, 0, %0, 0, %4, 0, 0, implicit $mode, implicit $exec
86
+ %6:vgpr_16 = COPY %5
87
+ %7:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %6, 0, %6, -1, 0, 0, implicit $mode, implicit $exec
88
+ $vgpr0 = COPY %7
89
+ S_ENDPGM 0, implicit $vgpr0
90
+ ...
91
+
92
+ ---
93
+ name : fold_16bit_subreg_1_clamp
94
+ tracksRegLiveness : true
95
+ registers :
96
+ body : |
97
+ bb.0:
98
+ liveins: $vgpr0, $vgpr1, $vgpr2
99
+ ; CHECK-LABEL: name: fold_16bit_subreg_1_clamp
100
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
101
+ ; CHECK-NEXT: {{ $}}
102
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
103
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
104
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
105
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
106
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
107
+ ; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec
108
+ ; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[V_FMA_MIXLO_F16_]].lo16, 0, [[V_FMA_MIXLO_F16_]].lo16, -1, 0, 0, implicit $mode, implicit $exec
109
+ ; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
110
+ ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
111
+ %0:vgpr_32 = COPY $vgpr2
112
+ %1:vgpr_32 = COPY $vgpr1
113
+ %2:vgpr_32 = COPY $vgpr0
114
+ %3:sreg_32 = IMPLICIT_DEF
115
+ %4:vgpr_32 = COPY %3
116
+ %5:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, %2, 8, %1, 0, %0, 0, %4, 0, 0, implicit $mode, implicit $exec
117
+ %6:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %5.lo16, 0, %5.lo16, -1, 0, 0, implicit $mode, implicit $exec
118
+ $vgpr0 = COPY %6
119
+ S_ENDPGM 0, implicit $vgpr0
120
+ ...
121
+
122
+ ---
123
+ name : fold_16bit_subreg_2_clamp
124
+ tracksRegLiveness : true
125
+ registers :
126
+ body : |
127
+ bb.0:
128
+ liveins: $vgpr0, $vgpr1, $vgpr2
129
+ ; CHECK-LABEL: name: fold_16bit_subreg_2_clamp
130
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
131
+ ; CHECK-NEXT: {{ $}}
132
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
133
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
134
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
135
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
136
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
137
+ ; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec
138
+ ; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[V_FMA_MIXLO_F16_]].lo16, 0, [[V_FMA_MIXLO_F16_]].lo16, -1, 0, 0, implicit $mode, implicit $exec
139
+ ; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
140
+ ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
141
+ %0:vgpr_32 = COPY $vgpr2
142
+ %1:vgpr_32 = COPY $vgpr1
143
+ %2:vgpr_32 = COPY $vgpr0
144
+ %3:sreg_32 = IMPLICIT_DEF
145
+ %4:vgpr_32 = COPY %3
146
+ %5:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, %2, 8, %1, 0, %0, 0, %4, 0, 0, implicit $mode, implicit $exec
147
+ %6:vgpr_16 = COPY %5.lo16
148
+ %7:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %6, 0, %6, -1, 0, 0, implicit $mode, implicit $exec
149
+ $vgpr0 = COPY %7
150
+ S_ENDPGM 0, implicit $vgpr0
151
+ ...
152
+
153
+ ---
154
+ name : fold_16bit_phyreg_clamp
155
+ tracksRegLiveness : true
156
+ registers :
157
+ body : |
158
+ bb.0:
159
+ liveins: $vgpr0, $vgpr1, $vgpr2
160
+ ; CHECK-LABEL: name: fold_16bit_phyreg_clamp
161
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
162
+ ; CHECK-NEXT: {{ $}}
163
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
164
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
165
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
166
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
167
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
168
+ ; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec
169
+ ; CHECK-NEXT: $vgpr10_lo16 = COPY [[V_FMA_MIXLO_F16_]]
170
+ ; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, $vgpr10_lo16, 0, $vgpr10_lo16, -1, 0, 0, implicit $mode, implicit $exec
171
+ ; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
172
+ ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
173
+ %0:vgpr_32 = COPY $vgpr2
174
+ %1:vgpr_32 = COPY $vgpr1
175
+ %2:vgpr_32 = COPY $vgpr0
176
+ %3:sreg_32 = IMPLICIT_DEF
177
+ %4:vgpr_32 = COPY %3
178
+ %5:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, %2, 8, %1, 0, %0, 0, %4, 0, 0, implicit $mode, implicit $exec
179
+ $vgpr10_lo16 = COPY %5
180
+ %6:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, $vgpr10_lo16, 0, $vgpr10_lo16, -1, 0, 0, implicit $mode, implicit $exec
181
+ $vgpr0 = COPY %6
182
+ S_ENDPGM 0, implicit $vgpr0
183
+ ...
184
+
185
+ ---
186
+ name : fold_16bit_undef_clamp
187
+ tracksRegLiveness : true
188
+ registers :
189
+ body : |
190
+ bb.0:
191
+ liveins: $vgpr0, $vgpr1, $vgpr2
192
+ ; CHECK-LABEL: name: fold_16bit_undef_clamp
193
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
194
+ ; CHECK-NEXT: {{ $}}
195
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
196
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
197
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
198
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
199
+ ; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[DEF]], 0, [[DEF]], -1, 0, 0, implicit $mode, implicit $exec
200
+ ; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
201
+ ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
202
+ %0:vgpr_32 = COPY $vgpr2
203
+ %1:vgpr_32 = COPY $vgpr1
204
+ %2:vgpr_32 = COPY $vgpr0
205
+ %3:vgpr_16 = IMPLICIT_DEF
206
+ %4:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %3, 0, %3, -1, 0, 0, implicit $mode, implicit $exec
207
+ $vgpr0 = COPY %4
208
+ S_ENDPGM 0, implicit $vgpr0
60
209
...
0 commit comments