Skip to content

Commit a419666

Browse files
authored
[AMDGPU] Revert "Preliminary patch for divergence driven instruction selection. Operands Folding 1." (#71710)
This reverts commit 201f892.
1 parent e6eda66 commit a419666

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+8424
-8903
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -717,24 +717,6 @@ void SIFoldOperands::foldOperand(
717717

718718
const TargetRegisterClass *DestRC = TRI->getRegClassForReg(*MRI, DestReg);
719719
if (!DestReg.isPhysical()) {
720-
if (TRI->isSGPRClass(SrcRC) && TRI->hasVectorRegisters(DestRC)) {
721-
SmallVector<FoldCandidate, 4> CopyUses;
722-
for (auto &Use : MRI->use_nodbg_operands(DestReg)) {
723-
// There's no point trying to fold into an implicit operand.
724-
if (Use.isImplicit())
725-
continue;
726-
727-
CopyUses.emplace_back(Use.getParent(),
728-
Use.getParent()->getOperandNo(&Use),
729-
&UseMI->getOperand(1));
730-
}
731-
732-
for (auto &F : CopyUses) {
733-
foldOperand(*F.OpToFold, F.UseMI, F.UseOpNo, FoldList,
734-
CopiesToReplace);
735-
}
736-
}
737-
738720
if (DestRC == &AMDGPU::AGPR_32RegClass &&
739721
TII->isInlineConstant(OpToFold, AMDGPU::OPERAND_REG_INLINE_C_INT32)) {
740722
UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64));

llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -165,9 +165,8 @@ define <2 x i16> @v_add_v2i16_neg_inline_imm_splat(<2 x i16> %a) {
165165
; GFX7-LABEL: v_add_v2i16_neg_inline_imm_splat:
166166
; GFX7: ; %bb.0:
167167
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
168-
; GFX7-NEXT: s_movk_i32 s4, 0xffc0
169-
; GFX7-NEXT: v_add_i32_e32 v0, vcc, s4, v0
170-
; GFX7-NEXT: v_add_i32_e32 v1, vcc, s4, v1
168+
; GFX7-NEXT: v_add_i32_e32 v0, vcc, 0xffffffc0, v0
169+
; GFX7-NEXT: v_add_i32_e32 v1, vcc, 0xffffffc0, v1
171170
; GFX7-NEXT: s_setpc_b64 s[30:31]
172171
;
173172
; GFX9-LABEL: v_add_v2i16_neg_inline_imm_splat:
@@ -180,10 +179,10 @@ define <2 x i16> @v_add_v2i16_neg_inline_imm_splat(<2 x i16> %a) {
180179
; GFX8-LABEL: v_add_v2i16_neg_inline_imm_splat:
181180
; GFX8: ; %bb.0:
182181
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
183-
; GFX8-NEXT: v_mov_b32_e32 v2, 0xffffffc0
184-
; GFX8-NEXT: v_add_u16_e32 v1, 0xffc0, v0
185-
; GFX8-NEXT: v_add_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
186-
; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
182+
; GFX8-NEXT: v_mov_b32_e32 v1, 0xffffffc0
183+
; GFX8-NEXT: v_add_u16_e32 v2, 0xffc0, v0
184+
; GFX8-NEXT: v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
185+
; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
187186
; GFX8-NEXT: s_setpc_b64 s[30:31]
188187
;
189188
; GFX10-LABEL: v_add_v2i16_neg_inline_imm_splat:

llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll

Lines changed: 231 additions & 231 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll

Lines changed: 83 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -126,14 +126,13 @@ define <2 x float> @v_pow_v2f32(<2 x float> %x, <2 x float> %y) {
126126
; GFX6-LABEL: v_pow_v2f32:
127127
; GFX6: ; %bb.0:
128128
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
129-
; GFX6-NEXT: s_mov_b32 s4, 0x800000
130-
; GFX6-NEXT: v_mov_b32_e32 v4, 0x4f800000
131-
; GFX6-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
132-
; GFX6-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc
133-
; GFX6-NEXT: v_mul_f32_e32 v0, v0, v5
134-
; GFX6-NEXT: v_mov_b32_e32 v5, 0x800000
135-
; GFX6-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v5
136-
; GFX6-NEXT: v_cndmask_b32_e64 v4, 1.0, v4, s[4:5]
129+
; GFX6-NEXT: v_mov_b32_e32 v4, 0x800000
130+
; GFX6-NEXT: v_mov_b32_e32 v5, 0x4f800000
131+
; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4
132+
; GFX6-NEXT: v_cndmask_b32_e32 v6, 1.0, v5, vcc
133+
; GFX6-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v4
134+
; GFX6-NEXT: v_mul_f32_e32 v0, v0, v6
135+
; GFX6-NEXT: v_cndmask_b32_e64 v4, 1.0, v5, s[4:5]
137136
; GFX6-NEXT: v_log_f32_e32 v0, v0
138137
; GFX6-NEXT: v_mul_f32_e32 v1, v1, v4
139138
; GFX6-NEXT: v_log_f32_e32 v1, v1
@@ -142,15 +141,15 @@ define <2 x float> @v_pow_v2f32(<2 x float> %x, <2 x float> %y) {
142141
; GFX6-NEXT: v_sub_f32_e32 v0, v0, v7
143142
; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, v6, s[4:5]
144143
; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v2
145-
; GFX6-NEXT: s_mov_b32 s6, 0xc2fc0000
144+
; GFX6-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
146145
; GFX6-NEXT: v_sub_f32_e32 v1, v1, v5
147-
; GFX6-NEXT: v_mov_b32_e32 v2, 0x42800000
148-
; GFX6-NEXT: v_cmp_gt_f32_e32 vcc, s6, v0
146+
; GFX6-NEXT: v_mov_b32_e32 v7, 0x42800000
147+
; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
149148
; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v3
150-
; GFX6-NEXT: v_cndmask_b32_e32 v7, 0, v2, vcc
151-
; GFX6-NEXT: v_cmp_gt_f32_e64 s[4:5], s6, v1
152-
; GFX6-NEXT: v_add_f32_e32 v0, v0, v7
153-
; GFX6-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[4:5]
149+
; GFX6-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc
150+
; GFX6-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v2
151+
; GFX6-NEXT: v_add_f32_e32 v0, v0, v8
152+
; GFX6-NEXT: v_cndmask_b32_e64 v2, 0, v7, s[4:5]
154153
; GFX6-NEXT: v_exp_f32_e32 v0, v0
155154
; GFX6-NEXT: v_add_f32_e32 v1, v1, v2
156155
; GFX6-NEXT: v_exp_f32_e32 v1, v1
@@ -164,14 +163,13 @@ define <2 x float> @v_pow_v2f32(<2 x float> %x, <2 x float> %y) {
164163
; GFX8-LABEL: v_pow_v2f32:
165164
; GFX8: ; %bb.0:
166165
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
167-
; GFX8-NEXT: s_mov_b32 s4, 0x800000
168-
; GFX8-NEXT: v_mov_b32_e32 v4, 0x4f800000
169-
; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
170-
; GFX8-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc
171-
; GFX8-NEXT: v_mul_f32_e32 v0, v0, v5
172-
; GFX8-NEXT: v_mov_b32_e32 v5, 0x800000
173-
; GFX8-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v5
174-
; GFX8-NEXT: v_cndmask_b32_e64 v4, 1.0, v4, s[4:5]
166+
; GFX8-NEXT: v_mov_b32_e32 v4, 0x800000
167+
; GFX8-NEXT: v_mov_b32_e32 v5, 0x4f800000
168+
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4
169+
; GFX8-NEXT: v_cndmask_b32_e32 v6, 1.0, v5, vcc
170+
; GFX8-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v4
171+
; GFX8-NEXT: v_mul_f32_e32 v0, v0, v6
172+
; GFX8-NEXT: v_cndmask_b32_e64 v4, 1.0, v5, s[4:5]
175173
; GFX8-NEXT: v_log_f32_e32 v0, v0
176174
; GFX8-NEXT: v_mul_f32_e32 v1, v1, v4
177175
; GFX8-NEXT: v_log_f32_e32 v1, v1
@@ -180,15 +178,15 @@ define <2 x float> @v_pow_v2f32(<2 x float> %x, <2 x float> %y) {
180178
; GFX8-NEXT: v_sub_f32_e32 v0, v0, v7
181179
; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, v6, s[4:5]
182180
; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v2
183-
; GFX8-NEXT: s_mov_b32 s6, 0xc2fc0000
181+
; GFX8-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
184182
; GFX8-NEXT: v_sub_f32_e32 v1, v1, v5
185-
; GFX8-NEXT: v_mov_b32_e32 v2, 0x42800000
186-
; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, s6, v0
183+
; GFX8-NEXT: v_mov_b32_e32 v7, 0x42800000
184+
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
187185
; GFX8-NEXT: v_mul_legacy_f32_e32 v1, v1, v3
188-
; GFX8-NEXT: v_cndmask_b32_e32 v7, 0, v2, vcc
189-
; GFX8-NEXT: v_cmp_gt_f32_e64 s[4:5], s6, v1
190-
; GFX8-NEXT: v_add_f32_e32 v0, v0, v7
191-
; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[4:5]
186+
; GFX8-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc
187+
; GFX8-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v2
188+
; GFX8-NEXT: v_add_f32_e32 v0, v0, v8
189+
; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, v7, s[4:5]
192190
; GFX8-NEXT: v_exp_f32_e32 v0, v0
193191
; GFX8-NEXT: v_add_f32_e32 v1, v1, v2
194192
; GFX8-NEXT: v_exp_f32_e32 v1, v1
@@ -202,14 +200,13 @@ define <2 x float> @v_pow_v2f32(<2 x float> %x, <2 x float> %y) {
202200
; GFX9-LABEL: v_pow_v2f32:
203201
; GFX9: ; %bb.0:
204202
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
205-
; GFX9-NEXT: s_mov_b32 s4, 0x800000
206-
; GFX9-NEXT: v_mov_b32_e32 v4, 0x4f800000
207-
; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
208-
; GFX9-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc
209-
; GFX9-NEXT: v_mul_f32_e32 v0, v0, v5
210-
; GFX9-NEXT: v_mov_b32_e32 v5, 0x800000
211-
; GFX9-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v5
212-
; GFX9-NEXT: v_cndmask_b32_e64 v4, 1.0, v4, s[4:5]
203+
; GFX9-NEXT: v_mov_b32_e32 v4, 0x800000
204+
; GFX9-NEXT: v_mov_b32_e32 v5, 0x4f800000
205+
; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4
206+
; GFX9-NEXT: v_cndmask_b32_e32 v6, 1.0, v5, vcc
207+
; GFX9-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v4
208+
; GFX9-NEXT: v_mul_f32_e32 v0, v0, v6
209+
; GFX9-NEXT: v_cndmask_b32_e64 v4, 1.0, v5, s[4:5]
213210
; GFX9-NEXT: v_log_f32_e32 v0, v0
214211
; GFX9-NEXT: v_mul_f32_e32 v1, v1, v4
215212
; GFX9-NEXT: v_log_f32_e32 v1, v1
@@ -218,15 +215,15 @@ define <2 x float> @v_pow_v2f32(<2 x float> %x, <2 x float> %y) {
218215
; GFX9-NEXT: v_sub_f32_e32 v0, v0, v7
219216
; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, v6, s[4:5]
220217
; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v2
221-
; GFX9-NEXT: s_mov_b32 s6, 0xc2fc0000
218+
; GFX9-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
222219
; GFX9-NEXT: v_sub_f32_e32 v1, v1, v5
223-
; GFX9-NEXT: v_mov_b32_e32 v2, 0x42800000
224-
; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, s6, v0
220+
; GFX9-NEXT: v_mov_b32_e32 v7, 0x42800000
221+
; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
225222
; GFX9-NEXT: v_mul_legacy_f32_e32 v1, v1, v3
226-
; GFX9-NEXT: v_cndmask_b32_e32 v7, 0, v2, vcc
227-
; GFX9-NEXT: v_cmp_gt_f32_e64 s[4:5], s6, v1
228-
; GFX9-NEXT: v_add_f32_e32 v0, v0, v7
229-
; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[4:5]
223+
; GFX9-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc
224+
; GFX9-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v2
225+
; GFX9-NEXT: v_add_f32_e32 v0, v0, v8
226+
; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, v7, s[4:5]
230227
; GFX9-NEXT: v_exp_f32_e32 v0, v0
231228
; GFX9-NEXT: v_add_f32_e32 v1, v1, v2
232229
; GFX9-NEXT: v_exp_f32_e32 v1, v1
@@ -382,25 +379,25 @@ define <2 x half> @v_pow_v2f16(<2 x half> %x, <2 x half> %y) {
382379
; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
383380
; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
384381
; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1
385-
; GFX6-NEXT: s_mov_b32 s4, 0xc2fc0000
382+
; GFX6-NEXT: v_mov_b32_e32 v4, 0xc2fc0000
386383
; GFX6-NEXT: v_log_f32_e32 v0, v0
387-
; GFX6-NEXT: v_mov_b32_e32 v4, 0x42800000
384+
; GFX6-NEXT: v_mov_b32_e32 v5, 0x42800000
388385
; GFX6-NEXT: v_log_f32_e32 v1, v1
389386
; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v2
390-
; GFX6-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
391-
; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
387+
; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4
388+
; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v5, vcc
392389
; GFX6-NEXT: v_add_f32_e32 v0, v0, v2
393390
; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v3
394391
; GFX6-NEXT: v_mov_b32_e32 v3, 0x1f800000
395-
; GFX6-NEXT: v_cndmask_b32_e32 v5, 1.0, v3, vcc
392+
; GFX6-NEXT: v_cndmask_b32_e32 v6, 1.0, v3, vcc
396393
; GFX6-NEXT: v_exp_f32_e32 v0, v0
397394
; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v2
398-
; GFX6-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
399-
; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
395+
; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v1, v4
396+
; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v5, vcc
400397
; GFX6-NEXT: v_add_f32_e32 v1, v1, v2
401398
; GFX6-NEXT: v_exp_f32_e32 v1, v1
402399
; GFX6-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
403-
; GFX6-NEXT: v_mul_f32_e32 v0, v0, v5
400+
; GFX6-NEXT: v_mul_f32_e32 v0, v0, v6
404401
; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
405402
; GFX6-NEXT: v_mul_f32_e32 v1, v1, v2
406403
; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
@@ -505,22 +502,22 @@ define <2 x half> @v_pow_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y) {
505502
; GFX6-NEXT: v_log_f32_e32 v1, v1
506503
; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3
507504
; GFX6-NEXT: v_log_f32_e32 v0, v0
508-
; GFX6-NEXT: s_mov_b32 s4, 0xc2fc0000
505+
; GFX6-NEXT: v_mov_b32_e32 v4, 0x42800000
509506
; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v2
510-
; GFX6-NEXT: v_mov_b32_e32 v2, 0x42800000
511-
; GFX6-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
512-
; GFX6-NEXT: v_cndmask_b32_e32 v4, 0, v2, vcc
513-
; GFX6-NEXT: v_add_f32_e32 v1, v1, v4
514-
; GFX6-NEXT: v_mov_b32_e32 v4, 0x1f800000
507+
; GFX6-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
508+
; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v1, v2
509+
; GFX6-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
510+
; GFX6-NEXT: v_add_f32_e32 v1, v1, v5
511+
; GFX6-NEXT: v_mov_b32_e32 v5, 0x1f800000
515512
; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v3
516-
; GFX6-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc
517-
; GFX6-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
518-
; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
513+
; GFX6-NEXT: v_cndmask_b32_e32 v6, 1.0, v5, vcc
514+
; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
515+
; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
519516
; GFX6-NEXT: v_exp_f32_e32 v1, v1
520517
; GFX6-NEXT: v_add_f32_e32 v0, v0, v2
521518
; GFX6-NEXT: v_exp_f32_e32 v2, v0
522-
; GFX6-NEXT: v_mul_f32_e32 v0, v1, v5
523-
; GFX6-NEXT: v_cndmask_b32_e32 v1, 1.0, v4, vcc
519+
; GFX6-NEXT: v_mul_f32_e32 v0, v1, v6
520+
; GFX6-NEXT: v_cndmask_b32_e32 v1, 1.0, v5, vcc
524521
; GFX6-NEXT: v_mul_f32_e32 v1, v2, v1
525522
; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
526523
; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
@@ -632,21 +629,21 @@ define <2 x half> @v_pow_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y) {
632629
; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
633630
; GFX6-NEXT: v_log_f32_e32 v1, v1
634631
; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v3
635-
; GFX6-NEXT: s_mov_b32 s4, 0xc2fc0000
636-
; GFX6-NEXT: v_mov_b32_e32 v3, 0x42800000
637-
; GFX6-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
638-
; GFX6-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc
639-
; GFX6-NEXT: v_add_f32_e32 v0, v0, v4
640-
; GFX6-NEXT: v_mov_b32_e32 v4, 0x1f800000
632+
; GFX6-NEXT: v_mov_b32_e32 v3, 0xc2fc0000
633+
; GFX6-NEXT: v_mov_b32_e32 v4, 0x42800000
634+
; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
635+
; GFX6-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
636+
; GFX6-NEXT: v_add_f32_e32 v0, v0, v5
637+
; GFX6-NEXT: v_mov_b32_e32 v5, 0x1f800000
641638
; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v2
642-
; GFX6-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc
643-
; GFX6-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
644-
; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v3, vcc
639+
; GFX6-NEXT: v_cndmask_b32_e32 v6, 1.0, v5, vcc
640+
; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3
641+
; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
645642
; GFX6-NEXT: v_add_f32_e32 v1, v1, v2
646643
; GFX6-NEXT: v_exp_f32_e32 v0, v0
647644
; GFX6-NEXT: v_exp_f32_e32 v1, v1
648-
; GFX6-NEXT: v_cndmask_b32_e32 v2, 1.0, v4, vcc
649-
; GFX6-NEXT: v_mul_f32_e32 v0, v0, v5
645+
; GFX6-NEXT: v_cndmask_b32_e32 v2, 1.0, v5, vcc
646+
; GFX6-NEXT: v_mul_f32_e32 v0, v0, v6
650647
; GFX6-NEXT: v_mul_f32_e32 v1, v1, v2
651648
; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
652649
; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
@@ -762,21 +759,21 @@ define <2 x half> @v_pow_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y) {
762759
; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1
763760
; GFX6-NEXT: v_log_f32_e32 v0, v0
764761
; GFX6-NEXT: v_mul_legacy_f32_e32 v2, v3, v2
765-
; GFX6-NEXT: s_mov_b32 s4, 0xc2fc0000
766-
; GFX6-NEXT: v_mov_b32_e32 v3, 0x42800000
767-
; GFX6-NEXT: v_cmp_gt_f32_e32 vcc, s4, v2
768-
; GFX6-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc
769-
; GFX6-NEXT: v_add_f32_e32 v2, v2, v4
770-
; GFX6-NEXT: v_mov_b32_e32 v4, 0x1f800000
762+
; GFX6-NEXT: v_mov_b32_e32 v3, 0xc2fc0000
763+
; GFX6-NEXT: v_mov_b32_e32 v4, 0x42800000
764+
; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v2, v3
765+
; GFX6-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
766+
; GFX6-NEXT: v_add_f32_e32 v2, v2, v5
767+
; GFX6-NEXT: v_mov_b32_e32 v5, 0x1f800000
771768
; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
772-
; GFX6-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc
773-
; GFX6-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
774-
; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
769+
; GFX6-NEXT: v_cndmask_b32_e32 v6, 1.0, v5, vcc
770+
; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
771+
; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc
775772
; GFX6-NEXT: v_exp_f32_e32 v2, v2
776773
; GFX6-NEXT: v_add_f32_e32 v0, v0, v1
777774
; GFX6-NEXT: v_exp_f32_e32 v1, v0
778-
; GFX6-NEXT: v_mul_f32_e32 v0, v2, v5
779-
; GFX6-NEXT: v_cndmask_b32_e32 v2, 1.0, v4, vcc
775+
; GFX6-NEXT: v_mul_f32_e32 v0, v2, v6
776+
; GFX6-NEXT: v_cndmask_b32_e32 v2, 1.0, v5, vcc
780777
; GFX6-NEXT: v_mul_f32_e32 v1, v1, v2
781778
; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
782779
; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1

0 commit comments

Comments
 (0)