Skip to content

Commit 347bc63

Browse files
committed
[DAG] fdiv X, c2 -> fmul X, 1/c2
This moves the combine of fdiv by constant to fmul out of an 'if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()' block, so that it still triggers if the divide is exact. An extra check for Recip.isDenormal() is added as multiple places make reference to it being unsafe or slow on certain platforms.
1 parent 6e975ec commit 347bc63

File tree

6 files changed

+134
-112
lines changed

6 files changed

+134
-112
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -17258,26 +17258,29 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
1725817258
if (SDValue V = combineRepeatedFPDivisors(N))
1725917259
return V;
1726017260

17261-
if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
17262-
// fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
17263-
if (auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1)) {
17264-
// Compute the reciprocal 1.0 / c2.
17265-
const APFloat &N1APF = N1CFP->getValueAPF();
17266-
APFloat Recip(N1APF.getSemantics(), 1); // 1.0
17267-
APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
17268-
// Only do the transform if the reciprocal is a legal fp immediate that
17269-
// isn't too nasty (eg NaN, denormal, ...).
17270-
if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
17271-
(!LegalOperations ||
17272-
// FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
17273-
// backend)... we should handle this gracefully after Legalize.
17274-
// TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
17275-
TLI.isOperationLegal(ISD::ConstantFP, VT) ||
17276-
TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
17277-
return DAG.getNode(ISD::FMUL, DL, VT, N0,
17278-
DAG.getConstantFP(Recip, DL, VT));
17279-
}
17261+
// fold (fdiv X, c2) -> (fmul X, 1/c2) if there is no loss in precision, or
17262+
// the loss is acceptable with AllowReciprocal.
17263+
if (auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1)) {
17264+
// Compute the reciprocal 1.0 / c2.
17265+
const APFloat &N1APF = N1CFP->getValueAPF();
17266+
APFloat Recip(N1APF.getSemantics(), 1); // 1.0
17267+
APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
17268+
// Only do the transform if the reciprocal is a legal fp immediate that
17269+
// isn't too nasty (eg NaN, denormal, ...).
17270+
if (((st == APFloat::opOK && !Recip.isDenormal()) ||
17271+
(st == APFloat::opInexact &&
17272+
(Options.UnsafeFPMath || Flags.hasAllowReciprocal()))) &&
17273+
(!LegalOperations ||
17274+
// FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
17275+
// backend)... we should handle this gracefully after Legalize.
17276+
// TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
17277+
TLI.isOperationLegal(ISD::ConstantFP, VT) ||
17278+
TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
17279+
return DAG.getNode(ISD::FMUL, DL, VT, N0,
17280+
DAG.getConstantFP(Recip, DL, VT));
17281+
}
1728017282

17283+
if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
1728117284
// If this FDIV is part of a reciprocal square root, it may be folded
1728217285
// into a target-specific square root estimate instruction.
1728317286
if (N1.getOpcode() == ISD::FSQRT) {

llvm/test/CodeGen/AArch64/fcvt-fixed.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -412,10 +412,10 @@ define half @scvtf_f16_i32_7(i32 %int) {
412412
; CHECK-NO16-LABEL: scvtf_f16_i32_7:
413413
; CHECK-NO16: // %bb.0:
414414
; CHECK-NO16-NEXT: scvtf s1, w0
415-
; CHECK-NO16-NEXT: movi v0.2s, #67, lsl #24
415+
; CHECK-NO16-NEXT: movi v0.2s, #60, lsl #24
416416
; CHECK-NO16-NEXT: fcvt h1, s1
417417
; CHECK-NO16-NEXT: fcvt s1, h1
418-
; CHECK-NO16-NEXT: fdiv s0, s1, s0
418+
; CHECK-NO16-NEXT: fmul s0, s1, s0
419419
; CHECK-NO16-NEXT: fcvt h0, s0
420420
; CHECK-NO16-NEXT: ret
421421
;
@@ -432,10 +432,10 @@ define half @scvtf_f16_i32_15(i32 %int) {
432432
; CHECK-NO16-LABEL: scvtf_f16_i32_15:
433433
; CHECK-NO16: // %bb.0:
434434
; CHECK-NO16-NEXT: scvtf s1, w0
435-
; CHECK-NO16-NEXT: movi v0.2s, #71, lsl #24
435+
; CHECK-NO16-NEXT: movi v0.2s, #56, lsl #24
436436
; CHECK-NO16-NEXT: fcvt h1, s1
437437
; CHECK-NO16-NEXT: fcvt s1, h1
438-
; CHECK-NO16-NEXT: fdiv s0, s1, s0
438+
; CHECK-NO16-NEXT: fmul s0, s1, s0
439439
; CHECK-NO16-NEXT: fcvt h0, s0
440440
; CHECK-NO16-NEXT: ret
441441
;
@@ -452,10 +452,10 @@ define half @scvtf_f16_i64_7(i64 %long) {
452452
; CHECK-NO16-LABEL: scvtf_f16_i64_7:
453453
; CHECK-NO16: // %bb.0:
454454
; CHECK-NO16-NEXT: scvtf s1, x0
455-
; CHECK-NO16-NEXT: movi v0.2s, #67, lsl #24
455+
; CHECK-NO16-NEXT: movi v0.2s, #60, lsl #24
456456
; CHECK-NO16-NEXT: fcvt h1, s1
457457
; CHECK-NO16-NEXT: fcvt s1, h1
458-
; CHECK-NO16-NEXT: fdiv s0, s1, s0
458+
; CHECK-NO16-NEXT: fmul s0, s1, s0
459459
; CHECK-NO16-NEXT: fcvt h0, s0
460460
; CHECK-NO16-NEXT: ret
461461
;
@@ -472,10 +472,10 @@ define half @scvtf_f16_i64_15(i64 %long) {
472472
; CHECK-NO16-LABEL: scvtf_f16_i64_15:
473473
; CHECK-NO16: // %bb.0:
474474
; CHECK-NO16-NEXT: scvtf s1, x0
475-
; CHECK-NO16-NEXT: movi v0.2s, #71, lsl #24
475+
; CHECK-NO16-NEXT: movi v0.2s, #56, lsl #24
476476
; CHECK-NO16-NEXT: fcvt h1, s1
477477
; CHECK-NO16-NEXT: fcvt s1, h1
478-
; CHECK-NO16-NEXT: fdiv s0, s1, s0
478+
; CHECK-NO16-NEXT: fmul s0, s1, s0
479479
; CHECK-NO16-NEXT: fcvt h0, s0
480480
; CHECK-NO16-NEXT: ret
481481
;
@@ -574,10 +574,10 @@ define half @ucvtf_f16_i32_7(i32 %int) {
574574
; CHECK-NO16-LABEL: ucvtf_f16_i32_7:
575575
; CHECK-NO16: // %bb.0:
576576
; CHECK-NO16-NEXT: ucvtf s1, w0
577-
; CHECK-NO16-NEXT: movi v0.2s, #67, lsl #24
577+
; CHECK-NO16-NEXT: movi v0.2s, #60, lsl #24
578578
; CHECK-NO16-NEXT: fcvt h1, s1
579579
; CHECK-NO16-NEXT: fcvt s1, h1
580-
; CHECK-NO16-NEXT: fdiv s0, s1, s0
580+
; CHECK-NO16-NEXT: fmul s0, s1, s0
581581
; CHECK-NO16-NEXT: fcvt h0, s0
582582
; CHECK-NO16-NEXT: ret
583583
;
@@ -594,10 +594,10 @@ define half @ucvtf_f16_i32_15(i32 %int) {
594594
; CHECK-NO16-LABEL: ucvtf_f16_i32_15:
595595
; CHECK-NO16: // %bb.0:
596596
; CHECK-NO16-NEXT: ucvtf s1, w0
597-
; CHECK-NO16-NEXT: movi v0.2s, #71, lsl #24
597+
; CHECK-NO16-NEXT: movi v0.2s, #56, lsl #24
598598
; CHECK-NO16-NEXT: fcvt h1, s1
599599
; CHECK-NO16-NEXT: fcvt s1, h1
600-
; CHECK-NO16-NEXT: fdiv s0, s1, s0
600+
; CHECK-NO16-NEXT: fmul s0, s1, s0
601601
; CHECK-NO16-NEXT: fcvt h0, s0
602602
; CHECK-NO16-NEXT: ret
603603
;
@@ -614,10 +614,10 @@ define half @ucvtf_f16_i64_7(i64 %long) {
614614
; CHECK-NO16-LABEL: ucvtf_f16_i64_7:
615615
; CHECK-NO16: // %bb.0:
616616
; CHECK-NO16-NEXT: ucvtf s1, x0
617-
; CHECK-NO16-NEXT: movi v0.2s, #67, lsl #24
617+
; CHECK-NO16-NEXT: movi v0.2s, #60, lsl #24
618618
; CHECK-NO16-NEXT: fcvt h1, s1
619619
; CHECK-NO16-NEXT: fcvt s1, h1
620-
; CHECK-NO16-NEXT: fdiv s0, s1, s0
620+
; CHECK-NO16-NEXT: fmul s0, s1, s0
621621
; CHECK-NO16-NEXT: fcvt h0, s0
622622
; CHECK-NO16-NEXT: ret
623623
;
@@ -634,10 +634,10 @@ define half @ucvtf_f16_i64_15(i64 %long) {
634634
; CHECK-NO16-LABEL: ucvtf_f16_i64_15:
635635
; CHECK-NO16: // %bb.0:
636636
; CHECK-NO16-NEXT: ucvtf s1, x0
637-
; CHECK-NO16-NEXT: movi v0.2s, #71, lsl #24
637+
; CHECK-NO16-NEXT: movi v0.2s, #56, lsl #24
638638
; CHECK-NO16-NEXT: fcvt h1, s1
639639
; CHECK-NO16-NEXT: fcvt s1, h1
640-
; CHECK-NO16-NEXT: fdiv s0, s1, s0
640+
; CHECK-NO16-NEXT: fmul s0, s1, s0
641641
; CHECK-NO16-NEXT: fcvt h0, s0
642642
; CHECK-NO16-NEXT: ret
643643
;

llvm/test/CodeGen/AArch64/frem-power2.ll

Lines changed: 61 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,12 @@
55
define float @frem2(float %x) {
66
; CHECK-SD-LABEL: frem2:
77
; CHECK-SD: // %bb.0: // %entry
8-
; CHECK-SD-NEXT: fmov s1, #2.00000000
8+
; CHECK-SD-NEXT: fmov s1, #0.50000000
99
; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
10-
; CHECK-SD-NEXT: fdiv s2, s0, s1
11-
; CHECK-SD-NEXT: frintz s2, s2
12-
; CHECK-SD-NEXT: fmsub s1, s2, s1, s0
10+
; CHECK-SD-NEXT: fmov s2, #-2.00000000
11+
; CHECK-SD-NEXT: fmul s1, s0, s1
12+
; CHECK-SD-NEXT: frintz s1, s1
13+
; CHECK-SD-NEXT: fmadd s1, s1, s2, s0
1314
; CHECK-SD-NEXT: mvni v2.4s, #128, lsl #24
1415
; CHECK-SD-NEXT: bit v0.16b, v1.16b, v2.16b
1516
; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
@@ -27,10 +28,11 @@ entry:
2728
define float @frem2_nsz(float %x) {
2829
; CHECK-SD-LABEL: frem2_nsz:
2930
; CHECK-SD: // %bb.0: // %entry
30-
; CHECK-SD-NEXT: fmov s1, #2.00000000
31-
; CHECK-SD-NEXT: fdiv s2, s0, s1
32-
; CHECK-SD-NEXT: frintz s2, s2
33-
; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
31+
; CHECK-SD-NEXT: fmov s1, #0.50000000
32+
; CHECK-SD-NEXT: fmov s2, #-2.00000000
33+
; CHECK-SD-NEXT: fmul s1, s0, s1
34+
; CHECK-SD-NEXT: frintz s1, s1
35+
; CHECK-SD-NEXT: fmadd s0, s1, s2, s0
3436
; CHECK-SD-NEXT: ret
3537
;
3638
; CHECK-GI-LABEL: frem2_nsz:
@@ -65,10 +67,11 @@ define float @frem2_abs(float %x) {
6567
; CHECK-SD-LABEL: frem2_abs:
6668
; CHECK-SD: // %bb.0: // %entry
6769
; CHECK-SD-NEXT: fabs s0, s0
68-
; CHECK-SD-NEXT: fmov s1, #2.00000000
69-
; CHECK-SD-NEXT: fdiv s2, s0, s1
70-
; CHECK-SD-NEXT: frintz s2, s2
71-
; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
70+
; CHECK-SD-NEXT: fmov s1, #0.50000000
71+
; CHECK-SD-NEXT: fmov s2, #-2.00000000
72+
; CHECK-SD-NEXT: fmul s1, s0, s1
73+
; CHECK-SD-NEXT: frintz s1, s1
74+
; CHECK-SD-NEXT: fmadd s0, s1, s2, s0
7275
; CHECK-SD-NEXT: ret
7376
;
7477
; CHECK-GI-LABEL: frem2_abs:
@@ -85,9 +88,9 @@ entry:
8588
define half @hrem2_nsz(half %x) {
8689
; CHECK-SD-LABEL: hrem2_nsz:
8790
; CHECK-SD: // %bb.0: // %entry
88-
; CHECK-SD-NEXT: fmov h1, #2.00000000
91+
; CHECK-SD-NEXT: fmov h1, #0.50000000
8992
; CHECK-SD-NEXT: fmov h2, #-2.00000000
90-
; CHECK-SD-NEXT: fdiv h1, h0, h1
93+
; CHECK-SD-NEXT: fmul h1, h0, h1
9194
; CHECK-SD-NEXT: frintz h1, h1
9295
; CHECK-SD-NEXT: fmadd h0, h1, h2, h0
9396
; CHECK-SD-NEXT: ret
@@ -112,10 +115,11 @@ entry:
112115
define double @drem2_nsz(double %x) {
113116
; CHECK-SD-LABEL: drem2_nsz:
114117
; CHECK-SD: // %bb.0: // %entry
115-
; CHECK-SD-NEXT: fmov d1, #2.00000000
116-
; CHECK-SD-NEXT: fdiv d2, d0, d1
117-
; CHECK-SD-NEXT: frintz d2, d2
118-
; CHECK-SD-NEXT: fmsub d0, d2, d1, d0
118+
; CHECK-SD-NEXT: fmov d1, #0.50000000
119+
; CHECK-SD-NEXT: fmov d2, #-2.00000000
120+
; CHECK-SD-NEXT: fmul d1, d0, d1
121+
; CHECK-SD-NEXT: frintz d1, d1
122+
; CHECK-SD-NEXT: fmadd d0, d1, d2, d0
119123
; CHECK-SD-NEXT: ret
120124
;
121125
; CHECK-GI-LABEL: drem2_nsz:
@@ -176,10 +180,11 @@ entry:
176180
define float @fremm2_nsz(float %x) {
177181
; CHECK-SD-LABEL: fremm2_nsz:
178182
; CHECK-SD: // %bb.0: // %entry
179-
; CHECK-SD-NEXT: fmov s1, #-2.00000000
180-
; CHECK-SD-NEXT: fdiv s2, s0, s1
181-
; CHECK-SD-NEXT: frintz s2, s2
182-
; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
183+
; CHECK-SD-NEXT: fmov s1, #-0.50000000
184+
; CHECK-SD-NEXT: fmov s2, #2.00000000
185+
; CHECK-SD-NEXT: fmul s1, s0, s1
186+
; CHECK-SD-NEXT: frintz s1, s1
187+
; CHECK-SD-NEXT: fmadd s0, s1, s2, s0
183188
; CHECK-SD-NEXT: ret
184189
;
185190
; CHECK-GI-LABEL: fremm2_nsz:
@@ -195,10 +200,11 @@ define float @frem4_abs(float %x) {
195200
; CHECK-SD-LABEL: frem4_abs:
196201
; CHECK-SD: // %bb.0: // %entry
197202
; CHECK-SD-NEXT: fabs s0, s0
198-
; CHECK-SD-NEXT: fmov s1, #4.00000000
199-
; CHECK-SD-NEXT: fdiv s2, s0, s1
200-
; CHECK-SD-NEXT: frintz s2, s2
201-
; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
203+
; CHECK-SD-NEXT: fmov s1, #0.25000000
204+
; CHECK-SD-NEXT: fmov s2, #-4.00000000
205+
; CHECK-SD-NEXT: fmul s1, s0, s1
206+
; CHECK-SD-NEXT: frintz s1, s1
207+
; CHECK-SD-NEXT: fmadd s0, s1, s2, s0
202208
; CHECK-SD-NEXT: ret
203209
;
204210
; CHECK-GI-LABEL: frem4_abs:
@@ -216,10 +222,12 @@ define float @frem16_abs(float %x) {
216222
; CHECK-SD-LABEL: frem16_abs:
217223
; CHECK-SD: // %bb.0: // %entry
218224
; CHECK-SD-NEXT: fabs s0, s0
219-
; CHECK-SD-NEXT: fmov s1, #16.00000000
220-
; CHECK-SD-NEXT: fdiv s2, s0, s1
221-
; CHECK-SD-NEXT: frintz s2, s2
222-
; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
225+
; CHECK-SD-NEXT: mov w8, #1031798784 // =0x3d800000
226+
; CHECK-SD-NEXT: fmov s2, #-16.00000000
227+
; CHECK-SD-NEXT: fmov s1, w8
228+
; CHECK-SD-NEXT: fmul s1, s0, s1
229+
; CHECK-SD-NEXT: frintz s1, s1
230+
; CHECK-SD-NEXT: fmadd s0, s1, s2, s0
223231
; CHECK-SD-NEXT: ret
224232
;
225233
; CHECK-GI-LABEL: frem16_abs:
@@ -237,11 +245,13 @@ define float @frem4294967296_abs(float %x) {
237245
; CHECK-SD-LABEL: frem4294967296_abs:
238246
; CHECK-SD: // %bb.0: // %entry
239247
; CHECK-SD-NEXT: fabs s0, s0
240-
; CHECK-SD-NEXT: mov w8, #1333788672 // =0x4f800000
248+
; CHECK-SD-NEXT: mov w8, #796917760 // =0x2f800000
241249
; CHECK-SD-NEXT: fmov s1, w8
242-
; CHECK-SD-NEXT: fdiv s2, s0, s1
243-
; CHECK-SD-NEXT: frintz s2, s2
244-
; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
250+
; CHECK-SD-NEXT: mov w8, #-813694976 // =0xcf800000
251+
; CHECK-SD-NEXT: fmov s2, w8
252+
; CHECK-SD-NEXT: fmul s1, s0, s1
253+
; CHECK-SD-NEXT: frintz s1, s1
254+
; CHECK-SD-NEXT: fmadd s0, s1, s2, s0
245255
; CHECK-SD-NEXT: ret
246256
;
247257
; CHECK-GI-LABEL: frem4294967296_abs:
@@ -260,11 +270,13 @@ define float @frem1152921504606846976_abs(float %x) {
260270
; CHECK-SD-LABEL: frem1152921504606846976_abs:
261271
; CHECK-SD: // %bb.0: // %entry
262272
; CHECK-SD-NEXT: fabs s0, s0
263-
; CHECK-SD-NEXT: mov w8, #1568669696 // =0x5d800000
273+
; CHECK-SD-NEXT: mov w8, #562036736 // =0x21800000
264274
; CHECK-SD-NEXT: fmov s1, w8
265-
; CHECK-SD-NEXT: fdiv s2, s0, s1
266-
; CHECK-SD-NEXT: frintz s2, s2
267-
; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
275+
; CHECK-SD-NEXT: mov w8, #-578813952 // =0xdd800000
276+
; CHECK-SD-NEXT: fmov s2, w8
277+
; CHECK-SD-NEXT: fmul s1, s0, s1
278+
; CHECK-SD-NEXT: frintz s1, s1
279+
; CHECK-SD-NEXT: fmadd s0, s1, s2, s0
268280
; CHECK-SD-NEXT: ret
269281
;
270282
; CHECK-GI-LABEL: frem1152921504606846976_abs:
@@ -283,11 +295,13 @@ define float @frem4611686018427387904_abs(float %x) {
283295
; CHECK-SD-LABEL: frem4611686018427387904_abs:
284296
; CHECK-SD: // %bb.0: // %entry
285297
; CHECK-SD-NEXT: fabs s0, s0
286-
; CHECK-SD-NEXT: mov w8, #1585446912 // =0x5e800000
298+
; CHECK-SD-NEXT: mov w8, #545259520 // =0x20800000
287299
; CHECK-SD-NEXT: fmov s1, w8
288-
; CHECK-SD-NEXT: fdiv s2, s0, s1
289-
; CHECK-SD-NEXT: frintz s2, s2
290-
; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
300+
; CHECK-SD-NEXT: mov w8, #-562036736 // =0xde800000
301+
; CHECK-SD-NEXT: fmov s2, w8
302+
; CHECK-SD-NEXT: fmul s1, s0, s1
303+
; CHECK-SD-NEXT: frintz s1, s1
304+
; CHECK-SD-NEXT: fmadd s0, s1, s2, s0
291305
; CHECK-SD-NEXT: ret
292306
;
293307
; CHECK-GI-LABEL: frem4611686018427387904_abs:
@@ -305,11 +319,12 @@ entry:
305319
define float @frem9223372036854775808_abs(float %x) {
306320
; CHECK-SD-LABEL: frem9223372036854775808_abs:
307321
; CHECK-SD: // %bb.0: // %entry
308-
; CHECK-SD-NEXT: movi v1.2s, #95, lsl #24
322+
; CHECK-SD-NEXT: movi v1.2s, #32, lsl #24
309323
; CHECK-SD-NEXT: fabs s0, s0
310-
; CHECK-SD-NEXT: fdiv s2, s0, s1
311-
; CHECK-SD-NEXT: frintz s2, s2
312-
; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
324+
; CHECK-SD-NEXT: movi v2.2s, #223, lsl #24
325+
; CHECK-SD-NEXT: fmul s1, s0, s1
326+
; CHECK-SD-NEXT: frintz s1, s1
327+
; CHECK-SD-NEXT: fmadd s0, s1, s2, s0
313328
; CHECK-SD-NEXT: ret
314329
;
315330
; CHECK-GI-LABEL: frem9223372036854775808_abs:

llvm/test/CodeGen/AMDGPU/uniform-phi-with-undef.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ define amdgpu_ps float @uniform_phi_with_undef(float inreg %c, float %v, i32 %x,
1717
; GCN-NEXT: s_mov_b32 exec_lo, s2
1818
; GCN-NEXT: s_cbranch_execz .LBB0_2
1919
; GCN-NEXT: ; %bb.1: ; %if
20-
; GCN-NEXT: s_mov_b32 s2, 2.0
20+
; GCN-NEXT: s_mov_b32 s2, 0x40400000
2121
; GCN-NEXT: v_div_scale_f32 v1, s3, s2, s2, v0
2222
; GCN-NEXT: v_rcp_f32_e64 v2, v1
2323
; GCN-NEXT: s_mov_b32 s3, 1.0
@@ -39,7 +39,7 @@ entry:
3939
br i1 %cc, label %if, label %end
4040

4141
if:
42-
%v.if = fdiv float %v, 2.0
42+
%v.if = fdiv float %v, 3.0
4343
br label %end
4444

4545
end:

0 commit comments

Comments
 (0)