Skip to content

Commit ef9f0b3

Browse files
authored
[DAGCombiner] Don't peek through truncates of shift amounts in takeInexpensiveLog2. (#126957)
Shift amounts in SelectionDAG don't have to match the result type of the shift. SelectionDAGBuilder will aggressively truncate shift amounts to the target's preferred type. This may result in a zero extend that existed in IR being removed. If we look through a truncate here, we can't guarantee the upper bits of the truncate input are zero. There may have been a zext that was removed. Unfortunately, this regresses tests where no truncate was involved. The only way I can think to fix this is to add an assertzext when SelectionDAGBuilder truncates a shift amount or remove the early truncation of shift amounts from SelectionDAGBuilder all together. Fixes #126889.
1 parent c5def84 commit ef9f0b3

File tree

2 files changed

+139
-85
lines changed

2 files changed

+139
-85
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28446,7 +28446,11 @@ static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
2844628446
return SDValue();
2844728447

2844828448
auto CastToVT = [&](EVT NewVT, SDValue ToCast) {
28449-
ToCast = PeekThroughCastsAndTrunc(ToCast);
28449+
// Peek through zero extend. We can't peek through truncates since this
28450+
// function is called on a shift amount. We must ensure that all of the bits
28451+
// above the original shift amount are zeroed by this function.
28452+
while (ToCast.getOpcode() == ISD::ZERO_EXTEND)
28453+
ToCast = ToCast.getOperand(0);
2845028454
EVT CurVT = ToCast.getValueType();
2845128455
if (NewVT == CurVT)
2845228456
return ToCast;

llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll

Lines changed: 134 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -660,73 +660,109 @@ define <8 x half> @fdiv_pow2_8xhalf(<8 x i16> %i) {
660660
ret <8 x half> %r
661661
}
662662

663+
; FIXME: The movzbl is unnecessary. It would be UB for the upper bits to be set
664+
; in the original IR.
663665
define double @fmul_pow_shl_cnt(i64 %cnt) nounwind {
664666
; CHECK-SSE-LABEL: fmul_pow_shl_cnt:
665667
; CHECK-SSE: # %bb.0:
666-
; CHECK-SSE-NEXT: shlq $52, %rdi
667-
; CHECK-SSE-NEXT: movabsq $4621256167635550208, %rax # imm = 0x4022000000000000
668-
; CHECK-SSE-NEXT: addq %rdi, %rax
669-
; CHECK-SSE-NEXT: movq %rax, %xmm0
668+
; CHECK-SSE-NEXT: movzbl %dil, %eax
669+
; CHECK-SSE-NEXT: shlq $52, %rax
670+
; CHECK-SSE-NEXT: movabsq $4621256167635550208, %rcx # imm = 0x4022000000000000
671+
; CHECK-SSE-NEXT: addq %rax, %rcx
672+
; CHECK-SSE-NEXT: movq %rcx, %xmm0
670673
; CHECK-SSE-NEXT: retq
671674
;
672675
; CHECK-AVX-LABEL: fmul_pow_shl_cnt:
673676
; CHECK-AVX: # %bb.0:
674-
; CHECK-AVX-NEXT: shlq $52, %rdi
675-
; CHECK-AVX-NEXT: movabsq $4621256167635550208, %rax # imm = 0x4022000000000000
676-
; CHECK-AVX-NEXT: addq %rdi, %rax
677-
; CHECK-AVX-NEXT: vmovq %rax, %xmm0
677+
; CHECK-AVX-NEXT: movzbl %dil, %eax
678+
; CHECK-AVX-NEXT: shlq $52, %rax
679+
; CHECK-AVX-NEXT: movabsq $4621256167635550208, %rcx # imm = 0x4022000000000000
680+
; CHECK-AVX-NEXT: addq %rax, %rcx
681+
; CHECK-AVX-NEXT: vmovq %rcx, %xmm0
678682
; CHECK-AVX-NEXT: retq
679683
%shl = shl nuw i64 1, %cnt
680684
%conv = uitofp i64 %shl to double
681685
%mul = fmul double 9.000000e+00, %conv
682686
ret double %mul
683687
}
684688

689+
; FIXME: The movzbl is unnecessary. It would be UB for the upper bits to be set
690+
; in the original IR.
685691
define double @fmul_pow_shl_cnt2(i64 %cnt) nounwind {
686692
; CHECK-SSE-LABEL: fmul_pow_shl_cnt2:
687693
; CHECK-SSE: # %bb.0:
688-
; CHECK-SSE-NEXT: incl %edi
689-
; CHECK-SSE-NEXT: shlq $52, %rdi
690-
; CHECK-SSE-NEXT: movabsq $-4602115869219225600, %rax # imm = 0xC022000000000000
691-
; CHECK-SSE-NEXT: addq %rdi, %rax
692-
; CHECK-SSE-NEXT: movq %rax, %xmm0
694+
; CHECK-SSE-NEXT: movzbl %dil, %eax
695+
; CHECK-SSE-NEXT: incl %eax
696+
; CHECK-SSE-NEXT: shlq $52, %rax
697+
; CHECK-SSE-NEXT: movabsq $-4602115869219225600, %rcx # imm = 0xC022000000000000
698+
; CHECK-SSE-NEXT: addq %rax, %rcx
699+
; CHECK-SSE-NEXT: movq %rcx, %xmm0
693700
; CHECK-SSE-NEXT: retq
694701
;
695702
; CHECK-AVX-LABEL: fmul_pow_shl_cnt2:
696703
; CHECK-AVX: # %bb.0:
697-
; CHECK-AVX-NEXT: incl %edi
698-
; CHECK-AVX-NEXT: shlq $52, %rdi
699-
; CHECK-AVX-NEXT: movabsq $-4602115869219225600, %rax # imm = 0xC022000000000000
700-
; CHECK-AVX-NEXT: addq %rdi, %rax
701-
; CHECK-AVX-NEXT: vmovq %rax, %xmm0
704+
; CHECK-AVX-NEXT: movzbl %dil, %eax
705+
; CHECK-AVX-NEXT: incl %eax
706+
; CHECK-AVX-NEXT: shlq $52, %rax
707+
; CHECK-AVX-NEXT: movabsq $-4602115869219225600, %rcx # imm = 0xC022000000000000
708+
; CHECK-AVX-NEXT: addq %rax, %rcx
709+
; CHECK-AVX-NEXT: vmovq %rcx, %xmm0
702710
; CHECK-AVX-NEXT: retq
703711
%shl = shl nuw i64 2, %cnt
704712
%conv = uitofp i64 %shl to double
705713
%mul = fmul double -9.000000e+00, %conv
706714
ret double %mul
707715
}
708716

717+
; Make sure we do a movzbl of the input register.
718+
define double @fmul_pow_shl_cnt3(i8 %cnt) nounwind {
719+
; CHECK-SSE-LABEL: fmul_pow_shl_cnt3:
720+
; CHECK-SSE: # %bb.0:
721+
; CHECK-SSE-NEXT: movzbl %dil, %eax
722+
; CHECK-SSE-NEXT: shlq $52, %rax
723+
; CHECK-SSE-NEXT: movabsq $-4602115869219225600, %rcx # imm = 0xC022000000000000
724+
; CHECK-SSE-NEXT: addq %rax, %rcx
725+
; CHECK-SSE-NEXT: movq %rcx, %xmm0
726+
; CHECK-SSE-NEXT: retq
727+
;
728+
; CHECK-AVX-LABEL: fmul_pow_shl_cnt3:
729+
; CHECK-AVX: # %bb.0:
730+
; CHECK-AVX-NEXT: movzbl %dil, %eax
731+
; CHECK-AVX-NEXT: shlq $52, %rax
732+
; CHECK-AVX-NEXT: movabsq $-4602115869219225600, %rcx # imm = 0xC022000000000000
733+
; CHECK-AVX-NEXT: addq %rax, %rcx
734+
; CHECK-AVX-NEXT: vmovq %rcx, %xmm0
735+
; CHECK-AVX-NEXT: retq
736+
%zext_cnt = zext i8 %cnt to i64
737+
%shl = shl nuw i64 1, %zext_cnt
738+
%conv = uitofp i64 %shl to double
739+
%mul = fmul double -9.000000e+00, %conv
740+
ret double %mul
741+
}
742+
743+
; FIXME: The movzbl is unnecessary. It would be UB for the upper bits to be set
744+
; in the original IR.
709745
define float @fmul_pow_select(i32 %cnt, i1 %c) nounwind {
710746
; CHECK-SSE-LABEL: fmul_pow_select:
711747
; CHECK-SSE: # %bb.0:
712-
; CHECK-SSE-NEXT: # kill: def $edi killed $edi def $rdi
713-
; CHECK-SSE-NEXT: leal 1(%rdi), %eax
748+
; CHECK-SSE-NEXT: movzbl %dil, %eax
749+
; CHECK-SSE-NEXT: leal 1(%rax), %ecx
714750
; CHECK-SSE-NEXT: testb $1, %sil
715-
; CHECK-SSE-NEXT: cmovnel %edi, %eax
716-
; CHECK-SSE-NEXT: shll $23, %eax
717-
; CHECK-SSE-NEXT: addl $1091567616, %eax # imm = 0x41100000
718-
; CHECK-SSE-NEXT: movd %eax, %xmm0
751+
; CHECK-SSE-NEXT: cmovnel %eax, %ecx
752+
; CHECK-SSE-NEXT: shll $23, %ecx
753+
; CHECK-SSE-NEXT: addl $1091567616, %ecx # imm = 0x41100000
754+
; CHECK-SSE-NEXT: movd %ecx, %xmm0
719755
; CHECK-SSE-NEXT: retq
720756
;
721757
; CHECK-AVX-LABEL: fmul_pow_select:
722758
; CHECK-AVX: # %bb.0:
723-
; CHECK-AVX-NEXT: # kill: def $edi killed $edi def $rdi
724-
; CHECK-AVX-NEXT: leal 1(%rdi), %eax
759+
; CHECK-AVX-NEXT: movzbl %dil, %eax
760+
; CHECK-AVX-NEXT: leal 1(%rax), %ecx
725761
; CHECK-AVX-NEXT: testb $1, %sil
726-
; CHECK-AVX-NEXT: cmovnel %edi, %eax
727-
; CHECK-AVX-NEXT: shll $23, %eax
728-
; CHECK-AVX-NEXT: addl $1091567616, %eax # imm = 0x41100000
729-
; CHECK-AVX-NEXT: vmovd %eax, %xmm0
762+
; CHECK-AVX-NEXT: cmovnel %eax, %ecx
763+
; CHECK-AVX-NEXT: shll $23, %ecx
764+
; CHECK-AVX-NEXT: addl $1091567616, %ecx # imm = 0x41100000
765+
; CHECK-AVX-NEXT: vmovd %ecx, %xmm0
730766
; CHECK-AVX-NEXT: retq
731767
%shl2 = shl nuw i32 2, %cnt
732768
%shl1 = shl nuw i32 1, %cnt
@@ -736,27 +772,31 @@ define float @fmul_pow_select(i32 %cnt, i1 %c) nounwind {
736772
ret float %mul
737773
}
738774

775+
; FIXME: The movzbl is unnecessary. It would be UB for the upper bits to be set
776+
; in the original IR.
739777
define float @fmul_fly_pow_mul_min_pow2(i64 %cnt) nounwind {
740778
; CHECK-SSE-LABEL: fmul_fly_pow_mul_min_pow2:
741779
; CHECK-SSE: # %bb.0:
742-
; CHECK-SSE-NEXT: addl $3, %edi
743-
; CHECK-SSE-NEXT: cmpl $13, %edi
744-
; CHECK-SSE-NEXT: movl $13, %eax
745-
; CHECK-SSE-NEXT: cmovbl %edi, %eax
746-
; CHECK-SSE-NEXT: shll $23, %eax
747-
; CHECK-SSE-NEXT: addl $1091567616, %eax # imm = 0x41100000
748-
; CHECK-SSE-NEXT: movd %eax, %xmm0
780+
; CHECK-SSE-NEXT: movzbl %dil, %eax
781+
; CHECK-SSE-NEXT: addl $3, %eax
782+
; CHECK-SSE-NEXT: cmpl $13, %eax
783+
; CHECK-SSE-NEXT: movl $13, %ecx
784+
; CHECK-SSE-NEXT: cmovbl %eax, %ecx
785+
; CHECK-SSE-NEXT: shll $23, %ecx
786+
; CHECK-SSE-NEXT: addl $1091567616, %ecx # imm = 0x41100000
787+
; CHECK-SSE-NEXT: movd %ecx, %xmm0
749788
; CHECK-SSE-NEXT: retq
750789
;
751790
; CHECK-AVX-LABEL: fmul_fly_pow_mul_min_pow2:
752791
; CHECK-AVX: # %bb.0:
753-
; CHECK-AVX-NEXT: addl $3, %edi
754-
; CHECK-AVX-NEXT: cmpl $13, %edi
755-
; CHECK-AVX-NEXT: movl $13, %eax
756-
; CHECK-AVX-NEXT: cmovbl %edi, %eax
757-
; CHECK-AVX-NEXT: shll $23, %eax
758-
; CHECK-AVX-NEXT: addl $1091567616, %eax # imm = 0x41100000
759-
; CHECK-AVX-NEXT: vmovd %eax, %xmm0
792+
; CHECK-AVX-NEXT: movzbl %dil, %eax
793+
; CHECK-AVX-NEXT: addl $3, %eax
794+
; CHECK-AVX-NEXT: cmpl $13, %eax
795+
; CHECK-AVX-NEXT: movl $13, %ecx
796+
; CHECK-AVX-NEXT: cmovbl %eax, %ecx
797+
; CHECK-AVX-NEXT: shll $23, %ecx
798+
; CHECK-AVX-NEXT: addl $1091567616, %ecx # imm = 0x41100000
799+
; CHECK-AVX-NEXT: vmovd %ecx, %xmm0
760800
; CHECK-AVX-NEXT: retq
761801
%shl8 = shl nuw i64 8, %cnt
762802
%shl = call i64 @llvm.umin.i64(i64 %shl8, i64 8192)
@@ -765,28 +805,30 @@ define float @fmul_fly_pow_mul_min_pow2(i64 %cnt) nounwind {
765805
ret float %mul
766806
}
767807

808+
; FIXME: The movzbl is unnecessary. It would be UB for the upper bits to be set
809+
; in the original IR.
768810
define double @fmul_pow_mul_max_pow2(i16 %cnt) nounwind {
769811
; CHECK-SSE-LABEL: fmul_pow_mul_max_pow2:
770812
; CHECK-SSE: # %bb.0:
771-
; CHECK-SSE-NEXT: movl %edi, %eax
813+
; CHECK-SSE-NEXT: movzbl %dil, %eax
772814
; CHECK-SSE-NEXT: leaq 1(%rax), %rcx
773815
; CHECK-SSE-NEXT: cmpq %rcx, %rax
774816
; CHECK-SSE-NEXT: cmovaq %rax, %rcx
775817
; CHECK-SSE-NEXT: shlq $52, %rcx
776818
; CHECK-SSE-NEXT: movabsq $4613937818241073152, %rax # imm = 0x4008000000000000
777-
; CHECK-SSE-NEXT: addq %rcx, %rax
819+
; CHECK-SSE-NEXT: orq %rcx, %rax
778820
; CHECK-SSE-NEXT: movq %rax, %xmm0
779821
; CHECK-SSE-NEXT: retq
780822
;
781823
; CHECK-AVX-LABEL: fmul_pow_mul_max_pow2:
782824
; CHECK-AVX: # %bb.0:
783-
; CHECK-AVX-NEXT: movl %edi, %eax
825+
; CHECK-AVX-NEXT: movzbl %dil, %eax
784826
; CHECK-AVX-NEXT: leaq 1(%rax), %rcx
785827
; CHECK-AVX-NEXT: cmpq %rcx, %rax
786828
; CHECK-AVX-NEXT: cmovaq %rax, %rcx
787829
; CHECK-AVX-NEXT: shlq $52, %rcx
788830
; CHECK-AVX-NEXT: movabsq $4613937818241073152, %rax # imm = 0x4008000000000000
789-
; CHECK-AVX-NEXT: addq %rcx, %rax
831+
; CHECK-AVX-NEXT: orq %rcx, %rax
790832
; CHECK-AVX-NEXT: vmovq %rax, %xmm0
791833
; CHECK-AVX-NEXT: retq
792834
%shl2 = shl nuw i16 2, %cnt
@@ -1161,23 +1203,25 @@ define double @fmul_pow_shl_cnt_fail_maybe_bad_exp(i64 %cnt) nounwind {
11611203
ret double %mul
11621204
}
11631205

1206+
; FIXME: The movzbl is unnecessary. It would be UB for the upper bits to be set
1207+
; in the original IR.
11641208
define double @fmul_pow_shl_cnt_safe(i16 %cnt) nounwind {
11651209
; CHECK-SSE-LABEL: fmul_pow_shl_cnt_safe:
11661210
; CHECK-SSE: # %bb.0:
1167-
; CHECK-SSE-NEXT: # kill: def $edi killed $edi def $rdi
1168-
; CHECK-SSE-NEXT: shlq $52, %rdi
1169-
; CHECK-SSE-NEXT: movabsq $8930638061065157010, %rax # imm = 0x7BEFFFFFFF5F3992
1170-
; CHECK-SSE-NEXT: addq %rdi, %rax
1171-
; CHECK-SSE-NEXT: movq %rax, %xmm0
1211+
; CHECK-SSE-NEXT: movzbl %dil, %eax
1212+
; CHECK-SSE-NEXT: shlq $52, %rax
1213+
; CHECK-SSE-NEXT: movabsq $8930638061065157010, %rcx # imm = 0x7BEFFFFFFF5F3992
1214+
; CHECK-SSE-NEXT: addq %rax, %rcx
1215+
; CHECK-SSE-NEXT: movq %rcx, %xmm0
11721216
; CHECK-SSE-NEXT: retq
11731217
;
11741218
; CHECK-AVX-LABEL: fmul_pow_shl_cnt_safe:
11751219
; CHECK-AVX: # %bb.0:
1176-
; CHECK-AVX-NEXT: # kill: def $edi killed $edi def $rdi
1177-
; CHECK-AVX-NEXT: shlq $52, %rdi
1178-
; CHECK-AVX-NEXT: movabsq $8930638061065157010, %rax # imm = 0x7BEFFFFFFF5F3992
1179-
; CHECK-AVX-NEXT: addq %rdi, %rax
1180-
; CHECK-AVX-NEXT: vmovq %rax, %xmm0
1220+
; CHECK-AVX-NEXT: movzbl %dil, %eax
1221+
; CHECK-AVX-NEXT: shlq $52, %rax
1222+
; CHECK-AVX-NEXT: movabsq $8930638061065157010, %rcx # imm = 0x7BEFFFFFFF5F3992
1223+
; CHECK-AVX-NEXT: addq %rax, %rcx
1224+
; CHECK-AVX-NEXT: vmovq %rcx, %xmm0
11811225
; CHECK-AVX-NEXT: retq
11821226
%shl = shl nuw i16 1, %cnt
11831227
%conv = uitofp i16 %shl to double
@@ -1236,15 +1280,15 @@ define float @fdiv_pow_shl_cnt_fail_maybe_z(i64 %cnt) nounwind {
12361280
; CHECK-SSE-NEXT: # kill: def $cl killed $cl killed $rcx
12371281
; CHECK-SSE-NEXT: shlq %cl, %rax
12381282
; CHECK-SSE-NEXT: testq %rax, %rax
1239-
; CHECK-SSE-NEXT: js .LBB22_1
1283+
; CHECK-SSE-NEXT: js .LBB23_1
12401284
; CHECK-SSE-NEXT: # %bb.2:
12411285
; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm1
1242-
; CHECK-SSE-NEXT: jmp .LBB22_3
1243-
; CHECK-SSE-NEXT: .LBB22_1:
1286+
; CHECK-SSE-NEXT: jmp .LBB23_3
1287+
; CHECK-SSE-NEXT: .LBB23_1:
12441288
; CHECK-SSE-NEXT: shrq %rax
12451289
; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm1
12461290
; CHECK-SSE-NEXT: addss %xmm1, %xmm1
1247-
; CHECK-SSE-NEXT: .LBB22_3:
1291+
; CHECK-SSE-NEXT: .LBB23_3:
12481292
; CHECK-SSE-NEXT: movss {{.*#+}} xmm0 = [-9.0E+0,0.0E+0,0.0E+0,0.0E+0]
12491293
; CHECK-SSE-NEXT: divss %xmm1, %xmm0
12501294
; CHECK-SSE-NEXT: retq
@@ -1256,15 +1300,15 @@ define float @fdiv_pow_shl_cnt_fail_maybe_z(i64 %cnt) nounwind {
12561300
; CHECK-AVX2-NEXT: # kill: def $cl killed $cl killed $rcx
12571301
; CHECK-AVX2-NEXT: shlq %cl, %rax
12581302
; CHECK-AVX2-NEXT: testq %rax, %rax
1259-
; CHECK-AVX2-NEXT: js .LBB22_1
1303+
; CHECK-AVX2-NEXT: js .LBB23_1
12601304
; CHECK-AVX2-NEXT: # %bb.2:
12611305
; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0
1262-
; CHECK-AVX2-NEXT: jmp .LBB22_3
1263-
; CHECK-AVX2-NEXT: .LBB22_1:
1306+
; CHECK-AVX2-NEXT: jmp .LBB23_3
1307+
; CHECK-AVX2-NEXT: .LBB23_1:
12641308
; CHECK-AVX2-NEXT: shrq %rax
12651309
; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0
12661310
; CHECK-AVX2-NEXT: vaddss %xmm0, %xmm0, %xmm0
1267-
; CHECK-AVX2-NEXT: .LBB22_3:
1311+
; CHECK-AVX2-NEXT: .LBB23_3:
12681312
; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm1 = [-9.0E+0,0.0E+0,0.0E+0,0.0E+0]
12691313
; CHECK-AVX2-NEXT: vdivss %xmm0, %xmm1, %xmm0
12701314
; CHECK-AVX2-NEXT: retq
@@ -1545,23 +1589,25 @@ define half @fdiv_pow_shl_cnt_fail_out_of_bound2(i16 %cnt) nounwind {
15451589
ret half %mul
15461590
}
15471591

1592+
; FIXME: The movzbl is unnecessary. It would be UB for the upper bits to be set
1593+
; in the original IR.
15481594
define double @fdiv_pow_shl_cnt32_to_dbl_okay(i32 %cnt) nounwind {
15491595
; CHECK-SSE-LABEL: fdiv_pow_shl_cnt32_to_dbl_okay:
15501596
; CHECK-SSE: # %bb.0:
1551-
; CHECK-SSE-NEXT: # kill: def $edi killed $edi def $rdi
1552-
; CHECK-SSE-NEXT: shlq $52, %rdi
1553-
; CHECK-SSE-NEXT: movabsq $3936146074321813504, %rax # imm = 0x36A0000000000000
1554-
; CHECK-SSE-NEXT: subq %rdi, %rax
1555-
; CHECK-SSE-NEXT: movq %rax, %xmm0
1597+
; CHECK-SSE-NEXT: movzbl %dil, %eax
1598+
; CHECK-SSE-NEXT: shlq $52, %rax
1599+
; CHECK-SSE-NEXT: movabsq $3936146074321813504, %rcx # imm = 0x36A0000000000000
1600+
; CHECK-SSE-NEXT: subq %rax, %rcx
1601+
; CHECK-SSE-NEXT: movq %rcx, %xmm0
15561602
; CHECK-SSE-NEXT: retq
15571603
;
15581604
; CHECK-AVX-LABEL: fdiv_pow_shl_cnt32_to_dbl_okay:
15591605
; CHECK-AVX: # %bb.0:
1560-
; CHECK-AVX-NEXT: # kill: def $edi killed $edi def $rdi
1561-
; CHECK-AVX-NEXT: shlq $52, %rdi
1562-
; CHECK-AVX-NEXT: movabsq $3936146074321813504, %rax # imm = 0x36A0000000000000
1563-
; CHECK-AVX-NEXT: subq %rdi, %rax
1564-
; CHECK-AVX-NEXT: vmovq %rax, %xmm0
1606+
; CHECK-AVX-NEXT: movzbl %dil, %eax
1607+
; CHECK-AVX-NEXT: shlq $52, %rax
1608+
; CHECK-AVX-NEXT: movabsq $3936146074321813504, %rcx # imm = 0x36A0000000000000
1609+
; CHECK-AVX-NEXT: subq %rax, %rcx
1610+
; CHECK-AVX-NEXT: vmovq %rcx, %xmm0
15651611
; CHECK-AVX-NEXT: retq
15661612
%shl = shl nuw i32 1, %cnt
15671613
%conv = uitofp i32 %shl to double
@@ -1617,21 +1663,25 @@ define float @fdiv_pow_shl_cnt32_out_of_bounds2(i32 %cnt) nounwind {
16171663
ret float %mul
16181664
}
16191665

1666+
; FIXME: The movzbl is unnecessary. It would be UB for the upper bits to be set
1667+
; in the original IR.
16201668
define float @fdiv_pow_shl_cnt32_okay(i32 %cnt) nounwind {
16211669
; CHECK-SSE-LABEL: fdiv_pow_shl_cnt32_okay:
16221670
; CHECK-SSE: # %bb.0:
1623-
; CHECK-SSE-NEXT: shll $23, %edi
1624-
; CHECK-SSE-NEXT: movl $285212672, %eax # imm = 0x11000000
1625-
; CHECK-SSE-NEXT: subl %edi, %eax
1626-
; CHECK-SSE-NEXT: movd %eax, %xmm0
1671+
; CHECK-SSE-NEXT: movzbl %dil, %eax
1672+
; CHECK-SSE-NEXT: shll $23, %eax
1673+
; CHECK-SSE-NEXT: movl $285212672, %ecx # imm = 0x11000000
1674+
; CHECK-SSE-NEXT: subl %eax, %ecx
1675+
; CHECK-SSE-NEXT: movd %ecx, %xmm0
16271676
; CHECK-SSE-NEXT: retq
16281677
;
16291678
; CHECK-AVX-LABEL: fdiv_pow_shl_cnt32_okay:
16301679
; CHECK-AVX: # %bb.0:
1631-
; CHECK-AVX-NEXT: shll $23, %edi
1632-
; CHECK-AVX-NEXT: movl $285212672, %eax # imm = 0x11000000
1633-
; CHECK-AVX-NEXT: subl %edi, %eax
1634-
; CHECK-AVX-NEXT: vmovd %eax, %xmm0
1680+
; CHECK-AVX-NEXT: movzbl %dil, %eax
1681+
; CHECK-AVX-NEXT: shll $23, %eax
1682+
; CHECK-AVX-NEXT: movl $285212672, %ecx # imm = 0x11000000
1683+
; CHECK-AVX-NEXT: subl %eax, %ecx
1684+
; CHECK-AVX-NEXT: vmovd %ecx, %xmm0
16351685
; CHECK-AVX-NEXT: retq
16361686
%shl = shl nuw i32 1, %cnt
16371687
%conv = uitofp i32 %shl to float

0 commit comments

Comments
 (0)