Skip to content

Commit 2c5a688

Browse files
authored
Fix non-splat vector SREM expansion when one of the divisors is a power of two. (#82706)
The expansion previously used, derived from Hacker's Delight, does not work correctly when the dividend is INT_MIN and the divisor is a power of two. We now use an alternate derivation of the A and Q constants specifically for the power-of-two divisor case to avoid this problem. Credit to Fabian Giesen for the new derivation. Fixes #77169
1 parent 8eb6757 commit 2c5a688

File tree

2 files changed

+70
-53
lines changed

2 files changed

+70
-53
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6803,6 +6803,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
68036803
SDValue CompTargetNode, ISD::CondCode Cond,
68046804
DAGCombinerInfo &DCI, const SDLoc &DL,
68056805
SmallVectorImpl<SDNode *> &Created) const {
6806+
// Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
68066807
// Fold:
68076808
// (seteq/ne (srem N, D), 0)
68086809
// To:
@@ -6813,6 +6814,17 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
68136814
// - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
68146815
// - Q = floor((2 * A) / (2^K))
68156816
// where W is the width of the common type of N and D.
6817+
//
6818+
// When D is a power of two (and thus D0 is 1), the normal
6819+
// formula for A and Q don't apply, because the derivation
6820+
// depends on D not dividing 2^(W-1), and thus theorem ZRS
6821+
// does not apply. This specifically fails when N = INT_MIN.
6822+
//
6823+
// Instead, for power-of-two D, we use:
6824+
// - A = 2^(W-1)
6825+
// |-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1])
6826+
// - Q = 2^(W-K) - 1
6827+
// |-> Test that the top K bits are zero after rotation
68166828
assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
68176829
"Only applicable for (in)equality comparisons.");
68186830

@@ -6896,6 +6908,14 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
68966908
// Q = floor((2 * A) / (2^K))
68976909
APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
68986910

6911+
// If D was a power of two, apply the alternate constant derivation.
6912+
if (D0.isOne()) {
6913+
// A = 2^(W-1)
6914+
A = APInt::getSignedMinValue(W);
6915+
// - Q = 2^(W-K) - 1
6916+
Q = APInt::getAllOnes(W - K).zext(W);
6917+
}
6918+
68996919
assert(APInt::getAllOnes(SVT.getSizeInBits()).ugt(A) &&
69006920
"We are expecting that A is always less than all-ones for SVT");
69016921
assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&

llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll

Lines changed: 50 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -560,7 +560,7 @@ define <4 x i32> @test_srem_odd_poweroftwo(<4 x i32> %X) nounwind {
560560
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
561561
; CHECK-SSE41-NEXT: psrlq $32, %xmm1
562562
; CHECK-SSE41-NEXT: por %xmm1, %xmm0
563-
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,858993458,268435454,858993458]
563+
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,858993458,268435455,858993458]
564564
; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
565565
; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
566566
; CHECK-SSE41-NEXT: psrld $31, %xmm0
@@ -646,7 +646,7 @@ define <4 x i32> @test_srem_even_poweroftwo(<4 x i32> %X) nounwind {
646646
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
647647
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
648648
; CHECK-SSE41-NEXT: por %xmm2, %xmm0
649-
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [306783378,306783378,268435454,306783378]
649+
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [306783378,306783378,268435455,306783378]
650650
; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
651651
; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
652652
; CHECK-SSE41-NEXT: psrld $31, %xmm0
@@ -735,7 +735,7 @@ define <4 x i32> @test_srem_odd_even_poweroftwo(<4 x i32> %X) nounwind {
735735
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
736736
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
737737
; CHECK-SSE41-NEXT: por %xmm2, %xmm0
738-
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,306783378,268435454,42949672]
738+
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,306783378,268435455,42949672]
739739
; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
740740
; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
741741
; CHECK-SSE41-NEXT: psrld $31, %xmm0
@@ -1041,7 +1041,7 @@ define <4 x i32> @test_srem_odd_INT_MIN(<4 x i32> %X) nounwind {
10411041
; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm2
10421042
; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
10431043
; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1044-
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,858993458,0,858993458]
1044+
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,858993458,1,858993458]
10451045
; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
10461046
; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
10471047
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5],xmm0[6,7]
@@ -1135,39 +1135,38 @@ define <4 x i32> @test_srem_even_INT_MIN(<4 x i32> %X) nounwind {
11351135
; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
11361136
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [3067833783,3067833783,1,3067833783]
11371137
; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm2
1138-
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm3 = [306783378,306783378,0,306783378]
1139-
; CHECK-SSE41-NEXT: paddd %xmm3, %xmm2
1140-
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
1141-
; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4
1138+
; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
1139+
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
1140+
; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
11421141
; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
1143-
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm2[1,1,3,3]
1144-
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm5 = xmm5[0,1],xmm4[2,3],xmm5[4,5],xmm4[6,7]
1145-
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,0,2,2]
1146-
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm2[0,1],xmm4[2,3],xmm2[4,5],xmm4[6,7]
1147-
; CHECK-SSE41-NEXT: por %xmm5, %xmm4
1148-
; CHECK-SSE41-NEXT: pminud %xmm4, %xmm3
1149-
; CHECK-SSE41-NEXT: pcmpeqd %xmm4, %xmm3
1142+
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
1143+
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7]
1144+
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,2,2]
1145+
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
1146+
; CHECK-SSE41-NEXT: por %xmm4, %xmm3
1147+
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [306783378,306783378,1,306783378]
1148+
; CHECK-SSE41-NEXT: pminud %xmm3, %xmm2
1149+
; CHECK-SSE41-NEXT: pcmpeqd %xmm3, %xmm2
11501150
; CHECK-SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
11511151
; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
1152-
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm3[0,1,2,3],xmm0[4,5],xmm3[6,7]
1152+
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5],xmm2[6,7]
11531153
; CHECK-SSE41-NEXT: psrld $31, %xmm0
11541154
; CHECK-SSE41-NEXT: retq
11551155
;
11561156
; CHECK-AVX1-LABEL: test_srem_even_INT_MIN:
11571157
; CHECK-AVX1: # %bb.0:
11581158
; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
11591159
; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
1160-
; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [306783378,306783378,0,306783378]
1161-
; CHECK-AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2
1162-
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
1163-
; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4
1160+
; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1161+
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
1162+
; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
11641163
; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1165-
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm2[1,1,3,3]
1166-
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm5[0,1],xmm4[2,3],xmm5[4,5],xmm4[6,7]
1167-
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[0,0,2,2]
1168-
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3],xmm2[4,5],xmm4[6,7]
1169-
; CHECK-AVX1-NEXT: vpor %xmm5, %xmm2, %xmm2
1170-
; CHECK-AVX1-NEXT: vpminud %xmm3, %xmm2, %xmm3
1164+
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
1165+
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7]
1166+
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,0,2,2]
1167+
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
1168+
; CHECK-AVX1-NEXT: vpor %xmm4, %xmm2, %xmm2
1169+
; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
11711170
; CHECK-AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
11721171
; CHECK-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
11731172
; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
@@ -1179,12 +1178,11 @@ define <4 x i32> @test_srem_even_INT_MIN(<4 x i32> %X) nounwind {
11791178
; CHECK-AVX2: # %bb.0:
11801179
; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
11811180
; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
1182-
; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [306783378,306783378,0,306783378]
1183-
; CHECK-AVX2-NEXT: vpaddd %xmm3, %xmm2, %xmm2
1184-
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm4
1181+
; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1182+
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
11851183
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1186-
; CHECK-AVX2-NEXT: vpor %xmm4, %xmm2, %xmm2
1187-
; CHECK-AVX2-NEXT: vpminud %xmm3, %xmm2, %xmm3
1184+
; CHECK-AVX2-NEXT: vpor %xmm3, %xmm2, %xmm2
1185+
; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
11881186
; CHECK-AVX2-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
11891187
; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [2147483647,2147483647,2147483647,2147483647]
11901188
; CHECK-AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
@@ -1196,15 +1194,14 @@ define <4 x i32> @test_srem_even_INT_MIN(<4 x i32> %X) nounwind {
11961194
; CHECK-AVX512VL-LABEL: test_srem_even_INT_MIN:
11971195
; CHECK-AVX512VL: # %bb.0:
11981196
; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
1199-
; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
1200-
; CHECK-AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [306783378,306783378,0,306783378]
1201-
; CHECK-AVX512VL-NEXT: vpaddd %xmm3, %xmm2, %xmm2
1202-
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1203-
; CHECK-AVX512VL-NEXT: vpminud %xmm3, %xmm2, %xmm3
1204-
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
1205-
; CHECK-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
1206-
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
1207-
; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm0[2],xmm2[3]
1197+
; CHECK-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm2
1198+
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm1
1199+
; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1200+
; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1201+
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1202+
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
1203+
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
1204+
; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
12081205
; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
12091206
; CHECK-AVX512VL-NEXT: retq
12101207
%srem = srem <4 x i32> %X, <i32 14, i32 14, i32 2147483648, i32 14>
@@ -1263,7 +1260,7 @@ define <4 x i32> @test_srem_odd_even_INT_MIN(<4 x i32> %X) nounwind {
12631260
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,2,2]
12641261
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
12651262
; CHECK-SSE41-NEXT: por %xmm4, %xmm3
1266-
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [858993458,306783378,0,42949672]
1263+
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [858993458,306783378,1,42949672]
12671264
; CHECK-SSE41-NEXT: pminud %xmm3, %xmm2
12681265
; CHECK-SSE41-NEXT: pcmpeqd %xmm3, %xmm2
12691266
; CHECK-SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -1362,7 +1359,7 @@ define <4 x i32> @test_srem_odd_allones_and_poweroftwo(<4 x i32> %X) nounwind {
13621359
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
13631360
; CHECK-SSE41-NEXT: psrlq $32, %xmm1
13641361
; CHECK-SSE41-NEXT: por %xmm1, %xmm0
1365-
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,4294967295,268435454,858993458]
1362+
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,4294967295,268435455,858993458]
13661363
; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
13671364
; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
13681365
; CHECK-SSE41-NEXT: psrld $31, %xmm0
@@ -1447,7 +1444,7 @@ define <4 x i32> @test_srem_even_allones_and_poweroftwo(<4 x i32> %X) nounwind {
14471444
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
14481445
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
14491446
; CHECK-SSE41-NEXT: por %xmm2, %xmm0
1450-
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [306783378,4294967295,268435454,306783378]
1447+
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [306783378,4294967295,268435455,306783378]
14511448
; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
14521449
; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
14531450
; CHECK-SSE41-NEXT: psrld $31, %xmm0
@@ -1536,7 +1533,7 @@ define <4 x i32> @test_srem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwi
15361533
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
15371534
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
15381535
; CHECK-SSE41-NEXT: por %xmm2, %xmm0
1539-
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,4294967295,268435454,42949672]
1536+
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,4294967295,268435455,42949672]
15401537
; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
15411538
; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
15421539
; CHECK-SSE41-NEXT: psrld $31, %xmm0
@@ -1844,7 +1841,7 @@ define <4 x i32> @test_srem_odd_poweroftwo_and_one(<4 x i32> %X) nounwind {
18441841
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
18451842
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
18461843
; CHECK-SSE41-NEXT: por %xmm2, %xmm0
1847-
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,268435454,4294967295,858993458]
1844+
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,268435455,4294967295,858993458]
18481845
; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
18491846
; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
18501847
; CHECK-SSE41-NEXT: psrld $31, %xmm0
@@ -1932,7 +1929,7 @@ define <4 x i32> @test_srem_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
19321929
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
19331930
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
19341931
; CHECK-SSE41-NEXT: por %xmm2, %xmm0
1935-
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [306783378,268435454,4294967295,306783378]
1932+
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [306783378,268435455,4294967295,306783378]
19361933
; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
19371934
; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
19381935
; CHECK-SSE41-NEXT: psrld $31, %xmm0
@@ -2016,7 +2013,7 @@ define <4 x i32> @test_srem_odd_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
20162013
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
20172014
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
20182015
; CHECK-SSE41-NEXT: por %xmm2, %xmm0
2019-
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,268435454,4294967295,42949672]
2016+
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,268435455,4294967295,42949672]
20202017
; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
20212018
; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
20222019
; CHECK-SSE41-NEXT: psrld $31, %xmm0
@@ -2093,7 +2090,7 @@ define <4 x i32> @test_srem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nou
20932090
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
20942091
; CHECK-SSE41-NEXT: psrlq $32, %xmm0
20952092
; CHECK-SSE41-NEXT: por %xmm1, %xmm0
2096-
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,4294967295,268435454,4294967295]
2093+
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,4294967295,268435455,4294967295]
20972094
; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
20982095
; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
20992096
; CHECK-SSE41-NEXT: psrld $31, %xmm0
@@ -2166,7 +2163,7 @@ define <4 x i32> @test_srem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) no
21662163
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
21672164
; CHECK-SSE41-NEXT: psrlq $32, %xmm0
21682165
; CHECK-SSE41-NEXT: por %xmm1, %xmm0
2169-
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [306783378,4294967295,268435454,4294967295]
2166+
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [306783378,4294967295,268435455,4294967295]
21702167
; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
21712168
; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
21722169
; CHECK-SSE41-NEXT: psrld $31, %xmm0
@@ -2237,7 +2234,7 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) {
22372234
; CHECK-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6
22382235
; CHECK-SSE2-NEXT: psrlw $8, %xmm6
22392236
; CHECK-SSE2-NEXT: packuswb %xmm5, %xmm6
2240-
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm7 = [84,2,36,42,2,0,2,4,2,255,4,36,126,30,2,2]
2237+
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm7 = [84,2,36,42,2,1,2,4,2,255,4,36,127,31,2,2]
22412238
; CHECK-SSE2-NEXT: pminub %xmm6, %xmm7
22422239
; CHECK-SSE2-NEXT: pcmpeqb %xmm6, %xmm7
22432240
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255]
@@ -2264,7 +2261,7 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) {
22642261
; CHECK-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
22652262
; CHECK-SSE2-NEXT: psrlw $8, %xmm0
22662263
; CHECK-SSE2-NEXT: packuswb %xmm1, %xmm0
2267-
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [19,51,13,7,127,31,127,3,5,5,51,37,3,127,85,5]
2264+
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [19,51,13,7,128,32,128,3,5,5,51,37,3,128,85,5]
22682265
; CHECK-SSE2-NEXT: pmaxub %xmm0, %xmm1
22692266
; CHECK-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
22702267
; CHECK-SSE2-NEXT: pcmpeqb %xmm6, %xmm3
@@ -2300,7 +2297,7 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) {
23002297
; CHECK-SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6
23012298
; CHECK-SSE41-NEXT: psrlw $8, %xmm6
23022299
; CHECK-SSE41-NEXT: packuswb %xmm0, %xmm6
2303-
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm0 = [84,2,36,42,2,0,2,4,2,255,4,36,126,30,2,2]
2300+
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm0 = [84,2,36,42,2,1,2,4,2,255,4,36,127,31,2,2]
23042301
; CHECK-SSE41-NEXT: pminub %xmm6, %xmm0
23052302
; CHECK-SSE41-NEXT: pcmpeqb %xmm6, %xmm0
23062303
; CHECK-SSE41-NEXT: pcmpeqd %xmm7, %xmm7
@@ -2326,7 +2323,7 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) {
23262323
; CHECK-SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
23272324
; CHECK-SSE41-NEXT: psrlw $8, %xmm0
23282325
; CHECK-SSE41-NEXT: packuswb %xmm4, %xmm0
2329-
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm4 = [19,51,13,7,127,31,127,3,5,5,51,37,3,127,85,5]
2326+
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm4 = [19,51,13,7,128,32,128,3,5,5,51,37,3,128,85,5]
23302327
; CHECK-SSE41-NEXT: pmaxub %xmm0, %xmm4
23312328
; CHECK-SSE41-NEXT: pcmpeqb %xmm0, %xmm4
23322329
; CHECK-SSE41-NEXT: pcmpeqb %xmm6, %xmm3

0 commit comments

Comments
 (0)