Skip to content

Commit 51ab757

Browse files
committed
[x86] autoupgrade and remove SSE2/SSE41 integer min/max intrinsics
Follow-up to: http://reviews.llvm.org/rL272806 http://reviews.llvm.org/rL272807 llvm-svn: 272907
1 parent 8e3c742 commit 51ab757

File tree

5 files changed

+177
-59
lines changed

5 files changed

+177
-59
lines changed

llvm/include/llvm/IR/IntrinsicsX86.td

Lines changed: 0 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -406,18 +406,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
406406
def int_x86_sse2_pavg_w : GCCBuiltin<"__builtin_ia32_pavgw128">,
407407
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
408408
llvm_v8i16_ty], [IntrNoMem, Commutative]>;
409-
def int_x86_sse2_pmaxu_b :
410-
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
411-
llvm_v16i8_ty], [IntrNoMem, Commutative]>;
412-
def int_x86_sse2_pmaxs_w :
413-
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
414-
llvm_v8i16_ty], [IntrNoMem, Commutative]>;
415-
def int_x86_sse2_pminu_b :
416-
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
417-
llvm_v16i8_ty], [IntrNoMem, Commutative]>;
418-
def int_x86_sse2_pmins_w :
419-
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
420-
llvm_v8i16_ty], [IntrNoMem, Commutative]>;
421409
def int_x86_sse2_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw128">,
422410
Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty,
423411
llvm_v16i8_ty], [IntrNoMem, Commutative]>;
@@ -735,34 +723,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
735723
[IntrNoMem]>;
736724
}
737725

738-
// Vector compare, min, max
739-
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
740-
def int_x86_sse41_pmaxsb :
741-
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
742-
[IntrNoMem, Commutative]>;
743-
def int_x86_sse41_pmaxsd :
744-
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
745-
[IntrNoMem, Commutative]>;
746-
def int_x86_sse41_pmaxud :
747-
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
748-
[IntrNoMem, Commutative]>;
749-
def int_x86_sse41_pmaxuw :
750-
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
751-
[IntrNoMem, Commutative]>;
752-
def int_x86_sse41_pminsb :
753-
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
754-
[IntrNoMem, Commutative]>;
755-
def int_x86_sse41_pminsd :
756-
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
757-
[IntrNoMem, Commutative]>;
758-
def int_x86_sse41_pminud :
759-
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
760-
[IntrNoMem, Commutative]>;
761-
def int_x86_sse41_pminuw :
762-
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
763-
[IntrNoMem, Commutative]>;
764-
}
765-
766726
// Advanced Encryption Standard (AES) Instructions
767727
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
768728
def int_x86_aesni_aesimc : GCCBuiltin<"__builtin_ia32_aesimc128">,

llvm/lib/IR/AutoUpgrade.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,18 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
174174
Name.startswith("x86.sse2.pcmpgt.") ||
175175
Name.startswith("x86.avx2.pcmpeq.") ||
176176
Name.startswith("x86.avx2.pcmpgt.") ||
177+
Name == "x86.sse41.pmaxsb" ||
178+
Name == "x86.sse2.pmaxs.w" ||
179+
Name == "x86.sse41.pmaxsd" ||
180+
Name == "x86.sse2.pmaxu.b" ||
181+
Name == "x86.sse41.pmaxuw" ||
182+
Name == "x86.sse41.pmaxud" ||
183+
Name == "x86.sse41.pminsb" ||
184+
Name == "x86.sse2.pmins.w" ||
185+
Name == "x86.sse41.pminsd" ||
186+
Name == "x86.sse2.pminu.b" ||
187+
Name == "x86.sse41.pminuw" ||
188+
Name == "x86.sse41.pminud" ||
177189
Name.startswith("x86.avx2.vbroadcast") ||
178190
Name.startswith("x86.avx2.pbroadcast") ||
179191
Name.startswith("x86.avx.vpermil.") ||
@@ -518,6 +530,14 @@ static Value *UpgradeMaskedLoad(IRBuilder<> &Builder, LLVMContext &C,
518530
return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru);
519531
}
520532

533+
static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
534+
ICmpInst::Predicate Pred) {
535+
Value *Op0 = CI.getArgOperand(0);
536+
Value *Op1 = CI.getArgOperand(1);
537+
Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
538+
return Builder.CreateSelect(Cmp, Op0, Op1);
539+
}
540+
521541
/// Upgrade a call to an old intrinsic. All argument and return casting must be
522542
/// provided to seamlessly integrate with existing context.
523543
void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
@@ -544,6 +564,22 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
544564
Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1),
545565
"pcmpgt");
546566
Rep = Builder.CreateSExt(Rep, CI->getType(), "");
567+
} else if (Name == "llvm.x86.sse41.pmaxsb" ||
568+
Name == "llvm.x86.sse2.pmaxs.w" ||
569+
Name == "llvm.x86.sse41.pmaxsd") {
570+
Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
571+
} else if (Name == "llvm.x86.sse2.pmaxu.b" ||
572+
Name == "llvm.x86.sse41.pmaxuw" ||
573+
Name == "llvm.x86.sse41.pmaxud") {
574+
Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
575+
} else if (Name == "llvm.x86.sse41.pminsb" ||
576+
Name == "llvm.x86.sse2.pmins.w" ||
577+
Name == "llvm.x86.sse41.pminsd") {
578+
Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
579+
} else if (Name == "llvm.x86.sse2.pminu.b" ||
580+
Name == "llvm.x86.sse41.pminuw" ||
581+
Name == "llvm.x86.sse41.pminud") {
582+
Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
547583
} else if (Name == "llvm.x86.sse2.cvtdq2pd" ||
548584
Name == "llvm.x86.sse2.cvtps2pd" ||
549585
Name == "llvm.x86.avx.cvtdq2.pd.256" ||

llvm/lib/Target/X86/X86IntrinsicsInfo.h

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2104,10 +2104,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
21042104
X86_INTRINSIC_DATA(sse2_paddus_w, INTR_TYPE_2OP, X86ISD::ADDUS, 0),
21052105
X86_INTRINSIC_DATA(sse2_pavg_b, INTR_TYPE_2OP, X86ISD::AVG, 0),
21062106
X86_INTRINSIC_DATA(sse2_pavg_w, INTR_TYPE_2OP, X86ISD::AVG, 0),
2107-
X86_INTRINSIC_DATA(sse2_pmaxs_w, INTR_TYPE_2OP, ISD::SMAX, 0),
2108-
X86_INTRINSIC_DATA(sse2_pmaxu_b, INTR_TYPE_2OP, ISD::UMAX, 0),
2109-
X86_INTRINSIC_DATA(sse2_pmins_w, INTR_TYPE_2OP, ISD::SMIN, 0),
2110-
X86_INTRINSIC_DATA(sse2_pminu_b, INTR_TYPE_2OP, ISD::UMIN, 0),
21112107
X86_INTRINSIC_DATA(sse2_pmovmskb_128, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
21122108
X86_INTRINSIC_DATA(sse2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0),
21132109
X86_INTRINSIC_DATA(sse2_pmulhu_w, INTR_TYPE_2OP, ISD::MULHU, 0),
@@ -2146,14 +2142,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
21462142
X86_INTRINSIC_DATA(sse3_hsub_ps, INTR_TYPE_2OP, X86ISD::FHSUB, 0),
21472143
X86_INTRINSIC_DATA(sse41_insertps, INTR_TYPE_3OP, X86ISD::INSERTPS, 0),
21482144
X86_INTRINSIC_DATA(sse41_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
2149-
X86_INTRINSIC_DATA(sse41_pmaxsb, INTR_TYPE_2OP, ISD::SMAX, 0),
2150-
X86_INTRINSIC_DATA(sse41_pmaxsd, INTR_TYPE_2OP, ISD::SMAX, 0),
2151-
X86_INTRINSIC_DATA(sse41_pmaxud, INTR_TYPE_2OP, ISD::UMAX, 0),
2152-
X86_INTRINSIC_DATA(sse41_pmaxuw, INTR_TYPE_2OP, ISD::UMAX, 0),
2153-
X86_INTRINSIC_DATA(sse41_pminsb, INTR_TYPE_2OP, ISD::SMIN, 0),
2154-
X86_INTRINSIC_DATA(sse41_pminsd, INTR_TYPE_2OP, ISD::SMIN, 0),
2155-
X86_INTRINSIC_DATA(sse41_pminud, INTR_TYPE_2OP, ISD::UMIN, 0),
2156-
X86_INTRINSIC_DATA(sse41_pminuw, INTR_TYPE_2OP, ISD::UMIN, 0),
21572145
X86_INTRINSIC_DATA(sse41_pmuldq, INTR_TYPE_2OP, X86ISD::PMULDQ, 0),
21582146
X86_INTRINSIC_DATA(sse4a_extrqi, INTR_TYPE_3OP, X86ISD::EXTRQI, 0),
21592147
X86_INTRINSIC_DATA(sse4a_insertqi, INTR_TYPE_4OP, X86ISD::INSERTQI, 0),

llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll

Lines changed: 51 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -144,8 +144,8 @@ define <4 x i32> @test_x86_sse2_pshuf_d(<4 x i32> %a) {
144144
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
145145
; CHECK-NEXT: retl
146146
entry:
147-
%res = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) nounwind readnone
148-
ret <4 x i32> %res
147+
%res = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) nounwind readnone
148+
ret <4 x i32> %res
149149
}
150150
declare <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32>, i8) nounwind readnone
151151

@@ -155,8 +155,8 @@ define <8 x i16> @test_x86_sse2_pshufl_w(<8 x i16> %a) {
155155
; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
156156
; CHECK-NEXT: retl
157157
entry:
158-
%res = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27) nounwind readnone
159-
ret <8 x i16> %res
158+
%res = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27) nounwind readnone
159+
ret <8 x i16> %res
160160
}
161161
declare <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16>, i8) nounwind readnone
162162

@@ -166,7 +166,52 @@ define <8 x i16> @test_x86_sse2_pshufh_w(<8 x i16> %a) {
166166
; CHECK-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
167167
; CHECK-NEXT: retl
168168
entry:
169-
%res = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %a, i8 27) nounwind readnone
170-
ret <8 x i16> %res
169+
%res = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %a, i8 27) nounwind readnone
170+
ret <8 x i16> %res
171171
}
172172
declare <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16>, i8) nounwind readnone
173+
174+
define <16 x i8> @max_epu8(<16 x i8> %a0, <16 x i8> %a1) {
175+
; CHECK-LABEL: max_epu8:
176+
; CHECK: ## BB#0:
177+
; CHECK-NEXT: pmaxub %xmm1, %xmm0
178+
; CHECK-NEXT: retl
179+
;
180+
%res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1)
181+
ret <16 x i8> %res
182+
}
183+
declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
184+
185+
define <16 x i8> @min_epu8(<16 x i8> %a0, <16 x i8> %a1) {
186+
; CHECK-LABEL: min_epu8:
187+
; CHECK: ## BB#0:
188+
; CHECK-NEXT: pminub %xmm1, %xmm0
189+
; CHECK-NEXT: retl
190+
;
191+
%res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1)
192+
ret <16 x i8> %res
193+
}
194+
declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
195+
196+
define <8 x i16> @max_epi16(<8 x i16> %a0, <8 x i16> %a1) {
197+
; CHECK-LABEL: max_epi16:
198+
; CHECK: ## BB#0:
199+
; CHECK-NEXT: pmaxsw %xmm1, %xmm0
200+
; CHECK-NEXT: retl
201+
;
202+
%res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1)
203+
ret <8 x i16> %res
204+
}
205+
declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
206+
207+
define <8 x i16> @min_epi16(<8 x i16> %a0, <8 x i16> %a1) {
208+
; CHECK-LABEL: min_epi16:
209+
; CHECK: ## BB#0:
210+
; CHECK-NEXT: pminsw %xmm1, %xmm0
211+
; CHECK-NEXT: retl
212+
;
213+
%res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1)
214+
ret <8 x i16> %res
215+
}
216+
declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
217+

llvm/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll

Lines changed: 90 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
22
; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse4.1 | FileCheck %s
33

44
; This test works just like the non-upgrade one except that it only checks
@@ -211,3 +211,92 @@ define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) {
211211
ret <2 x i64> %res
212212
}
213213
declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
214+
215+
define <16 x i8> @max_epi8(<16 x i8> %a0, <16 x i8> %a1) {
216+
; CHECK-LABEL: max_epi8:
217+
; CHECK: ## BB#0:
218+
; CHECK-NEXT: pmaxsb %xmm1, %xmm0
219+
; CHECK-NEXT: retl
220+
;
221+
%res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1)
222+
ret <16 x i8> %res
223+
}
224+
declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
225+
226+
define <16 x i8> @min_epi8(<16 x i8> %a0, <16 x i8> %a1) {
227+
; CHECK-LABEL: min_epi8:
228+
; CHECK: ## BB#0:
229+
; CHECK-NEXT: pminsb %xmm1, %xmm0
230+
; CHECK-NEXT: retl
231+
;
232+
%res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1)
233+
ret <16 x i8> %res
234+
}
235+
declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
236+
237+
define <8 x i16> @max_epu16(<8 x i16> %a0, <8 x i16> %a1) {
238+
; CHECK-LABEL: max_epu16:
239+
; CHECK: ## BB#0:
240+
; CHECK-NEXT: pmaxuw %xmm1, %xmm0
241+
; CHECK-NEXT: retl
242+
;
243+
%res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1)
244+
ret <8 x i16> %res
245+
}
246+
declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
247+
248+
define <8 x i16> @min_epu16(<8 x i16> %a0, <8 x i16> %a1) {
249+
; CHECK-LABEL: min_epu16:
250+
; CHECK: ## BB#0:
251+
; CHECK-NEXT: pminuw %xmm1, %xmm0
252+
; CHECK-NEXT: retl
253+
;
254+
%res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1)
255+
ret <8 x i16> %res
256+
}
257+
declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
258+
259+
define <4 x i32> @max_epi32(<4 x i32> %a0, <4 x i32> %a1) {
260+
; CHECK-LABEL: max_epi32:
261+
; CHECK: ## BB#0:
262+
; CHECK-NEXT: pmaxsd %xmm1, %xmm0
263+
; CHECK-NEXT: retl
264+
;
265+
%res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1)
266+
ret <4 x i32> %res
267+
}
268+
declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
269+
270+
define <4 x i32> @min_epi32(<4 x i32> %a0, <4 x i32> %a1) {
271+
; CHECK-LABEL: min_epi32:
272+
; CHECK: ## BB#0:
273+
; CHECK-NEXT: pminsd %xmm1, %xmm0
274+
; CHECK-NEXT: retl
275+
;
276+
%res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1)
277+
ret <4 x i32> %res
278+
}
279+
declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
280+
281+
define <4 x i32> @max_epu32(<4 x i32> %a0, <4 x i32> %a1) {
282+
; CHECK-LABEL: max_epu32:
283+
; CHECK: ## BB#0:
284+
; CHECK-NEXT: pmaxud %xmm1, %xmm0
285+
; CHECK-NEXT: retl
286+
;
287+
%res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1)
288+
ret <4 x i32> %res
289+
}
290+
declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
291+
292+
define <4 x i32> @min_epu32(<4 x i32> %a0, <4 x i32> %a1) {
293+
; CHECK-LABEL: min_epu32:
294+
; CHECK: ## BB#0:
295+
; CHECK-NEXT: pminud %xmm1, %xmm0
296+
; CHECK-NEXT: retl
297+
;
298+
%res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1)
299+
ret <4 x i32> %res
300+
}
301+
declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
302+

0 commit comments

Comments
 (0)