Skip to content

Commit 6a8464b

Browse files
authored
[X86][FP16] Do not generate X86 FMIN/FMAX for FP16 when VLX not enabled, part 2 (#143483)
Fixes: https://godbolt.org/z/eYTxeqE48
1 parent e5ff705 commit 6a8464b

File tree

2 files changed

+85
-0
lines changed

2 files changed

+85
-0
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47804,6 +47804,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
4780447804
Cond.getOpcode() == ISD::STRICT_FSETCCS) &&
4780547805
VT.isFloatingPoint() && VT != MVT::f80 && VT != MVT::f128 &&
4780647806
!isSoftF16(VT, Subtarget) && (TLI.isTypeLegal(VT) || VT == MVT::v2f32) &&
47807+
((VT != MVT::v8f16 && VT != MVT::v16f16) || Subtarget.hasVLX()) &&
4780747808
(Subtarget.hasSSE2() ||
4780847809
(Subtarget.hasSSE1() && VT.getScalarType() == MVT::f32))) {
4780947810
bool IsStrict = Cond->isStrictFPOpcode();

llvm/test/CodeGen/X86/avx512fp16-cvt-novl.ll renamed to llvm/test/CodeGen/X86/avx512fp16-novl.ll

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,3 +123,87 @@ define <8 x half> @vector_uint16ToHalf(<8 x i16> %int16) {
123123
%fp16 = uitofp <8 x i16> %int16 to <8 x half>
124124
ret <8 x half> %fp16
125125
}
126+
127+
define <8 x half> @select(<8 x half> %x) {
128+
; CHECK-LABEL: select:
129+
; CHECK: # %bb.0: # %entry
130+
; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
131+
; CHECK-NEXT: vmovsh {{.*#+}} xmm1 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0]
132+
; CHECK-NEXT: vucomish %xmm1, %xmm0
133+
; CHECK-NEXT: seta %al
134+
; CHECK-NEXT: andl $1, %eax
135+
; CHECK-NEXT: kmovw %eax, %k0
136+
; CHECK-NEXT: vpsrld $16, %xmm0, %xmm2
137+
; CHECK-NEXT: vucomish %xmm1, %xmm2
138+
; CHECK-NEXT: seta %al
139+
; CHECK-NEXT: kmovd %eax, %k1
140+
; CHECK-NEXT: kshiftlw $15, %k1, %k1
141+
; CHECK-NEXT: kshiftrw $14, %k1, %k1
142+
; CHECK-NEXT: korw %k1, %k0, %k0
143+
; CHECK-NEXT: movw $-5, %ax
144+
; CHECK-NEXT: kmovd %eax, %k1
145+
; CHECK-NEXT: kandw %k1, %k0, %k0
146+
; CHECK-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
147+
; CHECK-NEXT: vucomish %xmm1, %xmm2
148+
; CHECK-NEXT: seta %al
149+
; CHECK-NEXT: kmovd %eax, %k1
150+
; CHECK-NEXT: kshiftlw $15, %k1, %k1
151+
; CHECK-NEXT: kshiftrw $13, %k1, %k1
152+
; CHECK-NEXT: korw %k1, %k0, %k0
153+
; CHECK-NEXT: movw $-9, %ax
154+
; CHECK-NEXT: kmovd %eax, %k1
155+
; CHECK-NEXT: kandw %k1, %k0, %k0
156+
; CHECK-NEXT: vpsrlq $48, %xmm0, %xmm2
157+
; CHECK-NEXT: vucomish %xmm1, %xmm2
158+
; CHECK-NEXT: seta %al
159+
; CHECK-NEXT: kmovd %eax, %k1
160+
; CHECK-NEXT: kshiftlw $15, %k1, %k1
161+
; CHECK-NEXT: kshiftrw $12, %k1, %k1
162+
; CHECK-NEXT: korw %k1, %k0, %k0
163+
; CHECK-NEXT: movw $-17, %ax
164+
; CHECK-NEXT: kmovd %eax, %k1
165+
; CHECK-NEXT: kandw %k1, %k0, %k0
166+
; CHECK-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
167+
; CHECK-NEXT: vucomish %xmm1, %xmm2
168+
; CHECK-NEXT: seta %al
169+
; CHECK-NEXT: kmovd %eax, %k1
170+
; CHECK-NEXT: kshiftlw $15, %k1, %k1
171+
; CHECK-NEXT: kshiftrw $11, %k1, %k1
172+
; CHECK-NEXT: korw %k1, %k0, %k0
173+
; CHECK-NEXT: movw $-33, %ax
174+
; CHECK-NEXT: kmovd %eax, %k1
175+
; CHECK-NEXT: kandw %k1, %k0, %k0
176+
; CHECK-NEXT: vpsrldq {{.*#+}} xmm2 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
177+
; CHECK-NEXT: vucomish %xmm1, %xmm2
178+
; CHECK-NEXT: seta %al
179+
; CHECK-NEXT: kmovd %eax, %k1
180+
; CHECK-NEXT: kshiftlw $15, %k1, %k1
181+
; CHECK-NEXT: kshiftrw $10, %k1, %k1
182+
; CHECK-NEXT: korw %k1, %k0, %k0
183+
; CHECK-NEXT: movw $-65, %ax
184+
; CHECK-NEXT: kmovd %eax, %k1
185+
; CHECK-NEXT: kandw %k1, %k0, %k0
186+
; CHECK-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3]
187+
; CHECK-NEXT: vucomish %xmm1, %xmm2
188+
; CHECK-NEXT: seta %al
189+
; CHECK-NEXT: kmovd %eax, %k1
190+
; CHECK-NEXT: kshiftlw $6, %k1, %k1
191+
; CHECK-NEXT: korw %k1, %k0, %k0
192+
; CHECK-NEXT: kshiftlw $9, %k0, %k0
193+
; CHECK-NEXT: kshiftrw $9, %k0, %k0
194+
; CHECK-NEXT: vpsrldq {{.*#+}} xmm2 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
195+
; CHECK-NEXT: vucomish %xmm1, %xmm2
196+
; CHECK-NEXT: seta %al
197+
; CHECK-NEXT: kmovd %eax, %k1
198+
; CHECK-NEXT: kshiftlw $7, %k1, %k1
199+
; CHECK-NEXT: korw %k1, %k0, %k1
200+
; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
201+
; CHECK-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1}
202+
; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
203+
; CHECK-NEXT: vzeroupper
204+
; CHECK-NEXT: retq
205+
entry:
206+
%c = fcmp ogt <8 x half> %x, splat (half 0xH3C00)
207+
%s = select <8 x i1> %c, <8 x half> splat (half 0xH3C00), <8 x half> %x
208+
ret <8 x half> %s
209+
}

0 commit comments

Comments
 (0)