Skip to content

Commit c69f829

Browse files
authored
[X86][StrictFP] Add widening support for STRICT_FMIN/STRICT_FMAX (#119391)
Fixes: #119422
1 parent 3057ac1 commit c69f829

File tree

2 files changed

+112
-4
lines changed

2 files changed

+112
-4
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33424,15 +33424,26 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
3342433424
case X86ISD::FMINC:
3342533425
case X86ISD::FMIN:
3342633426
case X86ISD::FMAXC:
33427-
case X86ISD::FMAX: {
33427+
case X86ISD::FMAX:
33428+
case X86ISD::STRICT_FMIN:
33429+
case X86ISD::STRICT_FMAX: {
3342833430
EVT VT = N->getValueType(0);
3342933431
assert(VT == MVT::v2f32 && "Unexpected type (!= v2f32) on FMIN/FMAX.");
33432+
bool IsStrict = Opc == X86ISD::STRICT_FMIN || Opc == X86ISD::STRICT_FMAX;
3343033433
SDValue UNDEF = DAG.getUNDEF(VT);
3343133434
SDValue LHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
33432-
N->getOperand(0), UNDEF);
33435+
N->getOperand(IsStrict ? 1 : 0), UNDEF);
3343333436
SDValue RHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
33434-
N->getOperand(1), UNDEF);
33435-
Results.push_back(DAG.getNode(N->getOpcode(), dl, MVT::v4f32, LHS, RHS));
33437+
N->getOperand(IsStrict ? 2 : 1), UNDEF);
33438+
SDValue Res;
33439+
if (IsStrict)
33440+
Res = DAG.getNode(Opc, dl, {MVT::v4f32, MVT::Other},
33441+
{N->getOperand(0), LHS, RHS});
33442+
else
33443+
Res = DAG.getNode(Opc, dl, MVT::v4f32, LHS, RHS);
33444+
Results.push_back(Res);
33445+
if (IsStrict)
33446+
Results.push_back(Res.getValue(1));
3343633447
return;
3343733448
}
3343833449
case ISD::SDIV:

llvm/test/CodeGen/X86/vec-strict-cmp-128.ll

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6087,9 +6087,106 @@ define <2 x double> @test_v4f64_ogt2_s(<2 x double> %a, <2 x double> %b) #0 {
60876087
ret <2 x double> %res
60886088
}
60896089

6090+
define <2 x float> @test_v2f32_ogt2_s(<2 x float> %a, <2 x float> %b) #0 {
6091+
; SSE-32-LABEL: test_v2f32_ogt2_s:
6092+
; SSE-32: # %bb.0:
6093+
; SSE-32-NEXT: maxps %xmm1, %xmm0
6094+
; SSE-32-NEXT: retl
6095+
;
6096+
; SSE-64-LABEL: test_v2f32_ogt2_s:
6097+
; SSE-64: # %bb.0:
6098+
; SSE-64-NEXT: maxps %xmm1, %xmm0
6099+
; SSE-64-NEXT: retq
6100+
;
6101+
; AVX-32-LABEL: test_v2f32_ogt2_s:
6102+
; AVX-32: # %bb.0:
6103+
; AVX-32-NEXT: vmaxps %xmm1, %xmm0, %xmm0
6104+
; AVX-32-NEXT: retl
6105+
;
6106+
; AVX-64-LABEL: test_v2f32_ogt2_s:
6107+
; AVX-64: # %bb.0:
6108+
; AVX-64-NEXT: vmaxps %xmm1, %xmm0, %xmm0
6109+
; AVX-64-NEXT: retq
6110+
;
6111+
; AVX512-32-LABEL: test_v2f32_ogt2_s:
6112+
; AVX512-32: # %bb.0:
6113+
; AVX512-32-NEXT: vmaxps %xmm1, %xmm0, %xmm0
6114+
; AVX512-32-NEXT: retl
6115+
;
6116+
; AVX512-64-LABEL: test_v2f32_ogt2_s:
6117+
; AVX512-64: # %bb.0:
6118+
; AVX512-64-NEXT: vmaxps %xmm1, %xmm0, %xmm0
6119+
; AVX512-64-NEXT: retq
6120+
;
6121+
; AVX512F-32-LABEL: test_v2f32_ogt2_s:
6122+
; AVX512F-32: # %bb.0:
6123+
; AVX512F-32-NEXT: vmaxps %xmm1, %xmm0, %xmm0
6124+
; AVX512F-32-NEXT: retl
6125+
;
6126+
; AVX512F-64-LABEL: test_v2f32_ogt2_s:
6127+
; AVX512F-64: # %bb.0:
6128+
; AVX512F-64-NEXT: vmaxps %xmm1, %xmm0, %xmm0
6129+
; AVX512F-64-NEXT: retq
6130+
%cond = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f32(
6131+
<2 x float> %a, <2 x float> %b, metadata !"ogt",
6132+
metadata !"fpexcept.strict")
6133+
%res = select <2 x i1> %cond, <2 x float> %a, <2 x float> %b
6134+
ret <2 x float> %res
6135+
}
6136+
6137+
define <2 x float> @test_v2f32_ule2_s(<2 x float> %a, <2 x float> %b) #0 {
6138+
; SSE-32-LABEL: test_v2f32_ule2_s:
6139+
; SSE-32: # %bb.0:
6140+
; SSE-32-NEXT: minps %xmm0, %xmm1
6141+
; SSE-32-NEXT: movaps %xmm1, %xmm0
6142+
; SSE-32-NEXT: retl
6143+
;
6144+
; SSE-64-LABEL: test_v2f32_ule2_s:
6145+
; SSE-64: # %bb.0:
6146+
; SSE-64-NEXT: minps %xmm0, %xmm1
6147+
; SSE-64-NEXT: movaps %xmm1, %xmm0
6148+
; SSE-64-NEXT: retq
6149+
;
6150+
; AVX-32-LABEL: test_v2f32_ule2_s:
6151+
; AVX-32: # %bb.0:
6152+
; AVX-32-NEXT: vminps %xmm0, %xmm1, %xmm0
6153+
; AVX-32-NEXT: retl
6154+
;
6155+
; AVX-64-LABEL: test_v2f32_ule2_s:
6156+
; AVX-64: # %bb.0:
6157+
; AVX-64-NEXT: vminps %xmm0, %xmm1, %xmm0
6158+
; AVX-64-NEXT: retq
6159+
;
6160+
; AVX512-32-LABEL: test_v2f32_ule2_s:
6161+
; AVX512-32: # %bb.0:
6162+
; AVX512-32-NEXT: vminps %xmm0, %xmm1, %xmm0
6163+
; AVX512-32-NEXT: retl
6164+
;
6165+
; AVX512-64-LABEL: test_v2f32_ule2_s:
6166+
; AVX512-64: # %bb.0:
6167+
; AVX512-64-NEXT: vminps %xmm0, %xmm1, %xmm0
6168+
; AVX512-64-NEXT: retq
6169+
;
6170+
; AVX512F-32-LABEL: test_v2f32_ule2_s:
6171+
; AVX512F-32: # %bb.0:
6172+
; AVX512F-32-NEXT: vminps %xmm0, %xmm1, %xmm0
6173+
; AVX512F-32-NEXT: retl
6174+
;
6175+
; AVX512F-64-LABEL: test_v2f32_ule2_s:
6176+
; AVX512F-64: # %bb.0:
6177+
; AVX512F-64-NEXT: vminps %xmm0, %xmm1, %xmm0
6178+
; AVX512F-64-NEXT: retq
6179+
%cond = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f32(
6180+
<2 x float> %a, <2 x float> %b, metadata !"ule",
6181+
metadata !"fpexcept.strict")
6182+
%res = select <2 x i1> %cond, <2 x float> %a, <2 x float> %b
6183+
ret <2 x float> %res
6184+
}
6185+
60906186
attributes #0 = { strictfp nounwind }
60916187

60926188
declare <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float>, <4 x float>, metadata, metadata)
60936189
declare <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double>, <2 x double>, metadata, metadata)
6190+
declare <2 x i1> @llvm.experimental.constrained.fcmps.v2f32(<2 x float>, <2 x float>, metadata, metadata)
60946191
declare <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float>, <4 x float>, metadata, metadata)
60956192
declare <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double>, <2 x double>, metadata, metadata)

0 commit comments

Comments
 (0)