-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[LegalizeVectorOps] Enable ExpandFABS/COPYSIGN to use integer ops for fixed vectors in some cases. #109232
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…ome cases. Copy the same FSUB check from ExpandFNEG to avoid breaking AArch64 and ARM.
@llvm/pr-subscribers-backend-nvptx @llvm/pr-subscribers-backend-webassembly Author: Craig Topper (topperc) ChangesCopy the same FSUB check from ExpandFNEG to avoid breaking AArch64 and ARM. Patch is 132.62 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/109232.diff 7 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 5d433204d5da08..838e5c78d5d782 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -1804,9 +1804,12 @@ SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) {
EVT VT = Node->getValueType(0);
EVT IntVT = VT.changeVectorElementTypeToInteger();
+ if (!TLI.isOperationLegalOrCustom(ISD::XOR, IntVT))
+ return SDValue();
+
// FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
- if (!TLI.isOperationLegalOrCustom(ISD::XOR, IntVT) ||
- !(TLI.isOperationLegalOrCustomOrPromote(ISD::FSUB, VT) || VT.isScalableVector()))
+ if (!TLI.isOperationLegalOrCustomOrPromote(ISD::FSUB, VT) &&
+ !VT.isScalableVector())
return SDValue();
SDLoc DL(Node);
@@ -1821,8 +1824,12 @@ SDValue VectorLegalizer::ExpandFABS(SDNode *Node) {
EVT VT = Node->getValueType(0);
EVT IntVT = VT.changeVectorElementTypeToInteger();
- // FIXME: We shouldn't restrict this to scalable vectors.
- if (!TLI.isOperationLegalOrCustom(ISD::AND, IntVT) || !VT.isScalableVector())
+ if (!TLI.isOperationLegalOrCustom(ISD::AND, IntVT))
+ return SDValue();
+
+ // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
+ if (!TLI.isOperationLegalOrCustomOrPromote(ISD::FSUB, VT) &&
+ !VT.isScalableVector())
return SDValue();
SDLoc DL(Node);
@@ -1840,7 +1847,12 @@ SDValue VectorLegalizer::ExpandFCOPYSIGN(SDNode *Node) {
// FIXME: We shouldn't restrict this to scalable vectors.
if (VT != Node->getOperand(1).getValueType() ||
!TLI.isOperationLegalOrCustom(ISD::AND, IntVT) ||
- !TLI.isOperationLegalOrCustom(ISD::OR, IntVT) || !VT.isScalableVector())
+ !TLI.isOperationLegalOrCustom(ISD::OR, IntVT))
+ return SDValue();
+
+ // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
+ if (!TLI.isOperationLegalOrCustomOrPromote(ISD::FSUB, VT) &&
+ !VT.isScalableVector())
return SDValue();
SDLoc DL(Node);
diff --git a/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll
index 7030e5435f723e..8d40a9ef54dca9 100644
--- a/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll
@@ -508,21 +508,24 @@ define <2 x bfloat> @test_round(<2 x bfloat> %a) #0 {
; CHECK-LABEL: test_copysign(
; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_copysign_param_0];
; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_copysign_param_1];
-; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
-; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
-; CHECK-DAG: abs.bf16 [[AW1:%rs[0-9]+]], [[A1]];
-; CHECK-DAG: neg.bf16 [[AY1:%rs[0-9]+]], [[AW1]];
-; CHECK-DAG: shr.u16 [[BS1:%rs[0-9]+]], [[B1]], 15;
-; CHECK-DAG: and.b16 [[BR1:%rs[0-9]+]], [[BS1]], 1;
-; CHECK-DAG: setp.eq.b16 [[P1:%p[0-9]+]], [[BR1]], 1;
-; CHECK-DAG: selp.b16 [[RS1:%rs[0-9]+]], [[AY1]], [[AW1]], [[P1]]
-; CHECK-DAG: abs.bf16 [[AW0:%rs[0-9]+]], [[A0]];
-; CHECK-DAG: neg.bf16 [[AY0:%rs[0-9]+]], [[AW0]];
-; CHECK-DAG: shr.u16 [[BS0:%rs[0-9]+]], [[B0]], 15;
-; CHECK-DAG: and.b16 [[BR0:%rs[0-9]+]], [[BS0]], 1;
-; CHECK-DAG: setp.eq.b16 [[P0:%p[0-9]+]], [[BR0]], 1;
-; CHECK-DAG: selp.b16 [[RS0:%rs[0-9]+]], [[AY0]], [[AW0]], [[P0]]
-; CHECK-DAG: mov.b32 [[R:%r[0-9]+]], {[[RS0]], [[RS1]]}
+; SM80-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
+; SM80-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
+; SM80-DAG: abs.bf16 [[AW1:%rs[0-9]+]], [[A1]];
+; SM80-DAG: neg.bf16 [[AY1:%rs[0-9]+]], [[AW1]];
+; SM80-DAG: shr.u16 [[BS1:%rs[0-9]+]], [[B1]], 15;
+; SM80-DAG: and.b16 [[BR1:%rs[0-9]+]], [[BS1]], 1;
+; SM80-DAG: setp.eq.b16 [[P1:%p[0-9]+]], [[BR1]], 1;
+; SM80-DAG: selp.b16 [[RS1:%rs[0-9]+]], [[AY1]], [[AW1]], [[P1]]
+; SM80-DAG: abs.bf16 [[AW0:%rs[0-9]+]], [[A0]];
+; SM80-DAG: neg.bf16 [[AY0:%rs[0-9]+]], [[AW0]];
+; SM80-DAG: shr.u16 [[BS0:%rs[0-9]+]], [[B0]], 15;
+; SM80-DAG: and.b16 [[BR0:%rs[0-9]+]], [[BS0]], 1;
+; SM80-DAG: setp.eq.b16 [[P0:%p[0-9]+]], [[BR0]], 1;
+; SM80-DAG: selp.b16 [[RS0:%rs[0-9]+]], [[AY0]], [[AW0]], [[P0]]
+; SM80-DAG: mov.b32 [[R:%r[0-9]+]], {[[RS0]], [[RS1]]}
+; SM90-DAG: and.b32 [[R1:%r[0-9]+]], [[B]], -2147450880;
+; SM90-DAG: and.b32 [[R2:%r[0-9]+]], [[A]], 2147450879;
+; SM90-DAG: or.b32 [[R:%r[0-9]+]], [[R2]], [[R1]];
; CHECK: st.param.b32 [func_retval0+0], [[R]];
; CHECK: ret;
define <2 x bfloat> @test_copysign(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
diff --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
index 464b3a754804fe..b41f63b783d390 100644
--- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
@@ -1184,14 +1184,15 @@ define <2 x half> @test_fma(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 {
; CHECK-LABEL: test_fabs(
; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_fabs_param_0];
-; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
-; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]];
-; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]];
-; CHECK-DAG: abs.f32 [[RF0:%f[0-9]+]], [[AF0]];
-; CHECK-DAG: abs.f32 [[RF1:%f[0-9]+]], [[AF1]];
-; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]];
-; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]];
-; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
+; CHECK-NOF16: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]];
+; CHECK-NOF16-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]];
+; CHECK-NOF16-DAG: abs.f32 [[RF0:%f[0-9]+]], [[AF0]];
+; CHECK-NOF16-DAG: abs.f32 [[RF1:%f[0-9]+]], [[AF1]];
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]];
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]];
+; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
+; CHECK-F16: and.b32 [[R:%r[0-9]+]], [[A]], 2147450879;
; CHECK: st.param.b32 [func_retval0+0], [[R]];
; CHECK: ret;
define <2 x half> @test_fabs(<2 x half> %a) #0 {
@@ -1244,15 +1245,18 @@ define <2 x half> @test_maxnum(<2 x half> %a, <2 x half> %b) #0 {
; CHECK-LABEL: test_copysign(
; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_copysign_param_0];
; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_copysign_param_1];
-; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
-; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
-; CHECK-DAG: and.b16 [[AX0:%rs[0-9]+]], [[A0]], 32767;
-; CHECK-DAG: and.b16 [[AX1:%rs[0-9]+]], [[A1]], 32767;
-; CHECK-DAG: and.b16 [[BX0:%rs[0-9]+]], [[B0]], -32768;
-; CHECK-DAG: and.b16 [[BX1:%rs[0-9]+]], [[B1]], -32768;
-; CHECK-DAG: or.b16 [[R0:%rs[0-9]+]], [[AX0]], [[BX0]];
-; CHECK-DAG: or.b16 [[R1:%rs[0-9]+]], [[AX1]], [[BX1]];
-; CHECK-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: and.b16 [[AX0:%rs[0-9]+]], [[A0]], 32767;
+; CHECK-NOF16-DAG: and.b16 [[AX1:%rs[0-9]+]], [[A1]], 32767;
+; CHECK-NOF16-DAG: and.b16 [[BX0:%rs[0-9]+]], [[B0]], -32768;
+; CHECK-NOF16-DAG: and.b16 [[BX1:%rs[0-9]+]], [[B1]], -32768;
+; CHECK-NOF16-DAG: or.b16 [[R0:%rs[0-9]+]], [[AX0]], [[BX0]];
+; CHECK-NOF16-DAG: or.b16 [[R1:%rs[0-9]+]], [[AX1]], [[BX1]];
+; CHECK-NOF16-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
+; CHECK-F16-DAG: and.b32 [[R0:%r[0-9]+]], [[B]], -2147450880;
+; CHECK-F16-DAG: and.b32 [[R1:%r[0-9]+]], [[A]], 2147450879;
+; CHECK-F16-DAG: or.b32 [[R:%r[0-9]+]], [[R1]], [[R0]]
; CHECK: st.param.b32 [func_retval0+0], [[R]];
; CHECK: ret;
define <2 x half> @test_copysign(<2 x half> %a, <2 x half> %b) #0 {
@@ -1263,18 +1267,24 @@ define <2 x half> @test_copysign(<2 x half> %a, <2 x half> %b) #0 {
; CHECK-LABEL: test_copysign_f32(
; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_copysign_f32_param_0];
; CHECK-DAG: ld.param.v2.f32 {[[B0:%f[0-9]+]], [[B1:%f[0-9]+]]}, [test_copysign_f32_param_1];
-; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
-; CHECK-DAG: mov.b32 [[BI0:%r[0-9]+]], [[B0]];
-; CHECK-DAG: mov.b32 [[BI1:%r[0-9]+]], [[B1]];
-; CHECK-DAG: and.b16 [[AI0:%rs[0-9]+]], [[A0]], 32767;
-; CHECK-DAG: and.b16 [[AI1:%rs[0-9]+]], [[A1]], 32767;
-; CHECK-DAG: and.b32 [[BX0:%r[0-9]+]], [[BI0]], -2147483648;
-; CHECK-DAG: and.b32 [[BX1:%r[0-9]+]], [[BI1]], -2147483648;
-; CHECK-DAG: mov.b32 {tmp, [[BZ0:%rs[0-9]+]]}, [[BX0]]; }
-; CHECK-DAG: mov.b32 {tmp, [[BZ1:%rs[0-9]+]]}, [[BX1]]; }
-; CHECK-DAG: or.b16 [[R0:%rs[0-9]+]], [[AI0]], [[BZ0]];
-; CHECK-DAG: or.b16 [[R1:%rs[0-9]+]], [[AI1]], [[BZ1]];
-; CHECK-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 [[BI0:%r[0-9]+]], [[B0]];
+; CHECK-NOF16-DAG: mov.b32 [[BI1:%r[0-9]+]], [[B1]];
+; CHECK-NOF16-DAG: and.b16 [[AI0:%rs[0-9]+]], [[A0]], 32767;
+; CHECK-NOF16-DAG: and.b16 [[AI1:%rs[0-9]+]], [[A1]], 32767;
+; CHECK-NOF16-DAG: and.b32 [[BX0:%r[0-9]+]], [[BI0]], -2147483648;
+; CHECK-NOF16-DAG: and.b32 [[BX1:%r[0-9]+]], [[BI1]], -2147483648;
+; CHECK-NOF16-DAG: mov.b32 {tmp, [[BZ0:%rs[0-9]+]]}, [[BX0]]; }
+; CHECK-NOF16-DAG: mov.b32 {tmp, [[BZ1:%rs[0-9]+]]}, [[BX1]]; }
+; CHECK-NOF16-DAG: or.b16 [[R0:%rs[0-9]+]], [[AI0]], [[BZ0]];
+; CHECK-NOF16-DAG: or.b16 [[R1:%rs[0-9]+]], [[AI1]], [[BZ1]];
+; CHECK-NOF16-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
+; CHECK-F16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[B1]];
+; CHECK-F16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[B0]];
+; CHECK-F16-DAG: mov.b32 [[R2:%r[0-9]+]], {[[R1]], [[R0]]};
+; CHECK-F16-DAG: and.b32 [[R3:%r[0-9]+]], [[R2]], -2147450880;
+; CHECK-F16-DAG: and.b32 [[R4:%r[0-9]+]], [[A]], 2147450879;
+; CHECK-F16-DAG: or.b32 [[R:%r[0-9]+]], [[R4]], [[R3]]
; CHECK: st.param.b32 [func_retval0+0], [[R]];
; CHECK: ret;
define <2 x half> @test_copysign_f32(<2 x half> %a, <2 x float> %b) #0 {
@@ -1286,20 +1296,26 @@ define <2 x half> @test_copysign_f32(<2 x half> %a, <2 x float> %b) #0 {
; CHECK-LABEL: test_copysign_f64(
; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_copysign_f64_param_0];
; CHECK-DAG: ld.param.v2.f64 {[[B0:%fd[0-9]+]], [[B1:%fd[0-9]+]]}, [test_copysign_f64_param_1];
-; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
-; CHECK-DAG: mov.b64 [[BI0:%rd[0-9]+]], [[B0]];
-; CHECK-DAG: mov.b64 [[BI1:%rd[0-9]+]], [[B1]];
-; CHECK-DAG: and.b16 [[AI0:%rs[0-9]+]], [[A0]], 32767;
-; CHECK-DAG: and.b16 [[AI1:%rs[0-9]+]], [[A1]], 32767;
-; CHECK-DAG: and.b64 [[BX0:%rd[0-9]+]], [[BI0]], -9223372036854775808;
-; CHECK-DAG: and.b64 [[BX1:%rd[0-9]+]], [[BI1]], -9223372036854775808;
-; CHECK-DAG: shr.u64 [[BY0:%rd[0-9]+]], [[BX0]], 48;
-; CHECK-DAG: shr.u64 [[BY1:%rd[0-9]+]], [[BX1]], 48;
-; CHECK-DAG: cvt.u16.u64 [[BZ0:%rs[0-9]+]], [[BY0]];
-; CHECK-DAG: cvt.u16.u64 [[BZ1:%rs[0-9]+]], [[BY1]];
-; CHECK-DAG: or.b16 [[R0:%rs[0-9]+]], [[AI0]], [[BZ0]];
-; CHECK-DAG: or.b16 [[R1:%rs[0-9]+]], [[AI1]], [[BZ1]];
-; CHECK-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b64 [[BI0:%rd[0-9]+]], [[B0]];
+; CHECK-NOF16-DAG: mov.b64 [[BI1:%rd[0-9]+]], [[B1]];
+; CHECK-NOF16-DAG: and.b16 [[AI0:%rs[0-9]+]], [[A0]], 32767;
+; CHECK-NOF16-DAG: and.b16 [[AI1:%rs[0-9]+]], [[A1]], 32767;
+; CHECK-NOF16-DAG: and.b64 [[BX0:%rd[0-9]+]], [[BI0]], -9223372036854775808;
+; CHECK-NOF16-DAG: and.b64 [[BX1:%rd[0-9]+]], [[BI1]], -9223372036854775808;
+; CHECK-NOF16-DAG: shr.u64 [[BY0:%rd[0-9]+]], [[BX0]], 48;
+; CHECK-NOF16-DAG: shr.u64 [[BY1:%rd[0-9]+]], [[BX1]], 48;
+; CHECK-NOF16-DAG: cvt.u16.u64 [[BZ0:%rs[0-9]+]], [[BY0]];
+; CHECK-NOF16-DAG: cvt.u16.u64 [[BZ1:%rs[0-9]+]], [[BY1]];
+; CHECK-NOF16-DAG: or.b16 [[R0:%rs[0-9]+]], [[AI0]], [[BZ0]];
+; CHECK-NOF16-DAG: or.b16 [[R1:%rs[0-9]+]], [[AI1]], [[BZ1]];
+; CHECK-NOF16-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
+; CHECK-F16-DAG: cvt.rn.f16.f64 [[R0:%rs[0-9]+]], [[B1]];
+; CHECK-F16-DAG: cvt.rn.f16.f64 [[R1:%rs[0-9]+]], [[B0]];
+; CHECK-F16-DAG: mov.b32 [[R2:%r[0-9]+]], {[[R1]], [[R0]]};
+; CHECK-F16-DAG: and.b32 [[R3:%r[0-9]+]], [[R2]], -2147450880;
+; CHECK-F16-DAG: and.b32 [[R4:%r[0-9]+]], [[A]], 2147450879;
+; CHECK-F16-DAG: or.b32 [[R:%r[0-9]+]], [[R4]], [[R3]];
; CHECK: st.param.b32 [func_retval0+0], [[R]];
; CHECK: ret;
define <2 x half> @test_copysign_f64(<2 x half> %a, <2 x double> %b) #0 {
@@ -1311,16 +1327,22 @@ define <2 x half> @test_copysign_f64(<2 x half> %a, <2 x double> %b) #0 {
; CHECK-LABEL: test_copysign_extended(
; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_copysign_extended_param_0];
; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_copysign_extended_param_1];
-; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
-; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
-; CHECK-DAG: and.b16 [[AX0:%rs[0-9]+]], [[A0]], 32767;
-; CHECK-DAG: and.b16 [[AX1:%rs[0-9]+]], [[A1]], 32767;
-; CHECK-DAG: and.b16 [[BX0:%rs[0-9]+]], [[B0]], -32768;
-; CHECK-DAG: and.b16 [[BX1:%rs[0-9]+]], [[B1]], -32768;
-; CHECK-DAG: or.b16 [[R0:%rs[0-9]+]], [[AX0]], [[BX0]];
-; CHECK-DAG: or.b16 [[R1:%rs[0-9]+]], [[AX1]], [[BX1]];
-; CHECK-DAG: cvt.f32.f16 [[XR0:%f[0-9]+]], [[R0]];
-; CHECK-DAG: cvt.f32.f16 [[XR1:%f[0-9]+]], [[R1]];
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: and.b16 [[AX0:%rs[0-9]+]], [[A0]], 32767;
+; CHECK-NOF16-DAG: and.b16 [[AX1:%rs[0-9]+]], [[A1]], 32767;
+; CHECK-NOF16-DAG: and.b16 [[BX0:%rs[0-9]+]], [[B0]], -32768;
+; CHECK-NOF16-DAG: and.b16 [[BX1:%rs[0-9]+]], [[B1]], -32768;
+; CHECK-NOF16-DAG: or.b16 [[R0:%rs[0-9]+]], [[AX0]], [[BX0]];
+; CHECK-NOF16-DAG: or.b16 [[R1:%rs[0-9]+]], [[AX1]], [[BX1]];
+; CHECK-NOF16-DAG: cvt.f32.f16 [[XR0:%f[0-9]+]], [[R0]];
+; CHECK-NOF16-DAG: cvt.f32.f16 [[XR1:%f[0-9]+]], [[R1]];
+; CHECK-F16-DAG: and.b32 [[R0:%r[0-9]+]], [[B]], -2147450880;
+; CHECK-F16-DAG: and.b32 [[R1:%r[0-9]+]], [[A]], 2147450879;
+; CHECK-F16-DAG: or.b32 [[R2:%r[0-9]+]], [[R1]], [[R0]]
+; CHECK-F16-DAG: mov.b32 {[[R3:%rs[0-9]+]], [[R4:%rs[0-9]+]]}, [[R2]]
+; CHECK-F16-DAG: cvt.f32.f16 [[XR0:%f[0-9]+]], [[R3]]
+; CHECK-F16-DAG: cvt.f32.f16 [[XR1:%f[0-9]+]], [[R4]]
; CHECK: st.param.v2.f32 [func_retval0+0], {[[XR0]], [[XR1]]};
; CHECK: ret;
define <2 x float> @test_copysign_extended(<2 x half> %a, <2 x half> %b) #0 {
diff --git a/llvm/test/CodeGen/PowerPC/vec_abs.ll b/llvm/test/CodeGen/PowerPC/vec_abs.ll
index 50dcfc3faf62e9..b900f0ea29c4a6 100644
--- a/llvm/test/CodeGen/PowerPC/vec_abs.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_abs.ll
@@ -19,10 +19,9 @@ declare <4 x float> @llvm.fabs.v4f32(<4 x float>) #1
; CHECK: xvabssp
; CHECK: blr
-; CHECK-NOVSX: fabs
-; CHECK-NOVSX: fabs
-; CHECK-NOVSX: fabs
-; CHECK-NOVSX: fabs
+; CHECK-NOVSX: vspltisb
+; CHECK-NOVSX: vslw
+; CHECK-NOVSX: vandc
; CHECK-NOVSX: blr
define <4 x float> @test2_float(<4 x float> %aa) #0 {
@@ -40,11 +39,8 @@ define <4 x float> @test2_float(<4 x float> %aa) #0 {
; CHECK: xvnabssp
; CHECK: blr
; CHECK-NOVSX: vspltisb
-; CHECK-NOVSX: fabs
-; CHECK-NOVSX: fabs
-; CHECK-NOVSX: fabs
-; CHECK-NOVSX: fabs
-; CHECK-NOVSX: vxor
+; CHECK-NOVSX: vslw
+; CHECK-NOVSX: vor
; CHECK-NOVSX: blr
define <2 x double> @test_double(<2 x double> %aa) #0 {
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
index 5ab8eab091c2e4..69faf269ae3db6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
@@ -508,101 +508,15 @@ define void @fabs_v8f16(ptr %x) {
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-RV32-LABEL: fabs_v8f16:
-; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: addi sp, sp, -16
-; ZVFHMIN-RV32-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
-; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: mv a1, sp
-; ZVFHMIN-RV32-NEXT: vse16.v v8, (a1)
-; ZVFHMIN-RV32-NEXT: flh fa5, 2(sp)
-; ZVFHMIN-RV32-NEXT: flh fa4, 0(sp)
-; ZVFHMIN-RV32-NEXT: flh fa3, 4(sp)
-; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa4
-; ZVFHMIN-RV32-NEXT: lui a3, 8
-; ZVFHMIN-RV32-NEXT: fmv.x.h a4, fa3
-; ZVFHMIN-RV32-NEXT: flh fa5, 6(sp)
-; ZVFHMIN-RV32-NEXT: addi a3, a3, -1
-; ZVFHMIN-RV32-NEXT: and a2, a2, a3
-; ZVFHMIN-RV32-NEXT: vmv.v.x v8, a2
-; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5
-; ZVFHMIN-RV32-NEXT: flh fa5, 10(sp)
-; ZVFHMIN-RV32-NEXT: and a1, a1, a3
-; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a1
-; ZVFHMIN-RV32-NEXT: and a4, a4, a3
-; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-RV32-NEXT: flh fa5, 8(sp)
-; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a4
-; ZVFHMIN-RV32-NEXT: and a2, a2, a3
-; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a2
-; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5
-; ZVFHMIN-RV32-NEXT: flh fa5, 12(sp)
-; ZVFHMIN-RV32-NEXT: and a1, a1, a3
-; ZVFHMIN-RV32-NEXT: and a2, a2, a3
-; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a2
-; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5
-; ZVFHMIN-RV32-NEXT: flh fa5, 14(sp)
-; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a1
-; ZVFHMIN-RV32-NEXT: and a2, a2, a3
-; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a2
-; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-RV32-NEXT: and a1, a1, a3
-; ZVFHMIN-RV32-NEXT: vmv.v.i v0, 15
-; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a1
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
-; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT: addi sp, sp, 16
-; ZVFHMIN-RV32-NEXT: ret
-;
-; ZVFHMIN-RV64-LABEL: fabs_v8f16:
-; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: addi sp, sp, -16
-; ZVFHMIN-RV64-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
-; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: mv a1, sp
-; ZVFHMIN-RV64-NEXT: vse16.v v8, (a1)
-; ZVFHMIN-RV64-NEXT: flh fa5, 2(sp)
-; ZVFHMIN-RV64-NEXT: flh fa4, 0(sp)
-; ZVFHMIN-RV64-NEXT: flh fa3, 4(sp)
-; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa4
-; ZVFHMIN-RV64-NEXT: lui a3, 8
-; ZVFHMIN-RV64-NEXT: fmv.x.h a4, fa3
-; ZVFHMIN-RV64-NEXT: flh fa5, 6(sp)
-; ZVFHMIN-RV64-NEXT: addiw a3, a3, -1
-; ZVFHMIN-RV64-NEXT: and a2, a2, a3
-; ZVFHMIN-RV64-NEXT: vmv.v.x v8, a2
-; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5
-; ZVFHMIN-RV64-NEXT: flh fa5, 10(sp)
-; ZVFHMIN-RV64-NEXT: and a1, a1, a3
-; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a1
-; ZVFHMIN-RV64-NEXT: and a4, a4, a3
-; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-RV64-NEXT: flh fa5, 8(sp)
-; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a4
-; ZVF...
[truncated]
|
; CHECK-NOF16-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]]; | ||
; CHECK-NOF16-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]]; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This still has the unwanted conversions, I assume from type legalization
if (!TLI.isOperationLegalOrCustom(ISD::AND, IntVT)) | ||
return SDValue(); | ||
|
||
// FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you really not just delete this now? There are no more fsubs emitted
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's a proxy for FP arithmetic not being supported. AArch64 has a v1f64 as a legal type but Expands all operations. Using v1i64 for FNEG and scalarizing other arithmetic causes obvious regressions in tests.
Ping |
… fixed vectors in some cases. (llvm#109232) Copy the same FSUB check from ExpandFNEG to avoid breaking AArch64 and ARM.
Copy the same FSUB check from ExpandFNEG to avoid breaking AArch64 and ARM.