-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[LLVM][DAGCombine] Remove combiner-vector-fcopysign-extend-round. #129878
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This option was added to improve test coverage for SVE lowering code that is impossible to reach otherwise. Given it is not possible to trigger a bug without it and the generated code is universally worse with it, I figure the option has no value and should be removed.
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-aarch64 Author: Paul Walker (paulwalker-arm) ChangesThis option was added to improve test coverage for SVE lowering code that is impossible to reach otherwise. Given it is not possible to trigger a bug without it and the generated code is universally worse with it, I figure the option has no value and should be removed. Patch is 25.90 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/129878.diff 5 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 0e17897cf60b0..ef5f2210573e0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -149,10 +149,6 @@ static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
cl::desc("DAG combiner enable load/<replace bytes>/store with "
"a narrower store"));
-static cl::opt<bool> EnableVectorFCopySignExtendRound(
- "combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false),
- cl::desc(
- "Enable merging extends and rounds into FCOPYSIGN on vector types"));
namespace {
class DAGCombiner {
@@ -18011,7 +18007,8 @@ static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy) {
if (YTy == MVT::f128)
return false;
- return !YTy.isVector() || EnableVectorFCopySignExtendRound;
+ // Avoid mismatched vector operand types, for better instruction selection.
+ return !YTy.isVector();
}
static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
diff --git a/llvm/test/CodeGen/AArch64/sve-fcopysign.ll b/llvm/test/CodeGen/AArch64/sve-fcopysign.ll
index 78843e392e536..96056db2a4f2d 100644
--- a/llvm/test/CodeGen/AArch64/sve-fcopysign.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fcopysign.ll
@@ -1,6 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64 -mattr=+sve -o - | FileCheck --check-prefixes=CHECK,CHECK-NO-EXTEND-ROUND %s
-; RUN: llc < %s -mtriple=aarch64 -mattr=+sve --combiner-vector-fcopysign-extend-round -o - | FileCheck --check-prefixes=CHECK,CHECK-EXTEND-ROUND %s
+; RUN: llc < %s -mtriple=aarch64 -mattr=+sve -o - | FileCheck %s
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
;============ v2f32
@@ -47,32 +46,16 @@ define <vscale x 4 x float> @test_copysign_v4f32_v4f32(<vscale x 4 x float> %a,
; SplitVecOp #1
define <vscale x 4 x float> @test_copysign_v4f32_v4f64(<vscale x 4 x float> %a, <vscale x 4 x double> %b) #0 {
-; CHECK-NO-EXTEND-ROUND-LABEL: test_copysign_v4f32_v4f64:
-; CHECK-NO-EXTEND-ROUND: // %bb.0:
-; CHECK-NO-EXTEND-ROUND-NEXT: ptrue p0.d
-; CHECK-NO-EXTEND-ROUND-NEXT: and z0.s, z0.s, #0x7fffffff
-; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z2.s, p0/m, z2.d
-; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z1.s, p0/m, z1.d
-; CHECK-NO-EXTEND-ROUND-NEXT: uzp1 z1.s, z1.s, z2.s
-; CHECK-NO-EXTEND-ROUND-NEXT: and z1.s, z1.s, #0x80000000
-; CHECK-NO-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d
-; CHECK-NO-EXTEND-ROUND-NEXT: ret
-;
-; CHECK-EXTEND-ROUND-LABEL: test_copysign_v4f32_v4f64:
-; CHECK-EXTEND-ROUND: // %bb.0:
-; CHECK-EXTEND-ROUND-NEXT: ptrue p0.d
-; CHECK-EXTEND-ROUND-NEXT: uunpkhi z3.d, z0.s
-; CHECK-EXTEND-ROUND-NEXT: uunpklo z0.d, z0.s
-; CHECK-EXTEND-ROUND-NEXT: fcvt z2.s, p0/m, z2.d
-; CHECK-EXTEND-ROUND-NEXT: fcvt z1.s, p0/m, z1.d
-; CHECK-EXTEND-ROUND-NEXT: and z3.s, z3.s, #0x7fffffff
-; CHECK-EXTEND-ROUND-NEXT: and z0.s, z0.s, #0x7fffffff
-; CHECK-EXTEND-ROUND-NEXT: and z2.s, z2.s, #0x80000000
-; CHECK-EXTEND-ROUND-NEXT: and z1.s, z1.s, #0x80000000
-; CHECK-EXTEND-ROUND-NEXT: orr z2.d, z3.d, z2.d
-; CHECK-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d
-; CHECK-EXTEND-ROUND-NEXT: uzp1 z0.s, z0.s, z2.s
-; CHECK-EXTEND-ROUND-NEXT: ret
+; CHECK-LABEL: test_copysign_v4f32_v4f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: and z0.s, z0.s, #0x7fffffff
+; CHECK-NEXT: fcvt z2.s, p0/m, z2.d
+; CHECK-NEXT: fcvt z1.s, p0/m, z1.d
+; CHECK-NEXT: uzp1 z1.s, z1.s, z2.s
+; CHECK-NEXT: and z1.s, z1.s, #0x80000000
+; CHECK-NEXT: orr z0.d, z0.d, z1.d
+; CHECK-NEXT: ret
%tmp0 = fptrunc <vscale x 4 x double> %b to <vscale x 4 x float>
%r = call <vscale x 4 x float> @llvm.copysign.v4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %tmp0)
ret <vscale x 4 x float> %r
@@ -177,32 +160,16 @@ define <vscale x 4 x half> @test_copysign_v4f16_v4f32(<vscale x 4 x half> %a, <v
}
define <vscale x 4 x half> @test_copysign_v4f16_v4f64(<vscale x 4 x half> %a, <vscale x 4 x double> %b) #0 {
-; CHECK-NO-EXTEND-ROUND-LABEL: test_copysign_v4f16_v4f64:
-; CHECK-NO-EXTEND-ROUND: // %bb.0:
-; CHECK-NO-EXTEND-ROUND-NEXT: ptrue p0.d
-; CHECK-NO-EXTEND-ROUND-NEXT: and z0.h, z0.h, #0x7fff
-; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z2.h, p0/m, z2.d
-; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z1.h, p0/m, z1.d
-; CHECK-NO-EXTEND-ROUND-NEXT: uzp1 z1.s, z1.s, z2.s
-; CHECK-NO-EXTEND-ROUND-NEXT: and z1.h, z1.h, #0x8000
-; CHECK-NO-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d
-; CHECK-NO-EXTEND-ROUND-NEXT: ret
-;
-; CHECK-EXTEND-ROUND-LABEL: test_copysign_v4f16_v4f64:
-; CHECK-EXTEND-ROUND: // %bb.0:
-; CHECK-EXTEND-ROUND-NEXT: ptrue p0.d
-; CHECK-EXTEND-ROUND-NEXT: uunpkhi z3.d, z0.s
-; CHECK-EXTEND-ROUND-NEXT: uunpklo z0.d, z0.s
-; CHECK-EXTEND-ROUND-NEXT: fcvt z2.h, p0/m, z2.d
-; CHECK-EXTEND-ROUND-NEXT: fcvt z1.h, p0/m, z1.d
-; CHECK-EXTEND-ROUND-NEXT: and z3.h, z3.h, #0x7fff
-; CHECK-EXTEND-ROUND-NEXT: and z0.h, z0.h, #0x7fff
-; CHECK-EXTEND-ROUND-NEXT: and z2.h, z2.h, #0x8000
-; CHECK-EXTEND-ROUND-NEXT: and z1.h, z1.h, #0x8000
-; CHECK-EXTEND-ROUND-NEXT: orr z2.d, z3.d, z2.d
-; CHECK-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d
-; CHECK-EXTEND-ROUND-NEXT: uzp1 z0.s, z0.s, z2.s
-; CHECK-EXTEND-ROUND-NEXT: ret
+; CHECK-LABEL: test_copysign_v4f16_v4f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: and z0.h, z0.h, #0x7fff
+; CHECK-NEXT: fcvt z2.h, p0/m, z2.d
+; CHECK-NEXT: fcvt z1.h, p0/m, z1.d
+; CHECK-NEXT: uzp1 z1.s, z1.s, z2.s
+; CHECK-NEXT: and z1.h, z1.h, #0x8000
+; CHECK-NEXT: orr z0.d, z0.d, z1.d
+; CHECK-NEXT: ret
%tmp0 = fptrunc <vscale x 4 x double> %b to <vscale x 4 x half>
%r = call <vscale x 4 x half> @llvm.copysign.v4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %tmp0)
ret <vscale x 4 x half> %r
@@ -224,32 +191,16 @@ define <vscale x 8 x half> @test_copysign_v8f16_v8f16(<vscale x 8 x half> %a, <v
}
define <vscale x 8 x half> @test_copysign_v8f16_v8f32(<vscale x 8 x half> %a, <vscale x 8 x float> %b) #0 {
-; CHECK-NO-EXTEND-ROUND-LABEL: test_copysign_v8f16_v8f32:
-; CHECK-NO-EXTEND-ROUND: // %bb.0:
-; CHECK-NO-EXTEND-ROUND-NEXT: ptrue p0.s
-; CHECK-NO-EXTEND-ROUND-NEXT: and z0.h, z0.h, #0x7fff
-; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z2.h, p0/m, z2.s
-; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z1.h, p0/m, z1.s
-; CHECK-NO-EXTEND-ROUND-NEXT: uzp1 z1.h, z1.h, z2.h
-; CHECK-NO-EXTEND-ROUND-NEXT: and z1.h, z1.h, #0x8000
-; CHECK-NO-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d
-; CHECK-NO-EXTEND-ROUND-NEXT: ret
-;
-; CHECK-EXTEND-ROUND-LABEL: test_copysign_v8f16_v8f32:
-; CHECK-EXTEND-ROUND: // %bb.0:
-; CHECK-EXTEND-ROUND-NEXT: ptrue p0.s
-; CHECK-EXTEND-ROUND-NEXT: uunpkhi z3.s, z0.h
-; CHECK-EXTEND-ROUND-NEXT: uunpklo z0.s, z0.h
-; CHECK-EXTEND-ROUND-NEXT: fcvt z2.h, p0/m, z2.s
-; CHECK-EXTEND-ROUND-NEXT: fcvt z1.h, p0/m, z1.s
-; CHECK-EXTEND-ROUND-NEXT: and z3.h, z3.h, #0x7fff
-; CHECK-EXTEND-ROUND-NEXT: and z0.h, z0.h, #0x7fff
-; CHECK-EXTEND-ROUND-NEXT: and z2.h, z2.h, #0x8000
-; CHECK-EXTEND-ROUND-NEXT: and z1.h, z1.h, #0x8000
-; CHECK-EXTEND-ROUND-NEXT: orr z2.d, z3.d, z2.d
-; CHECK-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d
-; CHECK-EXTEND-ROUND-NEXT: uzp1 z0.h, z0.h, z2.h
-; CHECK-EXTEND-ROUND-NEXT: ret
+; CHECK-LABEL: test_copysign_v8f16_v8f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: and z0.h, z0.h, #0x7fff
+; CHECK-NEXT: fcvt z2.h, p0/m, z2.s
+; CHECK-NEXT: fcvt z1.h, p0/m, z1.s
+; CHECK-NEXT: uzp1 z1.h, z1.h, z2.h
+; CHECK-NEXT: and z1.h, z1.h, #0x8000
+; CHECK-NEXT: orr z0.d, z0.d, z1.d
+; CHECK-NEXT: ret
%tmp0 = fptrunc <vscale x 8 x float> %b to <vscale x 8 x half>
%r = call <vscale x 8 x half> @llvm.copysign.v8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %tmp0)
ret <vscale x 8 x half> %r
@@ -259,48 +210,28 @@ define <vscale x 8 x half> @test_copysign_v8f16_v8f32(<vscale x 8 x half> %a, <v
;========== FCOPYSIGN_EXTEND_ROUND
define <vscale x 4 x half> @test_copysign_nxv4f32_nxv4f16(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
-; CHECK-NO-EXTEND-ROUND-LABEL: test_copysign_nxv4f32_nxv4f16:
-; CHECK-NO-EXTEND-ROUND: // %bb.0:
-; CHECK-NO-EXTEND-ROUND-NEXT: and z1.s, z1.s, #0x80000000
-; CHECK-NO-EXTEND-ROUND-NEXT: and z0.s, z0.s, #0x7fffffff
-; CHECK-NO-EXTEND-ROUND-NEXT: ptrue p0.s
-; CHECK-NO-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d
-; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z0.h, p0/m, z0.s
-; CHECK-NO-EXTEND-ROUND-NEXT: ret
-;
-; CHECK-EXTEND-ROUND-LABEL: test_copysign_nxv4f32_nxv4f16:
-; CHECK-EXTEND-ROUND: // %bb.0:
-; CHECK-EXTEND-ROUND-NEXT: ptrue p0.s
-; CHECK-EXTEND-ROUND-NEXT: fcvt z0.h, p0/m, z0.s
-; CHECK-EXTEND-ROUND-NEXT: fcvt z1.h, p0/m, z1.s
-; CHECK-EXTEND-ROUND-NEXT: and z1.h, z1.h, #0x8000
-; CHECK-EXTEND-ROUND-NEXT: and z0.h, z0.h, #0x7fff
-; CHECK-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d
-; CHECK-EXTEND-ROUND-NEXT: ret
+; CHECK-LABEL: test_copysign_nxv4f32_nxv4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: and z1.s, z1.s, #0x80000000
+; CHECK-NEXT: and z0.s, z0.s, #0x7fffffff
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: orr z0.d, z0.d, z1.d
+; CHECK-NEXT: fcvt z0.h, p0/m, z0.s
+; CHECK-NEXT: ret
%t1 = call <vscale x 4 x float> @llvm.copysign.v4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b)
%t2 = fptrunc <vscale x 4 x float> %t1 to <vscale x 4 x half>
ret <vscale x 4 x half> %t2
}
define <vscale x 2 x float> @test_copysign_nxv2f64_nxv2f32(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-NO-EXTEND-ROUND-LABEL: test_copysign_nxv2f64_nxv2f32:
-; CHECK-NO-EXTEND-ROUND: // %bb.0:
-; CHECK-NO-EXTEND-ROUND-NEXT: and z1.d, z1.d, #0x8000000000000000
-; CHECK-NO-EXTEND-ROUND-NEXT: and z0.d, z0.d, #0x7fffffffffffffff
-; CHECK-NO-EXTEND-ROUND-NEXT: ptrue p0.d
-; CHECK-NO-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d
-; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z0.s, p0/m, z0.d
-; CHECK-NO-EXTEND-ROUND-NEXT: ret
-;
-; CHECK-EXTEND-ROUND-LABEL: test_copysign_nxv2f64_nxv2f32:
-; CHECK-EXTEND-ROUND: // %bb.0:
-; CHECK-EXTEND-ROUND-NEXT: ptrue p0.d
-; CHECK-EXTEND-ROUND-NEXT: fcvt z0.s, p0/m, z0.d
-; CHECK-EXTEND-ROUND-NEXT: fcvt z1.s, p0/m, z1.d
-; CHECK-EXTEND-ROUND-NEXT: and z1.s, z1.s, #0x80000000
-; CHECK-EXTEND-ROUND-NEXT: and z0.s, z0.s, #0x7fffffff
-; CHECK-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d
-; CHECK-EXTEND-ROUND-NEXT: ret
+; CHECK-LABEL: test_copysign_nxv2f64_nxv2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: and z1.d, z1.d, #0x8000000000000000
+; CHECK-NEXT: and z0.d, z0.d, #0x7fffffffffffffff
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: orr z0.d, z0.d, z1.d
+; CHECK-NEXT: fcvt z0.s, p0/m, z0.d
+; CHECK-NEXT: ret
%t1 = call <vscale x 2 x double> @llvm.copysign.v2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b)
%t2 = fptrunc <vscale x 2 x double> %t1 to <vscale x 2 x float>
ret <vscale x 2 x float> %t2
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fcopysign.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fcopysign.ll
index e77cd9ef55eaf..37450431d8a11 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fcopysign.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fcopysign.ll
@@ -1,10 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,CHECK_NO_EXTEND_ROUND
-; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,CHECK_NO_EXTEND_ROUND
-; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,CHECK_NO_EXTEND_ROUND
-; RUN: llc -aarch64-sve-vector-bits-min=256 --combiner-vector-fcopysign-extend-round < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,CHECK_EXTEND_ROUND
-; RUN: llc -aarch64-sve-vector-bits-min=512 --combiner-vector-fcopysign-extend-round < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,CHECK_EXTEND_ROUND
-; RUN: llc -aarch64-sve-vector-bits-min=2048 --combiner-vector-fcopysign-extend-round < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,CHECK_EXTEND_ROUND
+; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
+; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
+; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
@@ -436,30 +433,17 @@ define void @test_copysign_v2f64_v2f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
; SplitVecRes mismatched
define void @test_copysign_v4f64_v4f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
-; CHECK_NO_EXTEND_ROUND-LABEL: test_copysign_v4f64_v4f32:
-; CHECK_NO_EXTEND_ROUND: // %bb.0:
-; CHECK_NO_EXTEND_ROUND-NEXT: ptrue p0.d, vl4
-; CHECK_NO_EXTEND_ROUND-NEXT: ld1w { z0.d }, p0/z, [x1]
-; CHECK_NO_EXTEND_ROUND-NEXT: ld1d { z1.d }, p0/z, [x0]
-; CHECK_NO_EXTEND_ROUND-NEXT: fcvt z0.d, p0/m, z0.s
-; CHECK_NO_EXTEND_ROUND-NEXT: and z1.d, z1.d, #0x7fffffffffffffff
-; CHECK_NO_EXTEND_ROUND-NEXT: and z0.d, z0.d, #0x8000000000000000
-; CHECK_NO_EXTEND_ROUND-NEXT: orr z0.d, z1.d, z0.d
-; CHECK_NO_EXTEND_ROUND-NEXT: st1d { z0.d }, p0, [x0]
-; CHECK_NO_EXTEND_ROUND-NEXT: ret
-;
-; CHECK_EXTEND_ROUND-LABEL: test_copysign_v4f64_v4f32:
-; CHECK_EXTEND_ROUND: // %bb.0:
-; CHECK_EXTEND_ROUND-NEXT: ldr q0, [x1]
-; CHECK_EXTEND_ROUND-NEXT: ptrue p0.d, vl4
-; CHECK_EXTEND_ROUND-NEXT: uunpklo z0.d, z0.s
-; CHECK_EXTEND_ROUND-NEXT: ld1d { z1.d }, p0/z, [x0]
-; CHECK_EXTEND_ROUND-NEXT: and z1.d, z1.d, #0x7fffffffffffffff
-; CHECK_EXTEND_ROUND-NEXT: fcvt z0.d, p0/m, z0.s
-; CHECK_EXTEND_ROUND-NEXT: and z0.d, z0.d, #0x8000000000000000
-; CHECK_EXTEND_ROUND-NEXT: orr z0.d, z1.d, z0.d
-; CHECK_EXTEND_ROUND-NEXT: st1d { z0.d }, p0, [x0]
-; CHECK_EXTEND_ROUND-NEXT: ret
+; CHECK-LABEL: test_copysign_v4f64_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d, vl4
+; CHECK-NEXT: ld1w { z0.d }, p0/z, [x1]
+; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0]
+; CHECK-NEXT: fcvt z0.d, p0/m, z0.s
+; CHECK-NEXT: and z1.d, z1.d, #0x7fffffffffffffff
+; CHECK-NEXT: and z0.d, z0.d, #0x8000000000000000
+; CHECK-NEXT: orr z0.d, z1.d, z0.d
+; CHECK-NEXT: st1d { z0.d }, p0, [x0]
+; CHECK-NEXT: ret
%a = load <4 x double>, ptr %ap
%b = load <4 x float>, ptr %bp
%tmp0 = fpext <4 x float> %b to <4 x double>
diff --git a/llvm/test/CodeGen/AArch64/sve2-fcopysign.ll b/llvm/test/CodeGen/AArch64/sve2-fcopysign.ll
index 778d7e193e23e..a7fa9e7575df6 100644
--- a/llvm/test/CodeGen/AArch64/sve2-fcopysign.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-fcopysign.ll
@@ -1,6 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -o - | FileCheck --check-prefixes=CHECK,CHECK_NO_EXTEND_ROUND %s
-; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 --combiner-vector-fcopysign-extend-round -o - | FileCheck --check-prefixes=CHECK,CHECK_EXTEND_ROUND %s
+; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -o - | FileCheck %s
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
@@ -45,28 +44,15 @@ define <vscale x 4 x float> @test_copysign_v4f32_v4f32(<vscale x 4 x float> %a,
; SplitVecOp #1
define <vscale x 4 x float> @test_copysign_v4f32_v4f64(<vscale x 4 x float> %a, <vscale x 4 x double> %b) #0 {
-; CHECK_NO_EXTEND_ROUND-LABEL: test_copysign_v4f32_v4f64:
-; CHECK_NO_EXTEND_ROUND: // %bb.0:
-; CHECK_NO_EXTEND_ROUND-NEXT: ptrue p0.d
-; CHECK_NO_EXTEND_ROUND-NEXT: mov z3.s, #0x7fffffff
-; CHECK_NO_EXTEND_ROUND-NEXT: fcvt z2.s, p0/m, z2.d
-; CHECK_NO_EXTEND_ROUND-NEXT: fcvt z1.s, p0/m, z1.d
-; CHECK_NO_EXTEND_ROUND-NEXT: uzp1 z1.s, z1.s, z2.s
-; CHECK_NO_EXTEND_ROUND-NEXT: bsl z0.d, z0.d, z1.d, z3.d
-; CHECK_NO_EXTEND_ROUND-NEXT: ret
-;
-; CHECK_EXTEND_ROUND-LABEL: test_copysign_v4f32_v4f64:
-; CHECK_EXTEND_ROUND: // %bb.0:
-; CHECK_EXTEND_ROUND-NEXT: ptrue p0.d
-; CHECK_EXTEND_ROUND-NEXT: uunpkhi z3.d, z0.s
-; CHECK_EXTEND_ROUND-NEXT: mov z4.s, #0x7fffffff
-; CHECK_EXTEND_ROUND-NEXT: uunpklo z0.d, z0.s
-; CHECK_EXTEND_ROUND-NEXT: fcvt z2.s, p0/m, z2.d
-; CHECK_EXTEND_ROUND-NEXT: fcvt z1.s, p0/m, z1.d
-; CHECK_EXTEND_ROUND-NEXT: bsl z3.d, z3.d, z2.d, z4.d
-; CHECK_EXTEND_ROUND-NEXT: bsl z0.d, z0.d, z1.d, z4.d
-; CHECK_EXTEND_ROUND-NEXT: uzp1 z0.s, z0.s, z3.s
-; CHECK_EXTEND_ROUND-NEXT: ret
+; CHECK-LABEL: test_copysign_v4f32_v4f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mov z3.s, #0x7fffffff
+; CHECK-NEXT: fcvt z2.s, p0/m, z2.d
+; CHECK-NEXT: fcvt z1.s, p0/m, z1.d
+; CHECK-NEXT: uzp1 z1.s, z1.s, z2.s
+; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z3.d
+; CHECK-NEXT: ret
%tmp0 = fptrunc <vscale x 4 x double> %b to <vscale x 4 x float>
%r = call <vscale x 4 x float> @llvm.copysign.v4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %tmp0)
ret <vscale x 4 x float> %r
@@ -105,29 +91,17 @@ declare <vscale x 2 x double> @llvm.copysign.v2f64(<vscale x 2 x double> %a, <vs
; SplitVecRes mismatched
define <vscale x 4 x double> @test_copysign_v4f64_v4f32(<vscale x 4 x double> %a, <vscale x 4 x float> %b) #0 {
-; CHECK_NO_EXTEND_ROUND-LABEL: test_copysign_v4f64_v4f32:
-; CHECK_NO_EXTEND_ROUND: // %bb.0:
-; CHECK_NO_EXTEND_ROUND-NEXT: uunpkhi z3.d, z2.s
-; CHECK_NO_EXTEND_ROUND-NEXT: uunpklo z2.d, z2.s
-; CHECK_NO_EXTEND_ROUND-NEXT: ptrue p0.d
-; CHECK_NO_EXTEND_ROUND-NEXT: mov z4.d, #0x7fffffffffffffff
-; CHECK_NO_EXTEND_ROUND-NEXT: fcvt z3.d, p0/m, z3.s
-; CHECK_NO_EXTEND_ROUND-NEXT: fcvt z2.d, p0/m, z2.s
-; CHECK_NO_EXTEND_ROUND-NEXT: bsl z0.d, z0.d, z2.d, z4.d
-; CHECK_NO_EXTEND_ROUND-NEXT: bsl z1.d, z1.d, z3.d, z4.d
-; CHECK_NO_EXTEND_ROUND-NEXT: ret
-;
-; CHECK_EXTEND_ROUND-LABEL: test_copysign_v4f64_v4f32:
-; CHECK_EXTEND_ROUND: // %bb.0:
-; CHECK_EXTEND_ROUND-NEXT: uunpkhi z3.d, z2.s
-; CHECK_EXTEND_ROUND-NEXT: uunpklo z2.d, z2.s
-; CHECK_EXTEND_ROUND-NEXT: ptrue p0.d
-; CHECK_EXTEND_ROUND-NEXT: mov z4.d, #0x7fffffffffffffff
-; CHECK_EXTEND_ROUND-NEXT: fcvt z2.d, p0/m, z2.s
-; CHECK_EXTEND_ROUND-NEXT: fcvt z3.d, p0/m, z3.s
-; CHECK_EXTEND_ROUND-NEXT: bsl z0.d, z0.d, z2.d, z4.d
-; CHECK_EXTEND_ROUND-NEXT: bsl z1.d, z1.d, z3.d, z4.d
-; CHECK_EXTEND_ROUND-NEXT: ret
+; CHECK-LABEL: test_copysign_v4f64_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uunpkhi z3.d, z2.s
+; CHECK-NEXT: uunpklo z2.d, z2.s
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mov z4.d, #0x7fffffffffffffff
+; CHECK-NEXT: fcvt z3.d, p0/m, z3.s
+; CHECK-NEXT: fcvt z2.d, p0/m, z2.s
+; CHECK-NEXT: bsl z0.d, z0.d, z2.d, z4.d
+; CHECK-NEXT: bsl z1.d, z1.d, z3.d, z4.d
+; CHECK-NEXT: ret
%tmp0 = fpext <vscale x 4 x float> %b to <vscale x 4 x double>
%r = call <vscale x 4 x double> @llvm.copysign.v4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %tmp0)
ret <vscale x 4 x double> %r
@@ -173,28 +147,15 @@ define <vscale x 4 x half> @test_copysign_v4f16_v4f32(<vscale x 4 x half> %a, <v
}
define <vscale x 4 x half> @test_copysign_v4f16_v4f64(<vscale x 4 x half> %a, <vscale x 4 x double> %b) #0 {
-; CHECK_NO_EXTEND_ROUND-LABEL: test_copysign_v4f16_v4f64:
-; CHECK_NO_EXTEND_ROUND: // %bb.0:
-; CHECK_NO_EXTEND_ROUND-NEXT: ptrue p0.d
-; CHECK_NO_EXTEND_ROUND-NEXT: mov z3.h, #32767 // =0x7fff
-; CHECK_NO_EXTEND_ROUND-NEXT: fcvt z2.h, p0/m, z2.d
-; CHECK_NO_EXTEND_ROUND-NEXT: fcvt z1.h, p0/m, z1.d
-; CHECK_NO_EXTEND_ROUND-NEXT: uzp1 z1.s, z1.s, z2.s
-; CHECK_NO_EXTEND_ROUND-NEXT: bsl z0.d, z0.d, z1.d, z3.d
-; CHECK_NO_EXTEND_ROUND-NEXT: ret
-;
-; CHECK_EXTEND_ROUND-LABEL: test_copysign_v4f16_v4f64:
-; CHECK_EXTEND_ROUND: // %bb.0:
-; CHECK_EXTEND_ROUND-NEXT: ptrue p0.d
-; CHECK_EXTEND_ROUND-NEXT: uunpkhi z3.d, z0.s
-; CHECK_EXTEND_ROUND-NEXT: mov z4.h, #32767 // =0x7fff
-; CHECK_EXTEND_ROUND-NEXT: uunpklo z0.d, z0.s
-; CHECK_EXTEND_ROUND-NEXT: fcvt z2.h, p0/m, z2.d
-; CHECK_EXTEND_...
[truncated]
|
@@ -18011,7 +18007,8 @@ static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy) { | |||
if (YTy == MVT::f128) | |||
return false; | |||
|
|||
return !YTy.isVector() || EnableVectorFCopySignExtendRound; | |||
// Avoid mismatched vector operand types, for better instruction selection. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This was the original comment that I've pulled from https://reviews.llvm.org/D128642.
…vm#129878) This option was added to improve test coverage for SVE lowering code that is impossible to reach otherwise. Given it is not possible to trigger a bug without it and the generated code is universally worse with it, I figure the option has no value and should be removed.
This option was added to improve test coverage for SVE lowering code that is impossible to reach otherwise. Given it is not possible to trigger a bug without it and the generated code is universally worse with it, I figure the option has no value and should be removed.