Skip to content

[LLVM][DAGCombine] Remove combiner-vector-fcopysign-extend-round. #129878

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,10 +149,6 @@ static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
cl::desc("DAG combiner enable load/<replace bytes>/store with "
"a narrower store"));

static cl::opt<bool> EnableVectorFCopySignExtendRound(
"combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false),
cl::desc(
"Enable merging extends and rounds into FCOPYSIGN on vector types"));
namespace {

class DAGCombiner {
Expand Down Expand Up @@ -18011,7 +18007,8 @@ static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy) {
if (YTy == MVT::f128)
return false;

return !YTy.isVector() || EnableVectorFCopySignExtendRound;
// Avoid mismatched vector operand types, for better instruction selection.
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was the original comment that I've pulled from https://reviews.llvm.org/D128642.

return !YTy.isVector();
}

static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
Expand Down
163 changes: 47 additions & 116 deletions llvm/test/CodeGen/AArch64/sve-fcopysign.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64 -mattr=+sve -o - | FileCheck --check-prefixes=CHECK,CHECK-NO-EXTEND-ROUND %s
; RUN: llc < %s -mtriple=aarch64 -mattr=+sve --combiner-vector-fcopysign-extend-round -o - | FileCheck --check-prefixes=CHECK,CHECK-EXTEND-ROUND %s
; RUN: llc < %s -mtriple=aarch64 -mattr=+sve -o - | FileCheck %s
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"

;============ v2f32
Expand Down Expand Up @@ -47,32 +46,16 @@ define <vscale x 4 x float> @test_copysign_v4f32_v4f32(<vscale x 4 x float> %a,

; SplitVecOp #1
define <vscale x 4 x float> @test_copysign_v4f32_v4f64(<vscale x 4 x float> %a, <vscale x 4 x double> %b) #0 {
; CHECK-NO-EXTEND-ROUND-LABEL: test_copysign_v4f32_v4f64:
; CHECK-NO-EXTEND-ROUND: // %bb.0:
; CHECK-NO-EXTEND-ROUND-NEXT: ptrue p0.d
; CHECK-NO-EXTEND-ROUND-NEXT: and z0.s, z0.s, #0x7fffffff
; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z2.s, p0/m, z2.d
; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z1.s, p0/m, z1.d
; CHECK-NO-EXTEND-ROUND-NEXT: uzp1 z1.s, z1.s, z2.s
; CHECK-NO-EXTEND-ROUND-NEXT: and z1.s, z1.s, #0x80000000
; CHECK-NO-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NO-EXTEND-ROUND-NEXT: ret
;
; CHECK-EXTEND-ROUND-LABEL: test_copysign_v4f32_v4f64:
; CHECK-EXTEND-ROUND: // %bb.0:
; CHECK-EXTEND-ROUND-NEXT: ptrue p0.d
; CHECK-EXTEND-ROUND-NEXT: uunpkhi z3.d, z0.s
; CHECK-EXTEND-ROUND-NEXT: uunpklo z0.d, z0.s
; CHECK-EXTEND-ROUND-NEXT: fcvt z2.s, p0/m, z2.d
; CHECK-EXTEND-ROUND-NEXT: fcvt z1.s, p0/m, z1.d
; CHECK-EXTEND-ROUND-NEXT: and z3.s, z3.s, #0x7fffffff
; CHECK-EXTEND-ROUND-NEXT: and z0.s, z0.s, #0x7fffffff
; CHECK-EXTEND-ROUND-NEXT: and z2.s, z2.s, #0x80000000
; CHECK-EXTEND-ROUND-NEXT: and z1.s, z1.s, #0x80000000
; CHECK-EXTEND-ROUND-NEXT: orr z2.d, z3.d, z2.d
; CHECK-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d
; CHECK-EXTEND-ROUND-NEXT: uzp1 z0.s, z0.s, z2.s
; CHECK-EXTEND-ROUND-NEXT: ret
; CHECK-LABEL: test_copysign_v4f32_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: and z0.s, z0.s, #0x7fffffff
; CHECK-NEXT: fcvt z2.s, p0/m, z2.d
; CHECK-NEXT: fcvt z1.s, p0/m, z1.d
; CHECK-NEXT: uzp1 z1.s, z1.s, z2.s
; CHECK-NEXT: and z1.s, z1.s, #0x80000000
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%tmp0 = fptrunc <vscale x 4 x double> %b to <vscale x 4 x float>
%r = call <vscale x 4 x float> @llvm.copysign.v4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %tmp0)
ret <vscale x 4 x float> %r
Expand Down Expand Up @@ -177,32 +160,16 @@ define <vscale x 4 x half> @test_copysign_v4f16_v4f32(<vscale x 4 x half> %a, <v
}

define <vscale x 4 x half> @test_copysign_v4f16_v4f64(<vscale x 4 x half> %a, <vscale x 4 x double> %b) #0 {
; CHECK-NO-EXTEND-ROUND-LABEL: test_copysign_v4f16_v4f64:
; CHECK-NO-EXTEND-ROUND: // %bb.0:
; CHECK-NO-EXTEND-ROUND-NEXT: ptrue p0.d
; CHECK-NO-EXTEND-ROUND-NEXT: and z0.h, z0.h, #0x7fff
; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z2.h, p0/m, z2.d
; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z1.h, p0/m, z1.d
; CHECK-NO-EXTEND-ROUND-NEXT: uzp1 z1.s, z1.s, z2.s
; CHECK-NO-EXTEND-ROUND-NEXT: and z1.h, z1.h, #0x8000
; CHECK-NO-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NO-EXTEND-ROUND-NEXT: ret
;
; CHECK-EXTEND-ROUND-LABEL: test_copysign_v4f16_v4f64:
; CHECK-EXTEND-ROUND: // %bb.0:
; CHECK-EXTEND-ROUND-NEXT: ptrue p0.d
; CHECK-EXTEND-ROUND-NEXT: uunpkhi z3.d, z0.s
; CHECK-EXTEND-ROUND-NEXT: uunpklo z0.d, z0.s
; CHECK-EXTEND-ROUND-NEXT: fcvt z2.h, p0/m, z2.d
; CHECK-EXTEND-ROUND-NEXT: fcvt z1.h, p0/m, z1.d
; CHECK-EXTEND-ROUND-NEXT: and z3.h, z3.h, #0x7fff
; CHECK-EXTEND-ROUND-NEXT: and z0.h, z0.h, #0x7fff
; CHECK-EXTEND-ROUND-NEXT: and z2.h, z2.h, #0x8000
; CHECK-EXTEND-ROUND-NEXT: and z1.h, z1.h, #0x8000
; CHECK-EXTEND-ROUND-NEXT: orr z2.d, z3.d, z2.d
; CHECK-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d
; CHECK-EXTEND-ROUND-NEXT: uzp1 z0.s, z0.s, z2.s
; CHECK-EXTEND-ROUND-NEXT: ret
; CHECK-LABEL: test_copysign_v4f16_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: and z0.h, z0.h, #0x7fff
; CHECK-NEXT: fcvt z2.h, p0/m, z2.d
; CHECK-NEXT: fcvt z1.h, p0/m, z1.d
; CHECK-NEXT: uzp1 z1.s, z1.s, z2.s
; CHECK-NEXT: and z1.h, z1.h, #0x8000
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%tmp0 = fptrunc <vscale x 4 x double> %b to <vscale x 4 x half>
%r = call <vscale x 4 x half> @llvm.copysign.v4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %tmp0)
ret <vscale x 4 x half> %r
Expand All @@ -224,32 +191,16 @@ define <vscale x 8 x half> @test_copysign_v8f16_v8f16(<vscale x 8 x half> %a, <v
}

define <vscale x 8 x half> @test_copysign_v8f16_v8f32(<vscale x 8 x half> %a, <vscale x 8 x float> %b) #0 {
; CHECK-NO-EXTEND-ROUND-LABEL: test_copysign_v8f16_v8f32:
; CHECK-NO-EXTEND-ROUND: // %bb.0:
; CHECK-NO-EXTEND-ROUND-NEXT: ptrue p0.s
; CHECK-NO-EXTEND-ROUND-NEXT: and z0.h, z0.h, #0x7fff
; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z2.h, p0/m, z2.s
; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z1.h, p0/m, z1.s
; CHECK-NO-EXTEND-ROUND-NEXT: uzp1 z1.h, z1.h, z2.h
; CHECK-NO-EXTEND-ROUND-NEXT: and z1.h, z1.h, #0x8000
; CHECK-NO-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NO-EXTEND-ROUND-NEXT: ret
;
; CHECK-EXTEND-ROUND-LABEL: test_copysign_v8f16_v8f32:
; CHECK-EXTEND-ROUND: // %bb.0:
; CHECK-EXTEND-ROUND-NEXT: ptrue p0.s
; CHECK-EXTEND-ROUND-NEXT: uunpkhi z3.s, z0.h
; CHECK-EXTEND-ROUND-NEXT: uunpklo z0.s, z0.h
; CHECK-EXTEND-ROUND-NEXT: fcvt z2.h, p0/m, z2.s
; CHECK-EXTEND-ROUND-NEXT: fcvt z1.h, p0/m, z1.s
; CHECK-EXTEND-ROUND-NEXT: and z3.h, z3.h, #0x7fff
; CHECK-EXTEND-ROUND-NEXT: and z0.h, z0.h, #0x7fff
; CHECK-EXTEND-ROUND-NEXT: and z2.h, z2.h, #0x8000
; CHECK-EXTEND-ROUND-NEXT: and z1.h, z1.h, #0x8000
; CHECK-EXTEND-ROUND-NEXT: orr z2.d, z3.d, z2.d
; CHECK-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d
; CHECK-EXTEND-ROUND-NEXT: uzp1 z0.h, z0.h, z2.h
; CHECK-EXTEND-ROUND-NEXT: ret
; CHECK-LABEL: test_copysign_v8f16_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: and z0.h, z0.h, #0x7fff
; CHECK-NEXT: fcvt z2.h, p0/m, z2.s
; CHECK-NEXT: fcvt z1.h, p0/m, z1.s
; CHECK-NEXT: uzp1 z1.h, z1.h, z2.h
; CHECK-NEXT: and z1.h, z1.h, #0x8000
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%tmp0 = fptrunc <vscale x 8 x float> %b to <vscale x 8 x half>
%r = call <vscale x 8 x half> @llvm.copysign.v8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %tmp0)
ret <vscale x 8 x half> %r
Expand All @@ -259,48 +210,28 @@ define <vscale x 8 x half> @test_copysign_v8f16_v8f32(<vscale x 8 x half> %a, <v
;========== FCOPYSIGN_EXTEND_ROUND

define <vscale x 4 x half> @test_copysign_nxv4f32_nxv4f16(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
; CHECK-NO-EXTEND-ROUND-LABEL: test_copysign_nxv4f32_nxv4f16:
; CHECK-NO-EXTEND-ROUND: // %bb.0:
; CHECK-NO-EXTEND-ROUND-NEXT: and z1.s, z1.s, #0x80000000
; CHECK-NO-EXTEND-ROUND-NEXT: and z0.s, z0.s, #0x7fffffff
; CHECK-NO-EXTEND-ROUND-NEXT: ptrue p0.s
; CHECK-NO-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z0.h, p0/m, z0.s
; CHECK-NO-EXTEND-ROUND-NEXT: ret
;
; CHECK-EXTEND-ROUND-LABEL: test_copysign_nxv4f32_nxv4f16:
; CHECK-EXTEND-ROUND: // %bb.0:
; CHECK-EXTEND-ROUND-NEXT: ptrue p0.s
; CHECK-EXTEND-ROUND-NEXT: fcvt z0.h, p0/m, z0.s
; CHECK-EXTEND-ROUND-NEXT: fcvt z1.h, p0/m, z1.s
; CHECK-EXTEND-ROUND-NEXT: and z1.h, z1.h, #0x8000
; CHECK-EXTEND-ROUND-NEXT: and z0.h, z0.h, #0x7fff
; CHECK-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d
; CHECK-EXTEND-ROUND-NEXT: ret
; CHECK-LABEL: test_copysign_nxv4f32_nxv4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: and z1.s, z1.s, #0x80000000
; CHECK-NEXT: and z0.s, z0.s, #0x7fffffff
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: fcvt z0.h, p0/m, z0.s
; CHECK-NEXT: ret
%t1 = call <vscale x 4 x float> @llvm.copysign.v4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b)
%t2 = fptrunc <vscale x 4 x float> %t1 to <vscale x 4 x half>
ret <vscale x 4 x half> %t2
}

define <vscale x 2 x float> @test_copysign_nxv2f64_nxv2f32(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
; CHECK-NO-EXTEND-ROUND-LABEL: test_copysign_nxv2f64_nxv2f32:
; CHECK-NO-EXTEND-ROUND: // %bb.0:
; CHECK-NO-EXTEND-ROUND-NEXT: and z1.d, z1.d, #0x8000000000000000
; CHECK-NO-EXTEND-ROUND-NEXT: and z0.d, z0.d, #0x7fffffffffffffff
; CHECK-NO-EXTEND-ROUND-NEXT: ptrue p0.d
; CHECK-NO-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z0.s, p0/m, z0.d
; CHECK-NO-EXTEND-ROUND-NEXT: ret
;
; CHECK-EXTEND-ROUND-LABEL: test_copysign_nxv2f64_nxv2f32:
; CHECK-EXTEND-ROUND: // %bb.0:
; CHECK-EXTEND-ROUND-NEXT: ptrue p0.d
; CHECK-EXTEND-ROUND-NEXT: fcvt z0.s, p0/m, z0.d
; CHECK-EXTEND-ROUND-NEXT: fcvt z1.s, p0/m, z1.d
; CHECK-EXTEND-ROUND-NEXT: and z1.s, z1.s, #0x80000000
; CHECK-EXTEND-ROUND-NEXT: and z0.s, z0.s, #0x7fffffff
; CHECK-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d
; CHECK-EXTEND-ROUND-NEXT: ret
; CHECK-LABEL: test_copysign_nxv2f64_nxv2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: and z1.d, z1.d, #0x8000000000000000
; CHECK-NEXT: and z0.d, z0.d, #0x7fffffffffffffff
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: fcvt z0.s, p0/m, z0.d
; CHECK-NEXT: ret
%t1 = call <vscale x 2 x double> @llvm.copysign.v2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b)
%t2 = fptrunc <vscale x 2 x double> %t1 to <vscale x 2 x float>
ret <vscale x 2 x float> %t2
Expand Down
44 changes: 14 additions & 30 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-fcopysign.ll
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,CHECK_NO_EXTEND_ROUND
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,CHECK_NO_EXTEND_ROUND
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,CHECK_NO_EXTEND_ROUND
; RUN: llc -aarch64-sve-vector-bits-min=256 --combiner-vector-fcopysign-extend-round < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,CHECK_EXTEND_ROUND
; RUN: llc -aarch64-sve-vector-bits-min=512 --combiner-vector-fcopysign-extend-round < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,CHECK_EXTEND_ROUND
; RUN: llc -aarch64-sve-vector-bits-min=2048 --combiner-vector-fcopysign-extend-round < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,CHECK_EXTEND_ROUND
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512

target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"

Expand Down Expand Up @@ -436,30 +433,17 @@ define void @test_copysign_v2f64_v2f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 {

; SplitVecRes mismatched
define void @test_copysign_v4f64_v4f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
; CHECK_NO_EXTEND_ROUND-LABEL: test_copysign_v4f64_v4f32:
; CHECK_NO_EXTEND_ROUND: // %bb.0:
; CHECK_NO_EXTEND_ROUND-NEXT: ptrue p0.d, vl4
; CHECK_NO_EXTEND_ROUND-NEXT: ld1w { z0.d }, p0/z, [x1]
; CHECK_NO_EXTEND_ROUND-NEXT: ld1d { z1.d }, p0/z, [x0]
; CHECK_NO_EXTEND_ROUND-NEXT: fcvt z0.d, p0/m, z0.s
; CHECK_NO_EXTEND_ROUND-NEXT: and z1.d, z1.d, #0x7fffffffffffffff
; CHECK_NO_EXTEND_ROUND-NEXT: and z0.d, z0.d, #0x8000000000000000
; CHECK_NO_EXTEND_ROUND-NEXT: orr z0.d, z1.d, z0.d
; CHECK_NO_EXTEND_ROUND-NEXT: st1d { z0.d }, p0, [x0]
; CHECK_NO_EXTEND_ROUND-NEXT: ret
;
; CHECK_EXTEND_ROUND-LABEL: test_copysign_v4f64_v4f32:
; CHECK_EXTEND_ROUND: // %bb.0:
; CHECK_EXTEND_ROUND-NEXT: ldr q0, [x1]
; CHECK_EXTEND_ROUND-NEXT: ptrue p0.d, vl4
; CHECK_EXTEND_ROUND-NEXT: uunpklo z0.d, z0.s
; CHECK_EXTEND_ROUND-NEXT: ld1d { z1.d }, p0/z, [x0]
; CHECK_EXTEND_ROUND-NEXT: and z1.d, z1.d, #0x7fffffffffffffff
; CHECK_EXTEND_ROUND-NEXT: fcvt z0.d, p0/m, z0.s
; CHECK_EXTEND_ROUND-NEXT: and z0.d, z0.d, #0x8000000000000000
; CHECK_EXTEND_ROUND-NEXT: orr z0.d, z1.d, z0.d
; CHECK_EXTEND_ROUND-NEXT: st1d { z0.d }, p0, [x0]
; CHECK_EXTEND_ROUND-NEXT: ret
; CHECK-LABEL: test_copysign_v4f64_v4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x1]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0]
; CHECK-NEXT: fcvt z0.d, p0/m, z0.s
; CHECK-NEXT: and z1.d, z1.d, #0x7fffffffffffffff
; CHECK-NEXT: and z0.d, z0.d, #0x8000000000000000
; CHECK-NEXT: orr z0.d, z1.d, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
%a = load <4 x double>, ptr %ap
%b = load <4 x float>, ptr %bp
%tmp0 = fpext <4 x float> %b to <4 x double>
Expand Down
Loading