Skip to content

Commit 5bbbaa1

Browse files
[LLVM][Instcombine][SVE] fcvtnt(a,all_active,b) != fcvtnt(undef,all_active,b)
The "narrowing top" convert instructions leave the bottom half of active elements untouched and thus the first paramater of their associated intrinsic remains live even when there are no inactive lanes.
1 parent 3e3780e commit 5bbbaa1

File tree

2 files changed

+9
-8
lines changed

2 files changed

+9
-8
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2156,11 +2156,7 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
21562156
case Intrinsic::aarch64_sve_fcvt_f64f32:
21572157
case Intrinsic::aarch64_sve_fcvtlt_f32f16:
21582158
case Intrinsic::aarch64_sve_fcvtlt_f64f32:
2159-
case Intrinsic::aarch64_sve_fcvtnt_bf16f32:
2160-
case Intrinsic::aarch64_sve_fcvtnt_f16f32:
2161-
case Intrinsic::aarch64_sve_fcvtnt_f32f64:
21622159
case Intrinsic::aarch64_sve_fcvtx_f32f64:
2163-
case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
21642160
case Intrinsic::aarch64_sve_fcvtzs:
21652161
case Intrinsic::aarch64_sve_fcvtzs_i32f16:
21662162
case Intrinsic::aarch64_sve_fcvtzs_i32f64:
@@ -2182,6 +2178,11 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
21822178
case Intrinsic::aarch64_sve_ucvtf_f32i64:
21832179
case Intrinsic::aarch64_sve_ucvtf_f64i32:
21842180
return instCombineSVEAllOrNoActiveUnary(IC, II);
2181+
case Intrinsic::aarch64_sve_fcvtnt_bf16f32:
2182+
case Intrinsic::aarch64_sve_fcvtnt_f16f32:
2183+
case Intrinsic::aarch64_sve_fcvtnt_f32f64:
2184+
case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
2185+
return instCombineSVENoActiveReplace(IC, II, true);
21852186
case Intrinsic::aarch64_sve_st1_scatter:
21862187
case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
21872188
case Intrinsic::aarch64_sve_st1_scatter_sxtw:

llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ define <vscale x 8 x bfloat> @test_fcvtnt_bf16_f32(<vscale x 8 x bfloat> %a, <vs
138138
; CHECK-LABEL: define <vscale x 8 x bfloat> @test_fcvtnt_bf16_f32(
139139
; CHECK-SAME: <vscale x 8 x bfloat> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
140140
; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
141-
; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> [[PG]], <vscale x 4 x float> [[B]])
141+
; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat> [[A]], <vscale x 8 x i1> [[PG]], <vscale x 4 x float> [[B]])
142142
; CHECK-NEXT: ret <vscale x 8 x bfloat> [[OUT]]
143143
;
144144
%pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
@@ -150,7 +150,7 @@ define <vscale x 8 x half> @test_fcvtnt_f16_f32(<vscale x 8 x half> %a, <vscale
150150
; CHECK-LABEL: define <vscale x 8 x half> @test_fcvtnt_f16_f32(
151151
; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
152152
; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
153-
; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcvtnt.f16f32(<vscale x 8 x half> undef, <vscale x 4 x i1> [[PG]], <vscale x 4 x float> [[B]])
153+
; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcvtnt.f16f32(<vscale x 8 x half> [[A]], <vscale x 4 x i1> [[PG]], <vscale x 4 x float> [[B]])
154154
; CHECK-NEXT: ret <vscale x 8 x half> [[OUT]]
155155
;
156156
%pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
@@ -162,7 +162,7 @@ define <vscale x 4 x float> @test_fcvtnt_f32_f64(<vscale x 4 x float> %a, <vscal
162162
; CHECK-LABEL: define <vscale x 4 x float> @test_fcvtnt_f32_f64(
163163
; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
164164
; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
165-
; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtnt.f32f64(<vscale x 4 x float> undef, <vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[B]])
165+
; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtnt.f32f64(<vscale x 4 x float> [[A]], <vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[B]])
166166
; CHECK-NEXT: ret <vscale x 4 x float> [[OUT]]
167167
;
168168
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -186,7 +186,7 @@ define <vscale x 4 x float> @test_fcvtxnt_f32_f64(<vscale x 4 x float> %a, <vsca
186186
; CHECK-LABEL: define <vscale x 4 x float> @test_fcvtxnt_f32_f64(
187187
; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
188188
; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
189-
; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtxnt.f32f64(<vscale x 4 x float> undef, <vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[B]])
189+
; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtxnt.f32f64(<vscale x 4 x float> [[A]], <vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[B]])
190190
; CHECK-NEXT: ret <vscale x 4 x float> [[OUT]]
191191
;
192192
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)

0 commit comments

Comments
 (0)