Skip to content

Commit be9461c

Browse files
[LLVM][InstCombine][SVE] fcvtnt(a,all_active,b) != fcvtnt(undef,all_active,b) (#110278)
The "narrowing top" convert instructions leave the bottom half of active elements untouched and thus the first paramater of their associated intrinsic remains live even when there are no inactive lanes.
1 parent 8a8e7f3 commit be9461c

File tree

2 files changed

+9
-8
lines changed

2 files changed

+9
-8
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2166,11 +2166,7 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
21662166
case Intrinsic::aarch64_sve_fcvt_f64f32:
21672167
case Intrinsic::aarch64_sve_fcvtlt_f32f16:
21682168
case Intrinsic::aarch64_sve_fcvtlt_f64f32:
2169-
case Intrinsic::aarch64_sve_fcvtnt_bf16f32:
2170-
case Intrinsic::aarch64_sve_fcvtnt_f16f32:
2171-
case Intrinsic::aarch64_sve_fcvtnt_f32f64:
21722169
case Intrinsic::aarch64_sve_fcvtx_f32f64:
2173-
case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
21742170
case Intrinsic::aarch64_sve_fcvtzs:
21752171
case Intrinsic::aarch64_sve_fcvtzs_i32f16:
21762172
case Intrinsic::aarch64_sve_fcvtzs_i32f64:
@@ -2192,6 +2188,11 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
21922188
case Intrinsic::aarch64_sve_ucvtf_f32i64:
21932189
case Intrinsic::aarch64_sve_ucvtf_f64i32:
21942190
return instCombineSVEAllOrNoActiveUnary(IC, II);
2191+
case Intrinsic::aarch64_sve_fcvtnt_bf16f32:
2192+
case Intrinsic::aarch64_sve_fcvtnt_f16f32:
2193+
case Intrinsic::aarch64_sve_fcvtnt_f32f64:
2194+
case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
2195+
return instCombineSVENoActiveReplace(IC, II, true);
21952196
case Intrinsic::aarch64_sve_st1_scatter:
21962197
case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
21972198
case Intrinsic::aarch64_sve_st1_scatter_sxtw:

llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ define <vscale x 8 x bfloat> @test_fcvtnt_bf16_f32(<vscale x 8 x bfloat> %a, <vs
138138
; CHECK-LABEL: define <vscale x 8 x bfloat> @test_fcvtnt_bf16_f32(
139139
; CHECK-SAME: <vscale x 8 x bfloat> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
140140
; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
141-
; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> [[PG]], <vscale x 4 x float> [[B]])
141+
; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat> [[A]], <vscale x 8 x i1> [[PG]], <vscale x 4 x float> [[B]])
142142
; CHECK-NEXT: ret <vscale x 8 x bfloat> [[OUT]]
143143
;
144144
%pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
@@ -150,7 +150,7 @@ define <vscale x 8 x half> @test_fcvtnt_f16_f32(<vscale x 8 x half> %a, <vscale
150150
; CHECK-LABEL: define <vscale x 8 x half> @test_fcvtnt_f16_f32(
151151
; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
152152
; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
153-
; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcvtnt.f16f32(<vscale x 8 x half> undef, <vscale x 4 x i1> [[PG]], <vscale x 4 x float> [[B]])
153+
; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcvtnt.f16f32(<vscale x 8 x half> [[A]], <vscale x 4 x i1> [[PG]], <vscale x 4 x float> [[B]])
154154
; CHECK-NEXT: ret <vscale x 8 x half> [[OUT]]
155155
;
156156
%pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
@@ -162,7 +162,7 @@ define <vscale x 4 x float> @test_fcvtnt_f32_f64(<vscale x 4 x float> %a, <vscal
162162
; CHECK-LABEL: define <vscale x 4 x float> @test_fcvtnt_f32_f64(
163163
; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
164164
; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
165-
; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtnt.f32f64(<vscale x 4 x float> undef, <vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[B]])
165+
; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtnt.f32f64(<vscale x 4 x float> [[A]], <vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[B]])
166166
; CHECK-NEXT: ret <vscale x 4 x float> [[OUT]]
167167
;
168168
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -186,7 +186,7 @@ define <vscale x 4 x float> @test_fcvtxnt_f32_f64(<vscale x 4 x float> %a, <vsca
186186
; CHECK-LABEL: define <vscale x 4 x float> @test_fcvtxnt_f32_f64(
187187
; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
188188
; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
189-
; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtxnt.f32f64(<vscale x 4 x float> undef, <vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[B]])
189+
; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtxnt.f32f64(<vscale x 4 x float> [[A]], <vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[B]])
190190
; CHECK-NEXT: ret <vscale x 4 x float> [[OUT]]
191191
;
192192
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)

0 commit comments

Comments
 (0)