Skip to content

Commit fee2953

Browse files
authored
[ARM] Fix for undef elements from demanded elements (#70504)
I think this is right, that the undef bits should be the undef bits from the passthrough (operand 0), with the top/bottom lanes cleared, as they come from the second arg (operand 1). We don't yet attempt to look for undef elements in the second operand, but this should fix the bug with all elements being marked as undef and the instruction being optimized away.
1 parent 2fba469 commit fee2953

File tree

2 files changed

+12
-4
lines changed

2 files changed

+12
-4
lines changed

llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -272,8 +272,8 @@ std::optional<Value *> ARMTTIImpl::simplifyDemandedVectorEltsIntrinsic(
272272
: APInt::getHighBitsSet(2, 1));
273273
SimplifyAndSetOp(&II, 0, OrigDemandedElts & DemandedElts, UndefElts);
274274
// The other lanes will be defined from the inserted elements.
275-
UndefElts &= APInt::getSplat(NumElts, !IsTop ? APInt::getLowBitsSet(2, 1)
276-
: APInt::getHighBitsSet(2, 1));
275+
UndefElts &= APInt::getSplat(NumElts, IsTop ? APInt::getLowBitsSet(2, 1)
276+
: APInt::getHighBitsSet(2, 1));
277277
return std::nullopt;
278278
};
279279

llvm/test/Transforms/InstCombine/ARM/mve-narrow.ll

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,10 @@ define <8 x half> @test_cvtnp_v8i16_bt(<8 x half> %a, <8 x half> %b, <4 x float>
243243

244244
define <4 x i32> @test_vshrn_const(<8 x i16> %a) {
245245
; CHECK-LABEL: @test_vshrn_const(
246-
; CHECK-NEXT: ret <4 x i32> zeroinitializer
246+
; CHECK-NEXT: [[Y:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> poison, <4 x i32> <i32 512, i32 0, i32 0, i32 0>, i32 3, i32 0, i32 0, i32 0, i32 0, i32 1)
247+
; CHECK-NEXT: [[Z:%.*]] = shufflevector <8 x i16> [[Y]], <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
248+
; CHECK-NEXT: [[ZA:%.*]] = zext <4 x i16> [[Z]] to <4 x i32>
249+
; CHECK-NEXT: ret <4 x i32> [[ZA]]
247250
;
248251
%y = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> <i32 512, i32 0, i32 0, i32 0>, i32 3, i32 0, i32 0, i32 0, i32 0, i32 1)
249252
%z = shufflevector <8 x i16> %y, <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
@@ -254,7 +257,12 @@ define <4 x i32> @test_vshrn_const(<8 x i16> %a) {
254257
define zeroext i16 @test_undef_bits() {
255258
; CHECK-LABEL: @test_undef_bits(
256259
; CHECK-NEXT: e:
257-
; CHECK-NEXT: ret i16 0
260+
; CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> poison, <4 x i32> <i32 256, i32 0, i32 0, i32 0>, i32 8, i32 1, i32 1, i32 1, i32 0, i32 1)
261+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
262+
; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
263+
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <8 x i16>
264+
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i16> [[TMP3]], i64 0
265+
; CHECK-NEXT: ret i16 [[TMP4]]
258266
;
259267
e:
260268
%0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> zeroinitializer, <4 x i32> <i32 256, i32 0, i32 0, i32 0>, i32 8, i32 1, i32 1, i32 1, i32 0, i32 1)

0 commit comments

Comments
 (0)