Skip to content

Commit 771fd1a

Browse files
authored
[DAG] Extend input types if needed in combineShiftToAVG. (#76791)
This atempts to fix #76734 which is a crash in invalid TRUNC nodes types from unoptimized input code in combineShiftToAVG. The NVT can be VT if the larger type was legal and the adds will not overflow, in which case the inputs should be extended. From what I can tell this appears to be valid (if not optimal for this case): https://alive2.llvm.org/ce/z/fRieHR The result has also been changed to getExtOrTrunc in case that VT==NVT, which is not handled by SEXT/ZEXT.
1 parent 3db749a commit 771fd1a

File tree

2 files changed

+21
-4
lines changed

2 files changed

+21
-4
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1064,10 +1064,9 @@ static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG,
10641064

10651065
SDLoc DL(Op);
10661066
SDValue ResultAVG =
1067-
DAG.getNode(AVGOpc, DL, NVT, DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpA),
1068-
DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpB));
1069-
return DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, VT,
1070-
ResultAVG);
1067+
DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT),
1068+
DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT));
1069+
return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT);
10711070
}
10721071

10731072
/// Look at Op. At this point, we know that only the OriginalDemandedBits of the

llvm/test/CodeGen/AArch64/arm64-vhadd.ll

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1392,6 +1392,24 @@ define <8 x i8> @sextmask3v8i8(<8 x i16> %src1, <8 x i8> %src2) {
13921392
ret <8 x i8> %result
13931393
}
13941394

1395+
define <4 x i16> @ext_via_i19(<4 x i16> %a) {
1396+
; CHECK-LABEL: ext_via_i19:
1397+
; CHECK: // %bb.0:
1398+
; CHECK-NEXT: movi.4s v1, #1
1399+
; CHECK-NEXT: uaddw.4s v0, v1, v0
1400+
; CHECK-NEXT: uhadd.4s v0, v0, v1
1401+
; CHECK-NEXT: xtn.4h v0, v0
1402+
; CHECK-NEXT: ret
1403+
%t3 = zext <4 x i16> %a to <4 x i32>
1404+
%t4 = add <4 x i32> %t3, <i32 1, i32 1, i32 1, i32 1>
1405+
%t5 = trunc <4 x i32> %t4 to <4 x i19>
1406+
%new0 = add <4 x i19> %t5, <i19 1, i19 1, i19 1, i19 1>
1407+
%new1 = lshr <4 x i19> %new0, <i19 1, i19 1, i19 1, i19 1>
1408+
%last = zext <4 x i19> %new1 to <4 x i32>
1409+
%t6 = trunc <4 x i32> %last to <4 x i16>
1410+
ret <4 x i16> %t6
1411+
}
1412+
13951413
declare <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8>, <8 x i8>)
13961414
declare <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16>, <4 x i16>)
13971415
declare <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32>, <2 x i32>)

0 commit comments

Comments
 (0)