Skip to content

Commit 2f68d6d

Browse files
committed
[DAG] Extend input types if needed in combineShiftToAVG.
This atempts to fix #76734 which is a crash in invalid TRUNC nodes types from unoptimized input code in combineShiftToAVG. The NVT can be VT if the larger type was legal and the adds will not overflow, in which case the inputs should be extended. From what I can tell this appears to be valid, if not optimal: https://alive2.llvm.org/ce/z/fRieHR The result has also been changed to getExtOrTrunc in case that VT==NVT, which is not handled by SEXT/ZEXT.
1 parent d659bd1 commit 2f68d6d

File tree

2 files changed

+21
-4
lines changed

2 files changed

+21
-4
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1064,10 +1064,9 @@ static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG,
10641064

10651065
SDLoc DL(Op);
10661066
SDValue ResultAVG =
1067-
DAG.getNode(AVGOpc, DL, NVT, DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpA),
1068-
DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpB));
1069-
return DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, VT,
1070-
ResultAVG);
1067+
DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT),
1068+
DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT));
1069+
return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT);
10711070
}
10721071

10731072
/// Look at Op. At this point, we know that only the OriginalDemandedBits of the

llvm/test/CodeGen/AArch64/arm64-vhadd.ll

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1392,6 +1392,24 @@ define <8 x i8> @sextmask3v8i8(<8 x i16> %src1, <8 x i8> %src2) {
13921392
ret <8 x i8> %result
13931393
}
13941394

1395+
define <4 x i16> @ext_via_i19(<4 x i16> %a) {
1396+
; CHECK-LABEL: ext_via_i19:
1397+
; CHECK: // %bb.0:
1398+
; CHECK-NEXT: movi.4s v1, #1
1399+
; CHECK-NEXT: uaddw.4s v0, v1, v0
1400+
; CHECK-NEXT: uhadd.4s v0, v0, v1
1401+
; CHECK-NEXT: xtn.4h v0, v0
1402+
; CHECK-NEXT: ret
1403+
%t3 = zext <4 x i16> %a to <4 x i32>
1404+
%t4 = add <4 x i32> %t3, <i32 1, i32 1, i32 1, i32 1>
1405+
%t5 = trunc <4 x i32> %t4 to <4 x i19>
1406+
%new0 = add <4 x i19> %t5, <i19 1, i19 1, i19 1, i19 1>
1407+
%new1 = lshr <4 x i19> %new0, <i19 1, i19 1, i19 1, i19 1>
1408+
%last = zext <4 x i19> %new1 to <4 x i32>
1409+
%t6 = trunc <4 x i32> %last to <4 x i16>
1410+
ret <4 x i16> %t6
1411+
}
1412+
13951413
declare <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8>, <8 x i8>)
13961414
declare <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16>, <4 x i16>)
13971415
declare <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32>, <2 x i32>)

0 commit comments

Comments
 (0)