Skip to content

Commit d650fcd

Browse files
authored
[DAG] SimplifyDemandedVectorElts - add ISD::AVGCEILS/AVGCEILU/AVGFLOORS/AVGFLOORU nodes (#86284)
Fixes #84768
1 parent 250b467 commit d650fcd

File tree

3 files changed

+60
-7
lines changed

3 files changed

+60
-7
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3524,6 +3524,10 @@ bool TargetLowering::SimplifyDemandedVectorElts(
35243524
}
35253525
[[fallthrough]];
35263526
}
3527+
case ISD::AVGCEILS:
3528+
case ISD::AVGCEILU:
3529+
case ISD::AVGFLOORS:
3530+
case ISD::AVGFLOORU:
35273531
case ISD::OR:
35283532
case ISD::XOR:
35293533
case ISD::SUB:

llvm/test/CodeGen/AArch64/hadd-combine.ll

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -903,6 +903,58 @@ define <8 x i16> @shadd_fixedwidth_v8i16(<8 x i16> %a0, <8 x i16> %a1) {
903903
ret <8 x i16> %res
904904
}
905905

906+
define <8 x i16> @shadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) {
907+
; CHECK-LABEL: shadd_demandedelts:
908+
; CHECK: // %bb.0:
909+
; CHECK-NEXT: dup v0.8h, v0.h[0]
910+
; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
911+
; CHECK-NEXT: dup v0.8h, v0.h[0]
912+
; CHECK-NEXT: ret
913+
%s0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer
914+
%op = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %s0, <8 x i16> %a1)
915+
%r0 = shufflevector <8 x i16> %op, <8 x i16> undef, <8 x i32> zeroinitializer
916+
ret <8 x i16> %r0
917+
}
918+
919+
define <8 x i16> @srhadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) {
920+
; CHECK-LABEL: srhadd_demandedelts:
921+
; CHECK: // %bb.0:
922+
; CHECK-NEXT: dup v0.8h, v0.h[0]
923+
; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
924+
; CHECK-NEXT: dup v0.8h, v0.h[0]
925+
; CHECK-NEXT: ret
926+
%s0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer
927+
%op = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %s0, <8 x i16> %a1)
928+
%r0 = shufflevector <8 x i16> %op, <8 x i16> undef, <8 x i32> zeroinitializer
929+
ret <8 x i16> %r0
930+
}
931+
932+
define <8 x i16> @uhadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) {
933+
; CHECK-LABEL: uhadd_demandedelts:
934+
; CHECK: // %bb.0:
935+
; CHECK-NEXT: dup v0.8h, v0.h[0]
936+
; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
937+
; CHECK-NEXT: dup v0.8h, v0.h[0]
938+
; CHECK-NEXT: ret
939+
%s0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer
940+
%op = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %s0, <8 x i16> %a1)
941+
%r0 = shufflevector <8 x i16> %op, <8 x i16> undef, <8 x i32> zeroinitializer
942+
ret <8 x i16> %r0
943+
}
944+
945+
define <8 x i16> @urhadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) {
946+
; CHECK-LABEL: urhadd_demandedelts:
947+
; CHECK: // %bb.0:
948+
; CHECK-NEXT: dup v0.8h, v0.h[0]
949+
; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
950+
; CHECK-NEXT: dup v0.8h, v0.h[0]
951+
; CHECK-NEXT: ret
952+
%s0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer
953+
%op = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %s0, <8 x i16> %a1)
954+
%r0 = shufflevector <8 x i16> %op, <8 x i16> undef, <8 x i32> zeroinitializer
955+
ret <8 x i16> %r0
956+
}
957+
906958
declare <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8>, <8 x i8>)
907959
declare <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16>, <4 x i16>)
908960
declare <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32>, <2 x i32>)
@@ -927,4 +979,4 @@ declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>)
927979
declare <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32>, <4 x i32>)
928980
declare <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8>, <16 x i8>)
929981
declare <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>)
930-
declare <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32>, <4 x i32>)
982+
declare <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32>, <4 x i32>)

llvm/test/CodeGen/X86/combine-pavg.ll

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -84,25 +84,22 @@ define <16 x i8> @combine_pavgw_knownbits(<8 x i16> %a0, <8 x i16> %a1, <8 x i16
8484
define <8 x i16> @combine_pavgw_demandedelts(<8 x i16> %a0, <8 x i16> %a1) {
8585
; SSE-LABEL: combine_pavgw_demandedelts:
8686
; SSE: # %bb.0:
87-
; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
88-
; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,8,9,12,13,12,13]
8987
; SSE-NEXT: pavgw %xmm1, %xmm0
88+
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
9089
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
9190
; SSE-NEXT: retq
9291
;
9392
; AVX1-LABEL: combine_pavgw_demandedelts:
9493
; AVX1: # %bb.0:
95-
; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
96-
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,8,9,12,13,12,13]
9794
; AVX1-NEXT: vpavgw %xmm1, %xmm0, %xmm0
95+
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
9896
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
9997
; AVX1-NEXT: retq
10098
;
10199
; AVX2-LABEL: combine_pavgw_demandedelts:
102100
; AVX2: # %bb.0:
103-
; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1
104-
; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
105101
; AVX2-NEXT: vpavgw %xmm1, %xmm0, %xmm0
102+
; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
106103
; AVX2-NEXT: retq
107104
%s0 = shufflevector <8 x i16> %a0, <8 x i16> poison, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
108105
%avg = tail call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %s0, <8 x i16> %a1)

0 commit comments

Comments
 (0)