Skip to content

Commit 3bb5171

Browse files
committed
[DAGCombiner] Remove a hasOneUse check in visitAND
For some reason there was a hasOneUse check on the splat for the second operand and it's not obvious to me why. The check blocks optimisations for lowering of nodes like AVGFLOORU and AVGCEILU. In a follow-on patch I also plan to improve the generated code for AVGCEILU further by teaching computeKnownBits about zero-extending masked loads.
1 parent 5f7730e commit 3bb5171

File tree

2 files changed

+8
-11
lines changed

2 files changed

+8
-11
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7095,8 +7095,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
70957095
// fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
70967096
auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
70977097
ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true);
7098-
if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat &&
7099-
N1.hasOneUse()) {
7098+
if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat) {
71007099
EVT LoadVT = MLoad->getMemoryVT();
71017100
EVT ExtVT = VT;
71027101
if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {

llvm/test/CodeGen/AArch64/avg.ll

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,10 @@ define void @zext_mload_avgflooru(ptr %p1, ptr %p2, <vscale x 8 x i1> %mask) {
2222
; CHECK: // %bb.0:
2323
; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0]
2424
; CHECK-NEXT: ld1b { z1.h }, p0/z, [x1]
25-
; CHECK-NEXT: and z0.h, z0.h, #0xff
26-
; CHECK-NEXT: and z1.h, z1.h, #0xff
25+
; CHECK-NEXT: eor z2.d, z0.d, z1.d
26+
; CHECK-NEXT: and z0.d, z0.d, z1.d
27+
; CHECK-NEXT: lsr z1.h, z2.h, #1
2728
; CHECK-NEXT: add z0.h, z0.h, z1.h
28-
; CHECK-NEXT: lsr z0.h, z0.h, #1
2929
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
3030
; CHECK-NEXT: ret
3131
%ld1 = call <vscale x 8 x i8> @llvm.masked.load(ptr %p1, i32 16, <vscale x 8 x i1> %mask, <vscale x 8 x i8> zeroinitializer)
@@ -78,12 +78,10 @@ define void @zext_mload_avgceilu(ptr %p1, ptr %p2, <vscale x 8 x i1> %mask) {
7878
; CHECK: // %bb.0:
7979
; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0]
8080
; CHECK-NEXT: ld1b { z1.h }, p0/z, [x1]
81-
; CHECK-NEXT: mov z2.h, #-1 // =0xffffffffffffffff
82-
; CHECK-NEXT: and z0.h, z0.h, #0xff
83-
; CHECK-NEXT: and z1.h, z1.h, #0xff
84-
; CHECK-NEXT: eor z0.d, z0.d, z2.d
85-
; CHECK-NEXT: sub z0.h, z1.h, z0.h
86-
; CHECK-NEXT: lsr z0.h, z0.h, #1
81+
; CHECK-NEXT: eor z2.d, z0.d, z1.d
82+
; CHECK-NEXT: orr z0.d, z0.d, z1.d
83+
; CHECK-NEXT: lsr z1.h, z2.h, #1
84+
; CHECK-NEXT: sub z0.h, z0.h, z1.h
8785
; CHECK-NEXT: st1b { z0.h }, p0, [x0]
8886
; CHECK-NEXT: ret
8987
%ld1 = call <vscale x 8 x i8> @llvm.masked.load(ptr %p1, i32 16, <vscale x 8 x i1> %mask, <vscale x 8 x i8> zeroinitializer)

0 commit comments

Comments
 (0)