Skip to content

Commit b9dd602

Browse files
authored
[DAGCombiner] Remove a hasOneUse check in visitAND (#115142)
For some reason there was a hasOneUse check on the splat for the second operand and it's not obvious to me why. The check blocks optimisations for lowering of nodes like AVGFLOORU and AVGCEILU. In a follow-on patch I also plan to improve the generated code for AVGCEILU further by teaching computeKnownBits about zero-extending masked loads.
1 parent ff07df6 commit b9dd602

File tree

2 files changed

+64
-2
lines changed

2 files changed

+64
-2
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7096,8 +7096,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
70967096
// fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
70977097
auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
70987098
ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true);
7099-
if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat &&
7100-
N1.hasOneUse()) {
7099+
if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat) {
71017100
EVT LoadVT = MLoad->getMemoryVT();
71027101
EVT ExtVT = VT;
71037102
if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {

llvm/test/CodeGen/AArch64/sve-hadd.ll

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1341,3 +1341,66 @@ entry:
13411341
%avg = ashr <vscale x 2 x i64> %add, splat (i64 1)
13421342
ret <vscale x 2 x i64> %avg
13431343
}
1344+
1345+
define void @zext_mload_avgflooru(ptr %p1, ptr %p2, <vscale x 8 x i1> %mask) {
1346+
; SVE-LABEL: zext_mload_avgflooru:
1347+
; SVE: // %bb.0:
1348+
; SVE-NEXT: ld1b { z0.h }, p0/z, [x0]
1349+
; SVE-NEXT: ld1b { z1.h }, p0/z, [x1]
1350+
; SVE-NEXT: eor z2.d, z0.d, z1.d
1351+
; SVE-NEXT: and z0.d, z0.d, z1.d
1352+
; SVE-NEXT: lsr z1.h, z2.h, #1
1353+
; SVE-NEXT: add z0.h, z0.h, z1.h
1354+
; SVE-NEXT: st1h { z0.h }, p0, [x0]
1355+
; SVE-NEXT: ret
1356+
;
1357+
; SVE2-LABEL: zext_mload_avgflooru:
1358+
; SVE2: // %bb.0:
1359+
; SVE2-NEXT: ld1b { z0.h }, p0/z, [x0]
1360+
; SVE2-NEXT: ld1b { z1.h }, p0/z, [x1]
1361+
; SVE2-NEXT: ptrue p1.h
1362+
; SVE2-NEXT: uhadd z0.h, p1/m, z0.h, z1.h
1363+
; SVE2-NEXT: st1h { z0.h }, p0, [x0]
1364+
; SVE2-NEXT: ret
1365+
%ld1 = call <vscale x 8 x i8> @llvm.masked.load(ptr %p1, i32 16, <vscale x 8 x i1> %mask, <vscale x 8 x i8> zeroinitializer)
1366+
%ld2 = call <vscale x 8 x i8> @llvm.masked.load(ptr %p2, i32 16, <vscale x 8 x i1> %mask, <vscale x 8 x i8> zeroinitializer)
1367+
%and = and <vscale x 8 x i8> %ld1, %ld2
1368+
%xor = xor <vscale x 8 x i8> %ld1, %ld2
1369+
%shift = lshr <vscale x 8 x i8> %xor, splat(i8 1)
1370+
%avg = add <vscale x 8 x i8> %and, %shift
1371+
%avgext = zext <vscale x 8 x i8> %avg to <vscale x 8 x i16>
1372+
call void @llvm.masked.store.nxv8i16(<vscale x 8 x i16> %avgext, ptr %p1, i32 16, <vscale x 8 x i1> %mask)
1373+
ret void
1374+
}
1375+
1376+
define void @zext_mload_avgceilu(ptr %p1, ptr %p2, <vscale x 8 x i1> %mask) {
1377+
; SVE-LABEL: zext_mload_avgceilu:
1378+
; SVE: // %bb.0:
1379+
; SVE-NEXT: ld1b { z0.h }, p0/z, [x0]
1380+
; SVE-NEXT: ld1b { z1.h }, p0/z, [x1]
1381+
; SVE-NEXT: eor z2.d, z0.d, z1.d
1382+
; SVE-NEXT: orr z0.d, z0.d, z1.d
1383+
; SVE-NEXT: lsr z1.h, z2.h, #1
1384+
; SVE-NEXT: sub z0.h, z0.h, z1.h
1385+
; SVE-NEXT: st1b { z0.h }, p0, [x0]
1386+
; SVE-NEXT: ret
1387+
;
1388+
; SVE2-LABEL: zext_mload_avgceilu:
1389+
; SVE2: // %bb.0:
1390+
; SVE2-NEXT: ld1b { z0.h }, p0/z, [x0]
1391+
; SVE2-NEXT: ld1b { z1.h }, p0/z, [x1]
1392+
; SVE2-NEXT: ptrue p1.h
1393+
; SVE2-NEXT: urhadd z0.h, p1/m, z0.h, z1.h
1394+
; SVE2-NEXT: st1b { z0.h }, p0, [x0]
1395+
; SVE2-NEXT: ret
1396+
%ld1 = call <vscale x 8 x i8> @llvm.masked.load(ptr %p1, i32 16, <vscale x 8 x i1> %mask, <vscale x 8 x i8> zeroinitializer)
1397+
%ld2 = call <vscale x 8 x i8> @llvm.masked.load(ptr %p2, i32 16, <vscale x 8 x i1> %mask, <vscale x 8 x i8> zeroinitializer)
1398+
%zext1 = zext <vscale x 8 x i8> %ld1 to <vscale x 8 x i16>
1399+
%zext2 = zext <vscale x 8 x i8> %ld2 to <vscale x 8 x i16>
1400+
%add1 = add nuw nsw <vscale x 8 x i16> %zext1, splat(i16 1)
1401+
%add2 = add nuw nsw <vscale x 8 x i16> %add1, %zext2
1402+
%shift = lshr <vscale x 8 x i16> %add2, splat(i16 1)
1403+
%trunc = trunc <vscale x 8 x i16> %shift to <vscale x 8 x i8>
1404+
call void @llvm.masked.store.nxv8i8(<vscale x 8 x i8> %trunc, ptr %p1, i32 16, <vscale x 8 x i1> %mask)
1405+
ret void
1406+
}

0 commit comments

Comments
 (0)