Skip to content

Commit 3f6753b

Browse files
committed
Add tests
1 parent c72a751 commit 3f6753b

File tree

1 file changed

+69
-0
lines changed

1 file changed

+69
-0
lines changed

llvm/test/CodeGen/AArch64/sve-hadd.ll

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1341,3 +1341,72 @@ entry:
13411341
%avg = ashr <vscale x 2 x i64> %add, splat (i64 1)
13421342
ret <vscale x 2 x i64> %avg
13431343
}
1344+
1345+
define void @zext_mload_avgflooru(ptr %p1, ptr %p2, <vscale x 8 x i1> %mask) {
1346+
; SVE-LABEL: zext_mload_avgflooru:
1347+
; SVE: // %bb.0:
1348+
; SVE-NEXT: ld1b { z0.h }, p0/z, [x0]
1349+
; SVE-NEXT: ld1b { z1.h }, p0/z, [x1]
1350+
; SVE-NEXT: and z0.h, z0.h, #0xff
1351+
; SVE-NEXT: and z1.h, z1.h, #0xff
1352+
; SVE-NEXT: add z0.h, z0.h, z1.h
1353+
; SVE-NEXT: lsr z0.h, z0.h, #1
1354+
; SVE-NEXT: st1h { z0.h }, p0, [x0]
1355+
; SVE-NEXT: ret
1356+
;
1357+
; SVE2-LABEL: zext_mload_avgflooru:
1358+
; SVE2: // %bb.0:
1359+
; SVE2-NEXT: ld1b { z0.h }, p0/z, [x0]
1360+
; SVE2-NEXT: ld1b { z1.h }, p0/z, [x1]
1361+
; SVE2-NEXT: ptrue p1.h
1362+
; SVE2-NEXT: and z0.h, z0.h, #0xff
1363+
; SVE2-NEXT: and z1.h, z1.h, #0xff
1364+
; SVE2-NEXT: uhadd z0.h, p1/m, z0.h, z1.h
1365+
; SVE2-NEXT: st1h { z0.h }, p0, [x0]
1366+
; SVE2-NEXT: ret
1367+
%ld1 = call <vscale x 8 x i8> @llvm.masked.load(ptr %p1, i32 16, <vscale x 8 x i1> %mask, <vscale x 8 x i8> zeroinitializer)
1368+
%ld2 = call <vscale x 8 x i8> @llvm.masked.load(ptr %p2, i32 16, <vscale x 8 x i1> %mask, <vscale x 8 x i8> zeroinitializer)
1369+
%and = and <vscale x 8 x i8> %ld1, %ld2
1370+
%xor = xor <vscale x 8 x i8> %ld1, %ld2
1371+
%shift = lshr <vscale x 8 x i8> %xor, splat(i8 1)
1372+
%avg = add <vscale x 8 x i8> %and, %shift
1373+
%avgext = zext <vscale x 8 x i8> %avg to <vscale x 8 x i16>
1374+
call void @llvm.masked.store.nxv8i16(<vscale x 8 x i16> %avgext, ptr %p1, i32 16, <vscale x 8 x i1> %mask)
1375+
ret void
1376+
}
1377+
1378+
define void @zext_mload_avgceilu(ptr %p1, ptr %p2, <vscale x 8 x i1> %mask) {
1379+
; SVE-LABEL: zext_mload_avgceilu:
1380+
; SVE: // %bb.0:
1381+
; SVE-NEXT: ld1b { z0.h }, p0/z, [x0]
1382+
; SVE-NEXT: ld1b { z1.h }, p0/z, [x1]
1383+
; SVE-NEXT: mov z2.h, #-1 // =0xffffffffffffffff
1384+
; SVE-NEXT: and z0.h, z0.h, #0xff
1385+
; SVE-NEXT: and z1.h, z1.h, #0xff
1386+
; SVE-NEXT: eor z0.d, z0.d, z2.d
1387+
; SVE-NEXT: sub z0.h, z1.h, z0.h
1388+
; SVE-NEXT: lsr z0.h, z0.h, #1
1389+
; SVE-NEXT: st1b { z0.h }, p0, [x0]
1390+
; SVE-NEXT: ret
1391+
;
1392+
; SVE2-LABEL: zext_mload_avgceilu:
1393+
; SVE2: // %bb.0:
1394+
; SVE2-NEXT: ld1b { z0.h }, p0/z, [x0]
1395+
; SVE2-NEXT: ld1b { z1.h }, p0/z, [x1]
1396+
; SVE2-NEXT: ptrue p1.h
1397+
; SVE2-NEXT: and z0.h, z0.h, #0xff
1398+
; SVE2-NEXT: and z1.h, z1.h, #0xff
1399+
; SVE2-NEXT: urhadd z0.h, p1/m, z0.h, z1.h
1400+
; SVE2-NEXT: st1b { z0.h }, p0, [x0]
1401+
; SVE2-NEXT: ret
1402+
%ld1 = call <vscale x 8 x i8> @llvm.masked.load(ptr %p1, i32 16, <vscale x 8 x i1> %mask, <vscale x 8 x i8> zeroinitializer)
1403+
%ld2 = call <vscale x 8 x i8> @llvm.masked.load(ptr %p2, i32 16, <vscale x 8 x i1> %mask, <vscale x 8 x i8> zeroinitializer)
1404+
%zext1 = zext <vscale x 8 x i8> %ld1 to <vscale x 8 x i16>
1405+
%zext2 = zext <vscale x 8 x i8> %ld2 to <vscale x 8 x i16>
1406+
%add1 = add nuw nsw <vscale x 8 x i16> %zext1, splat(i16 1)
1407+
%add2 = add nuw nsw <vscale x 8 x i16> %add1, %zext2
1408+
%shift = lshr <vscale x 8 x i16> %add2, splat(i16 1)
1409+
%trunc = trunc <vscale x 8 x i16> %shift to <vscale x 8 x i8>
1410+
call void @llvm.masked.store.nxv8i8(<vscale x 8 x i8> %trunc, ptr %p1, i32 16, <vscale x 8 x i1> %mask)
1411+
ret void
1412+
}

0 commit comments

Comments
 (0)