Skip to content

Commit 0d6482a

Browse files
[llvm][AArch64][SVE] Lower fixed length vector fabs
Seemingly striaghtforward. Differential Revision: https://reviews.llvm.org/D98434
1 parent 16c30c3 commit 0d6482a

File tree

2 files changed

+234
-0
lines changed

2 files changed

+234
-0
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1397,6 +1397,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
13971397
setOperationAction(ISD::CTLZ, VT, Custom);
13981398
setOperationAction(ISD::CTPOP, VT, Custom);
13991399
setOperationAction(ISD::CTTZ, VT, Custom);
1400+
setOperationAction(ISD::FABS, VT, Custom);
14001401
setOperationAction(ISD::FADD, VT, Custom);
14011402
setOperationAction(ISD::FCEIL, VT, Custom);
14021403
setOperationAction(ISD::FDIV, VT, Custom);

llvm/test/CodeGen/AArch64/sve-fixed-length-fp-arith.ll

Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1710,6 +1710,220 @@ define void @fsub_v32f64(<32 x double>* %a, <32 x double>* %b) #0 {
17101710
ret void
17111711
}
17121712

1713+
;
1714+
; FABS
1715+
;
1716+
1717+
; Don't use SVE for 64-bit vectors.
1718+
define <4 x half> @fabs_v4f16(<4 x half> %op) #0 {
1719+
; CHECK-LABEL: fabs_v4f16:
1720+
; CHECK: fabs v0.4h, v0.4h
1721+
; CHECK: ret
1722+
%res = call <4 x half> @llvm.fabs.v4f16(<4 x half> %op)
1723+
ret <4 x half> %res
1724+
}
1725+
1726+
; Don't use SVE for 128-bit vectors.
1727+
define <8 x half> @fabs_v8f16(<8 x half> %op) #0 {
1728+
; CHECK-LABEL: fabs_v8f16:
1729+
; CHECK: fabs v0.8h, v0.8h
1730+
; CHECK: ret
1731+
%res = call <8 x half> @llvm.fabs.v8f16(<8 x half> %op)
1732+
ret <8 x half> %res
1733+
}
1734+
1735+
define void @fabs_v16f16(<16 x half>* %a) #0 {
1736+
; CHECK-LABEL: fabs_v16f16:
1737+
; CHECK: ptrue [[PG:p[0-9]+]].h, vl[[#min(div(VBYTES,2),16)]]
1738+
; CHECK: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1739+
; CHECK: fabs [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1740+
; CHECK: st1h { [[RES]].h }, [[PG]], [x0]
1741+
; CHECK: ret
1742+
%op = load <16 x half>, <16 x half>* %a
1743+
%res = call <16 x half> @llvm.fabs.v16f16(<16 x half> %op)
1744+
store <16 x half> %res, <16 x half>* %a
1745+
ret void
1746+
}
1747+
1748+
define void @fabs_v32f16(<32 x half>* %a) #0 {
1749+
; CHECK-LABEL: fabs_v32f16:
1750+
; CHECK: ptrue [[PG:p[0-9]+]].h, vl[[#min(div(VBYTES,2),32)]]
1751+
; CHECK: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1752+
; CHECK: fabs [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1753+
; CHECK: st1h { [[RES]].h }, [[PG]], [x0]
1754+
; CHECK: ret
1755+
%op = load <32 x half>, <32 x half>* %a
1756+
%res = call <32 x half> @llvm.fabs.v32f16(<32 x half> %op)
1757+
store <32 x half> %res, <32 x half>* %a
1758+
ret void
1759+
}
1760+
1761+
define void @fabs_v64f16(<64 x half>* %a) #0 {
1762+
; CHECK-LABEL: fabs_v64f16:
1763+
; CHECK: ptrue [[PG:p[0-9]+]].h, vl[[#min(div(VBYTES,2),64)]]
1764+
; CHECK: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1765+
; CHECK: fabs [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1766+
; CHECK: st1h { [[RES]].h }, [[PG]], [x0]
1767+
; CHECK: ret
1768+
%op = load <64 x half>, <64 x half>* %a
1769+
%res = call <64 x half> @llvm.fabs.v64f16(<64 x half> %op)
1770+
store <64 x half> %res, <64 x half>* %a
1771+
ret void
1772+
}
1773+
1774+
define void @fabs_v128f16(<128 x half>* %a) #0 {
1775+
; CHECK-LABEL: fabs_v128f16:
1776+
; CHECK: ptrue [[PG:p[0-9]+]].h, vl[[#min(div(VBYTES,2),128)]]
1777+
; CHECK: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1778+
; CHECK: fabs [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1779+
; CHECK: st1h { [[RES]].h }, [[PG]], [x0]
1780+
; CHECK: ret
1781+
%op = load <128 x half>, <128 x half>* %a
1782+
%res = call <128 x half> @llvm.fabs.v128f16(<128 x half> %op)
1783+
store <128 x half> %res, <128 x half>* %a
1784+
ret void
1785+
}
1786+
1787+
; Don't use SVE for 64-bit vectors.
1788+
define <2 x float> @fabs_v2f32(<2 x float> %op) #0 {
1789+
; CHECK-LABEL: fabs_v2f32:
1790+
; CHECK: fabs v0.2s, v0.2s
1791+
; CHECK: ret
1792+
%res = call <2 x float> @llvm.fabs.v2f32(<2 x float> %op)
1793+
ret <2 x float> %res
1794+
}
1795+
1796+
; Don't use SVE for 128-bit vectors.
1797+
define <4 x float> @fabs_v4f32(<4 x float> %op) #0 {
1798+
; CHECK-LABEL: fabs_v4f32:
1799+
; CHECK: fabs v0.4s, v0.4s
1800+
; CHECK: ret
1801+
%res = call <4 x float> @llvm.fabs.v4f32(<4 x float> %op)
1802+
ret <4 x float> %res
1803+
}
1804+
1805+
define void @fabs_v8f32(<8 x float>* %a) #0 {
1806+
; CHECK-LABEL: fabs_v8f32:
1807+
; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),8)]]
1808+
; CHECK: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1809+
; CHECK: fabs [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1810+
; CHECK: st1w { [[RES]].s }, [[PG]], [x0]
1811+
; CHECK: ret
1812+
%op = load <8 x float>, <8 x float>* %a
1813+
%res = call <8 x float> @llvm.fabs.v8f32(<8 x float> %op)
1814+
store <8 x float> %res, <8 x float>* %a
1815+
ret void
1816+
}
1817+
1818+
define void @fabs_v16f32(<16 x float>* %a) #0 {
1819+
; CHECK-LABEL: fabs_v16f32:
1820+
; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),16)]]
1821+
; CHECK: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1822+
; CHECK: fabs [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1823+
; CHECK: st1w { [[RES]].s }, [[PG]], [x0]
1824+
; CHECK: ret
1825+
%op = load <16 x float>, <16 x float>* %a
1826+
%res = call <16 x float> @llvm.fabs.v16f32(<16 x float> %op)
1827+
store <16 x float> %res, <16 x float>* %a
1828+
ret void
1829+
}
1830+
1831+
define void @fabs_v32f32(<32 x float>* %a) #0 {
1832+
; CHECK-LABEL: fabs_v32f32:
1833+
; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),32)]]
1834+
; CHECK: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1835+
; CHECK: fabs [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1836+
; CHECK: st1w { [[RES]].s }, [[PG]], [x0]
1837+
; CHECK: ret
1838+
%op = load <32 x float>, <32 x float>* %a
1839+
%res = call <32 x float> @llvm.fabs.v32f32(<32 x float> %op)
1840+
store <32 x float> %res, <32 x float>* %a
1841+
ret void
1842+
}
1843+
1844+
define void @fabs_v64f32(<64 x float>* %a) #0 {
1845+
; CHECK-LABEL: fabs_v64f32:
1846+
; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),64)]]
1847+
; CHECK: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1848+
; CHECK: fabs [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1849+
; CHECK: st1w { [[RES]].s }, [[PG]], [x0]
1850+
; CHECK: ret
1851+
%op = load <64 x float>, <64 x float>* %a
1852+
%res = call <64 x float> @llvm.fabs.v64f32(<64 x float> %op)
1853+
store <64 x float> %res, <64 x float>* %a
1854+
ret void
1855+
}
1856+
1857+
; Don't use SVE for 64-bit vectors.
1858+
define <1 x double> @fabs_v1f64(<1 x double> %op) #0 {
1859+
; CHECK-LABEL: fabs_v1f64:
1860+
; CHECK: fabs d0, d0
1861+
; CHECK: ret
1862+
%res = call <1 x double> @llvm.fabs.v1f64(<1 x double> %op)
1863+
ret <1 x double> %res
1864+
}
1865+
1866+
; Don't use SVE for 128-bit vectors.
1867+
define <2 x double> @fabs_v2f64(<2 x double> %op) #0 {
1868+
; CHECK-LABEL: fabs_v2f64:
1869+
; CHECK: fabs v0.2d, v0.2d
1870+
; CHECK: ret
1871+
%res = call <2 x double> @llvm.fabs.v2f64(<2 x double> %op)
1872+
ret <2 x double> %res
1873+
}
1874+
1875+
define void @fabs_v4f64(<4 x double>* %a) #0 {
1876+
; CHECK-LABEL: fabs_v4f64:
1877+
; CHECK: ptrue [[PG:p[0-9]+]].d, vl[[#min(div(VBYTES,8),4)]]
1878+
; CHECK: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1879+
; CHECK: fabs [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1880+
; CHECK: st1d { [[RES]].d }, [[PG]], [x0]
1881+
; CHECK: ret
1882+
%op = load <4 x double>, <4 x double>* %a
1883+
%res = call <4 x double> @llvm.fabs.v4f64(<4 x double> %op)
1884+
store <4 x double> %res, <4 x double>* %a
1885+
ret void
1886+
}
1887+
1888+
define void @fabs_v8f64(<8 x double>* %a) #0 {
1889+
; CHECK-LABEL: fabs_v8f64:
1890+
; CHECK: ptrue [[PG:p[0-9]+]].d, vl[[#min(div(VBYTES,8),8)]]
1891+
; CHECK: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1892+
; CHECK: fabs [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1893+
; CHECK: st1d { [[RES]].d }, [[PG]], [x0]
1894+
; CHECK: ret
1895+
%op = load <8 x double>, <8 x double>* %a
1896+
%res = call <8 x double> @llvm.fabs.v8f64(<8 x double> %op)
1897+
store <8 x double> %res, <8 x double>* %a
1898+
ret void
1899+
}
1900+
1901+
define void @fabs_v16f64(<16 x double>* %a) #0 {
1902+
; CHECK-LABEL: fabs_v16f64:
1903+
; CHECK: ptrue [[PG:p[0-9]+]].d, vl[[#min(div(VBYTES,8),16)]]
1904+
; CHECK: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1905+
; CHECK: fabs [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1906+
; CHECK: st1d { [[RES]].d }, [[PG]], [x0]
1907+
; CHECK: ret
1908+
%op = load <16 x double>, <16 x double>* %a
1909+
%res = call <16 x double> @llvm.fabs.v16f64(<16 x double> %op)
1910+
store <16 x double> %res, <16 x double>* %a
1911+
ret void
1912+
}
1913+
1914+
define void @fabs_v32f64(<32 x double>* %a) #0 {
1915+
; CHECK-LABEL: fabs_v32f64:
1916+
; CHECK: ptrue [[PG:p[0-9]+]].d, vl[[#min(div(VBYTES,8),32)]]
1917+
; CHECK: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1918+
; CHECK: fabs [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1919+
; CHECK: st1d { [[RES]].d }, [[PG]], [x0]
1920+
; CHECK: ret
1921+
%op = load <32 x double>, <32 x double>* %a
1922+
%res = call <32 x double> @llvm.fabs.v32f64(<32 x double> %op)
1923+
store <32 x double> %res, <32 x double>* %a
1924+
ret void
1925+
}
1926+
17131927
attributes #0 = { "target-features"="+sve" }
17141928

17151929
declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>)
@@ -1749,3 +1963,22 @@ declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
17491963
declare <8 x double> @llvm.sqrt.v8f64(<8 x double>)
17501964
declare <16 x double> @llvm.sqrt.v16f64(<16 x double>)
17511965
declare <32 x double> @llvm.sqrt.v32f64(<32 x double>)
1966+
1967+
declare <4 x half> @llvm.fabs.v4f16(<4 x half>)
1968+
declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
1969+
declare <16 x half> @llvm.fabs.v16f16(<16 x half>)
1970+
declare <32 x half> @llvm.fabs.v32f16(<32 x half>)
1971+
declare <64 x half> @llvm.fabs.v64f16(<64 x half>)
1972+
declare <128 x half> @llvm.fabs.v128f16(<128 x half>)
1973+
declare <2 x float> @llvm.fabs.v2f32(<2 x float>)
1974+
declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
1975+
declare <8 x float> @llvm.fabs.v8f32(<8 x float>)
1976+
declare <16 x float> @llvm.fabs.v16f32(<16 x float>)
1977+
declare <32 x float> @llvm.fabs.v32f32(<32 x float>)
1978+
declare <64 x float> @llvm.fabs.v64f32(<64 x float>)
1979+
declare <1 x double> @llvm.fabs.v1f64(<1 x double>)
1980+
declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
1981+
declare <4 x double> @llvm.fabs.v4f64(<4 x double>)
1982+
declare <8 x double> @llvm.fabs.v8f64(<8 x double>)
1983+
declare <16 x double> @llvm.fabs.v16f64(<16 x double>)
1984+
declare <32 x double> @llvm.fabs.v32f64(<32 x double>)

0 commit comments

Comments
 (0)