@@ -1710,6 +1710,220 @@ define void @fsub_v32f64(<32 x double>* %a, <32 x double>* %b) #0 {
1710
1710
ret void
1711
1711
}
1712
1712
1713
+ ;
1714
+ ; FABS
1715
+ ;
1716
+
1717
+ ; Don't use SVE for 64-bit vectors.
1718
+ define <4 x half > @fabs_v4f16 (<4 x half > %op ) #0 {
1719
+ ; CHECK-LABEL: fabs_v4f16:
1720
+ ; CHECK: fabs v0.4h, v0.4h
1721
+ ; CHECK: ret
1722
+ %res = call <4 x half > @llvm.fabs.v4f16 (<4 x half > %op )
1723
+ ret <4 x half > %res
1724
+ }
1725
+
1726
+ ; Don't use SVE for 128-bit vectors.
1727
+ define <8 x half > @fabs_v8f16 (<8 x half > %op ) #0 {
1728
+ ; CHECK-LABEL: fabs_v8f16:
1729
+ ; CHECK: fabs v0.8h, v0.8h
1730
+ ; CHECK: ret
1731
+ %res = call <8 x half > @llvm.fabs.v8f16 (<8 x half > %op )
1732
+ ret <8 x half > %res
1733
+ }
1734
+
1735
+ define void @fabs_v16f16 (<16 x half >* %a ) #0 {
1736
+ ; CHECK-LABEL: fabs_v16f16:
1737
+ ; CHECK: ptrue [[PG:p[0-9]+]].h, vl[[#min(div(VBYTES,2),16)]]
1738
+ ; CHECK: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1739
+ ; CHECK: fabs [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1740
+ ; CHECK: st1h { [[RES]].h }, [[PG]], [x0]
1741
+ ; CHECK: ret
1742
+ %op = load <16 x half >, <16 x half >* %a
1743
+ %res = call <16 x half > @llvm.fabs.v16f16 (<16 x half > %op )
1744
+ store <16 x half > %res , <16 x half >* %a
1745
+ ret void
1746
+ }
1747
+
1748
+ define void @fabs_v32f16 (<32 x half >* %a ) #0 {
1749
+ ; CHECK-LABEL: fabs_v32f16:
1750
+ ; CHECK: ptrue [[PG:p[0-9]+]].h, vl[[#min(div(VBYTES,2),32)]]
1751
+ ; CHECK: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1752
+ ; CHECK: fabs [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1753
+ ; CHECK: st1h { [[RES]].h }, [[PG]], [x0]
1754
+ ; CHECK: ret
1755
+ %op = load <32 x half >, <32 x half >* %a
1756
+ %res = call <32 x half > @llvm.fabs.v32f16 (<32 x half > %op )
1757
+ store <32 x half > %res , <32 x half >* %a
1758
+ ret void
1759
+ }
1760
+
1761
+ define void @fabs_v64f16 (<64 x half >* %a ) #0 {
1762
+ ; CHECK-LABEL: fabs_v64f16:
1763
+ ; CHECK: ptrue [[PG:p[0-9]+]].h, vl[[#min(div(VBYTES,2),64)]]
1764
+ ; CHECK: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1765
+ ; CHECK: fabs [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1766
+ ; CHECK: st1h { [[RES]].h }, [[PG]], [x0]
1767
+ ; CHECK: ret
1768
+ %op = load <64 x half >, <64 x half >* %a
1769
+ %res = call <64 x half > @llvm.fabs.v64f16 (<64 x half > %op )
1770
+ store <64 x half > %res , <64 x half >* %a
1771
+ ret void
1772
+ }
1773
+
1774
+ define void @fabs_v128f16 (<128 x half >* %a ) #0 {
1775
+ ; CHECK-LABEL: fabs_v128f16:
1776
+ ; CHECK: ptrue [[PG:p[0-9]+]].h, vl[[#min(div(VBYTES,2),128)]]
1777
+ ; CHECK: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1778
+ ; CHECK: fabs [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1779
+ ; CHECK: st1h { [[RES]].h }, [[PG]], [x0]
1780
+ ; CHECK: ret
1781
+ %op = load <128 x half >, <128 x half >* %a
1782
+ %res = call <128 x half > @llvm.fabs.v128f16 (<128 x half > %op )
1783
+ store <128 x half > %res , <128 x half >* %a
1784
+ ret void
1785
+ }
1786
+
1787
+ ; Don't use SVE for 64-bit vectors.
1788
+ define <2 x float > @fabs_v2f32 (<2 x float > %op ) #0 {
1789
+ ; CHECK-LABEL: fabs_v2f32:
1790
+ ; CHECK: fabs v0.2s, v0.2s
1791
+ ; CHECK: ret
1792
+ %res = call <2 x float > @llvm.fabs.v2f32 (<2 x float > %op )
1793
+ ret <2 x float > %res
1794
+ }
1795
+
1796
+ ; Don't use SVE for 128-bit vectors.
1797
+ define <4 x float > @fabs_v4f32 (<4 x float > %op ) #0 {
1798
+ ; CHECK-LABEL: fabs_v4f32:
1799
+ ; CHECK: fabs v0.4s, v0.4s
1800
+ ; CHECK: ret
1801
+ %res = call <4 x float > @llvm.fabs.v4f32 (<4 x float > %op )
1802
+ ret <4 x float > %res
1803
+ }
1804
+
1805
+ define void @fabs_v8f32 (<8 x float >* %a ) #0 {
1806
+ ; CHECK-LABEL: fabs_v8f32:
1807
+ ; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),8)]]
1808
+ ; CHECK: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1809
+ ; CHECK: fabs [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1810
+ ; CHECK: st1w { [[RES]].s }, [[PG]], [x0]
1811
+ ; CHECK: ret
1812
+ %op = load <8 x float >, <8 x float >* %a
1813
+ %res = call <8 x float > @llvm.fabs.v8f32 (<8 x float > %op )
1814
+ store <8 x float > %res , <8 x float >* %a
1815
+ ret void
1816
+ }
1817
+
1818
+ define void @fabs_v16f32 (<16 x float >* %a ) #0 {
1819
+ ; CHECK-LABEL: fabs_v16f32:
1820
+ ; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),16)]]
1821
+ ; CHECK: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1822
+ ; CHECK: fabs [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1823
+ ; CHECK: st1w { [[RES]].s }, [[PG]], [x0]
1824
+ ; CHECK: ret
1825
+ %op = load <16 x float >, <16 x float >* %a
1826
+ %res = call <16 x float > @llvm.fabs.v16f32 (<16 x float > %op )
1827
+ store <16 x float > %res , <16 x float >* %a
1828
+ ret void
1829
+ }
1830
+
1831
+ define void @fabs_v32f32 (<32 x float >* %a ) #0 {
1832
+ ; CHECK-LABEL: fabs_v32f32:
1833
+ ; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),32)]]
1834
+ ; CHECK: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1835
+ ; CHECK: fabs [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1836
+ ; CHECK: st1w { [[RES]].s }, [[PG]], [x0]
1837
+ ; CHECK: ret
1838
+ %op = load <32 x float >, <32 x float >* %a
1839
+ %res = call <32 x float > @llvm.fabs.v32f32 (<32 x float > %op )
1840
+ store <32 x float > %res , <32 x float >* %a
1841
+ ret void
1842
+ }
1843
+
1844
+ define void @fabs_v64f32 (<64 x float >* %a ) #0 {
1845
+ ; CHECK-LABEL: fabs_v64f32:
1846
+ ; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),64)]]
1847
+ ; CHECK: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1848
+ ; CHECK: fabs [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1849
+ ; CHECK: st1w { [[RES]].s }, [[PG]], [x0]
1850
+ ; CHECK: ret
1851
+ %op = load <64 x float >, <64 x float >* %a
1852
+ %res = call <64 x float > @llvm.fabs.v64f32 (<64 x float > %op )
1853
+ store <64 x float > %res , <64 x float >* %a
1854
+ ret void
1855
+ }
1856
+
1857
+ ; Don't use SVE for 64-bit vectors.
1858
+ define <1 x double > @fabs_v1f64 (<1 x double > %op ) #0 {
1859
+ ; CHECK-LABEL: fabs_v1f64:
1860
+ ; CHECK: fabs d0, d0
1861
+ ; CHECK: ret
1862
+ %res = call <1 x double > @llvm.fabs.v1f64 (<1 x double > %op )
1863
+ ret <1 x double > %res
1864
+ }
1865
+
1866
+ ; Don't use SVE for 128-bit vectors.
1867
+ define <2 x double > @fabs_v2f64 (<2 x double > %op ) #0 {
1868
+ ; CHECK-LABEL: fabs_v2f64:
1869
+ ; CHECK: fabs v0.2d, v0.2d
1870
+ ; CHECK: ret
1871
+ %res = call <2 x double > @llvm.fabs.v2f64 (<2 x double > %op )
1872
+ ret <2 x double > %res
1873
+ }
1874
+
1875
+ define void @fabs_v4f64 (<4 x double >* %a ) #0 {
1876
+ ; CHECK-LABEL: fabs_v4f64:
1877
+ ; CHECK: ptrue [[PG:p[0-9]+]].d, vl[[#min(div(VBYTES,8),4)]]
1878
+ ; CHECK: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1879
+ ; CHECK: fabs [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1880
+ ; CHECK: st1d { [[RES]].d }, [[PG]], [x0]
1881
+ ; CHECK: ret
1882
+ %op = load <4 x double >, <4 x double >* %a
1883
+ %res = call <4 x double > @llvm.fabs.v4f64 (<4 x double > %op )
1884
+ store <4 x double > %res , <4 x double >* %a
1885
+ ret void
1886
+ }
1887
+
1888
+ define void @fabs_v8f64 (<8 x double >* %a ) #0 {
1889
+ ; CHECK-LABEL: fabs_v8f64:
1890
+ ; CHECK: ptrue [[PG:p[0-9]+]].d, vl[[#min(div(VBYTES,8),8)]]
1891
+ ; CHECK: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1892
+ ; CHECK: fabs [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1893
+ ; CHECK: st1d { [[RES]].d }, [[PG]], [x0]
1894
+ ; CHECK: ret
1895
+ %op = load <8 x double >, <8 x double >* %a
1896
+ %res = call <8 x double > @llvm.fabs.v8f64 (<8 x double > %op )
1897
+ store <8 x double > %res , <8 x double >* %a
1898
+ ret void
1899
+ }
1900
+
1901
+ define void @fabs_v16f64 (<16 x double >* %a ) #0 {
1902
+ ; CHECK-LABEL: fabs_v16f64:
1903
+ ; CHECK: ptrue [[PG:p[0-9]+]].d, vl[[#min(div(VBYTES,8),16)]]
1904
+ ; CHECK: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1905
+ ; CHECK: fabs [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1906
+ ; CHECK: st1d { [[RES]].d }, [[PG]], [x0]
1907
+ ; CHECK: ret
1908
+ %op = load <16 x double >, <16 x double >* %a
1909
+ %res = call <16 x double > @llvm.fabs.v16f64 (<16 x double > %op )
1910
+ store <16 x double > %res , <16 x double >* %a
1911
+ ret void
1912
+ }
1913
+
1914
+ define void @fabs_v32f64 (<32 x double >* %a ) #0 {
1915
+ ; CHECK-LABEL: fabs_v32f64:
1916
+ ; CHECK: ptrue [[PG:p[0-9]+]].d, vl[[#min(div(VBYTES,8),32)]]
1917
+ ; CHECK: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1918
+ ; CHECK: fabs [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1919
+ ; CHECK: st1d { [[RES]].d }, [[PG]], [x0]
1920
+ ; CHECK: ret
1921
+ %op = load <32 x double >, <32 x double >* %a
1922
+ %res = call <32 x double > @llvm.fabs.v32f64 (<32 x double > %op )
1923
+ store <32 x double > %res , <32 x double >* %a
1924
+ ret void
1925
+ }
1926
+
1713
1927
attributes #0 = { "target-features" ="+sve" }
1714
1928
1715
1929
declare <4 x half > @llvm.fma.v4f16 (<4 x half >, <4 x half >, <4 x half >)
@@ -1749,3 +1963,22 @@ declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
1749
1963
declare <8 x double > @llvm.sqrt.v8f64 (<8 x double >)
1750
1964
declare <16 x double > @llvm.sqrt.v16f64 (<16 x double >)
1751
1965
declare <32 x double > @llvm.sqrt.v32f64 (<32 x double >)
1966
+
1967
+ declare <4 x half > @llvm.fabs.v4f16 (<4 x half >)
1968
+ declare <8 x half > @llvm.fabs.v8f16 (<8 x half >)
1969
+ declare <16 x half > @llvm.fabs.v16f16 (<16 x half >)
1970
+ declare <32 x half > @llvm.fabs.v32f16 (<32 x half >)
1971
+ declare <64 x half > @llvm.fabs.v64f16 (<64 x half >)
1972
+ declare <128 x half > @llvm.fabs.v128f16 (<128 x half >)
1973
+ declare <2 x float > @llvm.fabs.v2f32 (<2 x float >)
1974
+ declare <4 x float > @llvm.fabs.v4f32 (<4 x float >)
1975
+ declare <8 x float > @llvm.fabs.v8f32 (<8 x float >)
1976
+ declare <16 x float > @llvm.fabs.v16f32 (<16 x float >)
1977
+ declare <32 x float > @llvm.fabs.v32f32 (<32 x float >)
1978
+ declare <64 x float > @llvm.fabs.v64f32 (<64 x float >)
1979
+ declare <1 x double > @llvm.fabs.v1f64 (<1 x double >)
1980
+ declare <2 x double > @llvm.fabs.v2f64 (<2 x double >)
1981
+ declare <4 x double > @llvm.fabs.v4f64 (<4 x double >)
1982
+ declare <8 x double > @llvm.fabs.v8f64 (<8 x double >)
1983
+ declare <16 x double > @llvm.fabs.v16f64 (<16 x double >)
1984
+ declare <32 x double > @llvm.fabs.v32f64 (<32 x double >)
0 commit comments