@@ -1881,6 +1881,141 @@ define amdgpu_ps i65 @s_ashr_i65_33(i65 inreg %value) {
1881
1881
ret i65 %result
1882
1882
}
1883
1883
1884
+ define <4 x i2 > @v_ashr_v4i2 (<4 x i2 > %value , <4 x i2 > %amount ) {
1885
+ ; GFX6-LABEL: v_ashr_v4i2:
1886
+ ; GFX6: ; %bb.0:
1887
+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1888
+ ; GFX6-NEXT: v_and_b32_e32 v4, 3, v4
1889
+ ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 2
1890
+ ; GFX6-NEXT: v_ashrrev_i32_e32 v0, v4, v0
1891
+ ; GFX6-NEXT: v_and_b32_e32 v4, 3, v5
1892
+ ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 2
1893
+ ; GFX6-NEXT: v_ashrrev_i32_e32 v1, v4, v1
1894
+ ; GFX6-NEXT: v_and_b32_e32 v4, 3, v6
1895
+ ; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 2
1896
+ ; GFX6-NEXT: v_ashrrev_i32_e32 v2, v4, v2
1897
+ ; GFX6-NEXT: v_and_b32_e32 v4, 3, v7
1898
+ ; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 2
1899
+ ; GFX6-NEXT: v_ashrrev_i32_e32 v3, v4, v3
1900
+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
1901
+ ;
1902
+ ; GFX8-LABEL: v_ashr_v4i2:
1903
+ ; GFX8: ; %bb.0:
1904
+ ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1905
+ ; GFX8-NEXT: v_and_b32_e32 v4, 3, v4
1906
+ ; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 2
1907
+ ; GFX8-NEXT: v_ashrrev_i16_e32 v0, v4, v0
1908
+ ; GFX8-NEXT: v_and_b32_e32 v4, 3, v5
1909
+ ; GFX8-NEXT: v_bfe_i32 v1, v1, 0, 2
1910
+ ; GFX8-NEXT: v_ashrrev_i16_e32 v1, v4, v1
1911
+ ; GFX8-NEXT: v_and_b32_e32 v4, 3, v6
1912
+ ; GFX8-NEXT: v_bfe_i32 v2, v2, 0, 2
1913
+ ; GFX8-NEXT: v_ashrrev_i16_e32 v2, v4, v2
1914
+ ; GFX8-NEXT: v_and_b32_e32 v4, 3, v7
1915
+ ; GFX8-NEXT: v_bfe_i32 v3, v3, 0, 2
1916
+ ; GFX8-NEXT: v_ashrrev_i16_e32 v3, v4, v3
1917
+ ; GFX8-NEXT: s_setpc_b64 s[30:31]
1918
+ ;
1919
+ ; GFX9-LABEL: v_ashr_v4i2:
1920
+ ; GFX9: ; %bb.0:
1921
+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1922
+ ; GFX9-NEXT: v_and_b32_e32 v4, 3, v4
1923
+ ; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 2
1924
+ ; GFX9-NEXT: v_ashrrev_i16_e32 v0, v4, v0
1925
+ ; GFX9-NEXT: v_and_b32_e32 v4, 3, v5
1926
+ ; GFX9-NEXT: v_bfe_i32 v1, v1, 0, 2
1927
+ ; GFX9-NEXT: v_ashrrev_i16_e32 v1, v4, v1
1928
+ ; GFX9-NEXT: v_and_b32_e32 v4, 3, v6
1929
+ ; GFX9-NEXT: v_bfe_i32 v2, v2, 0, 2
1930
+ ; GFX9-NEXT: v_ashrrev_i16_e32 v2, v4, v2
1931
+ ; GFX9-NEXT: v_and_b32_e32 v4, 3, v7
1932
+ ; GFX9-NEXT: v_bfe_i32 v3, v3, 0, 2
1933
+ ; GFX9-NEXT: v_ashrrev_i16_e32 v3, v4, v3
1934
+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
1935
+ ;
1936
+ ; GFX10PLUS-LABEL: v_ashr_v4i2:
1937
+ ; GFX10PLUS: ; %bb.0:
1938
+ ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1939
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v4, 3, v4
1940
+ ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 2
1941
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v5, 3, v5
1942
+ ; GFX10PLUS-NEXT: v_bfe_i32 v1, v1, 0, 2
1943
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v6, 3, v6
1944
+ ; GFX10PLUS-NEXT: v_bfe_i32 v2, v2, 0, 2
1945
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v7, 3, v7
1946
+ ; GFX10PLUS-NEXT: v_bfe_i32 v3, v3, 0, 2
1947
+ ; GFX10PLUS-NEXT: v_ashrrev_i16 v0, v4, v0
1948
+ ; GFX10PLUS-NEXT: v_ashrrev_i16 v1, v5, v1
1949
+ ; GFX10PLUS-NEXT: v_ashrrev_i16 v2, v6, v2
1950
+ ; GFX10PLUS-NEXT: v_ashrrev_i16 v3, v7, v3
1951
+ ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1952
+ %result = ashr <4 x i2 > %value , %amount
1953
+ ret <4 x i2 > %result
1954
+ }
1955
+
1956
+ ; arithmetic shifts of an i1 are identity operations
1957
+ define amdgpu_ps <4 x i1 > @s_ashr_v4i1 (<4 x i1 > inreg %value , <4 x i1 > inreg %amount ) {
1958
+ ; GCN-LABEL: s_ashr_v4i1:
1959
+ ; GCN: ; %bb.0:
1960
+ ; GCN-NEXT: ; return to shader part epilog
1961
+ ;
1962
+ ; GFX10PLUS-LABEL: s_ashr_v4i1:
1963
+ ; GFX10PLUS: ; %bb.0:
1964
+ ; GFX10PLUS-NEXT: ; return to shader part epilog
1965
+ %result = ashr <4 x i1 > %value , %amount
1966
+ ret <4 x i1 > %result
1967
+ }
1968
+
1969
+ define <4 x i1 > @v_ashr_v4i1 (<4 x i1 > %value , <4 x i1 > %amount ) {
1970
+ ; GCN-LABEL: v_ashr_v4i1:
1971
+ ; GCN: ; %bb.0:
1972
+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1973
+ ; GCN-NEXT: s_setpc_b64 s[30:31]
1974
+ ;
1975
+ ; GFX10PLUS-LABEL: v_ashr_v4i1:
1976
+ ; GFX10PLUS: ; %bb.0:
1977
+ ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1978
+ ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1979
+ %result = ashr <4 x i1 > %value , %amount
1980
+ ret <4 x i1 > %result
1981
+ }
1982
+
1983
+ define amdgpu_ps <4 x i2 > @s_ashr_v4i2 (<4 x i2 > inreg %value , <4 x i2 > inreg %amount ) {
1984
+ ; GCN-LABEL: s_ashr_v4i2:
1985
+ ; GCN: ; %bb.0:
1986
+ ; GCN-NEXT: s_and_b32 s4, s4, 3
1987
+ ; GCN-NEXT: s_bfe_i32 s0, s0, 0x20000
1988
+ ; GCN-NEXT: s_ashr_i32 s0, s0, s4
1989
+ ; GCN-NEXT: s_and_b32 s4, s5, 3
1990
+ ; GCN-NEXT: s_bfe_i32 s1, s1, 0x20000
1991
+ ; GCN-NEXT: s_ashr_i32 s1, s1, s4
1992
+ ; GCN-NEXT: s_and_b32 s4, s6, 3
1993
+ ; GCN-NEXT: s_bfe_i32 s2, s2, 0x20000
1994
+ ; GCN-NEXT: s_ashr_i32 s2, s2, s4
1995
+ ; GCN-NEXT: s_and_b32 s4, s7, 3
1996
+ ; GCN-NEXT: s_bfe_i32 s3, s3, 0x20000
1997
+ ; GCN-NEXT: s_ashr_i32 s3, s3, s4
1998
+ ; GCN-NEXT: ; return to shader part epilog
1999
+ ;
2000
+ ; GFX10PLUS-LABEL: s_ashr_v4i2:
2001
+ ; GFX10PLUS: ; %bb.0:
2002
+ ; GFX10PLUS-NEXT: s_and_b32 s4, s4, 3
2003
+ ; GFX10PLUS-NEXT: s_bfe_i32 s0, s0, 0x20000
2004
+ ; GFX10PLUS-NEXT: s_and_b32 s5, s5, 3
2005
+ ; GFX10PLUS-NEXT: s_bfe_i32 s1, s1, 0x20000
2006
+ ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s4
2007
+ ; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, s5
2008
+ ; GFX10PLUS-NEXT: s_and_b32 s4, s6, 3
2009
+ ; GFX10PLUS-NEXT: s_bfe_i32 s2, s2, 0x20000
2010
+ ; GFX10PLUS-NEXT: s_and_b32 s5, s7, 3
2011
+ ; GFX10PLUS-NEXT: s_bfe_i32 s3, s3, 0x20000
2012
+ ; GFX10PLUS-NEXT: s_ashr_i32 s2, s2, s4
2013
+ ; GFX10PLUS-NEXT: s_ashr_i32 s3, s3, s5
2014
+ ; GFX10PLUS-NEXT: ; return to shader part epilog
2015
+ %result = ashr <4 x i2 > %value , %amount
2016
+ ret <4 x i2 > %result
2017
+ }
2018
+
1884
2019
; FIXME: Argument lowering asserts
1885
2020
; define <2 x i65> @v_ashr_v2i65(<2 x i65> %value, <2 x i65> %amount) {
1886
2021
; %result = ashr <2 x i65> %value, %amount
0 commit comments