Skip to content

Commit bb7e559

Browse files
authored
[AMDGPU] Correct bitshift legality transformation for small vectors (#140940)
Fix for a bug found by the AMD fuzzing project. The legaliser would originally try to widen a small vector such as `<4 x i1>` to a single `i16` during the legalisation of bitshifts, as it was not originally written with consideration for vector operands. This patch simply adds a guard to prohibit this transformation and allow other legalisation transformations to step in.
1 parent a58e2d1 commit bb7e559

File tree

2 files changed

+136
-1
lines changed

2 files changed

+136
-1
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1765,7 +1765,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
17651765
// 32-bit amount.
17661766
const LLT ValTy = Query.Types[0];
17671767
const LLT AmountTy = Query.Types[1];
1768-
return ValTy.getSizeInBits() <= 16 &&
1768+
return ValTy.isScalar() && ValTy.getSizeInBits() <= 16 &&
17691769
AmountTy.getSizeInBits() < 16;
17701770
}, changeTo(1, S16));
17711771
Shifts.maxScalarIf(typeIs(0, S16), 1, S16);

llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1881,6 +1881,141 @@ define amdgpu_ps i65 @s_ashr_i65_33(i65 inreg %value) {
18811881
ret i65 %result
18821882
}
18831883

1884+
define <4 x i2> @v_ashr_v4i2(<4 x i2> %value, <4 x i2> %amount) {
1885+
; GFX6-LABEL: v_ashr_v4i2:
1886+
; GFX6: ; %bb.0:
1887+
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1888+
; GFX6-NEXT: v_and_b32_e32 v4, 3, v4
1889+
; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 2
1890+
; GFX6-NEXT: v_ashrrev_i32_e32 v0, v4, v0
1891+
; GFX6-NEXT: v_and_b32_e32 v4, 3, v5
1892+
; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 2
1893+
; GFX6-NEXT: v_ashrrev_i32_e32 v1, v4, v1
1894+
; GFX6-NEXT: v_and_b32_e32 v4, 3, v6
1895+
; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 2
1896+
; GFX6-NEXT: v_ashrrev_i32_e32 v2, v4, v2
1897+
; GFX6-NEXT: v_and_b32_e32 v4, 3, v7
1898+
; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 2
1899+
; GFX6-NEXT: v_ashrrev_i32_e32 v3, v4, v3
1900+
; GFX6-NEXT: s_setpc_b64 s[30:31]
1901+
;
1902+
; GFX8-LABEL: v_ashr_v4i2:
1903+
; GFX8: ; %bb.0:
1904+
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1905+
; GFX8-NEXT: v_and_b32_e32 v4, 3, v4
1906+
; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 2
1907+
; GFX8-NEXT: v_ashrrev_i16_e32 v0, v4, v0
1908+
; GFX8-NEXT: v_and_b32_e32 v4, 3, v5
1909+
; GFX8-NEXT: v_bfe_i32 v1, v1, 0, 2
1910+
; GFX8-NEXT: v_ashrrev_i16_e32 v1, v4, v1
1911+
; GFX8-NEXT: v_and_b32_e32 v4, 3, v6
1912+
; GFX8-NEXT: v_bfe_i32 v2, v2, 0, 2
1913+
; GFX8-NEXT: v_ashrrev_i16_e32 v2, v4, v2
1914+
; GFX8-NEXT: v_and_b32_e32 v4, 3, v7
1915+
; GFX8-NEXT: v_bfe_i32 v3, v3, 0, 2
1916+
; GFX8-NEXT: v_ashrrev_i16_e32 v3, v4, v3
1917+
; GFX8-NEXT: s_setpc_b64 s[30:31]
1918+
;
1919+
; GFX9-LABEL: v_ashr_v4i2:
1920+
; GFX9: ; %bb.0:
1921+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1922+
; GFX9-NEXT: v_and_b32_e32 v4, 3, v4
1923+
; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 2
1924+
; GFX9-NEXT: v_ashrrev_i16_e32 v0, v4, v0
1925+
; GFX9-NEXT: v_and_b32_e32 v4, 3, v5
1926+
; GFX9-NEXT: v_bfe_i32 v1, v1, 0, 2
1927+
; GFX9-NEXT: v_ashrrev_i16_e32 v1, v4, v1
1928+
; GFX9-NEXT: v_and_b32_e32 v4, 3, v6
1929+
; GFX9-NEXT: v_bfe_i32 v2, v2, 0, 2
1930+
; GFX9-NEXT: v_ashrrev_i16_e32 v2, v4, v2
1931+
; GFX9-NEXT: v_and_b32_e32 v4, 3, v7
1932+
; GFX9-NEXT: v_bfe_i32 v3, v3, 0, 2
1933+
; GFX9-NEXT: v_ashrrev_i16_e32 v3, v4, v3
1934+
; GFX9-NEXT: s_setpc_b64 s[30:31]
1935+
;
1936+
; GFX10PLUS-LABEL: v_ashr_v4i2:
1937+
; GFX10PLUS: ; %bb.0:
1938+
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1939+
; GFX10PLUS-NEXT: v_and_b32_e32 v4, 3, v4
1940+
; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 2
1941+
; GFX10PLUS-NEXT: v_and_b32_e32 v5, 3, v5
1942+
; GFX10PLUS-NEXT: v_bfe_i32 v1, v1, 0, 2
1943+
; GFX10PLUS-NEXT: v_and_b32_e32 v6, 3, v6
1944+
; GFX10PLUS-NEXT: v_bfe_i32 v2, v2, 0, 2
1945+
; GFX10PLUS-NEXT: v_and_b32_e32 v7, 3, v7
1946+
; GFX10PLUS-NEXT: v_bfe_i32 v3, v3, 0, 2
1947+
; GFX10PLUS-NEXT: v_ashrrev_i16 v0, v4, v0
1948+
; GFX10PLUS-NEXT: v_ashrrev_i16 v1, v5, v1
1949+
; GFX10PLUS-NEXT: v_ashrrev_i16 v2, v6, v2
1950+
; GFX10PLUS-NEXT: v_ashrrev_i16 v3, v7, v3
1951+
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1952+
%result = ashr <4 x i2> %value, %amount
1953+
ret <4 x i2> %result
1954+
}
1955+
1956+
; arithmetic shifts of an i1 are identity operations
1957+
define amdgpu_ps <4 x i1> @s_ashr_v4i1(<4 x i1> inreg %value, <4 x i1> inreg %amount) {
1958+
; GCN-LABEL: s_ashr_v4i1:
1959+
; GCN: ; %bb.0:
1960+
; GCN-NEXT: ; return to shader part epilog
1961+
;
1962+
; GFX10PLUS-LABEL: s_ashr_v4i1:
1963+
; GFX10PLUS: ; %bb.0:
1964+
; GFX10PLUS-NEXT: ; return to shader part epilog
1965+
%result = ashr <4 x i1> %value, %amount
1966+
ret <4 x i1> %result
1967+
}
1968+
1969+
define <4 x i1> @v_ashr_v4i1(<4 x i1> %value, <4 x i1> %amount) {
1970+
; GCN-LABEL: v_ashr_v4i1:
1971+
; GCN: ; %bb.0:
1972+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1973+
; GCN-NEXT: s_setpc_b64 s[30:31]
1974+
;
1975+
; GFX10PLUS-LABEL: v_ashr_v4i1:
1976+
; GFX10PLUS: ; %bb.0:
1977+
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1978+
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1979+
%result = ashr <4 x i1> %value, %amount
1980+
ret <4 x i1> %result
1981+
}
1982+
1983+
define amdgpu_ps <4 x i2> @s_ashr_v4i2(<4 x i2> inreg %value, <4 x i2> inreg %amount) {
1984+
; GCN-LABEL: s_ashr_v4i2:
1985+
; GCN: ; %bb.0:
1986+
; GCN-NEXT: s_and_b32 s4, s4, 3
1987+
; GCN-NEXT: s_bfe_i32 s0, s0, 0x20000
1988+
; GCN-NEXT: s_ashr_i32 s0, s0, s4
1989+
; GCN-NEXT: s_and_b32 s4, s5, 3
1990+
; GCN-NEXT: s_bfe_i32 s1, s1, 0x20000
1991+
; GCN-NEXT: s_ashr_i32 s1, s1, s4
1992+
; GCN-NEXT: s_and_b32 s4, s6, 3
1993+
; GCN-NEXT: s_bfe_i32 s2, s2, 0x20000
1994+
; GCN-NEXT: s_ashr_i32 s2, s2, s4
1995+
; GCN-NEXT: s_and_b32 s4, s7, 3
1996+
; GCN-NEXT: s_bfe_i32 s3, s3, 0x20000
1997+
; GCN-NEXT: s_ashr_i32 s3, s3, s4
1998+
; GCN-NEXT: ; return to shader part epilog
1999+
;
2000+
; GFX10PLUS-LABEL: s_ashr_v4i2:
2001+
; GFX10PLUS: ; %bb.0:
2002+
; GFX10PLUS-NEXT: s_and_b32 s4, s4, 3
2003+
; GFX10PLUS-NEXT: s_bfe_i32 s0, s0, 0x20000
2004+
; GFX10PLUS-NEXT: s_and_b32 s5, s5, 3
2005+
; GFX10PLUS-NEXT: s_bfe_i32 s1, s1, 0x20000
2006+
; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s4
2007+
; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, s5
2008+
; GFX10PLUS-NEXT: s_and_b32 s4, s6, 3
2009+
; GFX10PLUS-NEXT: s_bfe_i32 s2, s2, 0x20000
2010+
; GFX10PLUS-NEXT: s_and_b32 s5, s7, 3
2011+
; GFX10PLUS-NEXT: s_bfe_i32 s3, s3, 0x20000
2012+
; GFX10PLUS-NEXT: s_ashr_i32 s2, s2, s4
2013+
; GFX10PLUS-NEXT: s_ashr_i32 s3, s3, s5
2014+
; GFX10PLUS-NEXT: ; return to shader part epilog
2015+
%result = ashr <4 x i2> %value, %amount
2016+
ret <4 x i2> %result
2017+
}
2018+
18842019
; FIXME: Argument lowering asserts
18852020
; define <2 x i65> @v_ashr_v2i65(<2 x i65> %value, <2 x i65> %amount) {
18862021
; %result = ashr <2 x i65> %value, %amount

0 commit comments

Comments
 (0)