Skip to content

Commit ee6d62d

Browse files
authored
[AMDGPU] Prevent folding of the negative i32 literals as i64 (#70274)
We can use sign extended 64-bit literals, but only for signed operands. At the moment we do not know if an operand is signed. Such operand will be encoded as its low 32 bits and then either correctly sign extended or incorrectly zero extended by HW.
1 parent 273ceb1 commit ee6d62d

File tree

3 files changed

+145
-4
lines changed

3 files changed

+145
-4
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5611,9 +5611,18 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
56115611
OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_INT64 ||
56125612
OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_V2INT32 ||
56135613
OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_V2FP32;
5614-
if (Is64BitOp && !AMDGPU::isValid32BitLiteral(Imm, Is64BitFPOp) &&
5615-
!AMDGPU::isInlinableLiteral64(Imm, ST.hasInv2PiInlineImm()))
5616-
return false;
5614+
if (Is64BitOp &&
5615+
!AMDGPU::isInlinableLiteral64(Imm, ST.hasInv2PiInlineImm())) {
5616+
if (!AMDGPU::isValid32BitLiteral(Imm, Is64BitFPOp))
5617+
return false;
5618+
5619+
// FIXME: We can use sign extended 64-bit literals, but only for signed
5620+
// operands. At the moment we do not know if an operand is signed.
5621+
// Such operand will be encoded as its low 32 bits and then either
5622+
// correctly sign extended or incorrectly zero extended by HW.
5623+
if (!Is64BitFPOp && (int32_t)Imm < 0)
5624+
return false;
5625+
}
56175626
}
56185627

56195628
// Handle non-register types that are treated like immediates.

llvm/test/CodeGen/AMDGPU/fold-short-64-bit-literals.mir

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,9 @@ body: |
8484
SI_RETURN_TO_EPILOG %2
8585
...
8686

87+
# FIXME: This could be folded, but we do not know if operand of S_AND_B64 is signed or unsigned
88+
# and if it will be sign or zero extended.
89+
8790
---
8891
name: fold_uint_32bit_literal_sgpr
8992
tracksRegLiveness: true
@@ -92,7 +95,8 @@ body: |
9295
9396
; GCN-LABEL: name: fold_uint_32bit_literal_sgpr
9497
; GCN: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
95-
; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[DEF]], 4294967295, implicit-def $scc
98+
; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 4294967295
99+
; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[DEF]], [[S_MOV_B64_]], implicit-def $scc
96100
; GCN-NEXT: SI_RETURN_TO_EPILOG [[S_AND_B64_]]
97101
%0:sreg_64 = IMPLICIT_DEF
98102
%1:sreg_64 = S_MOV_B64 4294967295
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
2+
# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=si-fold-operands -o - %s | FileCheck -check-prefix=GCN %s
3+
4+
# The constant is 0xffffffff80000000. It is 64-bit negative constant, but it passes the test
5+
# isInt<32>(). Nonetheless it is not a legal literal for a binary or unsigned operand and
6+
# cannot be used right in the shift as HW will zero extend it.
7+
8+
---
9+
name: imm64_shift_int32_const_0xffffffff80000000
10+
body: |
11+
bb.0:
12+
; GCN-LABEL: name: imm64_shift_int32_const_0xffffffff80000000
13+
; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -2147483648
14+
; GCN-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[S_MOV_B]], 1, implicit-def $scc
15+
; GCN-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B64_]]
16+
%0:sreg_64 = S_MOV_B64_IMM_PSEUDO 18446744071562067968
17+
%1:sreg_64 = S_LSHL_B64 %0, 1, implicit-def $scc
18+
S_ENDPGM 0, implicit %1
19+
20+
...
21+
22+
---
23+
name: imm64_shift_int32_const_0xffffffff
24+
body: |
25+
bb.0:
26+
; GCN-LABEL: name: imm64_shift_int32_const_0xffffffff
27+
; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967295
28+
; GCN-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[S_MOV_B]], 1, implicit-def $scc
29+
; GCN-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B64_]]
30+
%0:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967295
31+
%1:sreg_64 = S_LSHL_B64 %0, 1, implicit-def $scc
32+
S_ENDPGM 0, implicit %1
33+
34+
...
35+
36+
---
37+
name: imm64_shift_int32_const_0x80000000
38+
body: |
39+
bb.0:
40+
; GCN-LABEL: name: imm64_shift_int32_const_0x80000000
41+
; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 2147483648
42+
; GCN-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[S_MOV_B]], 1, implicit-def $scc
43+
; GCN-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B64_]]
44+
%0:sreg_64 = S_MOV_B64_IMM_PSEUDO 2147483648
45+
%1:sreg_64 = S_LSHL_B64 %0, 1, implicit-def $scc
46+
S_ENDPGM 0, implicit %1
47+
48+
...
49+
50+
---
51+
name: imm64_shift_int32_const_0x7fffffff
52+
body: |
53+
bb.0:
54+
; GCN-LABEL: name: imm64_shift_int32_const_0x7fffffff
55+
; GCN: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 2147483647, 1, implicit-def $scc
56+
; GCN-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B64_]]
57+
%0:sreg_64 = S_MOV_B64_IMM_PSEUDO 2147483647
58+
%1:sreg_64 = S_LSHL_B64 %0, 1, implicit-def $scc
59+
S_ENDPGM 0, implicit %1
60+
61+
...
62+
63+
---
64+
name: imm64_shift_int32_const_0x1ffffffff
65+
body: |
66+
bb.0:
67+
; GCN-LABEL: name: imm64_shift_int32_const_0x1ffffffff
68+
; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 8589934591
69+
; GCN-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[S_MOV_B]], 1, implicit-def $scc
70+
; GCN-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B64_]]
71+
%0:sreg_64 = S_MOV_B64_IMM_PSEUDO 8589934591
72+
%1:sreg_64 = S_LSHL_B64 %0, 1, implicit-def $scc
73+
S_ENDPGM 0, implicit %1
74+
75+
...
76+
77+
---
78+
name: imm64_shift_int32_const_0xffffffffffffffff
79+
body: |
80+
bb.0:
81+
; GCN-LABEL: name: imm64_shift_int32_const_0xffffffffffffffff
82+
; GCN: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 -1, 1, implicit-def $scc
83+
; GCN-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B64_]]
84+
%0:sreg_64 = S_MOV_B64_IMM_PSEUDO -1
85+
%1:sreg_64 = S_LSHL_B64 %0, 1, implicit-def $scc
86+
S_ENDPGM 0, implicit %1
87+
88+
...
89+
90+
---
91+
name: imm64_ashr_int32_const_0xffffffff
92+
body: |
93+
bb.0:
94+
; GCN-LABEL: name: imm64_ashr_int32_const_0xffffffff
95+
; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967295
96+
; GCN-NEXT: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 [[S_MOV_B]], 1, implicit-def $scc
97+
; GCN-NEXT: S_ENDPGM 0, implicit [[S_ASHR_I64_]]
98+
%0:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967295
99+
%1:sreg_64 = S_ASHR_I64 %0, 1, implicit-def $scc
100+
S_ENDPGM 0, implicit %1
101+
102+
...
103+
104+
---
105+
name: imm64_ashr_int32_const_0x7fffffff
106+
body: |
107+
bb.0:
108+
; GCN-LABEL: name: imm64_ashr_int32_const_0x7fffffff
109+
; GCN: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 2147483647, 1, implicit-def $scc
110+
; GCN-NEXT: S_ENDPGM 0, implicit [[S_ASHR_I64_]]
111+
%0:sreg_64 = S_MOV_B64_IMM_PSEUDO 2147483647
112+
%1:sreg_64 = S_ASHR_I64 %0, 1, implicit-def $scc
113+
S_ENDPGM 0, implicit %1
114+
115+
...
116+
117+
---
118+
name: imm64_ashr_int32_const_0xffffffffffffffff
119+
body: |
120+
bb.0:
121+
; GCN-LABEL: name: imm64_ashr_int32_const_0xffffffffffffffff
122+
; GCN: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 -1, 1, implicit-def $scc
123+
; GCN-NEXT: S_ENDPGM 0, implicit [[S_ASHR_I64_]]
124+
%0:sreg_64 = S_MOV_B64_IMM_PSEUDO -1
125+
%1:sreg_64 = S_ASHR_I64 %0, 1, implicit-def $scc
126+
S_ENDPGM 0, implicit %1
127+
128+
...

0 commit comments

Comments
 (0)