Skip to content

Commit b0847b0

Browse files
committed
AMDGPU/GlobalISel: Insert freeze when splitting vector G_SEXT_INREG
This transform is broken for undef or poison inputs without a freeze. This is also broken in lots of other places where shifts are split into 32-bit pieces. Amt < 32 case: ; Broken: https://alive2.llvm.org/ce/z/7bb4vc ; Freezing the low half of the bits makes it correct ; Fixed: https://alive2.llvm.org/ce/z/zJAZFr define i64 @src(i64 %val) { %shl = shl i64 %val, 55 %shr = ashr i64 %shl, 55 ret i64 %shr } define i64 @tgt(i64 %val) { %lo32 = trunc i64 %val to i32 %shr.half = lshr i64 %val, 32 %hi32 = trunc i64 %shr.half to i32 %inreg.0 = shl i32 %lo32, 23 %new.lo = ashr i32 %inreg.0, 23 %new.hi = ashr i32 %new.lo, 31 %zext.lo = zext i32 %new.lo to i64 %zext.hi = zext i32 %new.hi to i64 %hi.ins = shl i64 %zext.hi, 32 %or = or i64 %hi.ins, %zext.lo ret i64 %or } Amt == 32 case: Broken: https://alive2.llvm.org/ce/z/5f4qwQ Fixed: https://alive2.llvm.org/ce/z/A2hWWF This one times out alive; works if argument is made noundef or scaled down to a smaller bitwidth. define i64 @src(i64 %val) { %shl = shl i64 %val, 32 %shr = ashr i64 %shl, 32 ret i64 %shr } define i64 @tgt(i64 %val) { %lo32 = trunc i64 %val to i32 %shr.half = lshr i64 %val, 32 %hi32 = trunc i64 %shr.half to i32 %new.hi = ashr i32 %lo32, 31 %zext.lo = zext i32 %lo32 to i64 %zext.hi = zext i32 %new.hi to i64 %hi.ins = shl i64 %zext.hi, 32 %or = or i64 %hi.ins, %zext.lo ret i64 %or } Amt > 32 case: ; Correct: https://alive2.llvm.org/ce/z/tvrhPf define i64 @src(i64 %val) { %shl = shl i64 %val, 9 %shr = ashr i64 %shl, 9 ret i64 %shr } define i64 @tgt(i64 %val) { %lo32 = trunc i64 %val to i32 %lshr = lshr i64 %val, 32 %hi32 = trunc i64 %lshr to i32 %inreg.0 = shl i32 %hi32, 9 %new.hi = ashr i32 %inreg.0, 9 %zext.lo = zext i32 %lo32 to i64 %zext.hi = zext i32 %new.hi to i64 %hi.ins = shl i64 %zext.hi, 32 %or = or i64 %hi.ins, %zext.lo ret i64 %or }
1 parent 23481bf commit b0847b0

File tree

2 files changed

+13
-7
lines changed

2 files changed

+13
-7
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2447,17 +2447,21 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
24472447

24482448
int Amt = MI.getOperand(2).getImm();
24492449
if (Amt <= 32) {
2450+
// Downstream users have expectations for the high bit behavior, so freeze
2451+
// incoming undefined bits.
24502452
if (Amt == 32) {
24512453
// The low bits are unchanged.
2452-
B.buildCopy(DstRegs[0], SrcRegs[0]);
2454+
B.buildFreeze(DstRegs[0], SrcRegs[0]);
24532455
} else {
2456+
auto Freeze = B.buildFreeze(S32, SrcRegs[0]);
24542457
// Extend in the low bits and propagate the sign bit to the high half.
2455-
B.buildSExtInReg(DstRegs[0], SrcRegs[0], Amt);
2458+
B.buildSExtInReg(DstRegs[0], Freeze, Amt);
24562459
}
24572460

24582461
B.buildAShr(DstRegs[1], DstRegs[0], B.buildConstant(S32, 31));
24592462
} else {
24602463
// The low bits are unchanged, and extend in the high bits.
2464+
// No freeze required
24612465
B.buildCopy(DstRegs[0], SrcRegs[0]);
24622466
B.buildSExtInReg(DstRegs[1], DstRegs[0], Amt - 32);
24632467
}

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext-inreg.mir

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,8 @@ body: |
135135
; CHECK-NEXT: {{ $}}
136136
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
137137
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
138-
; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[UV]], 1
138+
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(s32) = G_FREEZE [[UV]]
139+
; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[FREEZE]], 1
139140
; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
140141
; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
141142
; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SEXT_INREG]](s32), [[ASHR]](s32)
@@ -159,7 +160,8 @@ body: |
159160
; CHECK-NEXT: {{ $}}
160161
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
161162
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
162-
; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[UV]], 31
163+
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(s32) = G_FREEZE [[UV]]
164+
; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[FREEZE]], 31
163165
; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
164166
; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
165167
; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SEXT_INREG]](s32), [[ASHR]](s32)
@@ -183,10 +185,10 @@ body: |
183185
; CHECK-NEXT: {{ $}}
184186
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
185187
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
186-
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32)
188+
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(s32) = G_FREEZE [[UV]]
187189
; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
188-
; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[COPY1]], [[C]](s32)
189-
; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[ASHR]](s32)
190+
; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[FREEZE]], [[C]](s32)
191+
; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[FREEZE]](s32), [[ASHR]](s32)
190192
; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s64)
191193
%0:_(s64) = COPY $vgpr0_vgpr1
192194
%1:_(s64) = G_SEXT_INREG %0, 32

0 commit comments

Comments
 (0)