Skip to content

Commit e1e38d4

Browse files
committed
[X86] Correct the register class for specific mask register constraints in getRegForInlineAsmConstraint when the VT is a scalar type
The default impementation in the base class for TargetLowering::getRegForInlineAsmConstraint doesn't work for mask registers when the VT is a scalar type integer types since the only legal mask types are vXi1. So we end up just getting whatever the first register class that contains the register. Currently this appears to be VK1, but its really dependent on the order tablegen outputs the register classes. Some code in the caller ends up looking up the type for this register class and find v1i1 then generates a copyfromreg from the physical k-register with the v1i1 type. Then it generates an any_extend from v1i1 to the scalar VT which isn't legal. This bad any_extend sticks around until isel where it selects a MOVZX32rr8 with a v1i1 input or maybe a i8 input. Not sure but eventually we pick up a copy from VK1 to GR8 in MachineIR which isn't supported. This leads to a failure in physical register copying. This patch uses the scalar type to find a VK class of the right size. In the attached test case this will be VK16. This causes a bitcast from vk16 to i16 to be generated instead of an any_extend. This will be properly iseled to a VK16 to GR32 copy and a GR32->GR16 extract_subreg. Fixes PR41678 Differential Revision: https://reviews.llvm.org/D61453 llvm-svn: 359837
1 parent e8a1cde commit e1e38d4

File tree

2 files changed

+50
-0
lines changed

2 files changed

+50
-0
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43890,6 +43890,18 @@ static bool isFRClass(const TargetRegisterClass &RC) {
4389043890
RC.hasSuperClassEq(&X86::VR512RegClass);
4389143891
}
4389243892

43893+
/// Check if \p RC is a mask register class.
43894+
/// I.e., VK* or one of their variant.
43895+
static bool isVKClass(const TargetRegisterClass &RC) {
43896+
return RC.hasSuperClassEq(&X86::VK1RegClass) ||
43897+
RC.hasSuperClassEq(&X86::VK2RegClass) ||
43898+
RC.hasSuperClassEq(&X86::VK4RegClass) ||
43899+
RC.hasSuperClassEq(&X86::VK8RegClass) ||
43900+
RC.hasSuperClassEq(&X86::VK16RegClass) ||
43901+
RC.hasSuperClassEq(&X86::VK32RegClass) ||
43902+
RC.hasSuperClassEq(&X86::VK64RegClass);
43903+
}
43904+
4389343905
std::pair<unsigned, const TargetRegisterClass *>
4389443906
X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
4389543907
StringRef Constraint,
@@ -44204,6 +44216,22 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
4420444216
Res.first = 0;
4420544217
Res.second = nullptr;
4420644218
}
44219+
} else if (isVKClass(*Class)) {
44220+
if (VT == MVT::i1)
44221+
Res.second = &X86::VK1RegClass;
44222+
else if (VT == MVT::i8)
44223+
Res.second = &X86::VK8RegClass;
44224+
else if (VT == MVT::i16)
44225+
Res.second = &X86::VK16RegClass;
44226+
else if (VT == MVT::i32)
44227+
Res.second = &X86::VK32RegClass;
44228+
else if (VT == MVT::i64)
44229+
Res.second = &X86::VK64RegClass;
44230+
else {
44231+
// Type mismatch and not a clobber: Return an error;
44232+
Res.first = 0;
44233+
Res.second = nullptr;
44234+
}
4420744235
}
4420844236

4420944237
return Res;

llvm/test/CodeGen/X86/pr41678.ll

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -O0 -mtriple=i386-pc-linux-gnu -mattr=avx512f | FileCheck %s
3+
4+
define void @a() {
5+
; CHECK-LABEL: a:
6+
; CHECK: # %bb.0: # %entry
7+
; CHECK-NEXT: subl $2, %esp
8+
; CHECK-NEXT: .cfi_def_cfa_offset 6
9+
; CHECK-NEXT: #APP
10+
; CHECK-NEXT: #NO_APP
11+
; CHECK-NEXT: kmovw %k6, %eax
12+
; CHECK-NEXT: movw %ax, %cx
13+
; CHECK-NEXT: movw %cx, (%esp)
14+
; CHECK-NEXT: addl $2, %esp
15+
; CHECK-NEXT: .cfi_def_cfa_offset 4
16+
; CHECK-NEXT: retl
17+
entry:
18+
%b = alloca i16, align 2
19+
%0 = call i16 asm "", "={k6},~{dirflag},~{fpsr},~{flags}"() #1
20+
store i16 %0, i16* %b, align 2
21+
ret void
22+
}

0 commit comments

Comments
 (0)