-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV][GISel] Add really basic support for FP regbank selection for G_LOAD/G_STORE. #70896
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -109,6 +109,56 @@ static const RegisterBankInfo::ValueMapping *getFPValueMapping(unsigned Size) { | |
return &RISCV::ValueMappings[Idx]; | ||
} | ||
|
||
// TODO: Make this more like AArch64? | ||
static bool onlyUsesFP(const MachineInstr &MI) { | ||
switch (MI.getOpcode()) { | ||
case TargetOpcode::G_FADD: | ||
case TargetOpcode::G_FSUB: | ||
case TargetOpcode::G_FMUL: | ||
case TargetOpcode::G_FDIV: | ||
case TargetOpcode::G_FNEG: | ||
case TargetOpcode::G_FABS: | ||
case TargetOpcode::G_FSQRT: | ||
case TargetOpcode::G_FMAXNUM: | ||
case TargetOpcode::G_FMINNUM: | ||
case TargetOpcode::G_FPEXT: | ||
case TargetOpcode::G_FPTRUNC: | ||
case TargetOpcode::G_FCMP: | ||
case TargetOpcode::G_FPTOSI: | ||
case TargetOpcode::G_FPTOUI: | ||
return true; | ||
default: | ||
break; | ||
} | ||
|
||
return false; | ||
} | ||
|
||
// TODO: Make this more like AArch64? | ||
static bool onlyDefinesFP(const MachineInstr &MI) { | ||
switch (MI.getOpcode()) { | ||
case TargetOpcode::G_FADD: | ||
case TargetOpcode::G_FSUB: | ||
case TargetOpcode::G_FMUL: | ||
case TargetOpcode::G_FDIV: | ||
case TargetOpcode::G_FNEG: | ||
case TargetOpcode::G_FABS: | ||
case TargetOpcode::G_FSQRT: | ||
case TargetOpcode::G_FMAXNUM: | ||
case TargetOpcode::G_FMINNUM: | ||
case TargetOpcode::G_FPEXT: | ||
case TargetOpcode::G_FPTRUNC: | ||
case TargetOpcode::G_SITOFP: | ||
case TargetOpcode::G_UITOFP: | ||
case TargetOpcode::G_FCONSTANT: | ||
return true; | ||
default: | ||
break; | ||
} | ||
|
||
return false; | ||
} | ||
|
||
const RegisterBankInfo::InstructionMapping & | ||
RISCVRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { | ||
const unsigned Opc = MI.getOpcode(); | ||
|
@@ -159,11 +209,53 @@ RISCVRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { | |
case TargetOpcode::G_ANYEXT: | ||
case TargetOpcode::G_SEXT: | ||
case TargetOpcode::G_ZEXT: | ||
case TargetOpcode::G_LOAD: | ||
case TargetOpcode::G_SEXTLOAD: | ||
case TargetOpcode::G_ZEXTLOAD: | ||
case TargetOpcode::G_STORE: | ||
break; | ||
case TargetOpcode::G_LOAD: { | ||
LLT Ty = MRI.getType(MI.getOperand(0).getReg()); | ||
// Use FPR64 for s64 loads on rv32. | ||
if (GPRSize == 32 && Ty.getSizeInBits() == 64) { | ||
assert(MF.getSubtarget<RISCVSubtarget>().hasStdExtD()); | ||
OperandsMapping = | ||
getOperandsMapping({getFPValueMapping(64), GPRValueMapping}); | ||
break; | ||
} | ||
|
||
// Check if that load feeds fp instructions. | ||
// In that case, we want the default mapping to be on FPR | ||
// instead of blind map every scalar to GPR. | ||
if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()), | ||
[&](const MachineInstr &UseMI) { | ||
// If we have at least one direct use in a FP instruction, | ||
// assume this was a floating point load in the IR. If it was | ||
// not, we would have had a bitcast before reaching that | ||
// instruction. | ||
return onlyUsesFP(UseMI); | ||
})) { | ||
OperandsMapping = getOperandsMapping( | ||
{getFPValueMapping(Ty.getSizeInBits()), GPRValueMapping}); | ||
} | ||
|
||
break; | ||
} | ||
case TargetOpcode::G_STORE: { | ||
LLT Ty = MRI.getType(MI.getOperand(0).getReg()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. getType can return LLT{}. Should we fail if !isValid()? |
||
// Use FPR64 for s64 stores on rv32. | ||
if (GPRSize == 32 && Ty.getSizeInBits() == 64) { | ||
assert(MF.getSubtarget<RISCVSubtarget>().hasStdExtD()); | ||
OperandsMapping = | ||
getOperandsMapping({getFPValueMapping(64), GPRValueMapping}); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On rv32, are you guaranteed to have FPR64? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No that's not a guarantee architecturally. But if s64 got through the legalizer then it must be supported. |
||
break; | ||
} | ||
|
||
MachineInstr *DefMI = MRI.getVRegDef(MI.getOperand(0).getReg()); | ||
if (onlyDefinesFP(*DefMI)) { | ||
OperandsMapping = getOperandsMapping( | ||
{getFPValueMapping(Ty.getSizeInBits()), GPRValueMapping}); | ||
} | ||
break; | ||
} | ||
case TargetOpcode::G_CONSTANT: | ||
case TargetOpcode::G_FRAME_INDEX: | ||
case TargetOpcode::G_GLOBAL_VALUE: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,170 @@ | ||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py | ||
# RUN: llc -mtriple=riscv32 -mattr=+d -run-pass=regbankselect \ | ||
# RUN: -simplify-mir -verify-machineinstrs %s \ | ||
# RUN: -o - | FileCheck %s --check-prefixes=CHECK,RV32 | ||
# RUN: llc -mtriple=riscv64 -mattr=+d -run-pass=regbankselect \ | ||
# RUN: -simplify-mir -verify-machineinstrs %s \ | ||
# RUN: -o - | FileCheck %s --check-prefixes=CHECK,RV64 | ||
|
||
--- | ||
name: fp_store_fp_def_f32 | ||
legalized: true | ||
tracksRegLiveness: true | ||
body: | | ||
bb.1: | ||
liveins: $x10, $f10_f, $f11_f | ||
|
||
; CHECK-LABEL: name: fp_store_fp_def_f32 | ||
; CHECK: liveins: $x10, $f10_f, $f11_f | ||
; CHECK-NEXT: {{ $}} | ||
; CHECK-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 | ||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:fprb(s32) = COPY $f10_f | ||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:fprb(s32) = COPY $f11_f | ||
; CHECK-NEXT: [[FADD:%[0-9]+]]:fprb(s32) = G_FADD [[COPY1]], [[COPY2]] | ||
; CHECK-NEXT: G_STORE [[FADD]](s32), [[COPY]](p0) :: (store (s32)) | ||
; CHECK-NEXT: PseudoRET | ||
%0:_(p0) = COPY $x10 | ||
%1:_(s32) = COPY $f10_f | ||
%2:_(s32) = COPY $f11_f | ||
%3:_(s32) = G_FADD %1, %2 | ||
G_STORE %3(s32), %0(p0) :: (store (s32)) | ||
PseudoRET | ||
|
||
... | ||
--- | ||
name: fp_store_fp_def_f64 | ||
legalized: true | ||
tracksRegLiveness: true | ||
body: | | ||
bb.1: | ||
liveins: $x10, $f10_d, $f11_d | ||
|
||
; CHECK-LABEL: name: fp_store_fp_def_f64 | ||
; CHECK: liveins: $x10, $f10_d, $f11_d | ||
; CHECK-NEXT: {{ $}} | ||
; CHECK-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 | ||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:fprb(s64) = COPY $f10_d | ||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:fprb(s64) = COPY $f11_d | ||
; CHECK-NEXT: [[FADD:%[0-9]+]]:fprb(s64) = G_FADD [[COPY1]], [[COPY2]] | ||
; CHECK-NEXT: G_STORE [[FADD]](s64), [[COPY]](p0) :: (store (s64)) | ||
; CHECK-NEXT: PseudoRET | ||
%0:_(p0) = COPY $x10 | ||
%1:_(s64) = COPY $f10_d | ||
%2:_(s64) = COPY $f11_d | ||
%3:_(s64) = G_FADD %1, %2 | ||
G_STORE %3(s64), %0(p0) :: (store (s64)) | ||
PseudoRET | ||
|
||
... | ||
--- | ||
name: fp_store_no_def_f64 | ||
legalized: true | ||
tracksRegLiveness: true | ||
body: | | ||
bb.1: | ||
liveins: $x10, $f10_d | ||
|
||
; RV32-LABEL: name: fp_store_no_def_f64 | ||
; RV32: liveins: $x10, $f10_d | ||
; RV32-NEXT: {{ $}} | ||
; RV32-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 | ||
; RV32-NEXT: [[COPY1:%[0-9]+]]:fprb(s64) = COPY $f10_d | ||
; RV32-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p0) :: (store (s64)) | ||
; RV32-NEXT: PseudoRET | ||
; | ||
; RV64-LABEL: name: fp_store_no_def_f64 | ||
; RV64: liveins: $x10, $f10_d | ||
; RV64-NEXT: {{ $}} | ||
; RV64-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 | ||
; RV64-NEXT: [[COPY1:%[0-9]+]]:fprb(s64) = COPY $f10_d | ||
; RV64-NEXT: [[COPY2:%[0-9]+]]:gprb(s64) = COPY [[COPY1]](s64) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks like there is an extra copy here. Is that needed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The first copy is created by call lowering, the second copy is created by regbank selection because this patch doesn't see the copy to know it should be an FP store. I'll fix that in another patch. |
||
; RV64-NEXT: G_STORE [[COPY2]](s64), [[COPY]](p0) :: (store (s64)) | ||
; RV64-NEXT: PseudoRET | ||
%0:_(p0) = COPY $x10 | ||
%1:_(s64) = COPY $f10_d | ||
G_STORE %1(s64), %0(p0) :: (store (s64)) | ||
PseudoRET | ||
|
||
... | ||
--- | ||
name: fp_load_fp_use_f32 | ||
legalized: true | ||
tracksRegLiveness: true | ||
body: | | ||
bb.1: | ||
liveins: $x10, $f10_f | ||
|
||
; CHECK-LABEL: name: fp_load_fp_use_f32 | ||
; CHECK: liveins: $x10, $f10_f | ||
; CHECK-NEXT: {{ $}} | ||
; CHECK-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 | ||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:fprb(s32) = COPY $f10_f | ||
; CHECK-NEXT: [[LOAD:%[0-9]+]]:fprb(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) | ||
; CHECK-NEXT: [[FADD:%[0-9]+]]:fprb(s32) = G_FADD [[LOAD]], [[COPY1]] | ||
; CHECK-NEXT: $f10_f = COPY [[FADD]](s32) | ||
; CHECK-NEXT: PseudoRET implicit $f10_f | ||
%0:_(p0) = COPY $x10 | ||
%1:_(s32) = COPY $f10_f | ||
%2:_(s32) = G_LOAD %0(p0) :: (load (s32)) | ||
%3:_(s32) = G_FADD %2, %1 | ||
$f10_f = COPY %3(s32) | ||
PseudoRET implicit $f10_f | ||
|
||
... | ||
--- | ||
name: fp_load_fp_use_f64 | ||
legalized: true | ||
tracksRegLiveness: true | ||
body: | | ||
bb.1: | ||
liveins: $x10, $f10_d | ||
|
||
; CHECK-LABEL: name: fp_load_fp_use_f64 | ||
; CHECK: liveins: $x10, $f10_d | ||
; CHECK-NEXT: {{ $}} | ||
; CHECK-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 | ||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:fprb(s64) = COPY $f10_d | ||
; CHECK-NEXT: [[LOAD:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY]](p0) :: (load (s64)) | ||
; CHECK-NEXT: [[FADD:%[0-9]+]]:fprb(s64) = G_FADD [[LOAD]], [[COPY1]] | ||
; CHECK-NEXT: $f10_d = COPY [[FADD]](s64) | ||
; CHECK-NEXT: PseudoRET implicit $f10_d | ||
%0:_(p0) = COPY $x10 | ||
%1:_(s64) = COPY $f10_d | ||
%2:_(s64) = G_LOAD %0(p0) :: (load (s64)) | ||
%3:_(s64) = G_FADD %2, %1 | ||
$f10_d = COPY %3(s64) | ||
PseudoRET implicit $f10_d | ||
|
||
... | ||
--- | ||
name: fp_load_no_use_f64 | ||
legalized: true | ||
tracksRegLiveness: true | ||
body: | | ||
bb.1: | ||
liveins: $x10, $f10_d | ||
|
||
; RV32-LABEL: name: fp_load_no_use_f64 | ||
; RV32: liveins: $x10, $f10_d | ||
; RV32-NEXT: {{ $}} | ||
; RV32-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 | ||
; RV32-NEXT: [[COPY1:%[0-9]+]]:fprb(s64) = COPY $f10_d | ||
; RV32-NEXT: [[LOAD:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY]](p0) :: (load (s64)) | ||
; RV32-NEXT: $f10_d = COPY [[LOAD]](s64) | ||
; RV32-NEXT: PseudoRET implicit $f10_d | ||
; | ||
; RV64-LABEL: name: fp_load_no_use_f64 | ||
; RV64: liveins: $x10, $f10_d | ||
; RV64-NEXT: {{ $}} | ||
; RV64-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 | ||
; RV64-NEXT: [[COPY1:%[0-9]+]]:fprb(s64) = COPY $f10_d | ||
; RV64-NEXT: [[LOAD:%[0-9]+]]:gprb(s64) = G_LOAD [[COPY]](p0) :: (load (s64)) | ||
; RV64-NEXT: $f10_d = COPY [[LOAD]](s64) | ||
; RV64-NEXT: PseudoRET implicit $f10_d | ||
%0:_(p0) = COPY $x10 | ||
%1:_(s64) = COPY $f10_d | ||
%2:_(s64) = G_LOAD %0(p0) :: (load (s64)) | ||
$f10_d = COPY %2(s64) | ||
PseudoRET implicit $f10_d | ||
|
||
... |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
getType
can returnLLT{}
. Should we fail if!isValid()
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Wouldn't the MIR be ill-formed if the register didn't have a type?
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LLT is not valid if the G_LOAD/G_STORE is on a physical register. I thought it might be well-formed to write:
but this seems to fail sooner:
Looks like we don't need an isValid check.