Skip to content

Commit afd42fb

Browse files
authored
[AMDGPU][True16][CodeGen] Support AND/OR/XOR and LDEXP True16 format (#102620)
Support AND/OR/XOR true16 and LDEXP true/fake16 format. These instructions are previously implemented with fake16 profile. Fixing the implementation. Added a RA hint so that when using 16bit register in a 32bit instruction, try to use the register directly without an extra 16bit move --------- Co-authored-by: guochen2 <[email protected]>
1 parent 248e885 commit afd42fb

14 files changed

+1837
-818
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 38 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -161,18 +161,34 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
161161

162162
// TODO: Skip masking high bits if def is known boolean.
163163

164-
bool IsSGPR = TRI.isSGPRClass(SrcRC);
165-
unsigned AndOpc =
166-
IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
167-
auto And = BuildMI(*BB, &I, DL, TII.get(AndOpc), MaskedReg)
168-
.addImm(1)
169-
.addReg(SrcReg);
170-
if (IsSGPR)
171-
And.setOperandDead(3); // Dead scc
172-
173-
BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
174-
.addImm(0)
175-
.addReg(MaskedReg);
164+
if (AMDGPU::getRegBitWidth(SrcRC->getID()) == 16) {
165+
assert(Subtarget->useRealTrue16Insts());
166+
const int64_t NoMods = 0;
167+
BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_AND_B16_t16_e64), MaskedReg)
168+
.addImm(NoMods)
169+
.addImm(1)
170+
.addImm(NoMods)
171+
.addReg(SrcReg)
172+
.addImm(NoMods);
173+
BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U16_t16_e64), DstReg)
174+
.addImm(NoMods)
175+
.addImm(0)
176+
.addImm(NoMods)
177+
.addReg(MaskedReg)
178+
.addImm(NoMods);
179+
} else {
180+
bool IsSGPR = TRI.isSGPRClass(SrcRC);
181+
unsigned AndOpc = IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
182+
auto And = BuildMI(*BB, &I, DL, TII.get(AndOpc), MaskedReg)
183+
.addImm(1)
184+
.addReg(SrcReg);
185+
if (IsSGPR)
186+
And.setOperandDead(3); // Dead scc
187+
188+
BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
189+
.addImm(0)
190+
.addReg(MaskedReg);
191+
}
176192
}
177193

178194
if (!MRI->getRegClassOrNull(SrcReg))
@@ -2206,6 +2222,16 @@ bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
22062222
return false;
22072223
}
22082224

2225+
if (DstRC == &AMDGPU::VGPR_16RegClass && SrcSize == 32) {
2226+
assert(STI.useRealTrue16Insts());
2227+
const DebugLoc &DL = I.getDebugLoc();
2228+
MachineBasicBlock *MBB = I.getParent();
2229+
BuildMI(*MBB, I, DL, TII.get(AMDGPU::COPY), DstReg)
2230+
.addReg(SrcReg, 0, AMDGPU::lo16);
2231+
I.eraseFromParent();
2232+
return true;
2233+
}
2234+
22092235
if (DstTy == LLT::fixed_vector(2, 16) && SrcTy == LLT::fixed_vector(2, 32)) {
22102236
MachineBasicBlock *MBB = I.getParent();
22112237
const DebugLoc &DL = I.getDebugLoc();

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2030,6 +2030,8 @@ def : GCNPat <
20302030
>;
20312031

20322032
foreach fp16vt = [f16, bf16] in {
2033+
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
2034+
let SubtargetPredicate = p in {
20332035
def : GCNPat <
20342036
(fabs (fp16vt VGPR_32:$src)),
20352037
(V_AND_B32_e64 (S_MOV_B32 (i32 0x00007fff)), VGPR_32:$src)
@@ -2044,6 +2046,24 @@ def : GCNPat <
20442046
(fneg (fabs (fp16vt VGPR_32:$src))),
20452047
(V_OR_B32_e64 (S_MOV_B32 (i32 0x00008000)), VGPR_32:$src) // Set sign bit
20462048
>;
2049+
}
2050+
2051+
let SubtargetPredicate = UseRealTrue16Insts in {
2052+
def : GCNPat <
2053+
(fabs (fp16vt VGPR_16:$src)),
2054+
(V_AND_B16_t16_e64 (i32 0), (i16 0x7fff), (i32 0), VGPR_16:$src)
2055+
>;
2056+
2057+
def : GCNPat <
2058+
(fneg (fp16vt VGPR_16:$src)),
2059+
(V_XOR_B16_t16_e64 (i32 0), (i16 0x8000), (i32 0), VGPR_16:$src)
2060+
>;
2061+
2062+
def : GCNPat <
2063+
(fneg (fabs (fp16vt VGPR_16:$src))),
2064+
(V_OR_B16_t16_e64 (i32 0), (i16 0x8000), (i32 0), VGPR_16:$src) // Set sign bit
2065+
>;
2066+
} // End SubtargetPredicate = UseRealTrue16Insts
20472067
} // End foreach fp16vt = ...
20482068

20492069
def : GCNPat <

llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,10 @@ bool SIShrinkInstructions::shouldShrinkTrue16(MachineInstr &MI) const {
152152
if (AMDGPU::VGPR_32RegClass.contains(Reg) &&
153153
!AMDGPU::VGPR_32_Lo128RegClass.contains(Reg))
154154
return false;
155+
156+
if (AMDGPU::VGPR_16RegClass.contains(Reg) &&
157+
!AMDGPU::VGPR_16_Lo128RegClass.contains(Reg))
158+
return false;
155159
}
156160
}
157161
return true;

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1397,7 +1397,8 @@ def : GCNPat <
13971397

13981398
} // End OtherPredicates = [isGFX8Plus]
13991399

1400-
let OtherPredicates = [isGFX8Plus] in {
1400+
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
1401+
let OtherPredicates = [isGFX8Plus, p] in {
14011402
def : GCNPat<
14021403
(i32 (anyext i16:$src)),
14031404
(COPY $src)
@@ -1420,7 +1421,43 @@ def : GCNPat <
14201421
(EXTRACT_SUBREG $src, sub0)
14211422
>;
14221423

1423-
} // End OtherPredicates = [isGFX8Plus]
1424+
} // End OtherPredicates = [isGFX8Plus, p]
1425+
1426+
let OtherPredicates = [UseFakeTrue16Insts] in {
1427+
def : GCNPat<
1428+
(i32 (DivergentUnaryFrag<anyext> i16:$src)),
1429+
(COPY $src)
1430+
>;
1431+
} // End OtherPredicates = [UseFakeTrue16Insts]
1432+
1433+
1434+
let OtherPredicates = [UseRealTrue16Insts] in {
1435+
def : GCNPat<
1436+
(i32 (UniformUnaryFrag<anyext> (i16 SReg_32:$src))),
1437+
(COPY $src)
1438+
>;
1439+
1440+
def : GCNPat<
1441+
(i32 (DivergentUnaryFrag<anyext> i16:$src)),
1442+
(REG_SEQUENCE VGPR_32, $src, lo16, (i16 (IMPLICIT_DEF)), hi16)
1443+
>;
1444+
1445+
def : GCNPat<
1446+
(i64 (anyext i16:$src)),
1447+
(REG_SEQUENCE VReg_64, $src, lo16, (i16 (IMPLICIT_DEF)), hi16, (i32 (IMPLICIT_DEF)), sub1)
1448+
>;
1449+
1450+
def : GCNPat<
1451+
(i16 (trunc i32:$src)),
1452+
(EXTRACT_SUBREG $src, lo16)
1453+
>;
1454+
1455+
def : GCNPat <
1456+
(i16 (trunc i64:$src)),
1457+
(EXTRACT_SUBREG $src, lo16)
1458+
>;
1459+
1460+
} // End OtherPredicates = [UseRealTrue16Insts]
14241461

14251462
//===----------------------------------------------------------------------===//
14261463
// GFX9

llvm/lib/Target/AMDGPU/VOP2Instructions.td

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -922,18 +922,25 @@ def LDEXP_F16_VOPProfile : VOPProfile <[f16, f16, f16, untyped]> {
922922
let HasSrc1FloatMods = 0;
923923
let Src1ModSDWA = Int16SDWAInputMods;
924924
}
925-
def LDEXP_F16_VOPProfile_True16 : VOPProfile_Fake16<VOP_F16_F16_F16> {
925+
def LDEXP_F16_VOPProfile_True16 : VOPProfile_True16<VOP_F16_F16_F16> {
926+
let Src1RC32 = RegisterOperand<VGPR_16_Lo128>;
927+
let Src1DPP = RegisterOperand<VGPR_16_Lo128>;
928+
let Src1ModDPP = IntT16VRegInputMods<0/*IsFake16*/>;
929+
}
930+
def LDEXP_F16_VOPProfile_Fake16 : VOPProfile_Fake16<VOP_F16_F16_F16> {
926931
let Src1RC32 = RegisterOperand<VGPR_32_Lo128>;
927932
let Src1DPP = RegisterOperand<VGPR_32_Lo128>;
928-
let Src1ModDPP = IntT16VRegInputMods</* IsFake16= */ 1>;
933+
let Src1ModDPP = IntT16VRegInputMods<1/*IsFake16*/>;
929934
}
930935

931936
let isReMaterializable = 1 in {
932937
let FPDPRounding = 1 in {
933938
let OtherPredicates = [Has16BitInsts], True16Predicate = NotHasTrue16BitInsts in
934939
defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", LDEXP_F16_VOPProfile>;
935-
let SubtargetPredicate = HasTrue16BitInsts in
940+
let SubtargetPredicate = UseRealTrue16Insts in
936941
defm V_LDEXP_F16_t16 : VOP2Inst <"v_ldexp_f16_t16", LDEXP_F16_VOPProfile_True16>;
942+
let SubtargetPredicate = UseFakeTrue16Insts in
943+
defm V_LDEXP_F16_fake16 : VOP2Inst <"v_ldexp_f16_fake16", LDEXP_F16_VOPProfile_Fake16, null_frag, "v_ldexp_f16_fake16">;
937944
} // End FPDPRounding = 1
938945
// FIXME VOP3 Only instructions. NFC using VOPProfile_True16 for these until a planned change to use a new register class for VOP3 encoded True16 instuctions
939946
defm V_LSHLREV_B16 : VOP2Inst_e64_t16 <"v_lshlrev_b16", VOP_I16_I16_I16, clshl_rev_16>;
@@ -968,14 +975,30 @@ class LDEXP_F16_Pat <SDPatternOperator op, VOP_Pseudo inst, VOPProfile P = inst.
968975
let OtherPredicates = [NotHasTrue16BitInsts] in
969976
def : LDEXP_F16_Pat<any_fldexp, V_LDEXP_F16_e64>;
970977

971-
let OtherPredicates = [HasTrue16BitInsts] in
972-
def : LDEXP_F16_Pat<any_fldexp, V_LDEXP_F16_t16_e64>;
978+
class LDEXP_F16_t16_Pat <SDPatternOperator op, VOP_Pseudo inst, VOPProfile P = inst.Pfl> : GCNPat <
979+
(P.DstVT (op (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
980+
(i16 (VOP3Mods0 P.Src1VT:$src1, i32:$src1_modifiers)))),
981+
(inst $src0_modifiers, $src0,
982+
$src1_modifiers, $src1,
983+
$clamp, /* clamp */
984+
$omod, /* omod */
985+
0) /* op_sel */
986+
>;
987+
988+
let OtherPredicates = [UseRealTrue16Insts] in
989+
def : LDEXP_F16_t16_Pat<any_fldexp, V_LDEXP_F16_t16_e64>;
990+
991+
let OtherPredicates = [UseFakeTrue16Insts] in
992+
def : LDEXP_F16_Pat<any_fldexp, V_LDEXP_F16_fake16_e64>;
973993

974994
let SubtargetPredicate = isGFX11Plus in {
975995
let isCommutable = 1 in {
976-
defm V_AND_B16_t16 : VOP2Inst_e64 <"v_and_b16_t16", VOPProfile_Fake16<VOP_I16_I16_I16>, and>;
977-
defm V_OR_B16_t16 : VOP2Inst_e64 <"v_or_b16_t16", VOPProfile_Fake16<VOP_I16_I16_I16>, or>;
978-
defm V_XOR_B16_t16 : VOP2Inst_e64 <"v_xor_b16_t16", VOPProfile_Fake16<VOP_I16_I16_I16>, xor>;
996+
defm V_AND_B16_t16 : VOP2Inst_e64 <"v_and_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, and>;
997+
defm V_AND_B16_fake16 : VOP2Inst_e64 <"v_and_b16_fake16", VOPProfile_Fake16<VOP_I16_I16_I16>, and>;
998+
defm V_OR_B16_t16 : VOP2Inst_e64 <"v_or_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, or>;
999+
defm V_OR_B16_fake16 : VOP2Inst_e64 <"v_or_b16_fake16", VOPProfile_Fake16<VOP_I16_I16_I16>, or>;
1000+
defm V_XOR_B16_t16 : VOP2Inst_e64 <"v_xor_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, xor>;
1001+
defm V_XOR_B16_fake16 : VOP2Inst_e64 <"v_xor_b16_fake16", VOPProfile_Fake16<VOP_I16_I16_I16>, xor>;
9791002
} // End isCommutable = 1
9801003
} // End SubtargetPredicate = isGFX11Plus
9811004

@@ -1714,6 +1737,7 @@ defm V_MUL_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x035, "v_mul_f16">;
17141737
defm V_MUL_F16_fake16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x035, "v_mul_f16">;
17151738
defm V_FMAC_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x036, "v_fmac_f16">;
17161739
defm V_LDEXP_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x03b, "v_ldexp_f16">;
1740+
defm V_LDEXP_F16_fake16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x03b, "v_ldexp_f16">;
17171741
defm V_MAX_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x039, "v_max_f16">;
17181742
defm V_MAX_F16_fake16 : VOP2_Real_FULL_t16_gfx11<0x039, "v_max_f16">;
17191743
defm V_MIN_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x03a, "v_min_f16">;

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1227,8 +1227,11 @@ let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in)
12271227
defm V_WRITELANE_B32 : VOP3_Real_No_Suffix_gfx11_gfx12<0x361>; // Pseudo in VOP2
12281228
} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in)
12291229
defm V_AND_B16_t16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x362, "v_and_b16">;
1230+
defm V_AND_B16_fake16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x362, "v_and_b16">;
12301231
defm V_OR_B16_t16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x363, "v_or_b16">;
1232+
defm V_OR_B16_fake16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x363, "v_or_b16">;
12311233
defm V_XOR_B16_t16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x364, "v_xor_b16">;
1234+
defm V_XOR_B16_fake16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x364, "v_xor_b16">;
12321235

12331236
//===----------------------------------------------------------------------===//
12341237
// GFX10.

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.s16.mir

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,11 @@ body: |
4949
; GFX11: liveins: $vgpr0
5050
; GFX11-NEXT: {{ $}}
5151
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
52-
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]]
52+
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
5353
; GFX11-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
54-
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CEIL_F16_t16_e64_]]
55-
; GFX11-NEXT: $vgpr0 = COPY [[COPY2]]
54+
; GFX11-NEXT: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
55+
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_CEIL_F16_t16_e64_]], %subreg.lo16, [[DEF]], %subreg.hi16
56+
; GFX11-NEXT: $vgpr0 = COPY [[REG_SEQUENCE]]
5657
;
5758
; GFX11-FAKE16-LABEL: name: fceil_s16_vv
5859
; GFX11-FAKE16: liveins: $vgpr0
@@ -89,8 +90,9 @@ body: |
8990
; GFX11-NEXT: {{ $}}
9091
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
9192
; GFX11-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
92-
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_CEIL_F16_t16_e64_]]
93-
; GFX11-NEXT: $vgpr0 = COPY [[COPY1]]
93+
; GFX11-NEXT: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
94+
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_CEIL_F16_t16_e64_]], %subreg.lo16, [[DEF]], %subreg.hi16
95+
; GFX11-NEXT: $vgpr0 = COPY [[REG_SEQUENCE]]
9496
;
9597
; GFX11-FAKE16-LABEL: name: fceil_s16_vs
9698
; GFX11-FAKE16: liveins: $sgpr0
@@ -126,10 +128,11 @@ body: |
126128
; GFX11: liveins: $vgpr0
127129
; GFX11-NEXT: {{ $}}
128130
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
129-
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]]
131+
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
130132
; GFX11-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 1, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
131-
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CEIL_F16_t16_e64_]]
132-
; GFX11-NEXT: $vgpr0 = COPY [[COPY2]]
133+
; GFX11-NEXT: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
134+
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_CEIL_F16_t16_e64_]], %subreg.lo16, [[DEF]], %subreg.hi16
135+
; GFX11-NEXT: $vgpr0 = COPY [[REG_SEQUENCE]]
133136
;
134137
; GFX11-FAKE16-LABEL: name: fceil_fneg_s16_vv
135138
; GFX11-FAKE16: liveins: $vgpr0

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s16.mir

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,11 @@ body: |
5858
; GFX11: liveins: $vgpr0
5959
; GFX11-NEXT: {{ $}}
6060
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
61-
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]]
61+
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
6262
; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
63-
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_FLOOR_F16_t16_e64_]]
64-
; GFX11-NEXT: $vgpr0 = COPY [[COPY2]]
63+
; GFX11-NEXT: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
64+
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_FLOOR_F16_t16_e64_]], %subreg.lo16, [[DEF]], %subreg.hi16
65+
; GFX11-NEXT: $vgpr0 = COPY [[REG_SEQUENCE]]
6566
;
6667
; GFX11-FAKE16-LABEL: name: ffloor_s16_vv
6768
; GFX11-FAKE16: liveins: $vgpr0
@@ -98,8 +99,9 @@ body: |
9899
; GFX11-NEXT: {{ $}}
99100
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
100101
; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
101-
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_FLOOR_F16_t16_e64_]]
102-
; GFX11-NEXT: $vgpr0 = COPY [[COPY1]]
102+
; GFX11-NEXT: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
103+
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_FLOOR_F16_t16_e64_]], %subreg.lo16, [[DEF]], %subreg.hi16
104+
; GFX11-NEXT: $vgpr0 = COPY [[REG_SEQUENCE]]
103105
;
104106
; GFX11-FAKE16-LABEL: name: ffloor_s16_vs
105107
; GFX11-FAKE16: liveins: $sgpr0
@@ -135,10 +137,11 @@ body: |
135137
; GFX11: liveins: $vgpr0
136138
; GFX11-NEXT: {{ $}}
137139
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
138-
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]]
140+
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
139141
; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 1, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
140-
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_FLOOR_F16_t16_e64_]]
141-
; GFX11-NEXT: $vgpr0 = COPY [[COPY2]]
142+
; GFX11-NEXT: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
143+
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_FLOOR_F16_t16_e64_]], %subreg.lo16, [[DEF]], %subreg.hi16
144+
; GFX11-NEXT: $vgpr0 = COPY [[REG_SEQUENCE]]
142145
;
143146
; GFX11-FAKE16-LABEL: name: ffloor_fneg_s16_vv
144147
; GFX11-FAKE16: liveins: $vgpr0

0 commit comments

Comments
 (0)