Skip to content

Commit 53af501

Browse files
[WIP][PowerPC] Add phony subregisters to cover the high half of the VSX registers.
On PowerPC we have VSX registers which overlap with floating point registers. However, the floating point registers only overlap with half of each VSX register while the other half is never used alone. This patch adds phony registers for the other half of the VSX registers in order to fully cover them and to make sure that the lane masks are not the same for the VSX and the floating point register. Note: This patch is still Work in Progress as there are a number of LIT failures that need to be investigated.
1 parent e84ecf2 commit 53af501

File tree

4 files changed

+117
-28
lines changed

4 files changed

+117
-28
lines changed

llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp

Lines changed: 85 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -435,7 +435,91 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
435435
}
436436
}
437437

438-
assert(checkAllSuperRegsMarked(Reserved));
438+
// Mark phony regsiters for the VSR high bits as reserved so that they are
439+
// not used.
440+
Reserved.set(PPC::FH0);
441+
Reserved.set(PPC::FH1);
442+
Reserved.set(PPC::FH2);
443+
Reserved.set(PPC::FH3);
444+
Reserved.set(PPC::FH4);
445+
Reserved.set(PPC::FH5);
446+
Reserved.set(PPC::FH6);
447+
Reserved.set(PPC::FH7);
448+
Reserved.set(PPC::FH8);
449+
Reserved.set(PPC::FH9);
450+
Reserved.set(PPC::FH10);
451+
Reserved.set(PPC::FH11);
452+
Reserved.set(PPC::FH12);
453+
Reserved.set(PPC::FH13);
454+
Reserved.set(PPC::FH14);
455+
Reserved.set(PPC::FH15);
456+
Reserved.set(PPC::FH16);
457+
Reserved.set(PPC::FH17);
458+
Reserved.set(PPC::FH18);
459+
Reserved.set(PPC::FH19);
460+
Reserved.set(PPC::FH20);
461+
Reserved.set(PPC::FH21);
462+
Reserved.set(PPC::FH22);
463+
Reserved.set(PPC::FH23);
464+
Reserved.set(PPC::FH24);
465+
Reserved.set(PPC::FH25);
466+
Reserved.set(PPC::FH26);
467+
Reserved.set(PPC::FH27);
468+
Reserved.set(PPC::FH28);
469+
Reserved.set(PPC::FH29);
470+
Reserved.set(PPC::FH30);
471+
Reserved.set(PPC::FH31);
472+
473+
Reserved.set(PPC::VFH0);
474+
Reserved.set(PPC::VFH1);
475+
Reserved.set(PPC::VFH2);
476+
Reserved.set(PPC::VFH3);
477+
Reserved.set(PPC::VFH4);
478+
Reserved.set(PPC::VFH5);
479+
Reserved.set(PPC::VFH6);
480+
Reserved.set(PPC::VFH7);
481+
Reserved.set(PPC::VFH8);
482+
Reserved.set(PPC::VFH9);
483+
Reserved.set(PPC::VFH10);
484+
Reserved.set(PPC::VFH11);
485+
Reserved.set(PPC::VFH12);
486+
Reserved.set(PPC::VFH13);
487+
Reserved.set(PPC::VFH14);
488+
Reserved.set(PPC::VFH15);
489+
Reserved.set(PPC::VFH16);
490+
Reserved.set(PPC::VFH17);
491+
Reserved.set(PPC::VFH18);
492+
Reserved.set(PPC::VFH19);
493+
Reserved.set(PPC::VFH20);
494+
Reserved.set(PPC::VFH21);
495+
Reserved.set(PPC::VFH22);
496+
Reserved.set(PPC::VFH23);
497+
Reserved.set(PPC::VFH24);
498+
Reserved.set(PPC::VFH25);
499+
Reserved.set(PPC::VFH26);
500+
Reserved.set(PPC::VFH27);
501+
Reserved.set(PPC::VFH28);
502+
Reserved.set(PPC::VFH29);
503+
Reserved.set(PPC::VFH30);
504+
Reserved.set(PPC::VFH31);
505+
506+
assert(checkAllSuperRegsMarked(Reserved,
507+
{PPC::FH0, PPC::FH1, PPC::FH2, PPC::FH3,
508+
PPC::FH4, PPC::FH5, PPC::FH6, PPC::FH7,
509+
PPC::FH8, PPC::FH9, PPC::FH10, PPC::FH11,
510+
PPC::FH12, PPC::FH13, PPC::FH14, PPC::FH15,
511+
PPC::FH16, PPC::FH17, PPC::FH18, PPC::FH19,
512+
PPC::FH20, PPC::FH21, PPC::FH22, PPC::FH23,
513+
PPC::FH24, PPC::FH25, PPC::FH26, PPC::FH27,
514+
PPC::FH28, PPC::FH29, PPC::FH30, PPC::FH31,
515+
PPC::VFH0, PPC::VFH1, PPC::VFH2, PPC::VFH3,
516+
PPC::VFH4, PPC::VFH5, PPC::VFH6, PPC::VFH7,
517+
PPC::VFH8, PPC::VFH9, PPC::VFH10, PPC::VFH11,
518+
PPC::VFH12, PPC::VFH13, PPC::VFH14, PPC::VFH15,
519+
PPC::VFH16, PPC::VFH17, PPC::VFH18, PPC::VFH19,
520+
PPC::VFH20, PPC::VFH21, PPC::VFH22, PPC::VFH23,
521+
PPC::VFH24, PPC::VFH25, PPC::VFH26, PPC::VFH27,
522+
PPC::VFH28, PPC::VFH29, PPC::VFH30, PPC::VFH31}));
439523
return Reserved;
440524
}
441525

llvm/lib/Target/PowerPC/PPCRegisterInfo.td

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ def sub_un : SubRegIndex<1, 3>;
1717
def sub_32 : SubRegIndex<32>;
1818
def sub_32_hi_phony : SubRegIndex<32,32>;
1919
def sub_64 : SubRegIndex<64>;
20+
def sub_64_hi_phony : SubRegIndex<64,64>;
2021
def sub_vsx0 : SubRegIndex<128>;
2122
def sub_vsx1 : SubRegIndex<128, 128>;
2223
def sub_gp8_x0 : SubRegIndex<64>;
@@ -77,19 +78,19 @@ class VF<bits<5> num, string n> : PPCReg<n> {
7778
}
7879

7980
// VR - One of the 32 128-bit vector registers
80-
class VR<VF SubReg, string n> : PPCReg<n> {
81+
class VR<VF SubReg, VF SubRegH, string n> : PPCReg<n> {
8182
let HWEncoding{4-0} = SubReg.HWEncoding{4-0};
8283
let HWEncoding{5} = 0;
83-
let SubRegs = [SubReg];
84-
let SubRegIndices = [sub_64];
84+
let SubRegs = [SubReg, SubRegH];
85+
let SubRegIndices = [sub_64, sub_64_hi_phony];
8586
}
8687

8788
// VSRL - One of the 32 128-bit VSX registers that overlap with the scalar
8889
// floating-point registers.
89-
class VSRL<FPR SubReg, string n> : PPCReg<n> {
90+
class VSRL<FPR SubReg, FPR SubRegH, string n> : PPCReg<n> {
9091
let HWEncoding = SubReg.HWEncoding;
91-
let SubRegs = [SubReg];
92-
let SubRegIndices = [sub_64];
92+
let SubRegs = [SubReg, SubRegH];
93+
let SubRegIndices = [sub_64, sub_64_hi_phony];
9394
}
9495

9596
// VSXReg - One of the VSX registers in the range vs32-vs63 with numbering
@@ -155,6 +156,12 @@ foreach Index = 0-31 in {
155156
DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>;
156157
}
157158

159+
let isArtificial = 1 in {
160+
foreach Index = 0-31 in {
161+
def FH#Index : FPR<-1, "">;
162+
}
163+
}
164+
158165
// Floating-point pair registers
159166
foreach Index = { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 } in {
160167
def Fpair#Index : FPPair<"fp"#Index, Index>;
@@ -168,15 +175,21 @@ foreach Index = 0-31 in {
168175
DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>;
169176
}
170177

178+
let isArtificial = 1 in {
179+
foreach Index = 0-31 in {
180+
def VFH#Index : VF<-1, "">;
181+
}
182+
}
183+
171184
// Vector registers
172185
foreach Index = 0-31 in {
173-
def V#Index : VR<!cast<VF>("VF"#Index), "v"#Index>,
174-
DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>;
186+
def V#Index : VR<!cast<VF>("VF"#Index), !cast<VF>("VFH"#Index), "v"#Index>,
187+
DwarfRegNum<[!add(Index, 77)]>;
175188
}
176189

177190
// VSX registers
178191
foreach Index = 0-31 in {
179-
def VSL#Index : VSRL<!cast<FPR>("F"#Index), "vs"#Index>,
192+
def VSL#Index : VSRL<!cast<FPR>("F"#Index), !cast<FPR>("FH"#Index), "vs"#Index>,
180193
DwarfRegAlias<!cast<FPR>("F"#Index)>;
181194
}
182195

llvm/test/CodeGen/PowerPC/frem.ll

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,6 @@ define <4 x float> @frem4x32(<4 x float> %a, <4 x float> %b) {
7070
; CHECK-NEXT: xscvspdpn 2, 0
7171
; CHECK-NEXT: bl fmodf
7272
; CHECK-NEXT: nop
73-
; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
7473
; CHECK-NEXT: xxmrghd 0, 1, 61
7574
; CHECK-NEXT: xscvspdpn 1, 62
7675
; CHECK-NEXT: xscvspdpn 2, 63
@@ -84,7 +83,6 @@ define <4 x float> @frem4x32(<4 x float> %a, <4 x float> %b) {
8483
; CHECK-NEXT: xscvspdpn 2, 0
8584
; CHECK-NEXT: bl fmodf
8685
; CHECK-NEXT: nop
87-
; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
8886
; CHECK-NEXT: xxmrghd 0, 61, 1
8987
; CHECK-NEXT: lxv 63, 80(1) # 16-byte Folded Reload
9088
; CHECK-NEXT: lxv 62, 64(1) # 16-byte Folded Reload
@@ -124,11 +122,8 @@ define <2 x double> @frem2x64(<2 x double> %a, <2 x double> %b) {
124122
; CHECK-NEXT: xscpsgndp 61, 1, 1
125123
; CHECK-NEXT: xxswapd 1, 62
126124
; CHECK-NEXT: xxswapd 2, 63
127-
; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1
128-
; CHECK-NEXT: # kill: def $f2 killed $f2 killed $vsl2
129125
; CHECK-NEXT: bl fmod
130126
; CHECK-NEXT: nop
131-
; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
132127
; CHECK-NEXT: xxmrghd 34, 61, 1
133128
; CHECK-NEXT: lxv 63, 64(1) # 16-byte Folded Reload
134129
; CHECK-NEXT: lxv 62, 48(1) # 16-byte Folded Reload

llvm/test/CodeGen/PowerPC/subreg-lanemasks.mir

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,21 +5,18 @@
55

66
# Keep track of all of the lanemasks for various subregsiters.
77
#
8-
# TODO: The mask for %6.sub_vsx1:accrc is the same as the mask for %10.sub_vsx1_then_sub_64:accrc.
9-
# Ideally on PowerPC these masks should be different. To be addressed in a later patch.
10-
#
11-
# CHECK: %3 [80r,80d:0) 0@80r L0000000000000004 [80r,80d:0) 0@80r weight:0.000000e+00
12-
# CHECK: %4 [96r,96d:0) 0@96r L0000000000000800 [96r,96d:0) 0@96r weight:0.000000e+00
13-
# CHECK: %5 [112r,112d:0) 0@112r L0000000000000004 [112r,112d:0) 0@112r weight:0.000000e+00
14-
# CHECK: %6 [128r,128d:0) 0@128r L0000000000000800 [128r,128d:0) 0@128r weight:0.000000e+00
8+
# CHECK: %3 [80r,80d:0) 0@80r L000000000000000C [80r,80d:0) 0@80r weight:0.000000e+00
9+
# CHECK: %4 [96r,96d:0) 0@96r L0000000000003000 [96r,96d:0) 0@96r weight:0.000000e+00
10+
# CHECK: %5 [112r,112d:0) 0@112r L000000000000000C [112r,112d:0) 0@112r weight:0.000000e+00
11+
# CHECK: %6 [128r,128d:0) 0@128r L0000000000003000 [128r,128d:0) 0@128r weight:0.000000e+00
1512
# CHECK: %7 [144r,144d:0) 0@144r L0000000000000004 [144r,144d:0) 0@144r weight:0.000000e+00
16-
# CHECK: %8 [160r,160d:0) 0@160r L0000000000000800 [160r,160d:0) 0@160r weight:0.000000e+00
13+
# CHECK: %8 [160r,160d:0) 0@160r L0000000000001000 [160r,160d:0) 0@160r weight:0.000000e+00
1714
# CHECK: %9 [176r,176d:0) 0@176r L0000000000000004 [176r,176d:0) 0@176r weight:0.000000e+00
18-
# CHECK: %10 [192r,192d:0) 0@192r L0000000000000800 [192r,192d:0) 0@192r weight:0.000000e+00
19-
# CHECK: %11 [208r,208d:0) 0@208r L0000000000001000 [208r,208d:0) 0@208r weight:0.000000e+00
20-
# CHECK: %12 [224r,224d:0) 0@224r L0000000000002000 [224r,224d:0) 0@224r weight:0.000000e+00
21-
# CHECK: %13 [240r,240d:0) 0@240r L0000000000000804 [240r,240d:0) 0@240r weight:0.000000e+00
22-
# CHECK: %14 [256r,256d:0) 0@256r L0000000000003000 [256r,256d:0) 0@256r weight:0.000000e+00
15+
# CHECK: %10 [192r,192d:0) 0@192r L0000000000001000 [192r,192d:0) 0@192r weight:0.000000e+00
16+
# CHECK: %11 [208r,208d:0) 0@208r L0000000000004000 [208r,208d:0) 0@208r weight:0.000000e+00
17+
# CHECK: %12 [224r,224d:0) 0@224r L0000000000010000 [224r,224d:0) 0@224r weight:0.000000e+00
18+
# CHECK: %13 [240r,240d:0) 0@240r L000000000000300C [240r,240d:0) 0@240r weight:0.000000e+00
19+
# CHECK: %14 [256r,256d:0) 0@256r L000000000003C000 [256r,256d:0) 0@256r weight:0.000000e+00
2320

2421

2522
# CHECK: 0B bb.0

0 commit comments

Comments
 (0)