Skip to content

Commit a444324

Browse files
stefanp-synopsystru
authored andcommitted
[PowerPC] Add phony subregisters to cover the high half of the VSX registers. (#94628)
On PowerPC there are 128 bit VSX registers. These registers are half overlapped with 64 bit floating point registers (FPR). The 64 bit half of the VXS register that does not overlap with the FPR does not overlap with any other register class. The FPR are the only subregisters of the VSX registers but they do not fully cover the 128 bit super register. This leads to incorrect lane masks being created. This patch adds phony registers for the other half of the VSX registers in order to fully cover them and to make sure that the lane masks are not the same for the VSX and the floating point register. (cherry picked from commit 53c37f3)
1 parent 56f4ade commit a444324

31 files changed

+61
-670
lines changed

llvm/lib/Target/PowerPC/PPCRegisterInfo.td

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ def sub_un : SubRegIndex<1, 3>;
1717
def sub_32 : SubRegIndex<32>;
1818
def sub_32_hi_phony : SubRegIndex<32,32>;
1919
def sub_64 : SubRegIndex<64>;
20+
def sub_64_hi_phony : SubRegIndex<64,64>;
2021
def sub_vsx0 : SubRegIndex<128>;
2122
def sub_vsx1 : SubRegIndex<128, 128>;
2223
def sub_gp8_x0 : SubRegIndex<64>;
@@ -77,19 +78,19 @@ class VF<bits<5> num, string n> : PPCReg<n> {
7778
}
7879

7980
// VR - One of the 32 128-bit vector registers
80-
class VR<VF SubReg, string n> : PPCReg<n> {
81+
class VR<VF SubReg, VF SubRegH, string n> : PPCReg<n> {
8182
let HWEncoding{4-0} = SubReg.HWEncoding{4-0};
8283
let HWEncoding{5} = 0;
83-
let SubRegs = [SubReg];
84-
let SubRegIndices = [sub_64];
84+
let SubRegs = [SubReg, SubRegH];
85+
let SubRegIndices = [sub_64, sub_64_hi_phony];
8586
}
8687

8788
// VSRL - One of the 32 128-bit VSX registers that overlap with the scalar
8889
// floating-point registers.
89-
class VSRL<FPR SubReg, string n> : PPCReg<n> {
90+
class VSRL<FPR SubReg, FPR SubRegH, string n> : PPCReg<n> {
9091
let HWEncoding = SubReg.HWEncoding;
91-
let SubRegs = [SubReg];
92-
let SubRegIndices = [sub_64];
92+
let SubRegs = [SubReg, SubRegH];
93+
let SubRegIndices = [sub_64, sub_64_hi_phony];
9394
}
9495

9596
// VSXReg - One of the VSX registers in the range vs32-vs63 with numbering
@@ -155,6 +156,22 @@ foreach Index = 0-31 in {
155156
DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>;
156157
}
157158

159+
// The FH and VFH registers have been marked as Artifical because there are no
160+
// instructions on PowerPC that use those register classes. They only exist
161+
// in order to ensure that the super registers (V and VSL) are covered by their
162+
// subregisters and have correct subregister lane masks.
163+
let isArtificial = 1 in {
164+
foreach Index = 0-31 in {
165+
def FH#Index : FPR<-1, "">;
166+
def VFH#Index : VF<-1, "">;
167+
}
168+
}
169+
170+
let isAllocatable = 0, CopyCost = -1 in {
171+
def VFHRC : RegisterClass<"PPC", [f64], 64, (sequence "VFH%u", 0, 31)>;
172+
def FHRC : RegisterClass<"PPC", [f64], 64, (sequence "FH%u", 0, 31)>;
173+
}
174+
158175
// Floating-point pair registers
159176
foreach Index = { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 } in {
160177
def Fpair#Index : FPPair<"fp"#Index, Index>;
@@ -168,17 +185,19 @@ foreach Index = 0-31 in {
168185
DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>;
169186
}
170187

188+
let CoveredBySubRegs = 1 in {
171189
// Vector registers
172190
foreach Index = 0-31 in {
173-
def V#Index : VR<!cast<VF>("VF"#Index), "v"#Index>,
191+
def V#Index : VR<!cast<VF>("VF"#Index), !cast<VF>("VFH"#Index), "v"#Index>,
174192
DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>;
175193
}
176194

177195
// VSX registers
178196
foreach Index = 0-31 in {
179-
def VSL#Index : VSRL<!cast<FPR>("F"#Index), "vs"#Index>,
197+
def VSL#Index : VSRL<!cast<FPR>("F"#Index), !cast<FPR>("FH"#Index), "vs"#Index>,
180198
DwarfRegAlias<!cast<FPR>("F"#Index)>;
181199
}
200+
}
182201

183202
// Dummy VSX registers, this defines string: "vs32"-"vs63", and is only used for
184203
// asm printing.

llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -750,25 +750,21 @@ entry:
750750
define <2 x double> @testDoubleImm1(<2 x double> %a, double %b) {
751751
; CHECK-64-LABEL: testDoubleImm1:
752752
; CHECK-64: # %bb.0: # %entry
753-
; CHECK-64-NEXT: # kill: def $f1 killed $f1 def $vsl1
754753
; CHECK-64-NEXT: xxpermdi 34, 1, 34, 1
755754
; CHECK-64-NEXT: blr
756755
;
757756
; CHECK-32-LABEL: testDoubleImm1:
758757
; CHECK-32: # %bb.0: # %entry
759-
; CHECK-32-NEXT: # kill: def $f1 killed $f1 def $vsl1
760758
; CHECK-32-NEXT: xxpermdi 34, 1, 34, 1
761759
; CHECK-32-NEXT: blr
762760
;
763761
; CHECK-64-P10-LABEL: testDoubleImm1:
764762
; CHECK-64-P10: # %bb.0: # %entry
765-
; CHECK-64-P10-NEXT: # kill: def $f1 killed $f1 def $vsl1
766763
; CHECK-64-P10-NEXT: xxpermdi 34, 1, 34, 1
767764
; CHECK-64-P10-NEXT: blr
768765
;
769766
; CHECK-32-P10-LABEL: testDoubleImm1:
770767
; CHECK-32-P10: # %bb.0: # %entry
771-
; CHECK-32-P10-NEXT: # kill: def $f1 killed $f1 def $vsl1
772768
; CHECK-32-P10-NEXT: xxpermdi 34, 1, 34, 1
773769
; CHECK-32-P10-NEXT: blr
774770
entry:

llvm/test/CodeGen/PowerPC/aix32-p8-scalar_vector_conversions.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1099,7 +1099,6 @@ define double @getd1(<2 x double> %vd) {
10991099
; CHECK-LABEL: getd1:
11001100
; CHECK: # %bb.0: # %entry
11011101
; CHECK-NEXT: xxswapd 1, 34
1102-
; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1
11031102
; CHECK-NEXT: blr
11041103
entry:
11051104
%vecext = extractelement <2 x double> %vd, i32 1
@@ -1115,7 +1114,6 @@ define double @getveld(<2 x double> %vd, i32 signext %i) {
11151114
; CHECK-NEXT: lvsl 3, 0, 3
11161115
; CHECK-NEXT: vperm 2, 2, 2, 3
11171116
; CHECK-NEXT: xxlor 1, 34, 34
1118-
; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1
11191117
; CHECK-NEXT: blr
11201118
entry:
11211119
%vecext = extractelement <2 x double> %vd, i32 %i

0 commit comments

Comments
 (0)