llvm · syzaara · Apr 17, 2024 · Mar 18, 2024 · Apr 4, 2024 · Apr 9, 2024
diff --git a/llvm/lib/Target/PowerPC/P10InstrResources.td b/llvm/lib/Target/PowerPC/P10InstrResources.td
@@ -881,7 +881,7 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY],
 // 3 Cycles ALU operations, 1 input operands
 def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read],
       (instrs
-    ADDI, ADDI8, ADDIdtprelL32, ADDItlsldLADDR32, ADDItocL8, LI, LI8,
+    ADDI, ADDI8, ADDIdtprelL32, ADDItlsldLADDR32, ADDItocL, ADDItocL8, LI, LI8,
     ADDIC, ADDIC8,
     ADDIS, ADDIS8, ADDISdtprelHA32, ADDIStocHA, ADDIStocHA8, LIS, LIS8,
     ADDME, ADDME8,

diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -1147,15 +1147,27 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
 
     MCSymbolRefExpr::VariantKind VK = GetVKForMO(MO);
 
-    // Always use TOC on AIX. Map the global address operand to be a reference
-    // to the TOC entry we will synthesize later. 'TOCEntry' is a label used to
-    // reference the storage allocated in the TOC which contains the address of
-    // 'MOSymbol'.
-    MCSymbol *TOCEntry =
-        lookUpOrCreateTOCEntry(MOSymbol, getTOCEntryTypeForMO(MO), VK);
-    const MCExpr *Exp = MCSymbolRefExpr::create(TOCEntry,
-                                                MCSymbolRefExpr::VK_PPC_U,
-                                                OutContext);
+    // If the symbol isn't toc-data then use the TOC on AIX.
+    // Map the global address operand to be a reference to the TOC entry we
+    // will synthesize later. 'TOCEntry' is a label used to reference the
+    // storage allocated in the TOC which contains the address of 'MOSymbol'.
+    // If the toc-data attribute is used, the TOC entry contains the data
+    // rather than the address of the MOSymbol.
+    if (![](const MachineOperand &MO) {
+          if (!MO.isGlobal())
+            return false;
+
+          const GlobalVariable *GV = dyn_cast<GlobalVariable>(MO.getGlobal());
+          if (!GV)
+            return false;
+
+          return GV->hasAttribute("toc-data");
+        }(MO)) {
+      MOSymbol = lookUpOrCreateTOCEntry(MOSymbol, getTOCEntryTypeForMO(MO), VK);
+    }
+
+    const MCExpr *Exp = MCSymbolRefExpr::create(
+        MOSymbol, MCSymbolRefExpr::VK_PPC_U, OutContext);
     TmpInst.getOperand(2) = MCOperand::createExpr(Exp);
     EmitToStreamer(*OutStreamer, TmpInst);
     return;
@@ -1272,25 +1284,32 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     EmitToStreamer(*OutStreamer, TmpInst);
     return;
   }
+  case PPC::ADDItocL:
   case PPC::ADDItocL8: {
-    // Transform %xd = ADDItocL8 %xs, @sym
+    // Transform %xd = ADDItocL %xs, @sym
     LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
 
-    // Change the opcode to ADDI8. If the global address is external, then
-    // generate a TOC entry and reference that. Otherwise, reference the
-    // symbol directly.
-    TmpInst.setOpcode(PPC::ADDI8);
+    unsigned Op = MI->getOpcode();
+
+    // Change the opcode to load address for tocdata
+    TmpInst.setOpcode(Op == PPC::ADDItocL8 ? PPC::ADDI8 : PPC::LA);
 
     const MachineOperand &MO = MI->getOperand(2);
-    assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL8.");
+    assert((Op == PPC::ADDItocL8)
+               ? (MO.isGlobal() || MO.isCPI())
+               : MO.isGlobal() && "Invalid operand for ADDItocL8.");
+    assert(!(MO.isGlobal() && Subtarget->isGVIndirectSymbol(MO.getGlobal())) &&
+           "Interposable definitions must use indirect accesses.");
 
-    LLVM_DEBUG(assert(
-        !(MO.isGlobal() && Subtarget->isGVIndirectSymbol(MO.getGlobal())) &&
-        "Interposable definitions must use indirect access."));
+    // Map the operand to its corresponding MCSymbol.
+    const MCSymbol *const MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
+
+    const MCExpr *Exp = MCSymbolRefExpr::create(
+        MOSymbol,
+        Op == PPC::ADDItocL8 ? MCSymbolRefExpr::VK_PPC_TOC_LO
+                             : MCSymbolRefExpr::VK_PPC_L,
+        OutContext);
 
-    const MCExpr *Exp =
-        MCSymbolRefExpr::create(getMCSymbolForTOCPseudoMO(MO, *this),
-                                MCSymbolRefExpr::VK_PPC_TOC_LO, OutContext);
     TmpInst.getOperand(2) = MCOperand::createExpr(Exp);
     EmitToStreamer(*OutStreamer, TmpInst);
     return;

diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -510,7 +510,7 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
 }
 
 // Check if a SDValue has the toc-data attribute.
-static bool hasTocDataAttr(SDValue Val, unsigned PointerSize) {
+static bool hasTocDataAttr(SDValue Val) {
   GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val);
   if (!GA)
     return false;
@@ -6115,8 +6115,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
 
       assert(isAIXABI && "ELF ABI already handled");
 
-      if (hasTocDataAttr(N->getOperand(0),
-                         CurDAG->getDataLayout().getPointerSize())) {
+      if (hasTocDataAttr(N->getOperand(0))) {
         replaceWith(PPC::ADDItoc, N, MVT::i32);
         return;
       }
@@ -6128,8 +6127,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
     if (isPPC64 && CModel == CodeModel::Small) {
       assert(isAIXABI && "ELF ABI handled in common SelectCode");
 
-      if (hasTocDataAttr(N->getOperand(0),
-                         CurDAG->getDataLayout().getPointerSize())) {
+      if (hasTocDataAttr(N->getOperand(0))) {
         replaceWith(PPC::ADDItoc8, N, MVT::i64);
         return;
       }
@@ -6144,23 +6142,44 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
            " ELF/AIX or 32-bit AIX in the following.");
 
     // Transforms the ISD::TOC_ENTRY node for 32-bit AIX large code model mode
-    // or 64-bit medium (ELF-only) or large (ELF and AIX) code model code. We
-    // generate two instructions as described below. The first source operand
-    // is a symbol reference. If it must be toc-referenced according to
+    // or 64-bit medium (ELF-only) or large (ELF and AIX) code model code non
+    // toc-data symbols.
+    // We generate two instructions as described below. The first source
+    // operand is a symbol reference. If it must be toc-referenced according to
     // Subtarget, we generate:
     // [32-bit AIX]
     //   LWZtocL(@sym, ADDIStocHA(%r2, @sym))
     // [64-bit ELF/AIX]
     //   LDtocL(@sym, ADDIStocHA8(%x2, @sym))
     // Otherwise we generate:
     //   ADDItocL8(ADDIStocHA8(%x2, @sym), @sym)
+
+    // For large code model toc-data symbols we generate:
+    // [32-bit AIX]
+    //   ADDItocL(ADDIStocHA(%x2, @sym), @sym)
+    // [64-bit AIX]
+    //   Currently not supported.
+
     SDValue GA = N->getOperand(0);
     SDValue TOCbase = N->getOperand(1);
 
     EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
     SDNode *Tmp = CurDAG->getMachineNode(
         isPPC64 ? PPC::ADDIStocHA8 : PPC::ADDIStocHA, dl, VT, TOCbase, GA);
 
+    // On AIX if the symbol has the toc-data attribute it will be defined
+    // in the TOC entry, so we use an ADDItocL similar to the medium code
+    // model ELF abi.
+    if (isAIXABI && hasTocDataAttr(GA)) {
+      if (isPPC64)
+        report_fatal_error(
+            "64-bit large code model toc-data not yet supported");
+
+      ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, VT,
+                                            SDValue(Tmp, 0), GA));
+      return;
+    }
+
     if (PPCLowering->isAccessedAsGotIndirect(GA)) {
       // If it is accessed as got-indirect, we need an extra LWZ/LD to load
       // the address.

diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -1077,6 +1077,7 @@ bool PPCInstrInfo::isReallyTriviallyReMaterializable(
   case PPC::LIS8:
   case PPC::ADDIStocHA:
   case PPC::ADDIStocHA8:
+  case PPC::ADDItocL:
   case PPC::ADDItocL8:
   case PPC::LOAD_STACK_GUARD:
   case PPC::PPCLdFixedAddr:

diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -3346,11 +3346,13 @@ def ADDIStocHA : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, tocentr
                        "#ADDIStocHA",
                        [(set i32:$rD,
                          (PPCtoc_entry i32:$reg, tglobaladdr:$disp))]>;
-// Local Data Transform
+// TOC Data Transform AIX
 def ADDItoc : PPCEmitTimePseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc:$reg),
                    "#ADDItoc",
                    [(set i32:$rD,
                      (PPCtoc_entry tglobaladdr:$disp, i32:$reg))]>;
+def ADDItocL : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, tocentry32:$disp),
+                   "#ADDItocL", []>;
 
 // Get Global (GOT) Base Register offset, from the word immediately preceding
 // the function label.

diff --git a/llvm/lib/Target/PowerPC/PPCMacroFusion.def b/llvm/lib/Target/PowerPC/PPCMacroFusion.def
@@ -32,7 +32,7 @@
 // {addi} followed by one of these {lxvd2x, lxvw4x, lxvdsx, lvebx, lvehx,
 // lvewx, lvx, lxsdx}
 FUSION_FEATURE(AddiLoad, hasAddiLoadFusion, 2, \
-               FUSION_OP_SET(ADDI, ADDI8, ADDItocL8), \
+               FUSION_OP_SET(ADDI, ADDI8, ADDItocL, ADDItocL8), \
                FUSION_OP_SET(LXVD2X, LXVW4X, LXVDSX, LVEBX, LVEHX, LVEWX, \
                              LVX, LXSDX))
 
@@ -134,13 +134,13 @@ FUSION_FEATURE(XorisXori, hasWideImmFusion, 1, FUSION_OP_SET(XORIS, XORIS8),
 
 // addis rx,ra,si - addi rt,rx,SI, SI >= 0
 FUSION_FEATURE(AddisAddi, hasWideImmFusion, 1,
-               FUSION_OP_SET(ADDIS, ADDIS8, ADDIStocHA8),
-               FUSION_OP_SET(ADDI, ADDI8, ADDItocL8))
+               FUSION_OP_SET(ADDIS, ADDIS8, ADDIStocHA8, ADDIStocHA),
+               FUSION_OP_SET(ADDI, ADDI8, ADDItocL8, ADDItocL))
 
 // addi rx,ra,si - addis rt,rx,SI, ra > 0, SI >= 2
 FUSION_FEATURE(AddiAddis, hasWideImmFusion, 1,
-               FUSION_OP_SET(ADDI, ADDI8, ADDItocL8),
-               FUSION_OP_SET(ADDIS, ADDIS8, ADDIStocHA8))
+               FUSION_OP_SET(ADDI, ADDI8, ADDItocL8, ADDItocL),
+               FUSION_OP_SET(ADDIS, ADDIS8, ADDIStocHA8, ADDIStocHA))
 
 // mtctr - { bcctr,bcctrl }
 FUSION_FEATURE(ZeroMoveCTR, hasZeroMoveFusion, -1,

diff --git a/llvm/test/CodeGen/PowerPC/toc-data.ll b/llvm/test/CodeGen/PowerPC/toc-data.ll
@@ -12,6 +12,14 @@
 ; RUN: llc -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs -O0 < %s | FileCheck %s --check-prefix TEST32
 ; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs -O0 < %s | FileCheck %s --check-prefix TEST64
 
+; RUN: llc -mtriple powerpc-ibm-aix-xcoff -code-model=large -verify-machineinstrs < %s \
+; RUN:     -stop-before=ppc-vsx-copy | FileCheck %s --check-prefix CHECK32LARGE
+; RUN: llc -mtriple powerpc-ibm-aix-xcoff -code-model=large -verify-machineinstrs < %s | FileCheck %s --check-prefix TEST32LARGE
+
+; Global variables i and f have the toc-data attribute.
+; In the following functions, those writing to or reading from
+; variables i and f should use the toc-data access pattern.
+; All remaining variables should use the regular toc access sequence.
 @i = dso_local global i32 0, align 4 #0
 @d = dso_local local_unnamed_addr global double 3.141590e+00, align 8
 @f = dso_local local_unnamed_addr global float 0x4005BE76C0000000, align 4 #0
@@ -44,6 +52,16 @@ define dso_local void @write_int(i32 signext %in) {
 ; TEST64:           la 4, i[TD](2)
 ; TEST64-NEXT:      stw 3, 0(4)
 
+; CHECK32LARGE: name:            write_int
+; CHECK32LARGE:      %[[SCRATCH1:[0-9]+]]:gprc_and_gprc_nor0 = ADDIStocHA $r2, @i
+; CHECK32LARGE-NEXT: %[[SCRATCH2:[0-9]+]]:gprc_and_gprc_nor0 = ADDItocL killed %[[SCRATCH1]], @i
+; CHECK32LARGE-NEXT: STW %{{[0-9]+}}, 0, killed %[[SCRATCH2]] :: (store (s32) into @i)
+
+; FIXME: peephole optimization opportunity for lower part relocation @l to the consuming stw
+; TEST32LARGE:         .write_int:
+; TEST32LARGE:          addis 4, i[TD]@u(2)
+; TEST32LARGE-NEXT:	la 4, i[TD]@l(4)
+; TEST32LARGE-NEXT:	stw 3, 0(4)
 
 define dso_local i64 @read_ll() {
   entry:
@@ -70,6 +88,15 @@ define dso_local i64 @read_ll() {
 ; TEST64:         ld 3, L..C0(2)
 ; TEST64-NEXT:    ld 3, 0(3)
 
+; CHECK32LARGE: name:            read_ll
+; CHECK32LARGE: %[[SCRATCH1:[0-9]+]]:gprc_and_gprc_nor0 = ADDIStocHA $r2, @ll
+; CHECK32LARGE: LWZtocL @ll, killed %[[SCRATCH1]] :: (load (s32) from got)
+
+; TEST32LARGE:         .read_ll:
+; TEST32LARGE:          addis 3, L..C0@u(2)
+; TEST32LARGE-NEXT:	lwz 4, L..C0@l(3)
+; TEST32LARGE-NEXT:	lwz 3, 0(4)
+; TEST32LARGE-NEXT:	lwz 4, 4(4)
 
 define dso_local float @read_float() {
   entry:
@@ -96,6 +123,16 @@ define dso_local float @read_float() {
 ; TEST64:         la 3, f[TD](2)
 ; TEST64-NEXT:    lfs 1, 0(3)
 
+; CHECK32LARGE: name:            read_float
+; CHECK32LARGE:      %[[SCRATCH1:[0-9]+]]:gprc_and_gprc_nor0 = ADDIStocHA $r2, @f
+; CHECK32LARGE-NEXT: %[[SCRATCH2:[0-9]+]]:gprc_and_gprc_nor0 = ADDItocL killed %[[SCRATCH1]], @f
+; CHECK32LARGE-NEXT: LFS 0, killed %[[SCRATCH2]] :: (dereferenceable load (s32) from @f)
+
+; FIXME: peephole optimization opportunity for lower part relocation @l to the consuming lfs
+; TEST32LARGE:         .read_float:
+; TEST32LARGE:          addis 3, f[TD]@u(2)
+; TEST32LARGE-NEXT:	la 3, f[TD]@l(3)
+; TEST32LARGE-NEXT:	lfs 1, 0(3)
 
 define dso_local void @write_double(double %in) {
   entry:
@@ -121,6 +158,14 @@ define dso_local void @write_double(double %in) {
 ; TEST64:         ld 3, L..C1(2)
 ; TEST64-NEXT:    stfd 1, 0(3)
 
+; CHECK32LARGE: name:            write_double
+; CHECK32LARGE: %[[SCRATCH1:[0-9]+]]:gprc_and_gprc_nor0 = ADDIStocHA $r2, @d
+; CHECK32LARGE: LWZtocL @d, killed %[[SCRATCH1]] :: (load (s32) from got)
+
+; TEST32LARGE:         .write_double:
+; TEST32LARGE:          addis 3, L..C1@u(2)
+; TEST32LARGE-NEXT:	lwz 3, L..C1@l(3)
+; TEST32LARGE-NEXT:	stfd 1, 0(3)
 
 define dso_local nonnull ptr @addr() {
   entry:
@@ -144,6 +189,15 @@ define dso_local nonnull ptr @addr() {
 ; TEST64:       .addr
 ; TEST64:         la 3, i[TD](2)
 
+; CHECK32LARGE: name:            addr
+; CHECK32LARGE:      %[[SCRATCH1:[0-9]+]]:gprc_and_gprc_nor0 = ADDIStocHA $r2, @i
+; CHECK32LARGE-NEXT: %[[SCRATCH2:[0-9]+]]:gprc = ADDItocL killed %[[SCRATCH1]], @i
+; CHECK32LARGE-NEXT: $r3 = COPY %[[SCRATCH2]]
+
+; TEST32LARGE:         .addr:
+; TEST32LARGE:          addis 3, i[TD]@u(2)
+; TEST32LARGE-NEXT:	la 3, i[TD]@l(3)
+
 ; TEST32:         .toc
 ; TEST32:           .tc ll[TC],ll[RW]
 ; TEST32-NOT:       .csect ll[TD]
@@ -170,4 +224,17 @@ define dso_local nonnull ptr @addr() {
 ; TEST64-NEXT:      .globl f[TD]
 ; TEST64-NOT:       .tc f[TD],f[RW]
 
+; TEST32LARGE:         .toc
+; TEST32LARGE:           .tc ll[TE],ll[RW]
+; TEST32LARGE-NOT:       .csect ll[TD]
+; TEST32LARGE:           .tc d[TE],d[RW]
+; TEST32LARGE-NOT:       .csect d[TD],2
+; TEST32LARGE:           .csect i[TD],2
+; TEST32LARGE-NEXT:      .globl  i[TD]
+; TEST32LARGE-NEXT:      .align  2
+; TEST32LARGE-NOT:       .tc i[TE],i[RW]
+; TEST32LARGE:           .csect f[TD],2
+; TEST32LARGE-NEXT:      .globl f[TD]
+; TEST32LARGE-NOT:       .tc f[TE],f[RW]
+
 attributes #0 = { "toc-data" }