Promote 32bit pseudo instr that infer extsw removal to 64bit in PPCMIPeephole #85451

diggerlin · 2024-03-15T19:15:19Z

Bug only happens in 64bit involving spills. Since we don't know when the spill will happen, all instructions in the chain used to deduce sign extension for eliminating 'extsw' will need to be promoted to 64-bit pseudo instructions.

The following instruction will promoted in PPCMIPeepholes: EXTSH, LHA, ISEL to EXTSH8, LHA8, ISEL8

llvmbot · 2024-03-15T19:15:48Z

@llvm/pr-subscribers-backend-powerpc

Author: zhijian lin (diggerlin)

Changes

Patch is 49.14 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/85451.diff

12 Files Affected:

(modified) llvm/lib/Target/PowerPC/PPCInstrInfo.cpp (+135)
(modified) llvm/lib/Target/PowerPC/PPCInstrInfo.h (+5)
(modified) llvm/lib/Target/PowerPC/PPCInstrInfo.td (+2-3)
(modified) llvm/lib/Target/PowerPC/PPCMIPeephole.cpp (+1)
(modified) llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir (+1-1)
(modified) llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir (+4-4)
(added) llvm/test/CodeGen/PowerPC/peephole-replaceInstr-after-eliminate-extsw.mir (+698)
(modified) llvm/test/CodeGen/PowerPC/ppc64-P9-setb.ll (+2)
(modified) llvm/test/CodeGen/PowerPC/select-constant-xor.ll (+4)
(modified) llvm/test/CodeGen/PowerPC/sext_elimination.mir (+8-2)
(modified) llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll (+3-1)
(modified) llvm/test/CodeGen/PowerPC/store-forward-be64.ll (+1)

diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 5f5eb31a5a85fa..0e9bdaf37d079d 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -5219,6 +5219,141 @@ bool PPCInstrInfo::isTOCSaveMI(const MachineInstr &MI) const {
 // We limit the max depth to track incoming values of PHIs or binary ops
 // (e.g. AND) to avoid excessive cost.
 const unsigned MAX_BINOP_DEPTH = 1;
+
+void PPCInstrInfo::replaceInstrAfterElimExt32To64(const Register &Reg,
+                                                  MachineRegisterInfo *MRI,
+                                                  unsigned BinOpDepth,
+                                                  LiveVariables *LV) const {
+  if (MRI->getRegClass(Reg) == &PPC::G8RCRegClass)
+    return;
+
+  MachineInstr *MI = MRI->getVRegDef(Reg);
+  if (!MI)
+    return;
+
+  unsigned Opcode = MI->getOpcode();
+  bool IsRelplaceIntr = false;
+  switch (Opcode) {
+  case PPC::OR:
+  case PPC::OR8:
+  case PPC::PHI:
+  case PPC::ISEL:
+    if (BinOpDepth < MAX_BINOP_DEPTH) {
+      if (Opcode == PPC::OR)
+        IsRelplaceIntr = true;
+      unsigned OperandEnd = 3, OperandStride = 1;
+      if (MI->getOpcode() == PPC::PHI) {
+        OperandEnd = MI->getNumOperands();
+        OperandStride = 2;
+      }
+
+      for (unsigned I = 1; I != OperandEnd; I += OperandStride) {
+        assert(MI->getOperand(I).isReg() && "Operand must be register");
+        Register SrcReg = MI->getOperand(I).getReg();
+        replaceInstrAfterElimExt32To64(SrcReg, MRI, BinOpDepth + 1, LV);
+      }
+    }
+    break;
+    // case PPC::COPY:
+  case PPC::ORI:
+  case PPC::XORI:
+  case PPC::ORI8:
+  case PPC::XORI8:
+  case PPC::ORIS:
+  case PPC::XORIS:
+  case PPC::ORIS8:
+  case PPC::XORIS8: {
+    if (Opcode == PPC::ORI || Opcode == PPC::XORI || Opcode == PPC::ORIS ||
+        Opcode == PPC::ORIS || Opcode == PPC::XORIS)
+      IsRelplaceIntr = true;
+    Register SrcReg = MI->getOperand(1).getReg();
+    replaceInstrAfterElimExt32To64(SrcReg, MRI, BinOpDepth, LV);
+    break;
+  }
+  case PPC::AND:
+  case PPC::AND8: {
+    if (BinOpDepth < MAX_BINOP_DEPTH) {
+      if (Opcode == PPC::AND)
+        IsRelplaceIntr = true;
+      Register SrcReg1 = MI->getOperand(1).getReg();
+      replaceInstrAfterElimExt32To64(SrcReg1, MRI, BinOpDepth, LV);
+      Register SrcReg2 = MI->getOperand(2).getReg();
+      replaceInstrAfterElimExt32To64(SrcReg2, MRI, BinOpDepth, LV);
+    }
+    break;
+  }
+  default:
+    break;
+  }
+
+  const PPCInstrInfo *TII =
+      MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
+  if ((TII->isSExt32To64(Opcode) && !TII->isZExt32To64(Opcode)) ||
+      IsRelplaceIntr) {
+    DebugLoc DL = MI->getDebugLoc();
+    auto MBB = MI->getParent();
+
+    // If the oprand of the instruction is Register which isPPC::GRCRegClass, we
+    // need to promot the Oprande to PPC::G8RCRegClass.
+    DenseMap<unsigned, Register> PromoteRegs;
+    for (unsigned i = 1; i < MI->getNumOperands(); i++) {
+      MachineOperand &Oprand = MI->getOperand(i);
+      if (Oprand.isReg()) {
+        Register OprandReg = Oprand.getReg();
+        if (!OprandReg.isVirtual())
+          continue;
+        if (MRI->getRegClass(OprandReg) == &PPC::GPRCRegClass) {
+          Register TmpReg = MRI->createVirtualRegister(&PPC::G8RCRegClass);
+          Register DstTmpReg = MRI->createVirtualRegister(&PPC::G8RCRegClass);
+
+          BuildMI(*MBB, MI, DL, TII->get(PPC::IMPLICIT_DEF), TmpReg);
+          BuildMI(*MBB, MI, DL, TII->get(PPC::INSERT_SUBREG), DstTmpReg)
+              .addReg(TmpReg)
+              .addReg(OprandReg)
+              .addImm(PPC::sub_32);
+          PromoteRegs[i] = DstTmpReg;
+        } else {
+          PromoteRegs[i] = OprandReg;
+        }
+      }
+    }
+
+    Register NewReg = MRI->createVirtualRegister(&PPC::G8RCRegClass);
+    Register SrcReg = MI->getOperand(0).getReg();
+
+    // Most of the opcode of 64-bit instruction equal to the opcode of 32-bit
+    // version of same instruction plus one. But there are some exception:
+    // PPC::ANDC_rec, PPC::ANDI_rec, PPC::ANDIS_rec.
+    unsigned NewOpcode = Opcode + 1;
+
+    if (Opcode == PPC::ANDC_rec)
+      NewOpcode = PPC::ANDC8_rec;
+    if (Opcode == PPC::ANDI_rec)
+      NewOpcode = PPC::ANDI8_rec;
+    if (Opcode == PPC::ANDIS_rec)
+      NewOpcode = PPC::ANDIS8_rec;
+
+    BuildMI(*MBB, MI, DL, TII->get(NewOpcode), NewReg);
+    MachineBasicBlock::instr_iterator Iter(MI);
+    --Iter;
+    for (unsigned i = 1; i < MI->getNumOperands(); i++)
+      if (PromoteRegs.find(i) != PromoteRegs.end())
+        MachineInstrBuilder(*Iter->getMF(), Iter)
+            .addReg(PromoteRegs[i], RegState::Kill);
+      else
+        Iter->addOperand(MI->getOperand(i));
+
+    for (auto Iter = PromoteRegs.begin(); Iter != PromoteRegs.end(); Iter++)
+      LV->recomputeForSingleDefVirtReg(Iter->second);
+    MI->eraseFromParent();
+    BuildMI(*MBB, ++Iter, DL, TII->get(PPC::COPY), SrcReg)
+        .addReg(NewReg, RegState::Kill, PPC::sub_32);
+    LV->recomputeForSingleDefVirtReg(NewReg);
+    return;
+  }
+  return;
+}
+
 // The isSignOrZeroExtended function is recursive. The parameter BinOpDepth
 // does not count all of the recursions. The parameter BinOpDepth is incremented
 // only when isSignOrZeroExtended calls itself more than once. This is done to
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 045932dc0d3ba1..f6e79707913c7b 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -17,6 +17,7 @@
 #include "PPC.h"
 #include "PPCRegisterInfo.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 
 #define GET_INSTRINFO_HEADER
@@ -610,6 +611,10 @@ class PPCInstrInfo : public PPCGenInstrInfo {
                       const MachineRegisterInfo *MRI) const {
     return isSignOrZeroExtended(Reg, 0, MRI).second;
   }
+  void replaceInstrAfterElimExt32To64(const Register &Reg,
+                                      MachineRegisterInfo *MRI,
+                                      unsigned BinOpDepth,
+                                      LiveVariables *LV) const;
 
   bool convertToImmediateForm(MachineInstr &MI,
                               SmallSet<Register, 4> &RegsToUpdate,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 82da1a3c305983..7c94add841402a 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -2408,7 +2408,7 @@ defm SRW  : XForm_6r<31, 536, (outs gprc:$RA), (ins gprc:$RST, gprc:$RB),
                      [(set i32:$RA, (PPCsrl i32:$RST, i32:$RB))]>, ZExt32To64;
 defm SRAW : XForm_6rc<31, 792, (outs gprc:$RA), (ins gprc:$RST, gprc:$RB),
                       "sraw", "$RA, $RST, $RB", IIC_IntShift,
-                      [(set i32:$RA, (PPCsra i32:$RST, i32:$RB))]>, SExt32To64;
+                      [(set i32:$RA, (PPCsra i32:$RST, i32:$RB))]>;
 }
 
 def : InstAlias<"mr $rA, $rB", (OR gprc:$rA, gprc:$rB, gprc:$rB)>;
@@ -2423,8 +2423,7 @@ let PPC970_Unit = 1 in {  // FXU Operations.
 let hasSideEffects = 0 in {
 defm SRAWI : XForm_10rc<31, 824, (outs gprc:$RA), (ins gprc:$RST, u5imm:$RB),
                         "srawi", "$RA, $RST, $RB", IIC_IntShift,
-                        [(set i32:$RA, (sra i32:$RST, (i32 imm:$RB)))]>,
-                        SExt32To64;
+                        [(set i32:$RA, (sra i32:$RST, (i32 imm:$RB)))]>;
 defm CNTLZW : XForm_11r<31,  26, (outs gprc:$RA), (ins gprc:$RST),
                         "cntlzw", "$RA, $RST", IIC_IntGeneral,
                         [(set i32:$RA, (ctlz i32:$RST))]>, ZExt32To64;
diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index 494e4b52a5b5eb..76b9c19db2b3eb 100644
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -1037,6 +1037,7 @@ bool PPCMIPeephole::simplifyCode() {
                    TII->isSignExtended(NarrowReg, MRI)) {
           // We can eliminate EXTSW if the input is known to be already
           // sign-extended.
+          TII->replaceInstrAfterElimExt32To64(NarrowReg, MRI, 0, LV);
           LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
           Register TmpReg =
               MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass);
diff --git a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir
index dfbf412a939212..bcc1d29a3f6ea3 100644
--- a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir
+++ b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir
@@ -604,7 +604,7 @@ body:             |
     %2 = LI 48
     %5 = COPY %0.sub_32
     %8 = SRW killed %5, killed %2
-    ; CHECK: LI 0
+    ; CHECK: LI8 0
     ; CHECK-LATE: li 3, 0
     $x3 = EXTSW_32_64 %8
     BLR8 implicit $lr8, implicit $rm, implicit $x3
diff --git a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir
index 761316ed7726d7..f095ffa85f02db 100644
--- a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir
+++ b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir
@@ -1348,7 +1348,7 @@ body:             |
     %1 = LI 77
     %2 = ADDI killed %1, 44
     %3 = EXTSW_32_64 killed %2
-    ; CHECK: LI 121
+    ; CHECK: LI8 121
     ; CHECK-LATE: li 3, 121
     $x3 = COPY %3
     BLR8 implicit $lr8, implicit $rm, implicit $x3
@@ -3573,7 +3573,7 @@ body:             |
 
     %0 = LI 777
     %1 = ORI %0, 88
-    ; CHECK: LI 857
+    ; CHECK: LI8 857
     ; CHECK-LATE: li 3, 857
     $x3 = EXTSW_32_64 %1
     BLR8 implicit $lr8, implicit $rm, implicit $x3
@@ -4145,7 +4145,7 @@ body:             |
     %3 = IMPLICIT_DEF
     %2 = LI 17
     %4 = RLWINM killed %2, 4, 20, 27
-    ; CHECK: LI 272
+    ; CHECK: LI8 272
     ; CHECK-LATE: li 3, 272
     $x3 = EXTSW_32_64 %4
     BLR8 implicit $lr8, implicit $rm, implicit $x3
@@ -6456,7 +6456,7 @@ body:             |
 
     %0 = LI 871
     %1 = XORI %0, 17
-    ; CHECK: LI 886
+    ; CHECK: LI8 886
     ; CHECK-LATE: li 3, 886
     $x3 = EXTSW_32_64 %1
     BLR8 implicit $lr8, implicit $rm, implicit $x3
diff --git a/llvm/test/CodeGen/PowerPC/peephole-replaceInstr-after-eliminate-extsw.mir b/llvm/test/CodeGen/PowerPC/peephole-replaceInstr-after-eliminate-extsw.mir
new file mode 100644
index 00000000000000..1b54ba7a38b816
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/peephole-replaceInstr-after-eliminate-extsw.mir
@@ -0,0 +1,698 @@
+# RUN: llc -run-pass=ppc-mi-peepholes  -mtriple powerpc64-ibm-aix-xcoff %s -o - \
+# RUN:   -verify-machineinstrs | FileCheck %s
+
+--- |
+  ; ModuleID = '71030_tmp_reduce-O2.ll'
+  source_filename = "71030_tmp_reduce.c"
+  target datalayout = "E-m:a-Fi64-i64:64-n32:64-S128-v256:256:256-v512:512:512"
+  target triple = "powerpc64-ibm-aix-xcoff"
+  
+  @globalShortValue = local_unnamed_addr global i16 1, align 2
+  @globalCharValue = local_unnamed_addr global i8 0, align 1
+  @largeNumber = local_unnamed_addr global i64 -3664682556119382352, align 8
+  @someIntValue = local_unnamed_addr global i32 378441747, align 4
+  @unitIncrement = local_unnamed_addr global i32 1, align 4
+  @computedResultUll = local_unnamed_addr global i64 0, align 8
+  @computedResultShort = local_unnamed_addr global i16 0, align 2
+  @computedResultUChar = local_unnamed_addr global i8 0, align 1
+  @computedResultBool = local_unnamed_addr global i8 0, align 1
+  @computedResultChar = local_unnamed_addr global i8 0, align 1
+  @shortArray = local_unnamed_addr global [8 x i16] zeroinitializer, align 2
+  @charArray = local_unnamed_addr global [8 x [8 x [8 x i8]]] zeroinitializer, align 1
+  @longArray = local_unnamed_addr global [8 x [8 x i64]] zeroinitializer, align 8
+  @resultArray = local_unnamed_addr global [8 x [8 x i16]] zeroinitializer, align 2
+  @ullArray = local_unnamed_addr global [8 x i64] zeroinitializer, align 8
+  @intArray = local_unnamed_addr global [8 x [8 x [8 x i32]]] zeroinitializer, align 4
+  @_MergedGlobals = private constant <{ [29 x i8], [46 x i8] }> <{ [29 x i8] c"Computed Result (ULL): %llx\0A\00", [46 x i8] c"Computed convert largeNumber&&&& (ULL): %llx\0A\00" }>, align 1
+  
+  @.str.1 = private alias [29 x i8], ptr @_MergedGlobals
+  @.str = private alias [46 x i8], getelementptr inbounds (<{ [29 x i8], [46 x i8] }>, ptr @_MergedGlobals, i32 0, i32 1)
+  
+  ; Function Attrs: nofree nounwind
+  define noundef signext i32 @main() local_unnamed_addr #0 {
+  entry:
+    store i16 -1, ptr getelementptr inbounds ([8 x i16], ptr @shortArray, i64 0, i64 3), align 2, !tbaa !3
+    %0 = load i64, ptr @largeNumber, align 8, !tbaa !7
+    %conv = trunc i64 %0 to i32
+    %sext = shl i32 %conv, 16
+    %conv1 = ashr exact i32 %sext, 16
+    %sub = add nsw i32 %conv1, -1705
+    %call = tail call signext i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) getelementptr inbounds (<{ [29 x i8], [46 x i8] }>, ptr @_MergedGlobals, i32 0, i32 1), i32 noundef signext %sub)
+    %1 = load i16, ptr @globalShortValue, align 2, !tbaa !3
+    %2 = load i32, ptr @someIntValue, align 4, !tbaa !9
+    %3 = trunc i32 %2 to i8
+    %conv20 = add i8 %3, -19
+    %4 = load i32, ptr @unitIncrement, align 4
+    %5 = load i8, ptr @globalCharValue, align 1
+    %conv45 = sext i8 %5 to i32
+    %computedResultShort.promoted = load i16, ptr @computedResultShort, align 2, !tbaa !3
+    %resultArray.promoted = load i16, ptr @resultArray, align 2, !tbaa !3
+    %computedResultChar.promoted149 = load i8, ptr @computedResultChar, align 1, !tbaa !11
+    %6 = sext i8 %conv20 to i64
+    %7 = load i16, ptr getelementptr inbounds ([8 x i16], ptr @shortArray, i64 0, i64 3), align 2, !tbaa !3
+    %8 = load i16, ptr getelementptr inbounds ([8 x i16], ptr @shortArray, i64 0, i64 2), align 2
+    %conv46 = sext i16 %8 to i32
+    %cond54 = tail call i32 @llvm.smin.i32(i32 %conv45, i32 %conv46)
+    %tobool = icmp ne i32 %cond54, 0
+    %conv55 = zext i1 %tobool to i8
+    %9 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @ullArray, i64 0, i64 3), align 8
+    %tobool72 = icmp ne i64 %9, 0
+    %frombool = zext i1 %tobool72 to i8
+    %smax = tail call i64 @llvm.smax.i64(i64 %6, i64 4)
+    %10 = add nuw nsw i64 %smax, 3
+    %11 = sub i64 %10, %6
+    %12 = lshr i64 %11, 2
+    %13 = add nuw nsw i64 %12, 1
+    %n.vec = and i64 %13, 9223372036854775806
+    %14 = shl i64 %n.vec, 2
+    %ind.end = add i64 %14, %6
+    %15 = shl i64 %6, 2
+    %16 = shl i64 %6, 3
+    %17 = add nsw i64 %16, -64
+    %scevgep30 = getelementptr i8, ptr @longArray, i64 %17
+    %18 = add nsw i64 %15, 64
+    %scevgep31 = getelementptr i8, ptr @intArray, i64 %18
+    %19 = lshr i64 %13, 1
+    %20 = shl nuw nsw i64 %19, 1
+    %21 = add nsw i64 %20, -2
+    %22 = lshr i64 %21, 1
+    %23 = add nuw i64 %22, 1
+    br label %for.body16
+  
+  for.cond.cleanup15:                               ; preds = %for.cond.cleanup25
+    %24 = tail call i16 @llvm.smin.i16(i16 %1, i16 %7)
+    %conv11.le = sext i16 %24 to i64
+    store i64 %conv11.le, ptr @computedResultUll, align 8, !tbaa !7
+    %call97 = tail call signext i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @_MergedGlobals, i64 noundef %conv11.le)
+    ret i32 0
+  
+  for.body16:                                       ; preds = %for.cond.cleanup25, %entry
+    %lsr.iv29 = phi i32 [ %lsr.iv.next, %for.cond.cleanup25 ], [ 8, %entry ]
+    %conv36.lcssa132140 = phi i16 [ %computedResultShort.promoted, %entry ], [ %conv36.lcssa131, %for.cond.cleanup25 ]
+    %and.lcssa135139 = phi i16 [ %resultArray.promoted, %entry ], [ %and.lcssa134, %for.cond.cleanup25 ]
+    %conv81118.lcssa.lcssa137138 = phi i8 [ %computedResultChar.promoted149, %entry ], [ %conv81118.lcssa.lcssa136, %for.cond.cleanup25 ]
+    %25 = icmp slt i8 %conv20, 8
+    br i1 %25, label %for.body31.lr.ph, label %for.cond.cleanup25
+  
+  for.body31.lr.ph:                                 ; preds = %for.body16
+    %26 = icmp ult i64 %11, 4
+    store i8 %conv55, ptr @computedResultUChar, align 1, !tbaa !11
+    br i1 %26, label %for.body31.preheader, label %vector.body.preheader
+  
+  vector.body.preheader:                            ; preds = %for.body31.lr.ph
+    call void @llvm.set.loop.iterations.i64(i64 %23)
+    br label %vector.body
+  
+  vector.body:                                      ; preds = %vector.body.preheader, %vector.body
+    %vec.phi = phi i16 [ %44, %vector.body ], [ %conv36.lcssa132140, %vector.body.preheader ]
+    %vec.phi159 = phi i16 [ %45, %vector.body ], [ 0, %vector.body.preheader ]
+    %vec.phi160 = phi i16 [ %46, %vector.body ], [ %and.lcssa135139, %vector.body.preheader ]
+    %vec.phi161 = phi i16 [ %47, %vector.body ], [ -1, %vector.body.preheader ]
+    %vec.phi162 = phi i8 [ %48, %vector.body ], [ %conv81118.lcssa.lcssa137138, %vector.body.preheader ]
+    %vec.phi163 = phi i8 [ %49, %vector.body ], [ 0, %vector.body.preheader ]
+    %27 = phi ptr [ %scevgep30, %vector.body.preheader ], [ %31, %vector.body ]
+    %28 = phi ptr [ %scevgep31, %vector.body.preheader ], [ %29, %vector.body ]
+    %29 = getelementptr i8, ptr %28, i64 32
+    %30 = getelementptr i8, ptr %29, i64 16
+    %31 = getelementptr i8, ptr %27, i64 64
+    %32 = getelementptr i8, ptr %31, i64 32
+    %33 = trunc i32 %4 to i16
+    %34 = load i64, ptr %31, align 8, !tbaa !7
+    %35 = load i64, ptr %32, align 8, !tbaa !7
+    %36 = trunc i64 %34 to i16
+    %37 = trunc i64 %35 to i16
+    %38 = load i32, ptr %29, align 4, !tbaa !9
+    %39 = load i32, ptr %30, align 4, !tbaa !9
+    %40 = trunc i32 %38 to i8
+    %41 = trunc i32 %39 to i8
+    %42 = mul i8 %40, -6
+    %43 = mul i8 %41, -6
+    %44 = sub i16 %vec.phi, %33
+    %45 = sub i16 %vec.phi159, %33
+    %46 = and i16 %vec.phi160, %36
+    %47 = and i16 %vec.phi161, %37
+    %48 = add i8 %42, %vec.phi162
+    %49 = add i8 %43, %vec.phi163
+    %50 = call i1 @llvm.loop.decrement.i64(i64 1)
+    br i1 %50, label %vector.body, label %middle.block, !llvm.loop !12
+  
+  middle.block:                                     ; preds = %vector.body
+    %51 = icmp eq i64 %13, %n.vec
+    %bin.rdx = add i16 %45, %44
+    %bin.rdx164 = and i16 %47, %46
+    %bin.rdx165 = add i8 %49, %48
+    br i1 %51, label %for.cond21.for.cond.cleanup25_crit_edge, label %for.body31.preheader
+  
+  for.body31.preheader:                             ; preds = %middle.block, %for.body31.lr.ph
+    %indvars.iv.ph = phi i64 [ %6, %for.body31.lr.ph ], [ %ind.end, %middle.block ]
+    %conv36121128.ph = phi i16 [ %conv36.lcssa132140, %for.body31.lr.ph ], [ %bin.rdx, %middle.block ]
+    %and122127.ph = phi i16 [ %and.lcssa135139, %for.body31.lr.ph ], [ %bin.rdx164, %middle.block ]
+    %conv81118.lcssa124126.ph = phi i8 [ %conv81118.lcssa.lcssa137138, %for.body31.lr.ph ], [ %bin.rdx165, %middle.block ]
+    %52 = shl i64 %indvars.iv.ph, 2
+    %53 = shl i64 %indvars.iv.ph, 3
+    %scevgep = getelementptr i8, ptr getelementptr ([8 x [8 x i64]], ptr @longArray, i64 -1, i64 7, i64 4), i64 %53
+    %scevgep32 = getelementptr i8, ptr getelementptr inbounds ([8 x [8 x [8 x i32]]], ptr @intArray, i64 0, i64 0, i64 2, i64 4), i64 %52
+    %smax33 = call i64 @llvm.smax.i64(i64 %indvars.iv.ph, i64 4)
+    %54 = add i64 %smax33, 3
+    %55 = sub i64 %54, %indvars.iv.ph
+    %56 = lshr i64 %55, 2
+    %57 = add nuw nsw i64 %56, 1
+    call void @llvm.set.loop.iterations.i64(i64 %57)
+    br label %for.body31
+  
+  for.cond21.for.cond.cleanup25_crit_edge:          ; preds = %for.body31, %middle.block
+    %conv36.lcssa = phi i16 [ %bin.rdx, %middle.block ], [ %conv36, %for.body31 ]
+    %and.lcssa = phi i16 [ %bin.rdx164, %middle.block ], [ %and, %for.body31 ]
+    %.lcssa = phi i8 [ %bin.rdx165, %middle.block ], [ %67, %for.body31 ]
+    %58 = trunc i16 %1 to i8
+    store i16 %conv36.lcssa, ptr @computedResultShort, align 2, !tbaa !3
+    store i8 %58, ptr getelementptr inbounds ([8 x [8 x [8 x i8]]], ptr @charArray, i64 0, i64 2, i64 0, i64 3), align 1, !tbaa !11
+    store i16 %and.lcssa, ptr @resultArray, align 2, !tbaa !3
+    store i8 %frombool, ptr @computedResultBool, align 1, !tbaa !16
+    store i8 %.lcssa, ptr @computedResultChar, align...
[truncated]

amy-kwan

Initial review, and I plan to review again.

amy-kwan · 2024-03-19T18:56:37Z

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

+    return;
+
+  unsigned Opcode = MI->getOpcode();
+  bool IsRelplaceIntr = false;


Suggested change

bool IsRelplaceIntr = false;

bool IsReplaceInstr = false;

Do we mean this?

amy-kwan · 2024-03-19T18:59:15Z

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

+    DenseMap<unsigned, Register> PromoteRegs;
+    DenseMap<unsigned, Register> ReCalRegs;
+    for (unsigned i = 1; i < MI->getNumOperands(); i++) {
+      MachineOperand &Oprand = MI->getOperand(i);


Suggested change

MachineOperand &Oprand = MI->getOperand(i);

MachineOperand &Operand = MI->getOperand(i);

I think we can use Operand instead of Oprand here and below.

amy-kwan · 2024-03-20T04:49:09Z

llvm/lib/Target/PowerPC/PPCInstrInfo.td

@@ -2408,7 +2408,7 @@ defm SRW  : XForm_6r<31, 536, (outs gprc:$RA), (ins gprc:$RST, gprc:$RB),
                     [(set i32:$RA, (PPCsrl i32:$RST, i32:$RB))]>, ZExt32To64;
 defm SRAW : XForm_6rc<31, 792, (outs gprc:$RA), (ins gprc:$RST, gprc:$RB),
                      "sraw", "$RA, $RST, $RB", IIC_IntShift,
-                      [(set i32:$RA, (PPCsra i32:$RST, i32:$RB))]>, SExt32To64;
+                      [(set i32:$RA, (PPCsra i32:$RST, i32:$RB))]>;


May I ask why SExt32To64 is removed from just SRAW and SRAWI?

there is no 64bit pseudo-code instruction for SRAW and SRAWI , that there is no SRAW8 and SRAWI8 pseudo-code instruction

Group code review comment:
Is the removal of the SExt32To64 flag truly correct? We're not sure if this instruction really makes sense in 32-bit mode. The sign extend flag should depend on the behaviour of the instruction itself (if the instruction is meant to sign extend, then presumably the flag should still be present).

It would be good investigate this.

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

nemanjai · 2024-04-04T18:09:13Z

I think it would be worthwhile investigating whether this can be accomplished significantly more easily and succinctly by implementing another InstrMapping similar to the record-form ones we have (getRecordFormOpcode, getNonRecordFormOpcode). If we have a pair of mappings similar to those but that represent get64BitMnemonic and get32BitMnemonic, we should be able to just use those rather than a big switch.

diggerlin · 2024-04-09T20:51:09Z

I think it would be worthwhile investigating whether this can be accomplished significantly more easily and succinctly by implementing another InstrMapping similar to the record-form ones we have (getRecordFormOpcode, getNonRecordFormOpcode). If we have a pair of mappings similar to those but that represent get64BitMnemonic and get32BitMnemonic, we should be able to just use those rather than a big switch.

I added a

def get64BitInstrFromSignedExt32BitInstr : InstrMapping {
...
}

amy-kwan

It would good to update the title and description, rather than "fix a bug".

amy-kwan · 2024-04-24T18:34:26Z

llvm/test/CodeGen/PowerPC/peephole-replaceInstr-after-eliminate-extsw.mir

@@ -0,0 +1,702 @@
+# RUN: llc -run-pass=ppc-mi-peepholes  -mtriple powerpc64-ibm-aix-xcoff %s -o - \


Could we commit this test case first before this patch? Then, it will be easier to see how this patch changes the test case.

amy-kwan · 2024-04-24T18:52:20Z

llvm/lib/Target/PowerPC/PPCInstrInfo.td

@@ -2408,7 +2408,7 @@ defm SRW  : XForm_6r<31, 536, (outs gprc:$RA), (ins gprc:$RST, gprc:$RB),
                     [(set i32:$RA, (PPCsrl i32:$RST, i32:$RB))]>, ZExt32To64;
 defm SRAW : XForm_6rc<31, 792, (outs gprc:$RA), (ins gprc:$RST, gprc:$RB),
                      "sraw", "$RA, $RST, $RB", IIC_IntShift,
-                      [(set i32:$RA, (PPCsra i32:$RST, i32:$RB))]>, SExt32To64;
+                      [(set i32:$RA, (PPCsra i32:$RST, i32:$RB))]>;


Group code review comment:
Is the removal of the SExt32To64 flag truly correct? We're not sure if this instruction really makes sense in 32-bit mode. The sign extend flag should depend on the behaviour of the instruction itself (if the instruction is meant to sign extend, then presumably the flag should still be present).

It would be good investigate this.

llvm/lib/Target/PowerPC/PPC.td

amy-kwan · 2024-04-24T18:58:49Z

llvm/lib/Target/PowerPC/PPC.td

+  let RowFields = ["Inst"];
+  // Instructions with the same Interpretation64Bit value form a column.
+  let ColFields = ["Interpretation64Bit"];
+    // The key column are not the Interpretation64Bit-form instructions.


Suggested change

// The key column are not the Interpretation64Bit-form instructions.

// The key column are not the Interpretation64Bit-form instructions.

Add pre-commit MIR test for PR "[Promote Pseudo Opcode from 32-bit to 64-bit after eliminating the extsw instruction in PPCMIPeepholes optimization](#85451)" which fixes bug reported in the issue "[Inconsistent Output at -O1 and -O2 Optimization Levels on PowerPC64 Due to Complex Type Casting and Nested Loop Structure](#71030)".

…w instruction in PPCMIPeepholes optimization

amy-kwan

Some group code review comments.

amy-kwan · 2024-05-01T18:33:11Z

llvm/lib/Target/PowerPC/PPCMIPeephole.cpp

@@ -1051,6 +1051,7 @@ bool PPCMIPeephole::simplifyCode() {
                   TII->isSignExtended(NarrowReg, MRI)) {
          // We can eliminate EXTSW if the input is known to be already
          // sign-extended.
+          TII->replaceInstrAfterElimExt32To64(NarrowReg, MRI, 0, LV);


A couple comments regarding this line:

This call does not make sense with the comment on 1052-1053. It would be better to put this call above the comment.

The naming of the function seems unclear. We can consider renaming the function from replaceInstrAfter to promoteInstrBefore, or something similar.

Add new documentation for the function.

amy-kwan · 2024-05-01T18:35:08Z

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

@@ -5234,6 +5234,218 @@ bool PPCInstrInfo::isTOCSaveMI(const MachineInstr &MI) const {
 // We limit the max depth to track incoming values of PHIs or binary ops
 // (e.g. AND) to avoid excessive cost.
 const unsigned MAX_BINOP_DEPTH = 1;
+
+void PPCInstrInfo::replaceInstrAfterElimExt32To64(const Register &Reg,


Please add a comment to describe this function.

amy-kwan · 2024-05-01T18:53:14Z

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

+        OperandStride = 2;
+      }
+
+      for (unsigned I = 1; I != OperandEnd; I += OperandStride) {


It would be better to use < instead of != here.

amy-kwan · 2024-05-01T19:01:22Z

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

+    return;
+
+  unsigned Opcode = MI->getOpcode();
+  bool IsReplaceInstr = false;


Can we update the name of this variable? Something like IsPromotedInstr?

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

Co-authored-by: Amy Kwan <[email protected]>

…omoteInstr32To64ForElimEXTSW

llvm/lib/Target/PowerPC/PPC.td

lei137 · 2024-10-10T17:55:27Z

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

+  default:
+    break;


Not needed since it doesn't do anything.

lei137 · 2024-10-10T18:04:22Z

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

+// extended within the function PPCInstrInfo::isSignOrZeroExtended.
+// Additionally, the `promoteInstr32To64ForElimEXTSW` function is recursive.
+// BinOpDepth does not count all of the recursions. The parameter BinOpDepth is
+// incremented  only when `promoteInstr32To64ForElimEXTSW` calls itself more


From what I can see below, BinOpDepth is only ever used for AND8 and PHI instr processing and only ever incremented when processing a PHI. Maybe I am missing something or this documentation need to be updated to be more clear. Do we not care to track how many times it calls itself for other instructions?

Good Catch for AND8 . the logic of BinOpDepth is come from function isSignOrZeroExtended(), when the instructions(e.g., AND, PHI, OR) which has two or more source registers , the instructions which defines these source registers also need to be promoted and so on , without increase the BinOpDepth , it maybe have 2 * 2 * 2 * 2.... recursions of promoteInstr32To64ForElimEXTSW.

lei137 · 2024-10-10T18:07:19Z

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

+    [[fallthrough]];
+  case PPC::XORIS:
+    CheckAndSetNewOpcode(PPC::XORIS8);
+    [[fallthrough]];


This big switch stmt makes things a bit confusing. See comment above about replacing the lambda func CheckAndSetNewOpcode with a static helper function.

lei137 · 2024-10-10T18:21:32Z

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

+    return;
+
+  unsigned Opcode = MI->getOpcode();
+  bool IsNonSignedExtInstrNeedPromoted = false;


instead of having a flag here, maybe we can setup a static function instead to replace the lambda CheckAndSetNewOpcod

// switch stmt to check opcode and set to new if needed and return true or false static bool SignedExtInstrPromotion(unsigned int &Opcode) { switch (Opcode) { default: return false; case .... return true; }

instead of using static helper function and I do not want a duplication code, I use a Macro definition to achieve it.

IMHO the macro seem more confusing to me.

OK, I change to another way

lei137 · 2024-10-10T18:23:45Z

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

+
+  const PPCInstrInfo *TII =
+      MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
+  if (TII->isSExt32To64(Opcode) || IsNonSignedExtInstrNeedPromoted) {


opt for early exit

if (! TII->isSExt32To64(Opcode) && ! IsNonSignedExtInstrNeedPromoted) return;

lei137 · 2024-10-10T18:26:06Z

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

+    DenseMap<unsigned, Register> PromoteRegs;
+    for (unsigned i = 1; i < MI->getNumOperands(); i++) {
+      MachineOperand &Operand = MI->getOperand(i);
+      if (Operand.isReg()) {


early exit

Suggested change

if (Operand.isReg()) {

if (!Operand.isReg())

continue;

lei137 · 2024-10-10T18:27:09Z

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

+
+    for (unsigned i = 1; i < Iter->getNumOperands(); i++) {
+      MachineOperand &Operand = Iter->getOperand(i);
+      if (Operand.isReg()) {


opt for early exit

lei137

Please try to keep your title with 80 chars and do not use hypertext in your PR discription.

nit: "Summary" is also not needed in the description as that is implied.

lei137 · 2024-10-15T18:27:11Z

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

+  case PPC::ISEL:
+  case PPC::OR8:
+  case PPC::PHI:
+    if (BinOpDepth < MAX_BINOP_DEPTH) {


early exit

Suggested change

if (BinOpDepth < MAX_BINOP_DEPTH) {

if (BinOpDepth >= MAX_BINOP_DEPTH)

break;

lei137 · 2024-10-15T18:28:47Z

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

+    break;
+  }
+  case PPC::AND:
+  case PPC::AND8: {


brace not needed here.

lei137 · 2024-10-15T18:29:22Z

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

+      Register SrcReg1 = MI->getOperand(1).getReg();
+      promoteInstr32To64ForElimEXTSW(SrcReg1, MRI, BinOpDepth + 1, LV);
+      Register SrcReg2 = MI->getOperand(2).getReg();
+      promoteInstr32To64ForElimEXTSW(SrcReg2, MRI, BinOpDepth + 1, LV);


tmp var SrcReg[12] is not needed. Prefer to inline it into the call.

lei137 · 2024-10-15T18:42:36Z

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

+  case PPC::ORIS8:
+  case PPC::XORIS8: {
+    Register SrcReg = MI->getOperand(1).getReg();
+    promoteInstr32To64ForElimEXTSW(SrcReg, MRI, BinOpDepth, LV);


no need for tmp var. Can inline into call.

lei137 · 2024-10-15T18:45:38Z

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

+    return;
+
+  unsigned Opcode = MI->getOpcode();
+  bool IsNonSignedExtInstrNeedPromoted = false;


This var is a bit of a mouth full and took me a few tries to understand what it means. Maybe a simpler var name: PromoteNonSignedExtInstr?

I think it maybe better to change to HasNonSignedExtInstrPromoted

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

lei137 · 2024-10-15T19:02:08Z

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

+    return;
+
+  unsigned Opcode = MI->getOpcode();
+  bool IsNonSignedExtInstrNeedPromoted = false;


IMHO the macro seem more confusing to me.

lei137 · 2024-10-22T19:27:40Z

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

+  const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+
+  if (RC == &PPC::G8RCRegClass || RC == &PPC::G8RC_and_G8RC_NOX0RegClass)
+    return;


Is Reg updated anywhere above? I don't see seem to see any updates so am a bit confused why this check is here vs at the top. Can we add a comment here as to why we are exiting earlier?

nit: remove empty line 5350

we can not move the checking to top, because even the RegClass of the reg is PPC::G8RCRegClass or PPC::G8RC_and_G8RC_NOX0RegClass , we still need to check whether we need to logic in the switch statement

for example, COPY from PPC::GPRCRegClass to PPC::G8RCRegClass, we still need to do promoteInstr32To64ForElimEXTSW

lei137 · 2024-10-22T19:31:36Z

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

+  // Check if the Opcode is in the map.
+  auto It = OpcodeMap.find(Opcode);
+  if (It != OpcodeMap.end()) {
+    // Set the new opcode to the mapped 64-bit version.
+    NewOpcode = It->second;
+    HasNonSignedExtInstrPromoted = true;
+  }


Since this is not neeed until line 5346, why not move it down there and combine with code on line 5358. That way probably don't even need HasNonSignedExtInstrPromoted.

lei137 · 2024-10-22T19:33:11Z

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

+  std::unordered_map<unsigned, unsigned> OpcodeMap = {
+      {PPC::OR, PPC::OR8},     {PPC::ISEL, PPC::ISEL8},
+      {PPC::ORI, PPC::ORI8},   {PPC::XORI, PPC::XORI8},
+      {PPC::ORIS, PPC::ORIS8}, {PPC::XORIS, PPC::XORIS8},
+      {PPC::AND, PPC::AND8}};


Can we document what this list is? As in why this list and how to determine what should be included?

lei137 · 2024-10-22T19:35:58Z

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

+      {PPC::ORIS, PPC::ORIS8}, {PPC::XORIS, PPC::XORIS8},
+      {PPC::AND, PPC::AND8}};
+
+  // Check if the Opcode is in the map.


This comment just document exactly what it being coded below. Please explain what the intent is.
Maybe something to the extent of "Return 64bit equivalent opcodes for ....."

I add the comment in above

// Map the opcode of instructions (which are not sign- or zero-extended

// themselves,but have operands that are destination registers of sign- or

// zero-extended instructions) to their 64-bit equivalents.

I think it is clear enough to explain it, I delete the comment here.

lei137 · 2024-10-23T19:13:52Z

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

+    return;
+
+  unsigned Opcode = MI->getOpcode();
+


nit: remove empty line

lei137 · 2024-10-23T19:26:14Z

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

+  auto It = OpcodeMap.find(Opcode);
+  if (It != OpcodeMap.end()) {
+    // Set the new opcode to the mapped 64-bit version.
+    NewOpcode = It->second;
+    HasNonSignedExtInstrPromoted = true;
+  }
+
+  const PPCInstrInfo *TII =
+      MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
+  if (!TII->isSExt32To64(Opcode) && !HasNonSignedExtInstrPromoted)
+    return;
+
+  const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+  if (RC == &PPC::G8RCRegClass || RC == &PPC::G8RC_and_G8RC_NOX0RegClass)
+    return;
+
+  // The TableGen function `get64BitInstrFromSignedExt32BitInstr` is used to
+  // map the 32-bit instruction with the `SExt32To64` flag to the 64-bit
+  // instruction with the same opcode.
+  if (!HasNonSignedExtInstrPromoted)
+    NewOpcode = PPC::get64BitInstrFromSignedExt32BitInstr(Opcode);
+


I don't see a real need for this boolean flag HasNonSignedExtInstrPromoted. I feel if we simplify it thus the intention is more clear.

Suggested change

auto It = OpcodeMap.find(Opcode);

if (It != OpcodeMap.end()) {

// Set the new opcode to the mapped 64-bit version.

NewOpcode = It->second;

HasNonSignedExtInstrPromoted = true;

}

const PPCInstrInfo *TII =

MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();

if (!TII->isSExt32To64(Opcode) && !HasNonSignedExtInstrPromoted)

return;

const TargetRegisterClass *RC = MRI->getRegClass(Reg);

if (RC == &PPC::G8RCRegClass || RC == &PPC::G8RC_and_G8RC_NOX0RegClass)

return;

// The TableGen function `get64BitInstrFromSignedExt32BitInstr` is used to

// map the 32-bit instruction with the `SExt32To64` flag to the 64-bit

// instruction with the same opcode.

if (!HasNonSignedExtInstrPromoted)

NewOpcode = PPC::get64BitInstrFromSignedExt32BitInstr(Opcode);

int NewOpcode;

auto It = OpcodeMap.find(Opcode);

// Promote to 64bit instruction.

if (It != OpcodeMap.end())

NewOpcode = It->second;

else {

if (!MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo()->isSExt32To64(Opcode))

return;

// TableGen function `get64BitInstrFromSignedExt32BitInstr` can be used to

// obtain the equivalent 64bit instructions for 32bit instructions tagged with the `SExt32To64` flag.

NewOpcode = PPC::get64BitInstrFromSignedExt32BitInstr(Opcode);

}

# please doc why we want to return here for R0?

const TargetRegisterClass *RC = MRI->getRegClass(Reg);

if (RC == &PPC::G8RCRegClass || RC == &PPC::G8RC_and_G8RC_NOX0RegClass)

return;

lei137 · 2024-10-23T19:38:27Z

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

+  // Map the opcode of instructions (which are not sign- or zero-extended
+  // themselves,but have operands that are destination registers of sign- or
+  // zero-extended instructions) to their 64-bit equivalents.


Suggested change

// Map the opcode of instructions (which are not sign- or zero-extended

// themselves,but have operands that are destination registers of sign- or

// zero-extended instructions) to their 64-bit equivalents.

// Map the 32bit to 64bit opcodes for instructions that are not signed or zero extended

// themselves, but may have operands who's destination registers are of signed or

// zero extended instructions.

lei137 · 2024-10-23T19:40:27Z

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

+    }
+  }
+
+    Register NewDefinedReg = MRI->createVirtualRegister(NewRC);


I think the indentation from here to return is a bit off.

lei137

This LGTM. Thanks for addressing all the comments.

lei137 · 2024-10-24T20:15:14Z

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

+  int NewOpcode = -1;
+


nit: maybe move this down to where it's being assigned... line 5345.

…Peephole (llvm#85451) Fixes: llvm#71030 Bug only happens in 64bit involving spills. Since we don't know when the spill will happen, all instructions in the chain used to deduce sign extension for eliminating 'extsw' will need to be promoted to 64-bit pseudo instructions. The following instruction will promoted in PPCMIPeepholes: EXTSH, LHA, ISEL to EXTSH8, LHA8, ISEL8

llvmbot added the backend:PowerPC label Mar 15, 2024

diggerlin changed the title ~~first implement of fixing issue 71030~~ fix a bug of PPCMIPeepholes which description in issue 71030 Mar 15, 2024

diggerlin requested review from nemanjai, stefanp-synopsys, amy-kwan, lei137 and mandlebug March 15, 2024 19:17

diggerlin force-pushed the digger/issue-71030 branch from 54fad33 to 680bcf6 Compare March 19, 2024 15:47

amy-kwan reviewed Mar 20, 2024

View reviewed changes

diggerlin requested review from amy-kwan and chenzheng1030 April 4, 2024 15:35

amy-kwan reviewed Apr 24, 2024

View reviewed changes

diggerlin changed the title ~~fix a bug of PPCMIPeepholes which description in issue 71030~~ promote PseduoCode from 32bit to 64bits after eliminating the extsw instruction in PPCMIPeepholes optimization Apr 24, 2024

diggerlin changed the title ~~promote PseduoCode from 32bit to 64bits after eliminating the extsw instruction in PPCMIPeepholes optimization~~ promote Pseduo Opcode from 32bit to 64bits after eliminating the extsw instruction in PPCMIPeepholes optimization Apr 25, 2024

diggerlin force-pushed the digger/issue-71030 branch from dc3948a to 217a293 Compare April 30, 2024 18:35

diggerlin mentioned this pull request Apr 30, 2024

NFC add a new precommit test case for PPCMIpeephole #90656

Merged

promote Pseduo Opcode from 32bit to 64bits after eliminating the exts…

79aaf13

…w instruction in PPCMIPeepholes optimization

diggerlin force-pushed the digger/issue-71030 branch from 217a293 to 79aaf13 Compare April 30, 2024 21:29

diggerlin added 3 commits May 1, 2024 09:02

add srawi8

d4b4abc

add SRAWI8 record

affb9bf

delete select pattern for SRAW8

75494c3

amy-kwan reviewed May 1, 2024

View reviewed changes

address comment

489cccc

diggerlin added 2 commits May 2, 2024 10:03

change function name and variable name

1ece68c

add more comment

56b04f5

diggerlin and others added 2 commits May 28, 2024 15:48

Update llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

9b0b9c8

Co-authored-by: Amy Kwan <[email protected]>

address comment

8550317

diggerlin requested review from amy-kwan and kamaub May 28, 2024 20:13

diggerlin added 2 commits June 13, 2024 11:28

minor: change function name from PromoteInstr32To64ForElimEXTSW to pr…

9a4e4d0

…omoteInstr32To64ForElimEXTSW

add not Reg.isVirtual() check

3798b03

diggerlin changed the title ~~Promote pseudo opcodes from 32-bit to 64-bit for instructions that infer extsw elimination in PPCMIPeepholes optimization~~ Promote pseudo opcodes from 32-bit to 64-bit for instructions that infer extsw elimination in PPCMIPeepholes pass Oct 10, 2024

lei137 reviewed Oct 10, 2024

View reviewed changes

diggerlin requested a review from lei137 October 11, 2024 15:56

address comment

34b52bb

lei137 reviewed Oct 15, 2024

View reviewed changes

diggerlin changed the title ~~Promote pseudo opcodes from 32-bit to 64-bit for instructions that infer extsw elimination in PPCMIPeepholes pass~~ Promote pseudo instructions from 32-bit to 64-bit for inferring extsw removal in PPCMIPeepholes Oct 17, 2024

diggerlin requested a review from lei137 October 17, 2024 18:35

address comment

f0cdc75

diggerlin changed the title ~~Promote pseudo instructions from 32-bit to 64-bit for inferring extsw removal in PPCMIPeepholes~~ Promote 32bit pseudo instr that infer extsw removal to 64bit in PPCMIPeephole Oct 22, 2024

lei137 reviewed Oct 22, 2024

View reviewed changes

diggerlin requested a review from lei137 October 23, 2024 18:07

address comment

1de4b40

lei137 reviewed Oct 23, 2024

View reviewed changes

diggerlin requested a review from lei137 October 24, 2024 13:54

diggerlin added 2 commits October 24, 2024 14:05

address comment

9ae473b

reformate indentation

910b1b5

lei137 approved these changes Oct 24, 2024

View reviewed changes

This comment was marked as duplicate.

Sign in to view

address minor comment

095acfc

diggerlin merged commit 674574d into llvm:main Oct 31, 2024
4 of 7 checks passed

	MachineOperand &Oprand = MI->getOperand(i);
	MachineOperand &Operand = MI->getOperand(i);

		@@ -0,0 +1,702 @@
		# RUN: llc -run-pass=ppc-mi-peepholes -mtriple powerpc64-ibm-aix-xcoff %s -o - \

	// The key column are not the Interpretation64Bit-form instructions.
	// The key column are not the Interpretation64Bit-form instructions.

	if (BinOpDepth < MAX_BINOP_DEPTH) {
	if (BinOpDepth >= MAX_BINOP_DEPTH)
	break;

Promote 32bit pseudo instr that infer extsw removal to 64bit in PPCMIPeephole #85451

Promote 32bit pseudo instr that infer extsw removal to 64bit in PPCMIPeephole #85451

Uh oh!

Conversation

diggerlin commented Mar 15, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Mar 15, 2024

Uh oh!

amy-kwan left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

nemanjai commented Apr 4, 2024

Uh oh!

diggerlin commented Apr 9, 2024

Uh oh!

amy-kwan left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

amy-kwan left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

diggerlin Oct 11, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

diggerlin Oct 11, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

lei137 left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

diggerlin commented Mar 15, 2024 •

edited

Loading

diggerlin Oct 11, 2024 •

edited

Loading

diggerlin Oct 11, 2024 •

edited

Loading