[AArch64][GlobalISel] Select negative arithmetic immediates in manual selector

Jessica Paquette · Jessica Paquette · commit f0580c73bb32 · 2020-11-11T09:20:05.000-08:00
Previously, we only handled negative arithmetic immediates in the imported selector code. Since we don't import code for, say, compares, we were missing opportunities for things like ``` %cst:gpr(s64) = G_CONSTANT i64 -10 %cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s64), %cst -> %adds = ADDSXri %reg0, 10, 0, implicit-def $nzcv %cmp = CSINCWr $wzr, $wzr, 1, implicit $nzcv ``` Instead, we would have to materialize the constant and emit a SUBS. This adds support for selection like above for SUB, SUBS, ADD, and ADDS. This is a 0.1% geomean code size improvement on SPECINT2000 at -Os. Differential Revision: https://reviews.llvm.org/D91108
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -176,26 +176,19 @@ class AArch64InstructionSelector : public InstructionSelector {
                           std::initializer_list<llvm::SrcOp> SrcOps,
                           MachineIRBuilder &MIRBuilder,
                           const ComplexRendererFns &RenderFns = None) const;
-  /// Helper function to emit a binary operation such as an ADD, ADDS, etc.
-  ///
-  /// This is intended for instructions with the following opcode variants:
-  ///
-  ///  - Xri, Wri (arithmetic immediate form)
-  ///  - Xrs, Wrs (shifted register form)
-  ///  - Xrr, Wrr (register form)
-  ///
-  /// For example, for ADD, we have ADDXri, ADDWri, ADDXrs, etc.
+  /// Helper function to emit an add or sub instruction.
   ///
   /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
   /// in a specific order.
   ///
   /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
   ///
   /// \code
-  ///   const std::array<std::array<unsigned, 2>, 3> Table {
+  ///   const std::array<std::array<unsigned, 2>, 4> Table {
   ///    {{AArch64::ADDXri, AArch64::ADDWri},
   ///     {AArch64::ADDXrs, AArch64::ADDWrs},
-  ///     {AArch64::ADDXrr, AArch64::ADDWrr}}};
+  ///     {AArch64::ADDXrr, AArch64::ADDWrr},
+  ///     {AArch64::SUBXri, AArch64::SUBWri}}};
   /// \endcode
   ///
   /// Each row in the table corresponds to a different addressing mode. Each
@@ -205,6 +198,7 @@ class AArch64InstructionSelector : public InstructionSelector {
   ///   - Row 0: The ri opcode variants
   ///   - Row 1: The rs opcode variants
   ///   - Row 2: The rr opcode variants
+  ///   - Row 3: The ri opcode variants for negative immediates
   ///
   /// \attention Columns must be structured as follows:
   ///   - Column 0: The 64-bit opcode variants
@@ -213,8 +207,8 @@ class AArch64InstructionSelector : public InstructionSelector {
   /// \p Dst is the destination register of the binop to emit.
   /// \p LHS is the left-hand operand of the binop to emit.
   /// \p RHS is the right-hand operand of the binop to emit.
-  MachineInstr *emitBinOp(
-      const std::array<std::array<unsigned, 2>, 3> &AddrModeAndSizeToOpcode,
+  MachineInstr *emitAddSub(
+      const std::array<std::array<unsigned, 2>, 4> &AddrModeAndSizeToOpcode,
       Register Dst, MachineOperand &LHS, MachineOperand &RHS,
       MachineIRBuilder &MIRBuilder) const;
   MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
@@ -3826,8 +3820,8 @@ MachineInstr *AArch64InstructionSelector::emitInstr(
   return &*MI;
 }
 
-MachineInstr *AArch64InstructionSelector::emitBinOp(
-    const std::array<std::array<unsigned, 2>, 3> &AddrModeAndSizeToOpcode,
+MachineInstr *AArch64InstructionSelector::emitAddSub(
+    const std::array<std::array<unsigned, 2>, 4> &AddrModeAndSizeToOpcode,
     Register Dst, MachineOperand &LHS, MachineOperand &RHS,
     MachineIRBuilder &MIRBuilder) const {
   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
@@ -3837,9 +3831,18 @@ MachineInstr *AArch64InstructionSelector::emitBinOp(
   unsigned Size = Ty.getSizeInBits();
   assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
   bool Is32Bit = Size == 32;
+
+  // INSTRri form with positive arithmetic immediate.
   if (auto Fns = selectArithImmed(RHS))
     return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
                      MIRBuilder, Fns);
+
+  // INSTRri form with negative arithmetic immediate.
+  if (auto Fns = selectNegArithImmed(RHS))
+    return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
+                     MIRBuilder, Fns);
+
+  // INSTRrs form.
   if (auto Fns = selectShiftedRegister(RHS))
     return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
                      MIRBuilder, Fns);
@@ -3851,33 +3854,36 @@ MachineInstr *
 AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
                                     MachineOperand &RHS,
                                     MachineIRBuilder &MIRBuilder) const {
-  const std::array<std::array<unsigned, 2>, 3> OpcTable{
+  const std::array<std::array<unsigned, 2>, 4> OpcTable{
       {{AArch64::ADDXri, AArch64::ADDWri},
        {AArch64::ADDXrs, AArch64::ADDWrs},
-       {AArch64::ADDXrr, AArch64::ADDWrr}}};
-  return emitBinOp(OpcTable, DefReg, LHS, RHS, MIRBuilder);
+       {AArch64::ADDXrr, AArch64::ADDWrr},
+       {AArch64::SUBXri, AArch64::SUBWri}}};
+  return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
 }
 
 MachineInstr *
 AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
                                      MachineOperand &RHS,
                                      MachineIRBuilder &MIRBuilder) const {
-  const std::array<std::array<unsigned, 2>, 3> OpcTable{
+  const std::array<std::array<unsigned, 2>, 4> OpcTable{
       {{AArch64::ADDSXri, AArch64::ADDSWri},
        {AArch64::ADDSXrs, AArch64::ADDSWrs},
-       {AArch64::ADDSXrr, AArch64::ADDSWrr}}};
-  return emitBinOp(OpcTable, Dst, LHS, RHS, MIRBuilder);
+       {AArch64::ADDSXrr, AArch64::ADDSWrr},
+       {AArch64::SUBSXri, AArch64::SUBSWri}}};
+  return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
 }
 
 MachineInstr *
 AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
                                      MachineOperand &RHS,
                                      MachineIRBuilder &MIRBuilder) const {
-  const std::array<std::array<unsigned, 2>, 3> OpcTable{
+  const std::array<std::array<unsigned, 2>, 4> OpcTable{
       {{AArch64::SUBSXri, AArch64::SUBSWri},
        {AArch64::SUBSXrs, AArch64::SUBSWrs},
-       {AArch64::SUBSXrr, AArch64::SUBSWrr}}};
-  return emitBinOp(OpcTable, Dst, LHS, RHS, MIRBuilder);
+       {AArch64::SUBSXrr, AArch64::SUBSWrr},
+       {AArch64::ADDSXri, AArch64::ADDSWri}}};
+  return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
 }
 
 MachineInstr *
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-compare.mir b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-compare.mir
@@ -577,3 +577,29 @@ body:             |
     %select:gpr(s32) = G_SELECT %cmp_trunc(s1), %one, %zero
     $w0 = COPY %select(s32)
     RET_ReallyLR implicit $w0
+
+...
+---
+name:            cmn_s32_neg_imm
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0, $w1
+
+    ; CHECK-LABEL: name: cmn_s32_neg_imm
+    ; CHECK: liveins: $w0, $w1
+    ; CHECK: %reg0:gpr32sp = COPY $w0
+    ; CHECK: $wzr = SUBSWri %reg0, 1, 0, implicit-def $nzcv
+    ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv
+    ; CHECK: $w0 = COPY %cmp
+    ; CHECK: RET_ReallyLR implicit $w0
+    %reg0:gpr(s32) = COPY $w0
+    %negative_one:gpr(s32) = G_CONSTANT i32 -1
+    %zero:gpr(s32) = G_CONSTANT i32 0
+    %sub:gpr(s32) = G_SUB %zero, %negative_one
+    %cmp:gpr(s32) = G_ICMP intpred(ne), %reg0(s32), %sub
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir
@@ -115,3 +115,70 @@ body:             |
     %5:gpr(s32) = G_ICMP intpred(eq), %0(s32), %2
     $w0 = COPY %5(s32)
     RET_ReallyLR implicit $w0
+
+...
+---
+name:            cmp_neg_imm_32
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $w0
+    ; CHECK-LABEL: name: cmp_neg_imm_32
+    ; CHECK: liveins: $w0
+    ; CHECK: %reg0:gpr32sp = COPY $w0
+    ; CHECK: [[ADDSWri:%[0-9]+]]:gpr32 = ADDSWri %reg0, 10, 0, implicit-def $nzcv
+    ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
+    ; CHECK: $w0 = COPY %cmp
+    ; CHECK: RET_ReallyLR implicit $w0
+    %reg0:gpr(s32) = COPY $w0
+    %cst:gpr(s32) = G_CONSTANT i32 -10
+    %cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s32), %cst
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            cmp_neg_imm_64
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0
+    ; CHECK-LABEL: name: cmp_neg_imm_64
+    ; CHECK: liveins: $x0
+    ; CHECK: %reg0:gpr64sp = COPY $x0
+    ; CHECK: [[ADDSXri:%[0-9]+]]:gpr64 = ADDSXri %reg0, 10, 0, implicit-def $nzcv
+    ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
+    ; CHECK: $w0 = COPY %cmp
+    ; CHECK: RET_ReallyLR implicit $w0
+    %reg0:gpr(s64) = COPY $x0
+    %cst:gpr(s64) = G_CONSTANT i64 -10
+    %cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s64), %cst
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            cmp_neg_imm_invalid
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $w0
+    ; CHECK-LABEL: name: cmp_neg_imm_invalid
+    ; CHECK: liveins: $w0
+    ; CHECK: %reg0:gpr32 = COPY $w0
+    ; CHECK: %cst:gpr32 = MOVi32imm -5000
+    ; CHECK: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr %reg0, %cst, implicit-def $nzcv
+    ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
+    ; CHECK: $w0 = COPY %cmp
+    ; CHECK: RET_ReallyLR implicit $w0
+    %reg0:gpr(s32) = COPY $w0
+    %cst:gpr(s32) = G_CONSTANT i32 -5000
+    %cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s32), %cst
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-ptr-add.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-ptr-add.mir
@@ -72,3 +72,20 @@ body:             |
     %3:fpr(<2 x p0>) = G_PTR_ADD %0, %1(<2 x s64>)
     $q0 = COPY %3(<2 x p0>)
 ...
+---
+name:            ptr_add_neg_imm
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.0:
+      liveins: $x0
+    ; CHECK-LABEL: name: ptr_add_neg_imm
+    ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK: [[COPY1:%[0-9]+]]:gpr64common = COPY [[COPY]]
+    ; CHECK: [[SUBSXri:%[0-9]+]]:gpr64 = SUBSXri [[COPY1]], 10, 0, implicit-def $nzcv
+    ; CHECK: $x0 = COPY [[SUBSXri]]
+    %0:gpr(p0) = COPY $x0
+    %1:gpr(s64) = G_CONSTANT i64 -10
+    %2:gpr(p0) = G_PTR_ADD %0, %1(s64)
+    $x0 = COPY %2(p0)
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-uaddo.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-uaddo.mir
@@ -111,3 +111,28 @@ body:             |
     %add:gpr(s32), %overflow:gpr(s1) = G_UADDO %copy1, %shift
     $w0 = COPY %add(s32)
     RET_ReallyLR implicit $w0
+
+...
+---
+name:            uaddo_s32_neg_imm
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $w0, $w1, $x2
+    ; Check that we get SUBSWri when we can fold in a negative constant.
+    ;
+    ; CHECK-LABEL: name: uaddo_s32_neg_imm
+    ; CHECK: liveins: $w0, $w1, $x2
+    ; CHECK: %copy:gpr32sp = COPY $w0
+    ; CHECK: %add:gpr32 = SUBSWri %copy, 16, 0, implicit-def $nzcv
+    ; CHECK: %overflow:gpr32 = CSINCWr $wzr, $wzr, 3, implicit $nzcv
+    ; CHECK: $w0 = COPY %add
+    ; CHECK: RET_ReallyLR implicit $w0
+    %copy:gpr(s32) = COPY $w0
+    %constant:gpr(s32) = G_CONSTANT i32 -16
+    %add:gpr(s32), %overflow:gpr(s1) = G_UADDO %copy, %constant
+    $w0 = COPY %add(s32)
+    RET_ReallyLR implicit $w0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/tbz-sgt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/tbz-sgt.mir
@@ -99,9 +99,8 @@ body:             |
   ; CHECK: bb.0:
   ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
   ; CHECK:   %copy:gpr64 = COPY $x0
-  ; CHECK:   %negative_one:gpr64 = MOVi64imm -1
-  ; CHECK:   %and:gpr64common = ANDXri %copy, 8000
-  ; CHECK:   [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %and, %negative_one, implicit-def $nzcv
+  ; CHECK:   %and:gpr64sp = ANDXri %copy, 8000
+  ; CHECK:   [[ADDSXri:%[0-9]+]]:gpr64 = ADDSXri %and, 1, 0, implicit-def $nzcv
   ; CHECK:   Bcc 12, %bb.1, implicit $nzcv
   ; CHECK:   B %bb.0
   ; CHECK: bb.1: