llvm · heiher · Jun 3, 2024 · May 22, 2024
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -121,6 +121,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
   // Set operations for LA64 only.
 
   if (Subtarget.is64Bit()) {
+    setOperationAction(ISD::ADD, MVT::i32, Custom);
+    setOperationAction(ISD::SUB, MVT::i32, Custom);
     setOperationAction(ISD::SHL, MVT::i32, Custom);
     setOperationAction(ISD::SRA, MVT::i32, Custom);
     setOperationAction(ISD::SRL, MVT::i32, Custom);
@@ -1723,6 +1725,18 @@ static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp,
   return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
 }
 
+// Converts the given 32-bit operation to a i64 operation with signed extension
+// semantic to reduce the signed extension instructions.
+static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
+  SDLoc DL(N);
+  SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
+  SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
+  SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
+  SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
+                               DAG.getValueType(MVT::i32));
+  return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
+}
+
 // Helper function that emits error message for intrinsics with/without chain
 // and return a UNDEF or and the chain as the results.
 static void emitErrorAndReplaceIntrinsicResults(
@@ -1846,6 +1860,12 @@ void LoongArchTargetLowering::ReplaceNodeResults(
   switch (N->getOpcode()) {
   default:
     llvm_unreachable("Don't know how to legalize this operation");
+  case ISD::ADD:
+  case ISD::SUB:
+    assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
+           "Unexpected custom legalisation");
+    Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
+    break;
   case ISD::UDIV:
   case ISD::UREM:
     assert(VT == MVT::i32 && Subtarget.is64Bit() &&

diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -441,7 +441,7 @@ def simm16_lsl16 : Operand<GRLenVT>,
 // A 32-bit signed immediate expressible with a pair of `addu16i.d + addi` for
 // use in additions.
 def simm32_hi16_lo12: Operand<GRLenVT>, ImmLeaf<GRLenVT, [{
-  return isShiftedInt<16, 16>(Imm - SignExtend64<12>(Imm));
+  return !isInt<12>(Imm) && isShiftedInt<16, 16>(Imm - SignExtend64<12>(Imm));
 }]>;
 
 def BareSymbol : AsmOperandClass {
@@ -1106,11 +1106,8 @@ foreach Idx = 1...3 in {
 
 let Predicates = [IsLA64] in {
 def : PatGprGpr<add, ADD_D>;
-def : PatGprGpr_32<add, ADD_W>;
 def : PatGprImm<add, ADDI_D, simm12>;
-def : PatGprImm_32<add, ADDI_W, simm12>;
 def : PatGprGpr<sub, SUB_D>;
-def : PatGprGpr_32<sub, SUB_W>;
 def : PatGprGpr<sdiv, DIV_D>;
 def : PatGprGpr_32<sdiv, DIV_W>;
 def : PatGprGpr<udiv, DIV_DU>;

diff --git a/llvm/test/CodeGen/LoongArch/alsl.ll b/llvm/test/CodeGen/LoongArch/alsl.ll
@@ -42,7 +42,7 @@ define i32 @alsl_i32(i32 signext %a, i32 signext %b) nounwind {
 ;
 ; LA64-LABEL: alsl_i32:
 ; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    alsl.d $a0, $a0, $a1, 3
+; LA64-NEXT:    alsl.w $a0, $a0, $a1, 3
 ; LA64-NEXT:    ret
 entry:
   %mul = mul nsw i32 %a, 8
@@ -178,7 +178,7 @@ define i32 @mul_add_i32(i32 signext %a, i32 signext %b) nounwind {
 ; LA64:       # %bb.0: # %entry
 ; LA64-NEXT:    slli.d $a2, $a0, 3
 ; LA64-NEXT:    alsl.d $a0, $a0, $a2, 2
-; LA64-NEXT:    add.d $a0, $a1, $a0
+; LA64-NEXT:    add.w $a0, $a1, $a0
 ; LA64-NEXT:    ret
 entry:
   %mul = mul nsw i32 %a, 12
@@ -325,7 +325,7 @@ define i32 @alsl_neg_i32(i32 signext %a, i32 signext %b) nounwind {
 ; LA64-LABEL: alsl_neg_i32:
 ; LA64:       # %bb.0: # %entry
 ; LA64-NEXT:    alsl.d $a0, $a0, $a0, 3
-; LA64-NEXT:    sub.d $a0, $a1, $a0
+; LA64-NEXT:    sub.w $a0, $a1, $a0
 ; LA64-NEXT:    ret
 entry:
   %mul = mul nsw i32 %a, -9

diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll
@@ -112,7 +112,7 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
 ; LA64-NEXT:    # =>This Loop Header: Depth=1
 ; LA64-NEXT:    # Child Loop BB2_3 Depth 2
 ; LA64-NEXT:    move $a3, $a2
-; LA64-NEXT:    addi.d $a2, $a2, 1
+; LA64-NEXT:    addi.w $a2, $a2, 1
 ; LA64-NEXT:    sltu $a4, $a3, $a1
 ; LA64-NEXT:    xori $a4, $a4, 1
 ; LA64-NEXT:    masknez $a4, $a2, $a4
@@ -298,7 +298,7 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
 ; LA64-NEXT:    # =>This Loop Header: Depth=1
 ; LA64-NEXT:    # Child Loop BB6_3 Depth 2
 ; LA64-NEXT:    move $a4, $a2
-; LA64-NEXT:    addi.d $a2, $a2, -1
+; LA64-NEXT:    addi.w $a2, $a2, -1
 ; LA64-NEXT:    sltui $a5, $a4, 1
 ; LA64-NEXT:    sltu $a6, $a3, $a4
 ; LA64-NEXT:    masknez $a2, $a2, $a6

diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/add.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/add.ll
@@ -54,7 +54,7 @@ define i32 @add_i32(i32 %x, i32 %y) {
 ;
 ; LA64-LABEL: add_i32:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    add.d $a0, $a0, $a1
+; LA64-NEXT:    add.w $a0, $a0, $a1
 ; LA64-NEXT:    ret
   %add = add i32 %x, %y
   ret i32 %add
@@ -144,7 +144,7 @@ define i32 @add_i32_3(i32 %x) {
 ;
 ; LA64-LABEL: add_i32_3:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    addi.d $a0, $a0, 3
+; LA64-NEXT:    addi.w $a0, $a0, 3
 ; LA64-NEXT:    ret
   %add = add i32 %x, 3
   ret i32 %add
@@ -195,6 +195,7 @@ define i32 @add_i32_0x12340000(i32 %x) {
 ; LA64-LABEL: add_i32_0x12340000:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    addu16i.d $a0, $a0, 4660
+; LA64-NEXT:    addi.w $a0, $a0, 0
 ; LA64-NEXT:    ret
   %add = add i32 %x, 305397760
   ret i32 %add
@@ -244,6 +245,7 @@ define i32 @add_i32_0x7fff0000(i32 %x) {
 ; LA64-LABEL: add_i32_0x7fff0000:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    addu16i.d $a0, $a0, 32767
+; LA64-NEXT:    addi.w $a0, $a0, 0
 ; LA64-NEXT:    ret
   %add = add i32 %x, 2147418112
   ret i32 %add
@@ -293,6 +295,7 @@ define i32 @add_i32_minus_0x80000000(i32 %x) {
 ; LA64-LABEL: add_i32_minus_0x80000000:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    addu16i.d $a0, $a0, -32768
+; LA64-NEXT:    addi.w $a0, $a0, 0
 ; LA64-NEXT:    ret
   %add = add i32 %x, -2147483648
   ret i32 %add
@@ -343,6 +346,7 @@ define i32 @add_i32_minus_0x10000(i32 %x) {
 ; LA64-LABEL: add_i32_minus_0x10000:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    addu16i.d $a0, $a0, -1
+; LA64-NEXT:    addi.w $a0, $a0, 0
 ; LA64-NEXT:    ret
   %add = add i32 %x, -65536
   ret i32 %add
@@ -396,7 +400,7 @@ define i32 @add_i32_0x7fff07ff(i32 %x) {
 ; LA64-LABEL: add_i32_0x7fff07ff:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    addu16i.d $a0, $a0, 32767
-; LA64-NEXT:    addi.d $a0, $a0, 2047
+; LA64-NEXT:    addi.w $a0, $a0, 2047
 ; LA64-NEXT:    ret
   %add = add i32 %x, 2147420159
   ret i32 %add
@@ -450,7 +454,7 @@ define i32 @add_i32_0x7ffef800(i32 %x) {
 ; LA64-LABEL: add_i32_0x7ffef800:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    addu16i.d $a0, $a0, 32767
-; LA64-NEXT:    addi.d $a0, $a0, -2048
+; LA64-NEXT:    addi.w $a0, $a0, -2048
 ; LA64-NEXT:    ret
   %add = add i32 %x, 2147416064
   ret i32 %add
@@ -525,7 +529,7 @@ define i32 @add_i32_minus_0x23450679(i32 %x) {
 ; LA64-LABEL: add_i32_minus_0x23450679:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    addu16i.d $a0, $a0, -9029
-; LA64-NEXT:    addi.d $a0, $a0, -1657
+; LA64-NEXT:    addi.w $a0, $a0, -1657
 ; LA64-NEXT:    ret
   %add = add i32 %x, -591726201
   ret i32 %add
@@ -580,7 +584,7 @@ define i32 @add_i32_minus_0x2345fedd(i32 %x) {
 ; LA64-LABEL: add_i32_minus_0x2345fedd:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    addu16i.d $a0, $a0, -9030
-; LA64-NEXT:    addi.d $a0, $a0, 291
+; LA64-NEXT:    addi.w $a0, $a0, 291
 ; LA64-NEXT:    ret
   %add = add i32 %x, -591789789
   ret i32 %add
@@ -680,7 +684,7 @@ define i32 @add_i32_minus_0x80000800(i32 %x) {
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    lu12i.w $a1, 524287
 ; LA64-NEXT:    ori $a1, $a1, 2048
-; LA64-NEXT:    add.d $a0, $a0, $a1
+; LA64-NEXT:    add.w $a0, $a0, $a1
 ; LA64-NEXT:    ret
   %add = add i32 %x, -2147485696
   ret i32 %add

diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/and.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/and.ll
@@ -444,7 +444,7 @@ define i32 @and_add_lsr(i32 %x, i32 %y) {
 ;
 ; LA64-LABEL: and_add_lsr:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    addi.d $a0, $a0, -1
+; LA64-NEXT:    addi.w $a0, $a0, -1
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 31, 20
 ; LA64-NEXT:    and $a0, $a1, $a0
 ; LA64-NEXT:    ret

diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/call.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/call.ll
@@ -34,7 +34,7 @@ define i32 @defined_function(i32 %a) nounwind {
 ;
 ; LA64-LABEL: defined_function:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    addi.d $a0, $a0, 1
+; LA64-NEXT:    addi.w $a0, $a0, 1
 ; LA64-NEXT:    ret
   %1 = add i32 %a, 1
   ret i32 %1

diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store.ll
@@ -32,7 +32,7 @@ define i32 @load_store_global() nounwind {
 ; LA64NOPIC-NEXT:    pcalau12i $a0, %pc_hi20(G)
 ; LA64NOPIC-NEXT:    addi.d $a1, $a0, %pc_lo12(G)
 ; LA64NOPIC-NEXT:    ld.w $a0, $a1, 0
-; LA64NOPIC-NEXT:    addi.d $a0, $a0, 1
+; LA64NOPIC-NEXT:    addi.w $a0, $a0, 1
 ; LA64NOPIC-NEXT:    st.w $a0, $a1, 0
 ; LA64NOPIC-NEXT:    ret
 ;
@@ -41,7 +41,7 @@ define i32 @load_store_global() nounwind {
 ; LA64PIC-NEXT:    pcalau12i $a0, %pc_hi20(.LG$local)
 ; LA64PIC-NEXT:    addi.d $a1, $a0, %pc_lo12(.LG$local)
 ; LA64PIC-NEXT:    ld.w $a0, $a1, 0
-; LA64PIC-NEXT:    addi.d $a0, $a0, 1
+; LA64PIC-NEXT:    addi.w $a0, $a0, 1
 ; LA64PIC-NEXT:    st.w $a0, $a1, 0
 ; LA64PIC-NEXT:    ret
   %v = load i32, ptr @G

diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sub.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/sub.ll
@@ -54,7 +54,7 @@ define i32 @sub_i32(i32 %x, i32 %y) {
 ;
 ; LA64-LABEL: sub_i32:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    sub.d $a0, $a0, $a1
+; LA64-NEXT:    sub.w $a0, $a0, $a1
 ; LA64-NEXT:    ret
   %sub = sub i32 %x, %y
   ret i32 %sub

diff --git a/llvm/test/CodeGen/LoongArch/select-to-shiftand.ll b/llvm/test/CodeGen/LoongArch/select-to-shiftand.ll
@@ -198,10 +198,9 @@ define i32 @sub_clamp_zero_i32(i32 signext %x, i32 signext %y) {
 ;
 ; LA64-LABEL: sub_clamp_zero_i32:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    sub.d $a2, $a0, $a1
 ; LA64-NEXT:    sub.w $a0, $a0, $a1
-; LA64-NEXT:    srai.d $a0, $a0, 31
-; LA64-NEXT:    andn $a0, $a2, $a0
+; LA64-NEXT:    srai.d $a1, $a0, 31
+; LA64-NEXT:    andn $a0, $a0, $a1
 ; LA64-NEXT:    ret
   %sub = sub nsw i32 %x, %y
   %cmp = icmp sgt i32 %sub, 0

diff --git a/llvm/test/CodeGen/LoongArch/sextw-removal.ll b/llvm/test/CodeGen/LoongArch/sextw-removal.ll
@@ -793,7 +793,9 @@ define signext i32 @test14(i32 signext %0, i32 signext %1) {
 ; NORMV-NEXT:    blt $a4, $a5, .LBB13_5
 ; NORMV-NEXT:  # %bb.3: # in Loop: Header=BB13_2 Depth=1
 ; NORMV-NEXT:    add.d $a0, $a3, $a0
-; NORMV-NEXT:    addi.w $a3, $a3, 1
+; NORMV-NEXT:    addi.d $a3, $a3, 1
+; NORMV-NEXT:    addi.w $a3, $a3, 0
+; NORMV-NEXT:    addi.d $a0, $a0, 0
 ; NORMV-NEXT:    blt $a3, $a1, .LBB13_2
 ; NORMV-NEXT:  .LBB13_4:
 ; NORMV-NEXT:    addi.w $a0, $a0, 0
@@ -861,7 +863,9 @@ define signext i32 @test14b(i32 %0, i32 signext %1) {
 ; NORMV-NEXT:    blt $a4, $a5, .LBB14_5
 ; NORMV-NEXT:  # %bb.3: # in Loop: Header=BB14_2 Depth=1
 ; NORMV-NEXT:    add.d $a0, $a3, $a0
-; NORMV-NEXT:    addi.w $a3, $a3, 1
+; NORMV-NEXT:    addi.d $a3, $a3, 1
+; NORMV-NEXT:    addi.w $a3, $a3, 0
+; NORMV-NEXT:    addi.d $a0, $a0, 0
 ; NORMV-NEXT:    blt $a3, $a1, .LBB14_2
 ; NORMV-NEXT:  .LBB14_4:
 ; NORMV-NEXT:    addi.w $a0, $a0, 0
@@ -929,7 +933,9 @@ define signext i32 @test14c(i32 zeroext %0, i32 signext %1) {
 ; NORMV-NEXT:    blt $a4, $a5, .LBB15_5
 ; NORMV-NEXT:  # %bb.3: # in Loop: Header=BB15_2 Depth=1
 ; NORMV-NEXT:    add.d $a0, $a3, $a0
-; NORMV-NEXT:    addi.w $a3, $a3, 1
+; NORMV-NEXT:    addi.d $a3, $a3, 1
+; NORMV-NEXT:    addi.w $a3, $a3, 0
+; NORMV-NEXT:    addi.d $a0, $a0, 0
 ; NORMV-NEXT:    blt $a3, $a1, .LBB15_2
 ; NORMV-NEXT:  .LBB15_4:
 ; NORMV-NEXT:    addi.w $a0, $a0, 0
@@ -996,7 +1002,9 @@ define signext i32 @test14d(i31 zeroext %0, i32 signext %1) {
 ; NORMV-NEXT:    blt $a4, $a5, .LBB16_5
 ; NORMV-NEXT:  # %bb.3: # in Loop: Header=BB16_2 Depth=1
 ; NORMV-NEXT:    add.d $a0, $a3, $a0
-; NORMV-NEXT:    addi.w $a3, $a3, 1
+; NORMV-NEXT:    addi.d $a3, $a3, 1
+; NORMV-NEXT:    addi.w $a3, $a3, 0
+; NORMV-NEXT:    addi.d $a0, $a0, 0
 ; NORMV-NEXT:    blt $a3, $a1, .LBB16_2
 ; NORMV-NEXT:  .LBB16_4:
 ; NORMV-NEXT:    addi.w $a0, $a0, 0
@@ -1122,8 +1130,8 @@ define signext i32 @bug(i32 signext %x) {
 ; CHECK-NEXT:    masknez $a1, $a1, $a2
 ; CHECK-NEXT:    maskeqz $a2, $a4, $a2
 ; CHECK-NEXT:    or $a1, $a2, $a1
-; CHECK-NEXT:    srai.d $a0, $a0, 31
 ; CHECK-NEXT:    nor $a0, $a0, $zero
+; CHECK-NEXT:    srli.d $a0, $a0, 31
 ; CHECK-NEXT:    add.w $a0, $a1, $a0
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB18_2:
@@ -1176,13 +1184,13 @@ define signext i32 @bug(i32 signext %x) {
 ; NORMV-NEXT:    masknez $a1, $a1, $a2
 ; NORMV-NEXT:    maskeqz $a2, $a4, $a2
 ; NORMV-NEXT:    or $a1, $a2, $a1
-; NORMV-NEXT:    srai.d $a0, $a0, 31
 ; NORMV-NEXT:    nor $a0, $a0, $zero
+; NORMV-NEXT:    srli.d $a0, $a0, 31
 ; NORMV-NEXT:    add.d $a0, $a1, $a0
 ; NORMV-NEXT:    addi.w $a0, $a0, 0
 ; NORMV-NEXT:    ret
 ; NORMV-NEXT:  .LBB18_2:
-; NORMV-NEXT:    addi.w $a0, $zero, 0
+; NORMV-NEXT:    move $a0, $zero
 ; NORMV-NEXT:    ret
 entry:
   %tobool.not = icmp eq i32 %x, 0
@@ -1345,7 +1353,7 @@ declare zeroext i16 @bat(i32 signext)
 define signext i32 @sextw_sh2add(i1 zeroext %0, ptr %1, i32 signext %2, i32 signext %3, i32 signext %4) {
 ; CHECK-LABEL: sextw_sh2add:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    alsl.d $a2, $a2, $a3, 2
+; CHECK-NEXT:    alsl.w $a2, $a2, $a3, 2
 ; CHECK-NEXT:    beqz $a0, .LBB21_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    st.w $a2, $a1, 0
@@ -1355,12 +1363,13 @@ define signext i32 @sextw_sh2add(i1 zeroext %0, ptr %1, i32 signext %2, i32 sign
 ;
 ; NORMV-LABEL: sextw_sh2add:
 ; NORMV:       # %bb.0:
-; NORMV-NEXT:    alsl.d $a2, $a2, $a3, 2
+; NORMV-NEXT:    alsl.w $a2, $a2, $a3, 2
 ; NORMV-NEXT:    beqz $a0, .LBB21_2
 ; NORMV-NEXT:  # %bb.1:
 ; NORMV-NEXT:    st.w $a2, $a1, 0
 ; NORMV-NEXT:  .LBB21_2:
-; NORMV-NEXT:    add.w $a0, $a2, $a4
+; NORMV-NEXT:    add.d $a0, $a2, $a4
+; NORMV-NEXT:    addi.w $a0, $a0, 0
 ; NORMV-NEXT:    ret
   %6 = shl i32 %2, 2
   %7 = add i32 %6, %3