[LoongArch] Enable FeatureExtLSX for generic-la64 processor #113421

Ami-zhang · 2024-10-23T07:05:02Z

This commit makes the generic target to support FP and LSX, as discussed in #110211. Thereby, it allows 128-bit vector to be enabled by default in the loongarch64 backend.

llvmbot · 2024-10-23T07:05:24Z

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-backend-loongarch

Author: None (Ami-zhang)

Changes

This commit makes the generic target to support FP and LSX, as discussed in #110211. Thereby, it allows 128-bit vector to be enabled by default in the loongarch64 backend.

Patch is 104.50 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/113421.diff

23 Files Affected:

(modified) llvm/lib/Target/LoongArch/LoongArch.td (+3-1)
(modified) llvm/test/CodeGen/LoongArch/calling-conv-common.ll (+8-10)
(modified) llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll (+9-20)
(modified) llvm/test/CodeGen/LoongArch/calling-conv-lp64s.ll (+11-14)
(modified) llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll (+19-70)
(modified) llvm/test/CodeGen/LoongArch/double-imm.ll (+2-6)
(modified) llvm/test/CodeGen/LoongArch/frame.ll (+2-2)
(modified) llvm/test/CodeGen/LoongArch/get-setcc-result-type.ll (+3-16)
(modified) llvm/test/CodeGen/LoongArch/inline-asm-constraint-error.ll (+3-3)
(modified) llvm/test/CodeGen/LoongArch/intrinsic-error.ll (+3-3)
(modified) llvm/test/CodeGen/LoongArch/intrinsic-memcpy.ll (+6-12)
(modified) llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll (+280-390)
(modified) llvm/test/CodeGen/LoongArch/sextw-removal.ll (+75-186)
(modified) llvm/test/CodeGen/LoongArch/soft-fp-to-int.ll (+8-20)
(modified) llvm/test/CodeGen/LoongArch/statepoint-call-lowering-r1.ll (+3-1)
(modified) llvm/test/CodeGen/LoongArch/statepoint-call-lowering.ll (+2-4)
(modified) llvm/test/CodeGen/LoongArch/tail-calls.ll (+2-2)
(modified) llvm/test/CodeGen/LoongArch/target-abi-from-triple-edge-cases.ll (+48-8)
(modified) llvm/test/CodeGen/LoongArch/target-abi-from-triple.ll (+2-6)
(modified) llvm/test/CodeGen/LoongArch/vararg.ll (+4-4)
(modified) llvm/test/CodeGen/LoongArch/vector-fp-imm.ll (+53-137)
(modified) llvm/test/Transforms/LoopIdiom/LoongArch/popcnt.ll (+1-1)
(modified) llvm/test/Transforms/LoopVectorize/LoongArch/loongarch-interleaved.ll (+1-1)

diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td
index ddb27dc6404fa8..67ee2fd791bcfb 100644
--- a/llvm/lib/Target/LoongArch/LoongArch.td
+++ b/llvm/lib/Target/LoongArch/LoongArch.td
@@ -129,7 +129,9 @@ include "LoongArchInstrInfo.td"
 //===----------------------------------------------------------------------===//
 
 def : ProcessorModel<"generic-la32", NoSchedModel, [Feature32Bit]>;
-def : ProcessorModel<"generic-la64", NoSchedModel, [Feature64Bit, FeatureUAL]>;
+def : ProcessorModel<"generic-la64", NoSchedModel, [Feature64Bit,
+                                                    FeatureUAL,
+                                                    FeatureExtLSX]>;
 
 // Generic 64-bit processor with double-precision floating-point support.
 def : ProcessorModel<"loongarch64", NoSchedModel, [Feature64Bit,
diff --git a/llvm/test/CodeGen/LoongArch/calling-conv-common.ll b/llvm/test/CodeGen/LoongArch/calling-conv-common.ll
index 06dfe00d908475..5c9575b2baab18 100644
--- a/llvm/test/CodeGen/LoongArch/calling-conv-common.ll
+++ b/llvm/test/CodeGen/LoongArch/calling-conv-common.ll
@@ -123,13 +123,12 @@ define i64 @caller_large_scalars() nounwind {
 ; CHECK-NEXT:    addi.d $sp, $sp, -80
 ; CHECK-NEXT:    st.d $ra, $sp, 72 # 8-byte Folded Spill
 ; CHECK-NEXT:    st.d $zero, $sp, 24
-; CHECK-NEXT:    st.d $zero, $sp, 16
-; CHECK-NEXT:    st.d $zero, $sp, 8
+; CHECK-NEXT:    vrepli.b $vr0, 0
+; CHECK-NEXT:    vst $vr0, $sp, 8
 ; CHECK-NEXT:    ori $a0, $zero, 2
 ; CHECK-NEXT:    st.d $a0, $sp, 0
 ; CHECK-NEXT:    st.d $zero, $sp, 56
-; CHECK-NEXT:    st.d $zero, $sp, 48
-; CHECK-NEXT:    st.d $zero, $sp, 40
+; CHECK-NEXT:    vst $vr0, $sp, 40
 ; CHECK-NEXT:    ori $a2, $zero, 1
 ; CHECK-NEXT:    addi.d $a0, $sp, 32
 ; CHECK-NEXT:    addi.d $a1, $sp, 0
@@ -182,14 +181,13 @@ define i64 @caller_large_scalars_exhausted_regs() nounwind {
 ; CHECK-NEXT:    ori $a0, $zero, 9
 ; CHECK-NEXT:    st.d $a0, $sp, 0
 ; CHECK-NEXT:    st.d $zero, $sp, 40
-; CHECK-NEXT:    st.d $zero, $sp, 32
-; CHECK-NEXT:    st.d $zero, $sp, 24
+; CHECK-NEXT:    vrepli.b $vr0, 0
+; CHECK-NEXT:    vst $vr0, $sp, 24
 ; CHECK-NEXT:    ori $a0, $zero, 10
 ; CHECK-NEXT:    st.d $a0, $sp, 16
 ; CHECK-NEXT:    st.d $zero, $sp, 72
-; CHECK-NEXT:    st.d $zero, $sp, 64
-; CHECK-NEXT:    st.d $zero, $sp, 56
-; CHECK-NEXT:    ori $t0, $zero, 8
+; CHECK-NEXT:    ori $a0, $zero, 8
+; CHECK-NEXT:    st.d $a0, $sp, 48
 ; CHECK-NEXT:    ori $a0, $zero, 1
 ; CHECK-NEXT:    ori $a1, $zero, 2
 ; CHECK-NEXT:    ori $a2, $zero, 3
@@ -198,7 +196,7 @@ define i64 @caller_large_scalars_exhausted_regs() nounwind {
 ; CHECK-NEXT:    ori $a5, $zero, 6
 ; CHECK-NEXT:    ori $a6, $zero, 7
 ; CHECK-NEXT:    addi.d $a7, $sp, 48
-; CHECK-NEXT:    st.d $t0, $sp, 48
+; CHECK-NEXT:    vst $vr0, $sp, 56
 ; CHECK-NEXT:    bl %plt(callee_large_scalars_exhausted_regs)
 ; CHECK-NEXT:    ld.d $ra, $sp, 88 # 8-byte Folded Reload
 ; CHECK-NEXT:    addi.d $sp, $sp, 96
diff --git a/llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll b/llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll
index 34fbec03c535b0..35186b660c1e66 100644
--- a/llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll
+++ b/llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll
@@ -63,26 +63,17 @@ define i64 @caller_double_in_gpr_exhausted_fprs() nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addi.d $sp, $sp, -16
 ; CHECK-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
-; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI3_0)
-; CHECK-NEXT:    fld.d $fa1, $a0, %pc_lo12(.LCPI3_0)
-; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI3_1)
-; CHECK-NEXT:    fld.d $fa2, $a0, %pc_lo12(.LCPI3_1)
-; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI3_2)
-; CHECK-NEXT:    fld.d $fa3, $a0, %pc_lo12(.LCPI3_2)
-; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI3_3)
-; CHECK-NEXT:    fld.d $fa4, $a0, %pc_lo12(.LCPI3_3)
-; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI3_4)
-; CHECK-NEXT:    fld.d $fa5, $a0, %pc_lo12(.LCPI3_4)
-; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI3_5)
-; CHECK-NEXT:    fld.d $fa6, $a0, %pc_lo12(.LCPI3_5)
-; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI3_6)
-; CHECK-NEXT:    fld.d $fa7, $a0, %pc_lo12(.LCPI3_6)
-; CHECK-NEXT:    addi.d $a0, $zero, 1
-; CHECK-NEXT:    movgr2fr.d $fa0, $a0
-; CHECK-NEXT:    ffint.d.l $fa0, $fa0
 ; CHECK-NEXT:    ori $a0, $zero, 0
 ; CHECK-NEXT:    lu32i.d $a0, 131072
 ; CHECK-NEXT:    lu52i.d $a0, $a0, 1026
+; CHECK-NEXT:    vldi $vr0, -912
+; CHECK-NEXT:    vldi $vr1, -1024
+; CHECK-NEXT:    vldi $vr2, -1016
+; CHECK-NEXT:    vldi $vr3, -1008
+; CHECK-NEXT:    vldi $vr4, -1004
+; CHECK-NEXT:    vldi $vr5, -1000
+; CHECK-NEXT:    vldi $vr6, -996
+; CHECK-NEXT:    vldi $vr7, -992
 ; CHECK-NEXT:    bl %plt(callee_double_in_gpr_exhausted_fprs)
 ; CHECK-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
 ; CHECK-NEXT:    addi.d $sp, $sp, 16
@@ -98,9 +89,7 @@ define i64 @caller_double_in_gpr_exhausted_fprs() nounwind {
 define double @callee_double_ret() nounwind {
 ; CHECK-LABEL: callee_double_ret:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi.d $a0, $zero, 1
-; CHECK-NEXT:    movgr2fr.d $fa0, $a0
-; CHECK-NEXT:    ffint.d.l $fa0, $fa0
+; CHECK-NEXT:    vldi $vr0, -912
 ; CHECK-NEXT:    ret
   ret double 1.0
 }
diff --git a/llvm/test/CodeGen/LoongArch/calling-conv-lp64s.ll b/llvm/test/CodeGen/LoongArch/calling-conv-lp64s.ll
index 558b9457239c13..79e66c8a6a1e38 100644
--- a/llvm/test/CodeGen/LoongArch/calling-conv-lp64s.ll
+++ b/llvm/test/CodeGen/LoongArch/calling-conv-lp64s.ll
@@ -6,16 +6,10 @@
 define i64 @callee_float_in_regs(i64 %a, float %b) nounwind {
 ; CHECK-LABEL: callee_float_in_regs:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi.d $sp, $sp, -16
-; CHECK-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
-; CHECK-NEXT:    st.d $fp, $sp, 0 # 8-byte Folded Spill
-; CHECK-NEXT:    move $fp, $a0
-; CHECK-NEXT:    move $a0, $a1
-; CHECK-NEXT:    bl %plt(__fixsfdi)
-; CHECK-NEXT:    add.d $a0, $fp, $a0
-; CHECK-NEXT:    ld.d $fp, $sp, 0 # 8-byte Folded Reload
-; CHECK-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
-; CHECK-NEXT:    addi.d $sp, $sp, 16
+; CHECK-NEXT:    movgr2fr.w $fa0, $a1
+; CHECK-NEXT:    ftintrz.l.s $fa0, $fa0
+; CHECK-NEXT:    movfr2gr.d $a1, $fa0
+; CHECK-NEXT:    add.d $a0, $a0, $a1
 ; CHECK-NEXT:    ret
   %b_fptosi = fptosi float %b to i64
   %1 = add i64 %a, %b_fptosi
@@ -27,7 +21,8 @@ define i64 @caller_float_in_regs() nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addi.d $sp, $sp, -16
 ; CHECK-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
-; CHECK-NEXT:    lu12i.w $a1, 262144
+; CHECK-NEXT:    vldi $vr0, -1280
+; CHECK-NEXT:    movfr2gr.s $a1, $fa0
 ; CHECK-NEXT:    ori $a0, $zero, 1
 ; CHECK-NEXT:    bl %plt(callee_float_in_regs)
 ; CHECK-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
@@ -59,7 +54,7 @@ define i64 @caller_float_on_stack() nounwind {
 ; CHECK-NEXT:    ori $a2, $zero, 2
 ; CHECK-NEXT:    ori $a4, $zero, 3
 ; CHECK-NEXT:    ori $a6, $zero, 4
-; CHECK-NEXT:    st.d $a1, $sp, 0
+; CHECK-NEXT:    st.w $a1, $sp, 0
 ; CHECK-NEXT:    move $a1, $zero
 ; CHECK-NEXT:    move $a3, $zero
 ; CHECK-NEXT:    move $a5, $zero
@@ -75,7 +70,8 @@ define i64 @caller_float_on_stack() nounwind {
 define float @callee_tiny_scalar_ret() nounwind {
 ; CHECK-LABEL: callee_tiny_scalar_ret:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lu12i.w $a0, 260096
+; CHECK-NEXT:    vldi $vr0, -1168
+; CHECK-NEXT:    movfr2gr.s $a0, $fa0
 ; CHECK-NEXT:    ret
   ret float 1.0
 }
@@ -86,7 +82,8 @@ define i64 @caller_tiny_scalar_ret() nounwind {
 ; CHECK-NEXT:    addi.d $sp, $sp, -16
 ; CHECK-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
 ; CHECK-NEXT:    bl %plt(callee_tiny_scalar_ret)
-; CHECK-NEXT:    addi.w $a0, $a0, 0
+; CHECK-NEXT:    movgr2fr.w $fa0, $a0
+; CHECK-NEXT:    movfr2gr.s $a0, $fa0
 ; CHECK-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
 ; CHECK-NEXT:    addi.d $sp, $sp, 16
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
index a26102710cbebe..161ed573c81f02 100644
--- a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
@@ -175,16 +175,11 @@ define i8 @test_ctpop_i8(i8 %a) nounwind {
 ;
 ; LA64-LABEL: test_ctpop_i8:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    srli.d $a1, $a0, 1
-; LA64-NEXT:    andi $a1, $a1, 85
-; LA64-NEXT:    sub.d $a0, $a0, $a1
-; LA64-NEXT:    andi $a1, $a0, 51
-; LA64-NEXT:    srli.d $a0, $a0, 2
-; LA64-NEXT:    andi $a0, $a0, 51
-; LA64-NEXT:    add.d $a0, $a1, $a0
-; LA64-NEXT:    srli.d $a1, $a0, 4
-; LA64-NEXT:    add.d $a0, $a0, $a1
-; LA64-NEXT:    andi $a0, $a0, 15
+; LA64-NEXT:    andi $a0, $a0, 255
+; LA64-NEXT:    vldi $vr0, 0
+; LA64-NEXT:    vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT:    vpcnt.d $vr0, $vr0
+; LA64-NEXT:    vpickve2gr.d $a0, $vr0, 0
 ; LA64-NEXT:    ret
   %1 = call i8 @llvm.ctpop.i8(i8 %a)
   ret i8 %1
@@ -213,22 +208,11 @@ define i16 @test_ctpop_i16(i16 %a) nounwind {
 ;
 ; LA64-LABEL: test_ctpop_i16:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    srli.d $a1, $a0, 1
-; LA64-NEXT:    lu12i.w $a2, 5
-; LA64-NEXT:    ori $a2, $a2, 1365
-; LA64-NEXT:    and $a1, $a1, $a2
-; LA64-NEXT:    sub.d $a0, $a0, $a1
-; LA64-NEXT:    lu12i.w $a1, 3
-; LA64-NEXT:    ori $a1, $a1, 819
-; LA64-NEXT:    and $a2, $a0, $a1
-; LA64-NEXT:    srli.d $a0, $a0, 2
-; LA64-NEXT:    and $a0, $a0, $a1
-; LA64-NEXT:    add.d $a0, $a2, $a0
-; LA64-NEXT:    srli.d $a1, $a0, 4
-; LA64-NEXT:    add.d $a0, $a0, $a1
-; LA64-NEXT:    bstrpick.d $a1, $a0, 11, 8
-; LA64-NEXT:    andi $a0, $a0, 15
-; LA64-NEXT:    add.d $a0, $a0, $a1
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vldi $vr0, 0
+; LA64-NEXT:    vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT:    vpcnt.d $vr0, $vr0
+; LA64-NEXT:    vpickve2gr.d $a0, $vr0, 0
 ; LA64-NEXT:    ret
   %1 = call i16 @llvm.ctpop.i16(i16 %a)
   ret i16 %1
@@ -261,26 +245,11 @@ define i32 @test_ctpop_i32(i32 %a) nounwind {
 ;
 ; LA64-LABEL: test_ctpop_i32:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    srli.d $a1, $a0, 1
-; LA64-NEXT:    lu12i.w $a2, 349525
-; LA64-NEXT:    ori $a2, $a2, 1365
-; LA64-NEXT:    and $a1, $a1, $a2
-; LA64-NEXT:    sub.d $a0, $a0, $a1
-; LA64-NEXT:    lu12i.w $a1, 209715
-; LA64-NEXT:    ori $a1, $a1, 819
-; LA64-NEXT:    and $a2, $a0, $a1
-; LA64-NEXT:    srli.d $a0, $a0, 2
-; LA64-NEXT:    and $a0, $a0, $a1
-; LA64-NEXT:    add.d $a0, $a2, $a0
-; LA64-NEXT:    srli.d $a1, $a0, 4
-; LA64-NEXT:    add.d $a0, $a0, $a1
-; LA64-NEXT:    lu12i.w $a1, 61680
-; LA64-NEXT:    ori $a1, $a1, 3855
-; LA64-NEXT:    and $a0, $a0, $a1
-; LA64-NEXT:    lu12i.w $a1, 4112
-; LA64-NEXT:    ori $a1, $a1, 257
-; LA64-NEXT:    mul.d $a0, $a0, $a1
-; LA64-NEXT:    bstrpick.d $a0, $a0, 31, 24
+; LA64-NEXT:    bstrpick.d $a0, $a0, 31, 0
+; LA64-NEXT:    vldi $vr0, 0
+; LA64-NEXT:    vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT:    vpcnt.d $vr0, $vr0
+; LA64-NEXT:    vpickve2gr.d $a0, $vr0, 0
 ; LA64-NEXT:    ret
   %1 = call i32 @llvm.ctpop.i32(i32 %a)
   ret i32 %1
@@ -327,30 +296,10 @@ define i64 @test_ctpop_i64(i64 %a) nounwind {
 ;
 ; LA64-LABEL: test_ctpop_i64:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    srli.d $a1, $a0, 1
-; LA64-NEXT:    lu12i.w $a2, 349525
-; LA64-NEXT:    ori $a2, $a2, 1365
-; LA64-NEXT:    bstrins.d $a2, $a2, 62, 32
-; LA64-NEXT:    and $a1, $a1, $a2
-; LA64-NEXT:    sub.d $a0, $a0, $a1
-; LA64-NEXT:    lu12i.w $a1, 209715
-; LA64-NEXT:    ori $a1, $a1, 819
-; LA64-NEXT:    bstrins.d $a1, $a1, 61, 32
-; LA64-NEXT:    and $a2, $a0, $a1
-; LA64-NEXT:    srli.d $a0, $a0, 2
-; LA64-NEXT:    and $a0, $a0, $a1
-; LA64-NEXT:    add.d $a0, $a2, $a0
-; LA64-NEXT:    srli.d $a1, $a0, 4
-; LA64-NEXT:    add.d $a0, $a0, $a1
-; LA64-NEXT:    lu12i.w $a1, 61680
-; LA64-NEXT:    ori $a1, $a1, 3855
-; LA64-NEXT:    bstrins.d $a1, $a1, 59, 32
-; LA64-NEXT:    and $a0, $a0, $a1
-; LA64-NEXT:    lu12i.w $a1, 4112
-; LA64-NEXT:    ori $a1, $a1, 257
-; LA64-NEXT:    bstrins.d $a1, $a1, 56, 32
-; LA64-NEXT:    mul.d $a0, $a0, $a1
-; LA64-NEXT:    srli.d $a0, $a0, 56
+; LA64-NEXT:    vldi $vr0, 0
+; LA64-NEXT:    vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT:    vpcnt.d $vr0, $vr0
+; LA64-NEXT:    vpickve2gr.d $a0, $vr0, 0
 ; LA64-NEXT:    ret
   %1 = call i64 @llvm.ctpop.i64(i64 %a)
   ret i64 %1
diff --git a/llvm/test/CodeGen/LoongArch/double-imm.ll b/llvm/test/CodeGen/LoongArch/double-imm.ll
index 8d50b27907d72b..fe403ec532d8e8 100644
--- a/llvm/test/CodeGen/LoongArch/double-imm.ll
+++ b/llvm/test/CodeGen/LoongArch/double-imm.ll
@@ -59,9 +59,7 @@ define double @f64_add_fimm1(double %a) nounwind {
 ;
 ; LA64-LABEL: f64_add_fimm1:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    addi.d $a0, $zero, 1
-; LA64-NEXT:    movgr2fr.d $fa1, $a0
-; LA64-NEXT:    ffint.d.l $fa1, $fa1
+; LA64-NEXT:    vldi $vr1, -912
 ; LA64-NEXT:    fadd.d $fa0, $fa0, $fa1
 ; LA64-NEXT:    ret
   %1 = fadd double %a, 1.0
@@ -79,9 +77,7 @@ define double @f64_positive_fimm1() nounwind {
 ;
 ; LA64-LABEL: f64_positive_fimm1:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    addi.d $a0, $zero, 1
-; LA64-NEXT:    movgr2fr.d $fa0, $a0
-; LA64-NEXT:    ffint.d.l $fa0, $fa0
+; LA64-NEXT:    vldi $vr0, -912
 ; LA64-NEXT:    ret
   ret double 1.0
 }
diff --git a/llvm/test/CodeGen/LoongArch/frame.ll b/llvm/test/CodeGen/LoongArch/frame.ll
index ac5cb3c7e72115..cf15fd8bdb4372 100644
--- a/llvm/test/CodeGen/LoongArch/frame.ll
+++ b/llvm/test/CodeGen/LoongArch/frame.ll
@@ -12,8 +12,8 @@ define i32 @test() nounwind {
 ; CHECK-NEXT:    addi.d $sp, $sp, -32
 ; CHECK-NEXT:    st.d $ra, $sp, 24 # 8-byte Folded Spill
 ; CHECK-NEXT:    st.w $zero, $sp, 16
-; CHECK-NEXT:    st.d $zero, $sp, 8
-; CHECK-NEXT:    st.d $zero, $sp, 0
+; CHECK-NEXT:    vrepli.b $vr0, 0
+; CHECK-NEXT:    vst $vr0, $sp, 0
 ; CHECK-NEXT:    addi.d $a0, $sp, 4
 ; CHECK-NEXT:    bl %plt(test1)
 ; CHECK-NEXT:    move $a0, $zero
diff --git a/llvm/test/CodeGen/LoongArch/get-setcc-result-type.ll b/llvm/test/CodeGen/LoongArch/get-setcc-result-type.ll
index 6cf9d7d75b9963..3d6e22b5eeb102 100644
--- a/llvm/test/CodeGen/LoongArch/get-setcc-result-type.ll
+++ b/llvm/test/CodeGen/LoongArch/get-setcc-result-type.ll
@@ -5,22 +5,9 @@
 define void @getSetCCResultType(ptr %p) {
 ; CHECK-LABEL: getSetCCResultType:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    ld.w $a1, $a0, 0
-; CHECK-NEXT:    ld.w $a2, $a0, 12
-; CHECK-NEXT:    ld.w $a3, $a0, 4
-; CHECK-NEXT:    ld.w $a4, $a0, 8
-; CHECK-NEXT:    sltui $a1, $a1, 1
-; CHECK-NEXT:    sub.d $a1, $zero, $a1
-; CHECK-NEXT:    sltui $a3, $a3, 1
-; CHECK-NEXT:    sub.d $a3, $zero, $a3
-; CHECK-NEXT:    sltui $a4, $a4, 1
-; CHECK-NEXT:    sub.d $a4, $zero, $a4
-; CHECK-NEXT:    sltui $a2, $a2, 1
-; CHECK-NEXT:    sub.d $a2, $zero, $a2
-; CHECK-NEXT:    st.w $a2, $a0, 12
-; CHECK-NEXT:    st.w $a4, $a0, 8
-; CHECK-NEXT:    st.w $a3, $a0, 4
-; CHECK-NEXT:    st.w $a1, $a0, 0
+; CHECK-NEXT:    vld $vr0, $a0, 0
+; CHECK-NEXT:    vseqi.w $vr0, $vr0, 0
+; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
   %0 = load <4 x i32>, ptr %p, align 16
diff --git a/llvm/test/CodeGen/LoongArch/inline-asm-constraint-error.ll b/llvm/test/CodeGen/LoongArch/inline-asm-constraint-error.ll
index 570fd438be97bf..83f796f73934c9 100644
--- a/llvm/test/CodeGen/LoongArch/inline-asm-constraint-error.ll
+++ b/llvm/test/CodeGen/LoongArch/inline-asm-constraint-error.ll
@@ -1,4 +1,4 @@
-; RUN: not llc --mtriple=loongarch32 < %s 2>&1 | FileCheck %s
+; RUN: not llc --mtriple=loongarch32 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,LA32
 ; RUN: not llc --mtriple=loongarch64 < %s 2>&1 | FileCheck %s
 
 define void @constraint_l() {
@@ -32,9 +32,9 @@ define void @constraint_K() {
 }
 
 define void @constraint_f() nounwind {
-; CHECK: error: couldn't allocate input reg for constraint 'f'
+; LA32: error: couldn't allocate input reg for constraint 'f'
   tail call void asm "fadd.s $$fa0, $$fa0, $0", "f"(float 0.0)
-; CHECK: error: couldn't allocate input reg for constraint 'f'
+; LA32: error: couldn't allocate input reg for constraint 'f'
   tail call void asm "fadd.s $$fa0, $$fa0, $0", "f"(double 0.0)
   ret void
 }
diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-error.ll b/llvm/test/CodeGen/LoongArch/intrinsic-error.ll
index a839ab149c3338..176e3f60c56252 100644
--- a/llvm/test/CodeGen/LoongArch/intrinsic-error.ll
+++ b/llvm/test/CodeGen/LoongArch/intrinsic-error.ll
@@ -1,4 +1,4 @@
-; RUN: not llc --mtriple=loongarch32 < %s 2>&1 | FileCheck %s
+; RUN: not llc --mtriple=loongarch32 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,LA32
 ; RUN: not llc --mtriple=loongarch64 < %s 2>&1 | FileCheck %s
 
 declare void @llvm.loongarch.dbar(i32)
@@ -54,7 +54,7 @@ entry:
 }
 
 define void @movgr2fcsr(i32 %a) nounwind {
-; CHECK: llvm.loongarch.movgr2fcsr: requires basic 'f' target feature.
+; LA32: llvm.loongarch.movgr2fcsr: requires basic 'f' target feature.
 entry:
   call void @llvm.loongarch.movgr2fcsr(i32 1, i32 %a)
   ret void
@@ -75,7 +75,7 @@ entry:
 }
 
 define i32 @movfcsr2gr() nounwind {
-; CHECK: llvm.loongarch.movfcsr2gr: requires basic 'f' target feature.
+; LA32: llvm.loongarch.movfcsr2gr: requires basic 'f' target feature.
 entry:
   %res = call i32 @llvm.loongarch.movfcsr2gr(i32 1)
   ret i32 %res
diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-memcpy.ll b/llvm/test/CodeGen/LoongArch/intrinsic-memcpy.ll
index 622001db329551..402ddb9ad941b4 100644
--- a/llvm/test/CodeGen/LoongArch/intrinsic-memcpy.ll
+++ b/llvm/test/CodeGen/LoongArch/intrinsic-memcpy.ll
@@ -12,18 +12,12 @@ define void @box(ptr noalias nocapture noundef writeonly sret(%Box) align 16 der
 ; CHECK-NEXT:    alsl.d $a1, $a1, $a2, 4
 ; CHECK-NEXT:    addi.d $a2, $sp, 0
 ; CHECK-NEXT:    add.d $a3, $a2, $a1
-; CHECK-NEXT:    ldx.d $a1, $a1, $a2
-; CHECK-NEXT:    ld.d $a2, $a3, 40
-; CHECK-NEXT:    st.d $a1, $a0, 0
-; CHECK-NEXT:    st.d $a2, $a0, 40
-; CHECK-NEXT:    ld.d $a1, $a3, 32
-; CHECK-NEXT:    ld.d $a2, $a3, 24
-; CHECK-NEXT:    ld.d $a4, $a3, 16
-; CHECK-NEXT:    ld.d $a3, $a3, 8
-; CHECK-NEXT:    st.d $a1, $a0, 32
-; CHECK-NEXT:    st.d $a2, $a0, 24
-; CHECK-NEXT:    st.d $a4, $a0, 16
-; CHECK-NEXT:    st.d $a3, $a0, 8
+; CHECK-NEXT:    vldx $vr0, $a1, $a2
+; CHECK-NEXT:    vld $vr1, $a3, 32
+; CHECK-NEXT:    vld $vr2, $a3, 16
+; CHECK-NEXT:    vst $vr0, $a0, 0
+; CHECK-NEXT:    vst $vr1, $a0, 32
+; CHECK-NEXT:    vst $vr2, $a0, 16
 ; CHECK-NEXT:    addi.d $sp, $sp, 96
 ; CHECK-NEXT:    ret
   %1 = alloca [2 x %Box], align 16
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll
index 7e320d9245f1c2..6ea658acdd7172 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll
@@ -40,9 +40,7 @@ define float @float_fadd_acquire(ptr %p) nounwind {
 ; LA64D-LABEL: float_fadd_acquire:
 ; LA64D:       # %bb.0:
 ; LA64D-NEXT:    fld.s $fa0, $a0, 0
-; LA64D-NEXT:    addi.w $a1, $zero, 1
-; LA64D-NEXT:    movgr2fr.w $fa1, $a1
-; LA64D-NEXT:    ffint.s.w $fa1, $fa1
+; LA64D-NEXT:    vldi $vr1, -1168
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB0_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Loop Header: Depth=1
@@ -111,8 +109,7 @@ define float @float_fsub_acquire(ptr %p) nounwind {
 ; LA64D-LABEL: float_fsub_acquire:
 ; LA64D:       # %bb.0:
 ; LA64D-NEXT:    fld.s $fa0, $a0, 0
-; LA64D-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI1_0)
-; LA64D-NEXT:    fld.s $fa1, $a1, %pc_lo12(.LCPI1_0)
+; LA64D-NEXT:    vldi $vr1, -1040
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB1_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Loop Header: Depth=1
@@ -183,9 +180,7 @@ define float @float_fmin_acquire(ptr %p) nounwind {
 ; LA64D-LABEL: float_fmin_acquire:
 ; LA64D:       # %bb.0:
 ; LA64D-NEXT:    fld.s $fa0, $a0, 0
-; LA64D-NEXT:    addi.w $a1, $zero, 1
-; LA64D-NEXT:    movgr2fr.w $fa1, $a1
-; LA64D-NEXT:    ffint.s.w $fa1, $fa1
+; LA64D-NEXT:    vldi $vr1, -1168
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB2_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Loop Header: Depth=1
@@ -257,9 +252,7 @@ define float @float_fmax_acquire(ptr %p) nounwind {
 ; LA64D-LABEL: float_fmax_acquire:
 ; LA64D:       # %bb.0:
 ; LA64D-NEXT:    fld.s $fa0, $a0, 0
-; LA64D-NEXT:    addi.w $a1, $zero, 1
-; LA64D-NEXT:    movgr2fr.w $fa1, $a1
-; LA64D-NEXT:    ffint.s.w $fa1, $fa1
+; LA64D-NEXT:    vldi $vr1, -1168
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB3_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Loo...
[truncated]

heiher · 2024-10-23T11:47:01Z

cc @xen0n

xen0n

It's nice to see vectorization being leveraged across the board, as per the recommended baseline!

Note to myself: While this clearly affects -march=generic for Clang, I haven't verified whether -march=loongarch64 behavior stays the same as GCC (no LSX).

2nd note to myself: GCC needs -march=generic implemented too for feature parity.

xen0n · 2024-10-23T13:22:17Z

llvm/test/CodeGen/LoongArch/calling-conv-lp64s.ll

-; CHECK-NEXT:    addi.d $sp, $sp, -16
-; CHECK-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
-; CHECK-NEXT:    st.d $fp, $sp, 0 # 8-byte Folded Spill
-; CHECK-NEXT:    move $fp, $a0
-; CHECK-NEXT:    move $a0, $a1
-; CHECK-NEXT:    bl %plt(__fixsfdi)
-; CHECK-NEXT:    add.d $a0, $fp, $a0
-; CHECK-NEXT:    ld.d $fp, $sp, 0 # 8-byte Folded Reload
-; CHECK-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
-; CHECK-NEXT:    addi.d $sp, $sp, 16
+; CHECK-NEXT:    movgr2fr.w $fa0, $a1
+; CHECK-NEXT:    ftintrz.l.s $fa0, $fa0
+; CHECK-NEXT:    movfr2gr.d $a1, $fa0
+; CHECK-NEXT:    add.d $a0, $a0, $a1


The LP64S tests require updating of compilation flags so it stays soft-float.

Updated.
Added --mattr=-f to stay soft-float.

xen0n · 2024-10-23T13:24:20Z

llvm/test/CodeGen/LoongArch/statepoint-call-lowering-r1.ll

 ; RUN: llc --mtriple=loongarch64 --verify-machineinstrs --stop-after=prologepilog < %s | FileCheck %s

 ;; Check that STATEPOINT instruction has an early clobber implicit def for R1.

 define void @test() gc "statepoint-example" {
 entry:
  %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void ()) @return_i1, i32 0, i32 0, i32 0, i32 0) ["gc-live" ()]
-; CHECK: STATEPOINT 0, 0, 0, target-flags(loongarch-call-plt) @return_i1, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, csr_ilp32s_lp64s, implicit-def $r3, implicit-def dead early-clobber $r1


Is this behavioral change intentional?

The automated updating of this case with update_llc_test_checks.py led to this behavior.
To keep the original style for easy-reviewing, I manually updated it this time.

xen0n · 2024-10-23T13:25:43Z

llvm/test/Transforms/LoopIdiom/LoongArch/popcnt.ll

@@ -2,7 +2,7 @@
 ; RUN: opt -passes=loop-idiom -mtriple=loongarch32 -mattr=+lsx -S < %s | FileCheck %s --check-prefix=CPOP
 ; RUN: opt -passes=loop-idiom -mtriple=loongarch64 -mattr=+lsx -S < %s | FileCheck %s --check-prefix=CPOP
 ; RUN: opt -passes=loop-idiom -mtriple=loongarch32 -S < %s | FileCheck %s --check-prefix=NOCPOP
-; RUN: opt -passes=loop-idiom -mtriple=loongarch64 -S < %s | FileCheck %s --check-prefix=NOCPOP
+; RUN: opt -passes=loop-idiom -mtriple=loongarch64 -S < %s | FileCheck %s --check-prefix=CPOP


We can additionally test LA64 without LSX somehow, to ensure that coverage doesn't shrink?

Updated, adjusted the testing way.

SixWeining · 2024-10-24T02:02:55Z

llvm/test/CodeGen/LoongArch/target-abi-from-triple-edge-cases.ll

@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5


The checking prefixes for this test were written manually. I prefer keeping this style unchanged this time for easy-reviewing. Then maybe you can submit a separate NFC PR switching to use update_llc_test_checks.py.

SixWeining · 2024-10-24T02:05:02Z

llvm/test/CodeGen/LoongArch/statepoint-call-lowering-r1.ll

@@ -1,13 +1,15 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5


Same as llvm/test/CodeGen/LoongArch/target-abi-from-triple-edge-cases.ll

Ami-zhang · 2024-10-24T08:52:58Z

Note to myself: While this clearly affects -march=generic for Clang, I haven't verified whether -march=loongarch64 behavior stays the same as GCC (no LSX).

2nd note to myself: GCC needs -march=generic implemented too for feature parity.

Thanks for your thoughtfulness and for sharing your insights! Of course, We will also continue to take note of matters related to -march=generic.

xen0n · 2024-10-24T11:25:59Z

Sorry, I misremembered things and there's actually no -march=generic, even in x86 land. We currently only have loongarch64, la464 and la664 for -march` and it's okay as-is.

llvm/test/CodeGen/LoongArch/soft-fp-to-int.ll

heiher · 2024-10-24T16:20:21Z

This patch enables LSX by default in Clang, whereas GCC does not. Should we align the behavior with GCC?

This commit makes the `generic` target to support FP and LSX, as discussed in llvm#110211. Thereby, it allows 128-bit vector to be enabled by default in the loongarch64 backend.

SixWeining · 2024-10-25T02:13:48Z

This patch enables LSX by default in Clang, whereas GCC does not. Should we align the behavior with GCC?

Actually it has been enabled in #100056.

Yes, clang and gcc have different mechanisms to enable lsx by default. Clang achieves this by how its source code is written while gcc achieves it by how it is configured when being built.

AFAIK, the upcoming soft-dev-conv requires developers must enable LSX by default when building desktop and server operating systems (but not for embedded systems). Maybe maintainer of linux distribution will turn on LSX when they configure GCC.

xen0n

AFAIK, the upcoming soft-dev-conv requires developers must enable LSX by default when building desktop and server operating systems (but not for embedded systems).

Upcoming? Indeed it seems so because v0.1 says "should".

This matters mostly from a business perspective, more so than the technical (performance) perspective, because one of the indirect consequences of mandating LSX (without also mandating scalar fallbacks be provided for all code) is that a body of closed-source LSX-containing components will begin to form, forcing the ecosystem building upon it to require LSX as well. This allows for easy market segmentation by e.g. providing specific CPU models without LSX/LASX -- while being lower-priced overall, they cannot run most general-purpose distributions unmodified, and even if the devs went to the great length of assembling a sysroot themselves e.g. with buildroot, they'd be still out of luck if their business depends on a closed-source blob requiring LSX. And with certain LSX/LASX features likely patent-encumbered, this makes third-party LoongArch implementations less competitive as well.

However, the status quo is already LSX all over the place -- memcpy/memset getting auto-vectorized once it's enable-by-default -- as one can verify on Compiler Explorer on on their own machine. In this case having consistency is probably better, and we should approach the embedded/indie-core problem separately.

As I have now verified several important -march choices to match GCC behavior for a piece of trivial loop eligible for vectorization (simple i32 adds):

no -march given: LSX
-march=loongarch64: scalar
-march=la64v1.0: LSX

I'm accepting the patch as well. Thanks!

) This commit makes the `generic` target to support FP and LSX, as discussed in llvm#110211. Thereby, it allows 128-bit vector to be enabled by default in the loongarch64 backend.

Ami-zhang added the backend:loongarch label Oct 23, 2024

Ami-zhang requested review from heiher, wangleiat and SixWeining October 23, 2024 07:05

llvmbot added the llvm:transforms label Oct 23, 2024

Ami-zhang force-pushed the generic branch from 622aff0 to 8014eff Compare October 23, 2024 07:53

xen0n reviewed Oct 23, 2024

View reviewed changes

SixWeining reviewed Oct 24, 2024

View reviewed changes

Ami-zhang force-pushed the generic branch from 8014eff to aebf0e6 Compare October 24, 2024 08:13

Ami-zhang requested a review from xen0n October 24, 2024 12:45

heiher reviewed Oct 24, 2024

View reviewed changes

llvm/test/CodeGen/LoongArch/soft-fp-to-int.ll Outdated Show resolved Hide resolved

[LoongArch] Enable FeatureExtLSX for generic-la64 processor

f7824cc

This commit makes the `generic` target to support FP and LSX, as discussed in llvm#110211. Thereby, it allows 128-bit vector to be enabled by default in the loongarch64 backend.

Ami-zhang force-pushed the generic branch from aebf0e6 to f7824cc Compare October 25, 2024 01:47

heiher approved these changes Oct 25, 2024

View reviewed changes

xen0n approved these changes Oct 25, 2024

View reviewed changes

SixWeining approved these changes Oct 31, 2024

View reviewed changes

Ami-zhang merged commit 1897bf6 into llvm:main Oct 31, 2024
8 checks passed

Ami-zhang deleted the generic branch October 31, 2024 07:59

SixWeining mentioned this pull request Oct 31, 2024

[LoongArch] Ensure pcaddu18i and jirl adjacency in tail calls for correct relocation #113932

Merged

		@@ -1,3 +1,4 @@
		; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5

		@@ -1,13 +1,15 @@
		; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5

[LoongArch] Enable FeatureExtLSX for generic-la64 processor #113421

[LoongArch] Enable FeatureExtLSX for generic-la64 processor #113421

Uh oh!

Conversation

Ami-zhang commented Oct 23, 2024

Uh oh!

llvmbot commented Oct 23, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

heiher commented Oct 23, 2024

Uh oh!

xen0n left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Ami-zhang commented Oct 24, 2024

Uh oh!

xen0n commented Oct 24, 2024

Uh oh!

Uh oh!

heiher commented Oct 24, 2024

Uh oh!

SixWeining commented Oct 25, 2024

Uh oh!

xen0n left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

llvmbot commented Oct 23, 2024 •

edited

Loading