Skip to content

Commit f06d969

Browse files
[LLVM][DAGCombiner] Extend coverage for insert_subv(undef, extract_subv(A, 0), 0) (#95242)
There is an existing combine to remove the need for extract_subv that requires matching vector types (all fixed or all scalable). The combine doesn't need this restriction and so I've changed it to use ValueType's "knownBits??" interface that supports mixed vector types. In doing so we also need extra guards to prevent invalid operations (e.g. extracting a scalable vector from a fixed length vector).
1 parent 1ba8ed0 commit f06d969

8 files changed

+846
-564
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26473,12 +26473,13 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
2647326473
return N1.getOperand(0);
2647426474
// TODO: To remove the zero check, need to adjust the offset to
2647526475
// a multiple of the new src type.
26476-
if (isNullConstant(N2) &&
26477-
VT.isScalableVector() == SrcVT.isScalableVector()) {
26478-
if (VT.getVectorMinNumElements() >= SrcVT.getVectorMinNumElements())
26476+
if (isNullConstant(N2)) {
26477+
if (VT.knownBitsGE(SrcVT) &&
26478+
!(VT.isFixedLengthVector() && SrcVT.isScalableVector()))
2647926479
return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
2648026480
VT, N0, N1.getOperand(0), N2);
26481-
else
26481+
else if (VT.knownBitsLE(SrcVT) &&
26482+
!(VT.isScalableVector() && SrcVT.isFixedLengthVector()))
2648226483
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N),
2648326484
VT, N1.getOperand(0), N2);
2648426485
}

llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll

Lines changed: 128 additions & 126 deletions
Original file line numberDiff line numberDiff line change
@@ -519,32 +519,32 @@ define void @fcvtzu_v4f16_v4i64(ptr %a, ptr %b) {
519519
define void @fcvtzu_v8f16_v8i64(ptr %a, ptr %b) {
520520
; CHECK-LABEL: fcvtzu_v8f16_v8i64:
521521
; CHECK: // %bb.0:
522-
; CHECK-NEXT: sub sp, sp, #64
523-
; CHECK-NEXT: .cfi_def_cfa_offset 64
524522
; CHECK-NEXT: ldr q0, [x0]
525-
; CHECK-NEXT: mov z1.h, z0.h[1]
526-
; CHECK-NEXT: mov z2.h, z0.h[3]
527-
; CHECK-NEXT: mov z3.h, z0.h[2]
528-
; CHECK-NEXT: fcvtzu x8, h0
529-
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
530-
; CHECK-NEXT: fcvtzu x9, h1
531-
; CHECK-NEXT: fcvtzu x10, h2
532-
; CHECK-NEXT: fcvtzu x11, h3
533-
; CHECK-NEXT: mov z1.h, z0.h[1]
534-
; CHECK-NEXT: mov z2.h, z0.h[3]
523+
; CHECK-NEXT: mov z1.d, z0.d
535524
; CHECK-NEXT: fcvtzu x12, h0
536-
; CHECK-NEXT: mov z0.h, z0.h[2]
537-
; CHECK-NEXT: stp x8, x9, [sp, #32]
525+
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
526+
; CHECK-NEXT: mov z2.h, z1.h[1]
538527
; CHECK-NEXT: fcvtzu x8, h1
528+
; CHECK-NEXT: mov z3.h, z1.h[3]
529+
; CHECK-NEXT: mov z1.h, z1.h[2]
539530
; CHECK-NEXT: fcvtzu x9, h2
540-
; CHECK-NEXT: stp x11, x10, [sp, #48]
531+
; CHECK-NEXT: mov z2.h, z0.h[1]
532+
; CHECK-NEXT: fcvtzu x10, h3
533+
; CHECK-NEXT: mov z3.h, z0.h[3]
534+
; CHECK-NEXT: fcvtzu x11, h1
535+
; CHECK-NEXT: mov z0.h, z0.h[2]
536+
; CHECK-NEXT: stp x8, x9, [sp, #-64]!
537+
; CHECK-NEXT: .cfi_def_cfa_offset 64
538+
; CHECK-NEXT: fcvtzu x8, h2
539+
; CHECK-NEXT: fcvtzu x9, h3
540+
; CHECK-NEXT: stp x11, x10, [sp, #16]
541541
; CHECK-NEXT: fcvtzu x10, h0
542-
; CHECK-NEXT: ldp q2, q3, [sp, #32]
543-
; CHECK-NEXT: stp x12, x8, [sp]
544-
; CHECK-NEXT: stp x10, x9, [sp, #16]
545-
; CHECK-NEXT: ldp q1, q0, [sp]
546-
; CHECK-NEXT: stp q2, q3, [x1]
547-
; CHECK-NEXT: stp q1, q0, [x1, #32]
542+
; CHECK-NEXT: ldp q2, q3, [sp]
543+
; CHECK-NEXT: stp x12, x8, [sp, #32]
544+
; CHECK-NEXT: stp x10, x9, [sp, #48]
545+
; CHECK-NEXT: ldp q1, q0, [sp, #32]
546+
; CHECK-NEXT: stp q2, q3, [x1, #32]
547+
; CHECK-NEXT: stp q1, q0, [x1]
548548
; CHECK-NEXT: add sp, sp, #64
549549
; CHECK-NEXT: ret
550550
;
@@ -598,55 +598,56 @@ define void @fcvtzu_v8f16_v8i64(ptr %a, ptr %b) {
598598
define void @fcvtzu_v16f16_v16i64(ptr %a, ptr %b) {
599599
; CHECK-LABEL: fcvtzu_v16f16_v16i64:
600600
; CHECK: // %bb.0:
601-
; CHECK-NEXT: sub sp, sp, #128
602-
; CHECK-NEXT: .cfi_def_cfa_offset 128
603601
; CHECK-NEXT: ldp q1, q0, [x0]
604-
; CHECK-NEXT: mov z2.h, z1.h[1]
605-
; CHECK-NEXT: mov z3.h, z1.h[3]
606-
; CHECK-NEXT: mov z4.h, z1.h[2]
607-
; CHECK-NEXT: fcvtzu x8, h1
608-
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
609-
; CHECK-NEXT: mov z5.h, z0.h[3]
610-
; CHECK-NEXT: fcvtzu x10, h0
611-
; CHECK-NEXT: fcvtzu x9, h2
612-
; CHECK-NEXT: fcvtzu x11, h3
613-
; CHECK-NEXT: fcvtzu x12, h4
614-
; CHECK-NEXT: mov z2.h, z1.h[1]
615-
; CHECK-NEXT: mov z4.h, z1.h[3]
616-
; CHECK-NEXT: fcvtzu x13, h1
617-
; CHECK-NEXT: mov z1.h, z1.h[2]
618-
; CHECK-NEXT: mov z3.h, z0.h[1]
619-
; CHECK-NEXT: stp x8, x9, [sp, #32]
602+
; CHECK-NEXT: mov z2.d, z1.d
603+
; CHECK-NEXT: mov z3.d, z0.d
604+
; CHECK-NEXT: ext z2.b, z2.b, z1.b, #8
605+
; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
606+
; CHECK-NEXT: mov z4.h, z2.h[1]
620607
; CHECK-NEXT: fcvtzu x8, h2
608+
; CHECK-NEXT: mov z5.h, z2.h[3]
609+
; CHECK-NEXT: mov z2.h, z2.h[2]
610+
; CHECK-NEXT: fcvtzu x12, h3
621611
; CHECK-NEXT: fcvtzu x9, h4
622-
; CHECK-NEXT: stp x12, x11, [sp, #48]
612+
; CHECK-NEXT: mov z4.h, z3.h[1]
613+
; CHECK-NEXT: fcvtzu x10, h5
614+
; CHECK-NEXT: mov z5.h, z3.h[3]
615+
; CHECK-NEXT: fcvtzu x11, h2
616+
; CHECK-NEXT: mov z2.h, z3.h[2]
617+
; CHECK-NEXT: stp x8, x9, [sp, #-128]!
618+
; CHECK-NEXT: .cfi_def_cfa_offset 128
619+
; CHECK-NEXT: fcvtzu x8, h4
620+
; CHECK-NEXT: fcvtzu x9, h5
621+
; CHECK-NEXT: stp x11, x10, [sp, #16]
622+
; CHECK-NEXT: fcvtzu x10, h2
623+
; CHECK-NEXT: mov z3.h, z1.h[1]
624+
; CHECK-NEXT: mov z4.h, z1.h[3]
623625
; CHECK-NEXT: fcvtzu x11, h1
624-
; CHECK-NEXT: mov z2.h, z0.h[2]
625-
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
626+
; CHECK-NEXT: mov z1.h, z1.h[2]
627+
; CHECK-NEXT: mov z2.h, z0.h[1]
628+
; CHECK-NEXT: stp x12, x8, [sp, #64]
626629
; CHECK-NEXT: fcvtzu x12, h3
627-
; CHECK-NEXT: stp x13, x8, [sp]
628-
; CHECK-NEXT: fcvtzu x8, h5
629-
; CHECK-NEXT: stp x11, x9, [sp, #16]
630-
; CHECK-NEXT: fcvtzu x9, h2
631-
; CHECK-NEXT: mov z1.h, z0.h[1]
632-
; CHECK-NEXT: mov z2.h, z0.h[3]
633-
; CHECK-NEXT: fcvtzu x11, h0
630+
; CHECK-NEXT: fcvtzu x8, h4
631+
; CHECK-NEXT: stp x10, x9, [sp, #80]
632+
; CHECK-NEXT: fcvtzu x9, h1
633+
; CHECK-NEXT: mov z3.h, z0.h[3]
634+
; CHECK-NEXT: fcvtzu x10, h0
634635
; CHECK-NEXT: mov z0.h, z0.h[2]
635-
; CHECK-NEXT: stp x10, x12, [sp, #96]
636-
; CHECK-NEXT: ldp q3, q4, [sp]
637-
; CHECK-NEXT: fcvtzu x10, h1
638-
; CHECK-NEXT: fcvtzu x12, h2
639-
; CHECK-NEXT: stp x9, x8, [sp, #112]
636+
; CHECK-NEXT: stp x11, x12, [sp, #32]
637+
; CHECK-NEXT: fcvtzu x11, h2
638+
; CHECK-NEXT: fcvtzu x12, h3
639+
; CHECK-NEXT: stp x9, x8, [sp, #48]
640640
; CHECK-NEXT: fcvtzu x8, h0
641-
; CHECK-NEXT: ldp q0, q1, [sp, #32]
642-
; CHECK-NEXT: ldp q6, q7, [sp, #96]
643-
; CHECK-NEXT: stp x11, x10, [sp, #64]
644-
; CHECK-NEXT: stp x8, x12, [sp, #80]
645-
; CHECK-NEXT: ldp q5, q2, [sp, #64]
646-
; CHECK-NEXT: stp q0, q1, [x1]
647-
; CHECK-NEXT: stp q3, q4, [x1, #32]
648-
; CHECK-NEXT: stp q6, q7, [x1, #64]
649-
; CHECK-NEXT: stp q5, q2, [x1, #96]
641+
; CHECK-NEXT: ldp q0, q1, [sp]
642+
; CHECK-NEXT: ldp q3, q4, [sp, #64]
643+
; CHECK-NEXT: stp x10, x11, [sp, #96]
644+
; CHECK-NEXT: ldp q6, q7, [sp, #32]
645+
; CHECK-NEXT: stp x8, x12, [sp, #112]
646+
; CHECK-NEXT: ldp q5, q2, [sp, #96]
647+
; CHECK-NEXT: stp q0, q1, [x1, #32]
648+
; CHECK-NEXT: stp q6, q7, [x1]
649+
; CHECK-NEXT: stp q3, q4, [x1, #96]
650+
; CHECK-NEXT: stp q5, q2, [x1, #64]
650651
; CHECK-NEXT: add sp, sp, #128
651652
; CHECK-NEXT: ret
652653
;
@@ -2262,32 +2263,32 @@ define void @fcvtzs_v4f16_v4i64(ptr %a, ptr %b) {
22622263
define void @fcvtzs_v8f16_v8i64(ptr %a, ptr %b) {
22632264
; CHECK-LABEL: fcvtzs_v8f16_v8i64:
22642265
; CHECK: // %bb.0:
2265-
; CHECK-NEXT: sub sp, sp, #64
2266-
; CHECK-NEXT: .cfi_def_cfa_offset 64
22672266
; CHECK-NEXT: ldr q0, [x0]
2268-
; CHECK-NEXT: mov z1.h, z0.h[1]
2269-
; CHECK-NEXT: mov z2.h, z0.h[3]
2270-
; CHECK-NEXT: mov z3.h, z0.h[2]
2271-
; CHECK-NEXT: fcvtzs x8, h0
2272-
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
2273-
; CHECK-NEXT: fcvtzs x9, h1
2274-
; CHECK-NEXT: fcvtzs x10, h2
2275-
; CHECK-NEXT: fcvtzs x11, h3
2276-
; CHECK-NEXT: mov z1.h, z0.h[1]
2277-
; CHECK-NEXT: mov z2.h, z0.h[3]
2267+
; CHECK-NEXT: mov z1.d, z0.d
22782268
; CHECK-NEXT: fcvtzs x12, h0
2279-
; CHECK-NEXT: mov z0.h, z0.h[2]
2280-
; CHECK-NEXT: stp x8, x9, [sp, #32]
2269+
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
2270+
; CHECK-NEXT: mov z2.h, z1.h[1]
22812271
; CHECK-NEXT: fcvtzs x8, h1
2272+
; CHECK-NEXT: mov z3.h, z1.h[3]
2273+
; CHECK-NEXT: mov z1.h, z1.h[2]
22822274
; CHECK-NEXT: fcvtzs x9, h2
2283-
; CHECK-NEXT: stp x11, x10, [sp, #48]
2275+
; CHECK-NEXT: mov z2.h, z0.h[1]
2276+
; CHECK-NEXT: fcvtzs x10, h3
2277+
; CHECK-NEXT: mov z3.h, z0.h[3]
2278+
; CHECK-NEXT: fcvtzs x11, h1
2279+
; CHECK-NEXT: mov z0.h, z0.h[2]
2280+
; CHECK-NEXT: stp x8, x9, [sp, #-64]!
2281+
; CHECK-NEXT: .cfi_def_cfa_offset 64
2282+
; CHECK-NEXT: fcvtzs x8, h2
2283+
; CHECK-NEXT: fcvtzs x9, h3
2284+
; CHECK-NEXT: stp x11, x10, [sp, #16]
22842285
; CHECK-NEXT: fcvtzs x10, h0
2285-
; CHECK-NEXT: ldp q2, q3, [sp, #32]
2286-
; CHECK-NEXT: stp x12, x8, [sp]
2287-
; CHECK-NEXT: stp x10, x9, [sp, #16]
2288-
; CHECK-NEXT: ldp q1, q0, [sp]
2289-
; CHECK-NEXT: stp q2, q3, [x1]
2290-
; CHECK-NEXT: stp q1, q0, [x1, #32]
2286+
; CHECK-NEXT: ldp q2, q3, [sp]
2287+
; CHECK-NEXT: stp x12, x8, [sp, #32]
2288+
; CHECK-NEXT: stp x10, x9, [sp, #48]
2289+
; CHECK-NEXT: ldp q1, q0, [sp, #32]
2290+
; CHECK-NEXT: stp q2, q3, [x1, #32]
2291+
; CHECK-NEXT: stp q1, q0, [x1]
22912292
; CHECK-NEXT: add sp, sp, #64
22922293
; CHECK-NEXT: ret
22932294
;
@@ -2341,55 +2342,56 @@ define void @fcvtzs_v8f16_v8i64(ptr %a, ptr %b) {
23412342
define void @fcvtzs_v16f16_v16i64(ptr %a, ptr %b) {
23422343
; CHECK-LABEL: fcvtzs_v16f16_v16i64:
23432344
; CHECK: // %bb.0:
2344-
; CHECK-NEXT: sub sp, sp, #128
2345-
; CHECK-NEXT: .cfi_def_cfa_offset 128
23462345
; CHECK-NEXT: ldp q1, q0, [x0]
2347-
; CHECK-NEXT: mov z2.h, z1.h[1]
2348-
; CHECK-NEXT: mov z3.h, z1.h[3]
2349-
; CHECK-NEXT: mov z4.h, z1.h[2]
2350-
; CHECK-NEXT: fcvtzs x8, h1
2351-
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
2352-
; CHECK-NEXT: mov z5.h, z0.h[3]
2353-
; CHECK-NEXT: fcvtzs x10, h0
2354-
; CHECK-NEXT: fcvtzs x9, h2
2355-
; CHECK-NEXT: fcvtzs x11, h3
2356-
; CHECK-NEXT: fcvtzs x12, h4
2357-
; CHECK-NEXT: mov z2.h, z1.h[1]
2358-
; CHECK-NEXT: mov z4.h, z1.h[3]
2359-
; CHECK-NEXT: fcvtzs x13, h1
2360-
; CHECK-NEXT: mov z1.h, z1.h[2]
2361-
; CHECK-NEXT: mov z3.h, z0.h[1]
2362-
; CHECK-NEXT: stp x8, x9, [sp, #32]
2346+
; CHECK-NEXT: mov z2.d, z1.d
2347+
; CHECK-NEXT: mov z3.d, z0.d
2348+
; CHECK-NEXT: ext z2.b, z2.b, z1.b, #8
2349+
; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
2350+
; CHECK-NEXT: mov z4.h, z2.h[1]
23632351
; CHECK-NEXT: fcvtzs x8, h2
2352+
; CHECK-NEXT: mov z5.h, z2.h[3]
2353+
; CHECK-NEXT: mov z2.h, z2.h[2]
2354+
; CHECK-NEXT: fcvtzs x12, h3
23642355
; CHECK-NEXT: fcvtzs x9, h4
2365-
; CHECK-NEXT: stp x12, x11, [sp, #48]
2356+
; CHECK-NEXT: mov z4.h, z3.h[1]
2357+
; CHECK-NEXT: fcvtzs x10, h5
2358+
; CHECK-NEXT: mov z5.h, z3.h[3]
2359+
; CHECK-NEXT: fcvtzs x11, h2
2360+
; CHECK-NEXT: mov z2.h, z3.h[2]
2361+
; CHECK-NEXT: stp x8, x9, [sp, #-128]!
2362+
; CHECK-NEXT: .cfi_def_cfa_offset 128
2363+
; CHECK-NEXT: fcvtzs x8, h4
2364+
; CHECK-NEXT: fcvtzs x9, h5
2365+
; CHECK-NEXT: stp x11, x10, [sp, #16]
2366+
; CHECK-NEXT: fcvtzs x10, h2
2367+
; CHECK-NEXT: mov z3.h, z1.h[1]
2368+
; CHECK-NEXT: mov z4.h, z1.h[3]
23662369
; CHECK-NEXT: fcvtzs x11, h1
2367-
; CHECK-NEXT: mov z2.h, z0.h[2]
2368-
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
2370+
; CHECK-NEXT: mov z1.h, z1.h[2]
2371+
; CHECK-NEXT: mov z2.h, z0.h[1]
2372+
; CHECK-NEXT: stp x12, x8, [sp, #64]
23692373
; CHECK-NEXT: fcvtzs x12, h3
2370-
; CHECK-NEXT: stp x13, x8, [sp]
2371-
; CHECK-NEXT: fcvtzs x8, h5
2372-
; CHECK-NEXT: stp x11, x9, [sp, #16]
2373-
; CHECK-NEXT: fcvtzs x9, h2
2374-
; CHECK-NEXT: mov z1.h, z0.h[1]
2375-
; CHECK-NEXT: mov z2.h, z0.h[3]
2376-
; CHECK-NEXT: fcvtzs x11, h0
2374+
; CHECK-NEXT: fcvtzs x8, h4
2375+
; CHECK-NEXT: stp x10, x9, [sp, #80]
2376+
; CHECK-NEXT: fcvtzs x9, h1
2377+
; CHECK-NEXT: mov z3.h, z0.h[3]
2378+
; CHECK-NEXT: fcvtzs x10, h0
23772379
; CHECK-NEXT: mov z0.h, z0.h[2]
2378-
; CHECK-NEXT: stp x10, x12, [sp, #96]
2379-
; CHECK-NEXT: ldp q3, q4, [sp]
2380-
; CHECK-NEXT: fcvtzs x10, h1
2381-
; CHECK-NEXT: fcvtzs x12, h2
2382-
; CHECK-NEXT: stp x9, x8, [sp, #112]
2380+
; CHECK-NEXT: stp x11, x12, [sp, #32]
2381+
; CHECK-NEXT: fcvtzs x11, h2
2382+
; CHECK-NEXT: fcvtzs x12, h3
2383+
; CHECK-NEXT: stp x9, x8, [sp, #48]
23832384
; CHECK-NEXT: fcvtzs x8, h0
2384-
; CHECK-NEXT: ldp q0, q1, [sp, #32]
2385-
; CHECK-NEXT: ldp q6, q7, [sp, #96]
2386-
; CHECK-NEXT: stp x11, x10, [sp, #64]
2387-
; CHECK-NEXT: stp x8, x12, [sp, #80]
2388-
; CHECK-NEXT: ldp q5, q2, [sp, #64]
2389-
; CHECK-NEXT: stp q0, q1, [x1]
2390-
; CHECK-NEXT: stp q3, q4, [x1, #32]
2391-
; CHECK-NEXT: stp q6, q7, [x1, #64]
2392-
; CHECK-NEXT: stp q5, q2, [x1, #96]
2385+
; CHECK-NEXT: ldp q0, q1, [sp]
2386+
; CHECK-NEXT: ldp q3, q4, [sp, #64]
2387+
; CHECK-NEXT: stp x10, x11, [sp, #96]
2388+
; CHECK-NEXT: ldp q6, q7, [sp, #32]
2389+
; CHECK-NEXT: stp x8, x12, [sp, #112]
2390+
; CHECK-NEXT: ldp q5, q2, [sp, #96]
2391+
; CHECK-NEXT: stp q0, q1, [x1, #32]
2392+
; CHECK-NEXT: stp q6, q7, [x1]
2393+
; CHECK-NEXT: stp q3, q4, [x1, #96]
2394+
; CHECK-NEXT: stp q5, q2, [x1, #64]
23932395
; CHECK-NEXT: add sp, sp, #128
23942396
; CHECK-NEXT: ret
23952397
;

0 commit comments

Comments
 (0)