Skip to content

Commit 199c6ec

Browse files
committed
[RISCV] Add coverage for missed scalarization of gather/scatter base pointers
1 parent 8a5b97a commit 199c6ec

File tree

1 file changed

+167
-0
lines changed

1 file changed

+167
-0
lines changed

llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2373,3 +2373,170 @@ define <vscale x 1 x i8> @mgather_baseidx_zext_nxv1i1_nxv1i8(ptr %base, <vscale
23732373
%v = call <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr> %ptrs, i32 1, <vscale x 1 x i1> %m, <vscale x 1 x i8> %passthru)
23742374
ret <vscale x 1 x i8> %v
23752375
}
2376+
2377+
define <4 x i32> @scalar_prefix(ptr %base, i32 signext %index, <4 x i32> %vecidx) {
2378+
; RV32-LABEL: scalar_prefix:
2379+
; RV32: # %bb.0:
2380+
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2381+
; RV32-NEXT: vmv.v.x v9, a1
2382+
; RV32-NEXT: vsll.vi v9, v9, 10
2383+
; RV32-NEXT: vadd.vx v9, v9, a0
2384+
; RV32-NEXT: vsll.vi v8, v8, 2
2385+
; RV32-NEXT: vadd.vv v8, v9, v8
2386+
; RV32-NEXT: vluxei32.v v8, (zero), v8
2387+
; RV32-NEXT: ret
2388+
;
2389+
; RV64-LABEL: scalar_prefix:
2390+
; RV64: # %bb.0:
2391+
; RV64-NEXT: li a2, 1024
2392+
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2393+
; RV64-NEXT: vmv.v.x v10, a0
2394+
; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2395+
; RV64-NEXT: vmv.v.x v9, a2
2396+
; RV64-NEXT: vwmaccsu.vx v10, a1, v9
2397+
; RV64-NEXT: li a0, 4
2398+
; RV64-NEXT: vwmaccus.vx v10, a0, v8
2399+
; RV64-NEXT: vluxei64.v v8, (zero), v10
2400+
; RV64-NEXT: ret
2401+
%gep = getelementptr [256 x i32], ptr %base, i32 %index, <4 x i32> %vecidx
2402+
%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
2403+
ret <4 x i32> %res
2404+
}
2405+
2406+
define <4 x i32> @scalar_prefix_with_splat(ptr %base, i32 %index, <4 x i32> %vecidx) {
2407+
; RV32-LABEL: scalar_prefix_with_splat:
2408+
; RV32: # %bb.0:
2409+
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2410+
; RV32-NEXT: vmv.v.x v9, a1
2411+
; RV32-NEXT: vsll.vi v9, v9, 10
2412+
; RV32-NEXT: vadd.vx v9, v9, a0
2413+
; RV32-NEXT: vsll.vi v8, v8, 2
2414+
; RV32-NEXT: vadd.vv v8, v9, v8
2415+
; RV32-NEXT: vluxei32.v v8, (zero), v8
2416+
; RV32-NEXT: ret
2417+
;
2418+
; RV64-LABEL: scalar_prefix_with_splat:
2419+
; RV64: # %bb.0:
2420+
; RV64-NEXT: li a2, 1024
2421+
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2422+
; RV64-NEXT: vmv.v.x v10, a0
2423+
; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2424+
; RV64-NEXT: vmv.v.x v9, a2
2425+
; RV64-NEXT: vwmaccsu.vx v10, a1, v9
2426+
; RV64-NEXT: li a0, 4
2427+
; RV64-NEXT: vwmaccus.vx v10, a0, v8
2428+
; RV64-NEXT: vluxei64.v v8, (zero), v10
2429+
; RV64-NEXT: ret
2430+
%broadcast.splatinsert = insertelement <4 x i32> poison, i32 %index, i32 0
2431+
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
2432+
2433+
%gep = getelementptr [256 x i32], ptr %base, <4 x i32> %broadcast.splat, <4 x i32> %vecidx
2434+
%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
2435+
ret <4 x i32> %res
2436+
}
2437+
2438+
define <4 x i32> @scalar_prefix_with_constant_splat(ptr %base, <4 x i32> %vecidx) {
2439+
; RV32-LABEL: scalar_prefix_with_constant_splat:
2440+
; RV32: # %bb.0:
2441+
; RV32-NEXT: lui a1, 5
2442+
; RV32-NEXT: add a0, a0, a1
2443+
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2444+
; RV32-NEXT: vsll.vi v8, v8, 2
2445+
; RV32-NEXT: vluxei32.v v8, (a0), v8
2446+
; RV32-NEXT: ret
2447+
;
2448+
; RV64-LABEL: scalar_prefix_with_constant_splat:
2449+
; RV64: # %bb.0:
2450+
; RV64-NEXT: li a1, 4
2451+
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2452+
; RV64-NEXT: vwmulsu.vx v10, v8, a1
2453+
; RV64-NEXT: lui a1, 5
2454+
; RV64-NEXT: add a0, a0, a1
2455+
; RV64-NEXT: vluxei64.v v8, (a0), v10
2456+
; RV64-NEXT: ret
2457+
%gep = getelementptr [256 x i32], ptr %base, <4 x i32> splat (i32 20), <4 x i32> %vecidx
2458+
%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
2459+
ret <4 x i32> %res
2460+
}
2461+
2462+
define <4 x i32> @reassociate(ptr %base, i32 %index, <4 x i32> %vecidx) {
2463+
; RV32-LABEL: reassociate:
2464+
; RV32: # %bb.0:
2465+
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2466+
; RV32-NEXT: vsll.vi v8, v8, 10
2467+
; RV32-NEXT: vmv.v.x v9, a1
2468+
; RV32-NEXT: vadd.vx v8, v8, a0
2469+
; RV32-NEXT: vsll.vi v9, v9, 2
2470+
; RV32-NEXT: vadd.vv v8, v8, v9
2471+
; RV32-NEXT: vluxei32.v v8, (zero), v8
2472+
; RV32-NEXT: ret
2473+
;
2474+
; RV64-LABEL: reassociate:
2475+
; RV64: # %bb.0:
2476+
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2477+
; RV64-NEXT: vmv.v.x v10, a0
2478+
; RV64-NEXT: li a0, 1024
2479+
; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2480+
; RV64-NEXT: vwmaccus.vx v10, a0, v8
2481+
; RV64-NEXT: vmv.v.i v8, 4
2482+
; RV64-NEXT: vwmaccsu.vx v10, a1, v8
2483+
; RV64-NEXT: vluxei64.v v8, (zero), v10
2484+
; RV64-NEXT: ret
2485+
%gep = getelementptr [256 x i32], ptr %base, <4 x i32> %vecidx, i32 %index
2486+
%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
2487+
ret <4 x i32> %res
2488+
}
2489+
2490+
define <4 x i32> @reassociate_with_splat(ptr %base, i32 %index, <4 x i32> %vecidx) {
2491+
; RV32-LABEL: reassociate_with_splat:
2492+
; RV32: # %bb.0:
2493+
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2494+
; RV32-NEXT: vmv.v.x v9, a1
2495+
; RV32-NEXT: vsll.vi v8, v8, 10
2496+
; RV32-NEXT: vadd.vx v8, v8, a0
2497+
; RV32-NEXT: vsll.vi v9, v9, 2
2498+
; RV32-NEXT: vadd.vv v8, v8, v9
2499+
; RV32-NEXT: vluxei32.v v8, (zero), v8
2500+
; RV32-NEXT: ret
2501+
;
2502+
; RV64-LABEL: reassociate_with_splat:
2503+
; RV64: # %bb.0:
2504+
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2505+
; RV64-NEXT: vmv.v.x v10, a0
2506+
; RV64-NEXT: li a0, 1024
2507+
; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2508+
; RV64-NEXT: vwmaccus.vx v10, a0, v8
2509+
; RV64-NEXT: vmv.v.i v8, 4
2510+
; RV64-NEXT: vwmaccsu.vx v10, a1, v8
2511+
; RV64-NEXT: vluxei64.v v8, (zero), v10
2512+
; RV64-NEXT: ret
2513+
%broadcast.splatinsert = insertelement <4 x i32> poison, i32 %index, i32 0
2514+
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
2515+
2516+
%gep = getelementptr [256 x i32], ptr %base, <4 x i32> %vecidx, <4 x i32> %broadcast.splat
2517+
%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
2518+
ret <4 x i32> %res
2519+
}
2520+
2521+
define <4 x i32> @reassociate_with_constant_splat(ptr %base, i32 %index, <4 x i32> %vecidx) {
2522+
; RV32-LABEL: reassociate_with_constant_splat:
2523+
; RV32: # %bb.0:
2524+
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2525+
; RV32-NEXT: vsll.vi v8, v8, 10
2526+
; RV32-NEXT: addi a0, a0, 80
2527+
; RV32-NEXT: vluxei32.v v8, (a0), v8
2528+
; RV32-NEXT: ret
2529+
;
2530+
; RV64-LABEL: reassociate_with_constant_splat:
2531+
; RV64: # %bb.0:
2532+
; RV64-NEXT: li a1, 1024
2533+
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2534+
; RV64-NEXT: vwmulsu.vx v10, v8, a1
2535+
; RV64-NEXT: addi a0, a0, 80
2536+
; RV64-NEXT: vluxei64.v v8, (a0), v10
2537+
; RV64-NEXT: ret
2538+
%gep = getelementptr [256 x i32], ptr %base, <4 x i32> %vecidx, <4 x i32> splat (i32 20)
2539+
%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
2540+
ret <4 x i32> %res
2541+
}
2542+

0 commit comments

Comments
 (0)