@@ -2373,3 +2373,170 @@ define <vscale x 1 x i8> @mgather_baseidx_zext_nxv1i1_nxv1i8(ptr %base, <vscale
2373
2373
%v = call <vscale x 1 x i8 > @llvm.masked.gather.nxv1i8.nxv1p0 (<vscale x 1 x ptr > %ptrs , i32 1 , <vscale x 1 x i1 > %m , <vscale x 1 x i8 > %passthru )
2374
2374
ret <vscale x 1 x i8 > %v
2375
2375
}
2376
+
2377
+ define <4 x i32 > @scalar_prefix (ptr %base , i32 signext %index , <4 x i32 > %vecidx ) {
2378
+ ; RV32-LABEL: scalar_prefix:
2379
+ ; RV32: # %bb.0:
2380
+ ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2381
+ ; RV32-NEXT: vmv.v.x v9, a1
2382
+ ; RV32-NEXT: vsll.vi v9, v9, 10
2383
+ ; RV32-NEXT: vadd.vx v9, v9, a0
2384
+ ; RV32-NEXT: vsll.vi v8, v8, 2
2385
+ ; RV32-NEXT: vadd.vv v8, v9, v8
2386
+ ; RV32-NEXT: vluxei32.v v8, (zero), v8
2387
+ ; RV32-NEXT: ret
2388
+ ;
2389
+ ; RV64-LABEL: scalar_prefix:
2390
+ ; RV64: # %bb.0:
2391
+ ; RV64-NEXT: li a2, 1024
2392
+ ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2393
+ ; RV64-NEXT: vmv.v.x v10, a0
2394
+ ; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2395
+ ; RV64-NEXT: vmv.v.x v9, a2
2396
+ ; RV64-NEXT: vwmaccsu.vx v10, a1, v9
2397
+ ; RV64-NEXT: li a0, 4
2398
+ ; RV64-NEXT: vwmaccus.vx v10, a0, v8
2399
+ ; RV64-NEXT: vluxei64.v v8, (zero), v10
2400
+ ; RV64-NEXT: ret
2401
+ %gep = getelementptr [256 x i32 ], ptr %base , i32 %index , <4 x i32 > %vecidx
2402
+ %res = call <4 x i32 > @llvm.masked.gather.v4i32.v4p0 (<4 x ptr > %gep , i32 4 , <4 x i1 > <i1 true , i1 true , i1 true , i1 true >, <4 x i32 > undef )
2403
+ ret <4 x i32 > %res
2404
+ }
2405
+
2406
+ define <4 x i32 > @scalar_prefix_with_splat (ptr %base , i32 %index , <4 x i32 > %vecidx ) {
2407
+ ; RV32-LABEL: scalar_prefix_with_splat:
2408
+ ; RV32: # %bb.0:
2409
+ ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2410
+ ; RV32-NEXT: vmv.v.x v9, a1
2411
+ ; RV32-NEXT: vsll.vi v9, v9, 10
2412
+ ; RV32-NEXT: vadd.vx v9, v9, a0
2413
+ ; RV32-NEXT: vsll.vi v8, v8, 2
2414
+ ; RV32-NEXT: vadd.vv v8, v9, v8
2415
+ ; RV32-NEXT: vluxei32.v v8, (zero), v8
2416
+ ; RV32-NEXT: ret
2417
+ ;
2418
+ ; RV64-LABEL: scalar_prefix_with_splat:
2419
+ ; RV64: # %bb.0:
2420
+ ; RV64-NEXT: li a2, 1024
2421
+ ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2422
+ ; RV64-NEXT: vmv.v.x v10, a0
2423
+ ; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2424
+ ; RV64-NEXT: vmv.v.x v9, a2
2425
+ ; RV64-NEXT: vwmaccsu.vx v10, a1, v9
2426
+ ; RV64-NEXT: li a0, 4
2427
+ ; RV64-NEXT: vwmaccus.vx v10, a0, v8
2428
+ ; RV64-NEXT: vluxei64.v v8, (zero), v10
2429
+ ; RV64-NEXT: ret
2430
+ %broadcast.splatinsert = insertelement <4 x i32 > poison, i32 %index , i32 0
2431
+ %broadcast.splat = shufflevector <4 x i32 > %broadcast.splatinsert , <4 x i32 > poison, <4 x i32 > zeroinitializer
2432
+
2433
+ %gep = getelementptr [256 x i32 ], ptr %base , <4 x i32 > %broadcast.splat , <4 x i32 > %vecidx
2434
+ %res = call <4 x i32 > @llvm.masked.gather.v4i32.v4p0 (<4 x ptr > %gep , i32 4 , <4 x i1 > <i1 true , i1 true , i1 true , i1 true >, <4 x i32 > undef )
2435
+ ret <4 x i32 > %res
2436
+ }
2437
+
2438
+ define <4 x i32 > @scalar_prefix_with_constant_splat (ptr %base , <4 x i32 > %vecidx ) {
2439
+ ; RV32-LABEL: scalar_prefix_with_constant_splat:
2440
+ ; RV32: # %bb.0:
2441
+ ; RV32-NEXT: lui a1, 5
2442
+ ; RV32-NEXT: add a0, a0, a1
2443
+ ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2444
+ ; RV32-NEXT: vsll.vi v8, v8, 2
2445
+ ; RV32-NEXT: vluxei32.v v8, (a0), v8
2446
+ ; RV32-NEXT: ret
2447
+ ;
2448
+ ; RV64-LABEL: scalar_prefix_with_constant_splat:
2449
+ ; RV64: # %bb.0:
2450
+ ; RV64-NEXT: li a1, 4
2451
+ ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2452
+ ; RV64-NEXT: vwmulsu.vx v10, v8, a1
2453
+ ; RV64-NEXT: lui a1, 5
2454
+ ; RV64-NEXT: add a0, a0, a1
2455
+ ; RV64-NEXT: vluxei64.v v8, (a0), v10
2456
+ ; RV64-NEXT: ret
2457
+ %gep = getelementptr [256 x i32 ], ptr %base , <4 x i32 > splat (i32 20 ), <4 x i32 > %vecidx
2458
+ %res = call <4 x i32 > @llvm.masked.gather.v4i32.v4p0 (<4 x ptr > %gep , i32 4 , <4 x i1 > <i1 true , i1 true , i1 true , i1 true >, <4 x i32 > undef )
2459
+ ret <4 x i32 > %res
2460
+ }
2461
+
2462
+ define <4 x i32 > @reassociate (ptr %base , i32 %index , <4 x i32 > %vecidx ) {
2463
+ ; RV32-LABEL: reassociate:
2464
+ ; RV32: # %bb.0:
2465
+ ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2466
+ ; RV32-NEXT: vsll.vi v8, v8, 10
2467
+ ; RV32-NEXT: vmv.v.x v9, a1
2468
+ ; RV32-NEXT: vadd.vx v8, v8, a0
2469
+ ; RV32-NEXT: vsll.vi v9, v9, 2
2470
+ ; RV32-NEXT: vadd.vv v8, v8, v9
2471
+ ; RV32-NEXT: vluxei32.v v8, (zero), v8
2472
+ ; RV32-NEXT: ret
2473
+ ;
2474
+ ; RV64-LABEL: reassociate:
2475
+ ; RV64: # %bb.0:
2476
+ ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2477
+ ; RV64-NEXT: vmv.v.x v10, a0
2478
+ ; RV64-NEXT: li a0, 1024
2479
+ ; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2480
+ ; RV64-NEXT: vwmaccus.vx v10, a0, v8
2481
+ ; RV64-NEXT: vmv.v.i v8, 4
2482
+ ; RV64-NEXT: vwmaccsu.vx v10, a1, v8
2483
+ ; RV64-NEXT: vluxei64.v v8, (zero), v10
2484
+ ; RV64-NEXT: ret
2485
+ %gep = getelementptr [256 x i32 ], ptr %base , <4 x i32 > %vecidx , i32 %index
2486
+ %res = call <4 x i32 > @llvm.masked.gather.v4i32.v4p0 (<4 x ptr > %gep , i32 4 , <4 x i1 > <i1 true , i1 true , i1 true , i1 true >, <4 x i32 > undef )
2487
+ ret <4 x i32 > %res
2488
+ }
2489
+
2490
+ define <4 x i32 > @reassociate_with_splat (ptr %base , i32 %index , <4 x i32 > %vecidx ) {
2491
+ ; RV32-LABEL: reassociate_with_splat:
2492
+ ; RV32: # %bb.0:
2493
+ ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2494
+ ; RV32-NEXT: vmv.v.x v9, a1
2495
+ ; RV32-NEXT: vsll.vi v8, v8, 10
2496
+ ; RV32-NEXT: vadd.vx v8, v8, a0
2497
+ ; RV32-NEXT: vsll.vi v9, v9, 2
2498
+ ; RV32-NEXT: vadd.vv v8, v8, v9
2499
+ ; RV32-NEXT: vluxei32.v v8, (zero), v8
2500
+ ; RV32-NEXT: ret
2501
+ ;
2502
+ ; RV64-LABEL: reassociate_with_splat:
2503
+ ; RV64: # %bb.0:
2504
+ ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2505
+ ; RV64-NEXT: vmv.v.x v10, a0
2506
+ ; RV64-NEXT: li a0, 1024
2507
+ ; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2508
+ ; RV64-NEXT: vwmaccus.vx v10, a0, v8
2509
+ ; RV64-NEXT: vmv.v.i v8, 4
2510
+ ; RV64-NEXT: vwmaccsu.vx v10, a1, v8
2511
+ ; RV64-NEXT: vluxei64.v v8, (zero), v10
2512
+ ; RV64-NEXT: ret
2513
+ %broadcast.splatinsert = insertelement <4 x i32 > poison, i32 %index , i32 0
2514
+ %broadcast.splat = shufflevector <4 x i32 > %broadcast.splatinsert , <4 x i32 > poison, <4 x i32 > zeroinitializer
2515
+
2516
+ %gep = getelementptr [256 x i32 ], ptr %base , <4 x i32 > %vecidx , <4 x i32 > %broadcast.splat
2517
+ %res = call <4 x i32 > @llvm.masked.gather.v4i32.v4p0 (<4 x ptr > %gep , i32 4 , <4 x i1 > <i1 true , i1 true , i1 true , i1 true >, <4 x i32 > undef )
2518
+ ret <4 x i32 > %res
2519
+ }
2520
+
2521
+ define <4 x i32 > @reassociate_with_constant_splat (ptr %base , i32 %index , <4 x i32 > %vecidx ) {
2522
+ ; RV32-LABEL: reassociate_with_constant_splat:
2523
+ ; RV32: # %bb.0:
2524
+ ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2525
+ ; RV32-NEXT: vsll.vi v8, v8, 10
2526
+ ; RV32-NEXT: addi a0, a0, 80
2527
+ ; RV32-NEXT: vluxei32.v v8, (a0), v8
2528
+ ; RV32-NEXT: ret
2529
+ ;
2530
+ ; RV64-LABEL: reassociate_with_constant_splat:
2531
+ ; RV64: # %bb.0:
2532
+ ; RV64-NEXT: li a1, 1024
2533
+ ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2534
+ ; RV64-NEXT: vwmulsu.vx v10, v8, a1
2535
+ ; RV64-NEXT: addi a0, a0, 80
2536
+ ; RV64-NEXT: vluxei64.v v8, (a0), v10
2537
+ ; RV64-NEXT: ret
2538
+ %gep = getelementptr [256 x i32 ], ptr %base , <4 x i32 > %vecidx , <4 x i32 > splat (i32 20 )
2539
+ %res = call <4 x i32 > @llvm.masked.gather.v4i32.v4p0 (<4 x ptr > %gep , i32 4 , <4 x i1 > <i1 true , i1 true , i1 true , i1 true >, <4 x i32 > undef )
2540
+ ret <4 x i32 > %res
2541
+ }
2542
+
0 commit comments