@@ -2352,12 +2352,249 @@ static void nfp_bpf_opt_ld_shift(struct nfp_prog *nfp_prog)
2352
2352
}
2353
2353
}
2354
2354
2355
+ /* load/store pair that forms memory copy sould look like the following:
2356
+ *
2357
+ * ld_width R, [addr_src + offset_src]
2358
+ * st_width [addr_dest + offset_dest], R
2359
+ *
2360
+ * The destination register of load and source register of store should
2361
+ * be the same, load and store should also perform at the same width.
2362
+ * If either of addr_src or addr_dest is stack pointer, we don't do the
2363
+ * CPP optimization as stack is modelled by registers on NFP.
2364
+ */
2365
+ static bool
2366
+ curr_pair_is_memcpy (struct nfp_insn_meta * ld_meta ,
2367
+ struct nfp_insn_meta * st_meta )
2368
+ {
2369
+ struct bpf_insn * ld = & ld_meta -> insn ;
2370
+ struct bpf_insn * st = & st_meta -> insn ;
2371
+
2372
+ if (!is_mbpf_load (ld_meta ) || !is_mbpf_store (st_meta ))
2373
+ return false;
2374
+
2375
+ if (ld_meta -> ptr .type != PTR_TO_PACKET )
2376
+ return false;
2377
+
2378
+ if (st_meta -> ptr .type != PTR_TO_PACKET )
2379
+ return false;
2380
+
2381
+ if (BPF_SIZE (ld -> code ) != BPF_SIZE (st -> code ))
2382
+ return false;
2383
+
2384
+ if (ld -> dst_reg != st -> src_reg )
2385
+ return false;
2386
+
2387
+ /* There is jump to the store insn in this pair. */
2388
+ if (st_meta -> flags & FLAG_INSN_IS_JUMP_DST )
2389
+ return false;
2390
+
2391
+ return true;
2392
+ }
2393
+
2394
+ /* Currently, we only support chaining load/store pairs if:
2395
+ *
2396
+ * - Their address base registers are the same.
2397
+ * - Their address offsets are in the same order.
2398
+ * - They operate at the same memory width.
2399
+ * - There is no jump into the middle of them.
2400
+ */
2401
+ static bool
2402
+ curr_pair_chain_with_previous (struct nfp_insn_meta * ld_meta ,
2403
+ struct nfp_insn_meta * st_meta ,
2404
+ struct bpf_insn * prev_ld ,
2405
+ struct bpf_insn * prev_st )
2406
+ {
2407
+ u8 prev_size , curr_size , prev_ld_base , prev_st_base , prev_ld_dst ;
2408
+ struct bpf_insn * ld = & ld_meta -> insn ;
2409
+ struct bpf_insn * st = & st_meta -> insn ;
2410
+ s16 prev_ld_off , prev_st_off ;
2411
+
2412
+ /* This pair is the start pair. */
2413
+ if (!prev_ld )
2414
+ return true;
2415
+
2416
+ prev_size = BPF_LDST_BYTES (prev_ld );
2417
+ curr_size = BPF_LDST_BYTES (ld );
2418
+ prev_ld_base = prev_ld -> src_reg ;
2419
+ prev_st_base = prev_st -> dst_reg ;
2420
+ prev_ld_dst = prev_ld -> dst_reg ;
2421
+ prev_ld_off = prev_ld -> off ;
2422
+ prev_st_off = prev_st -> off ;
2423
+
2424
+ if (ld -> dst_reg != prev_ld_dst )
2425
+ return false;
2426
+
2427
+ if (ld -> src_reg != prev_ld_base || st -> dst_reg != prev_st_base )
2428
+ return false;
2429
+
2430
+ if (curr_size != prev_size )
2431
+ return false;
2432
+
2433
+ /* There is jump to the head of this pair. */
2434
+ if (ld_meta -> flags & FLAG_INSN_IS_JUMP_DST )
2435
+ return false;
2436
+
2437
+ /* Both in ascending order. */
2438
+ if (prev_ld_off + prev_size == ld -> off &&
2439
+ prev_st_off + prev_size == st -> off )
2440
+ return true;
2441
+
2442
+ /* Both in descending order. */
2443
+ if (ld -> off + curr_size == prev_ld_off &&
2444
+ st -> off + curr_size == prev_st_off )
2445
+ return true;
2446
+
2447
+ return false;
2448
+ }
2449
+
2450
+ /* Return TRUE if cross memory access happens. Cross memory access means
2451
+ * store area is overlapping with load area that a later load might load
2452
+ * the value from previous store, for this case we can't treat the sequence
2453
+ * as an memory copy.
2454
+ */
2455
+ static bool
2456
+ cross_mem_access (struct bpf_insn * ld , struct nfp_insn_meta * head_ld_meta ,
2457
+ struct nfp_insn_meta * head_st_meta )
2458
+ {
2459
+ s16 head_ld_off , head_st_off , ld_off ;
2460
+
2461
+ /* Different pointer types does not overlap. */
2462
+ if (head_ld_meta -> ptr .type != head_st_meta -> ptr .type )
2463
+ return false;
2464
+
2465
+ /* load and store are both PTR_TO_PACKET, check ID info. */
2466
+ if (head_ld_meta -> ptr .id != head_st_meta -> ptr .id )
2467
+ return true;
2468
+
2469
+ /* Canonicalize the offsets. Turn all of them against the original
2470
+ * base register.
2471
+ */
2472
+ head_ld_off = head_ld_meta -> insn .off + head_ld_meta -> ptr .off ;
2473
+ head_st_off = head_st_meta -> insn .off + head_st_meta -> ptr .off ;
2474
+ ld_off = ld -> off + head_ld_meta -> ptr .off ;
2475
+
2476
+ /* Ascending order cross. */
2477
+ if (ld_off > head_ld_off &&
2478
+ head_ld_off < head_st_off && ld_off >= head_st_off )
2479
+ return true;
2480
+
2481
+ /* Descending order cross. */
2482
+ if (ld_off < head_ld_off &&
2483
+ head_ld_off > head_st_off && ld_off <= head_st_off )
2484
+ return true;
2485
+
2486
+ return false;
2487
+ }
2488
+
2489
+ /* This pass try to identify the following instructoin sequences.
2490
+ *
2491
+ * load R, [regA + offA]
2492
+ * store [regB + offB], R
2493
+ * load R, [regA + offA + const_imm_A]
2494
+ * store [regB + offB + const_imm_A], R
2495
+ * load R, [regA + offA + 2 * const_imm_A]
2496
+ * store [regB + offB + 2 * const_imm_A], R
2497
+ * ...
2498
+ *
2499
+ * Above sequence is typically generated by compiler when lowering
2500
+ * memcpy. NFP prefer using CPP instructions to accelerate it.
2501
+ */
2502
+ static void nfp_bpf_opt_ldst_gather (struct nfp_prog * nfp_prog )
2503
+ {
2504
+ struct nfp_insn_meta * head_ld_meta = NULL ;
2505
+ struct nfp_insn_meta * head_st_meta = NULL ;
2506
+ struct nfp_insn_meta * meta1 , * meta2 ;
2507
+ struct bpf_insn * prev_ld = NULL ;
2508
+ struct bpf_insn * prev_st = NULL ;
2509
+ u8 count = 0 ;
2510
+
2511
+ nfp_for_each_insn_walk2 (nfp_prog , meta1 , meta2 ) {
2512
+ struct bpf_insn * ld = & meta1 -> insn ;
2513
+ struct bpf_insn * st = & meta2 -> insn ;
2514
+
2515
+ /* Reset record status if any of the following if true:
2516
+ * - The current insn pair is not load/store.
2517
+ * - The load/store pair doesn't chain with previous one.
2518
+ * - The chained load/store pair crossed with previous pair.
2519
+ * - The chained load/store pair has a total size of memory
2520
+ * copy beyond 128 bytes which is the maximum length a
2521
+ * single NFP CPP command can transfer.
2522
+ */
2523
+ if (!curr_pair_is_memcpy (meta1 , meta2 ) ||
2524
+ !curr_pair_chain_with_previous (meta1 , meta2 , prev_ld ,
2525
+ prev_st ) ||
2526
+ (head_ld_meta && (cross_mem_access (ld , head_ld_meta ,
2527
+ head_st_meta ) ||
2528
+ head_ld_meta -> ldst_gather_len >= 128 ))) {
2529
+ if (!count )
2530
+ continue ;
2531
+
2532
+ if (count > 1 ) {
2533
+ s16 prev_ld_off = prev_ld -> off ;
2534
+ s16 prev_st_off = prev_st -> off ;
2535
+ s16 head_ld_off = head_ld_meta -> insn .off ;
2536
+
2537
+ if (prev_ld_off < head_ld_off ) {
2538
+ head_ld_meta -> insn .off = prev_ld_off ;
2539
+ head_st_meta -> insn .off = prev_st_off ;
2540
+ head_ld_meta -> ldst_gather_len =
2541
+ - head_ld_meta -> ldst_gather_len ;
2542
+ }
2543
+
2544
+ head_ld_meta -> paired_st = & head_st_meta -> insn ;
2545
+ head_st_meta -> skip = true;
2546
+ } else {
2547
+ head_ld_meta -> ldst_gather_len = 0 ;
2548
+ }
2549
+
2550
+ /* If the chain is ended by an load/store pair then this
2551
+ * could serve as the new head of the the next chain.
2552
+ */
2553
+ if (curr_pair_is_memcpy (meta1 , meta2 )) {
2554
+ head_ld_meta = meta1 ;
2555
+ head_st_meta = meta2 ;
2556
+ head_ld_meta -> ldst_gather_len =
2557
+ BPF_LDST_BYTES (ld );
2558
+ meta1 = nfp_meta_next (meta1 );
2559
+ meta2 = nfp_meta_next (meta2 );
2560
+ prev_ld = ld ;
2561
+ prev_st = st ;
2562
+ count = 1 ;
2563
+ } else {
2564
+ head_ld_meta = NULL ;
2565
+ head_st_meta = NULL ;
2566
+ prev_ld = NULL ;
2567
+ prev_st = NULL ;
2568
+ count = 0 ;
2569
+ }
2570
+
2571
+ continue ;
2572
+ }
2573
+
2574
+ if (!head_ld_meta ) {
2575
+ head_ld_meta = meta1 ;
2576
+ head_st_meta = meta2 ;
2577
+ } else {
2578
+ meta1 -> skip = true;
2579
+ meta2 -> skip = true;
2580
+ }
2581
+
2582
+ head_ld_meta -> ldst_gather_len += BPF_LDST_BYTES (ld );
2583
+ meta1 = nfp_meta_next (meta1 );
2584
+ meta2 = nfp_meta_next (meta2 );
2585
+ prev_ld = ld ;
2586
+ prev_st = st ;
2587
+ count ++ ;
2588
+ }
2589
+ }
2590
+
2355
2591
static int nfp_bpf_optimize (struct nfp_prog * nfp_prog )
2356
2592
{
2357
2593
nfp_bpf_opt_reg_init (nfp_prog );
2358
2594
2359
2595
nfp_bpf_opt_ld_mask (nfp_prog );
2360
2596
nfp_bpf_opt_ld_shift (nfp_prog );
2597
+ nfp_bpf_opt_ldst_gather (nfp_prog );
2361
2598
2362
2599
return 0 ;
2363
2600
}
0 commit comments