@@ -2389,15 +2389,72 @@ defm XOR : sse12_fp_packed_logical<0x57, "xor", xor, SchedWriteFLogic>;
2389
2389
let isCommutable = 0 in
2390
2390
defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp, SchedWriteFLogic>;
2391
2391
2392
+ let Predicates = [HasAVX2, NoVLX] in {
2393
+ def : Pat<(v32i8 (and VR256:$src1, VR256:$src2)),
2394
+ (VPANDYrr VR256:$src1, VR256:$src2)>;
2395
+ def : Pat<(v16i16 (and VR256:$src1, VR256:$src2)),
2396
+ (VPANDYrr VR256:$src1, VR256:$src2)>;
2397
+ def : Pat<(v8i32 (and VR256:$src1, VR256:$src2)),
2398
+ (VPANDYrr VR256:$src1, VR256:$src2)>;
2399
+
2400
+ def : Pat<(v32i8 (or VR256:$src1, VR256:$src2)),
2401
+ (VPORYrr VR256:$src1, VR256:$src2)>;
2402
+ def : Pat<(v16i16 (or VR256:$src1, VR256:$src2)),
2403
+ (VPORYrr VR256:$src1, VR256:$src2)>;
2404
+ def : Pat<(v8i32 (or VR256:$src1, VR256:$src2)),
2405
+ (VPORYrr VR256:$src1, VR256:$src2)>;
2406
+
2407
+ def : Pat<(v32i8 (xor VR256:$src1, VR256:$src2)),
2408
+ (VPXORYrr VR256:$src1, VR256:$src2)>;
2409
+ def : Pat<(v16i16 (xor VR256:$src1, VR256:$src2)),
2410
+ (VPXORYrr VR256:$src1, VR256:$src2)>;
2411
+ def : Pat<(v8i32 (xor VR256:$src1, VR256:$src2)),
2412
+ (VPXORYrr VR256:$src1, VR256:$src2)>;
2413
+
2414
+ def : Pat<(v32i8 (X86andnp VR256:$src1, VR256:$src2)),
2415
+ (VPANDNYrr VR256:$src1, VR256:$src2)>;
2416
+ def : Pat<(v16i16 (X86andnp VR256:$src1, VR256:$src2)),
2417
+ (VPANDNYrr VR256:$src1, VR256:$src2)>;
2418
+ def : Pat<(v8i32 (X86andnp VR256:$src1, VR256:$src2)),
2419
+ (VPANDNYrr VR256:$src1, VR256:$src2)>;
2420
+ }
2421
+
2392
2422
// If only AVX1 is supported, we need to handle integer operations with
2393
2423
// floating point instructions since the integer versions aren't available.
2394
2424
let Predicates = [HasAVX1Only] in {
2425
+ def : Pat<(v32i8 (and VR256:$src1, VR256:$src2)),
2426
+ (VANDPSYrr VR256:$src1, VR256:$src2)>;
2427
+ def : Pat<(v16i16 (and VR256:$src1, VR256:$src2)),
2428
+ (VANDPSYrr VR256:$src1, VR256:$src2)>;
2429
+ def : Pat<(v8i32 (and VR256:$src1, VR256:$src2)),
2430
+ (VANDPSYrr VR256:$src1, VR256:$src2)>;
2395
2431
def : Pat<(v4i64 (and VR256:$src1, VR256:$src2)),
2396
2432
(VANDPSYrr VR256:$src1, VR256:$src2)>;
2433
+
2434
+ def : Pat<(v32i8 (or VR256:$src1, VR256:$src2)),
2435
+ (VORPSYrr VR256:$src1, VR256:$src2)>;
2436
+ def : Pat<(v16i16 (or VR256:$src1, VR256:$src2)),
2437
+ (VORPSYrr VR256:$src1, VR256:$src2)>;
2438
+ def : Pat<(v8i32 (or VR256:$src1, VR256:$src2)),
2439
+ (VORPSYrr VR256:$src1, VR256:$src2)>;
2397
2440
def : Pat<(v4i64 (or VR256:$src1, VR256:$src2)),
2398
2441
(VORPSYrr VR256:$src1, VR256:$src2)>;
2442
+
2443
+ def : Pat<(v32i8 (xor VR256:$src1, VR256:$src2)),
2444
+ (VXORPSYrr VR256:$src1, VR256:$src2)>;
2445
+ def : Pat<(v16i16 (xor VR256:$src1, VR256:$src2)),
2446
+ (VXORPSYrr VR256:$src1, VR256:$src2)>;
2447
+ def : Pat<(v8i32 (xor VR256:$src1, VR256:$src2)),
2448
+ (VXORPSYrr VR256:$src1, VR256:$src2)>;
2399
2449
def : Pat<(v4i64 (xor VR256:$src1, VR256:$src2)),
2400
2450
(VXORPSYrr VR256:$src1, VR256:$src2)>;
2451
+
2452
+ def : Pat<(v32i8 (X86andnp VR256:$src1, VR256:$src2)),
2453
+ (VANDNPSYrr VR256:$src1, VR256:$src2)>;
2454
+ def : Pat<(v16i16 (X86andnp VR256:$src1, VR256:$src2)),
2455
+ (VANDNPSYrr VR256:$src1, VR256:$src2)>;
2456
+ def : Pat<(v8i32 (X86andnp VR256:$src1, VR256:$src2)),
2457
+ (VANDNPSYrr VR256:$src1, VR256:$src2)>;
2401
2458
def : Pat<(v4i64 (X86andnp VR256:$src1, VR256:$src2)),
2402
2459
(VANDNPSYrr VR256:$src1, VR256:$src2)>;
2403
2460
@@ -2504,6 +2561,66 @@ let Predicates = [UseSSE2] in {
2504
2561
FR64)>;
2505
2562
}
2506
2563
2564
+ let Predicates = [HasAVX, NoVLX] in {
2565
+ def : Pat<(v16i8 (and VR128:$src1, VR128:$src2)),
2566
+ (VPANDrr VR128:$src1, VR128:$src2)>;
2567
+ def : Pat<(v8i16 (and VR128:$src1, VR128:$src2)),
2568
+ (VPANDrr VR128:$src1, VR128:$src2)>;
2569
+ def : Pat<(v4i32 (and VR128:$src1, VR128:$src2)),
2570
+ (VPANDrr VR128:$src1, VR128:$src2)>;
2571
+
2572
+ def : Pat<(v16i8 (or VR128:$src1, VR128:$src2)),
2573
+ (VPORrr VR128:$src1, VR128:$src2)>;
2574
+ def : Pat<(v8i16 (or VR128:$src1, VR128:$src2)),
2575
+ (VPORrr VR128:$src1, VR128:$src2)>;
2576
+ def : Pat<(v4i32 (or VR128:$src1, VR128:$src2)),
2577
+ (VPORrr VR128:$src1, VR128:$src2)>;
2578
+
2579
+ def : Pat<(v16i8 (xor VR128:$src1, VR128:$src2)),
2580
+ (VPXORrr VR128:$src1, VR128:$src2)>;
2581
+ def : Pat<(v8i16 (xor VR128:$src1, VR128:$src2)),
2582
+ (VPXORrr VR128:$src1, VR128:$src2)>;
2583
+ def : Pat<(v4i32 (xor VR128:$src1, VR128:$src2)),
2584
+ (VPXORrr VR128:$src1, VR128:$src2)>;
2585
+
2586
+ def : Pat<(v16i8 (X86andnp VR128:$src1, VR128:$src2)),
2587
+ (VPANDNrr VR128:$src1, VR128:$src2)>;
2588
+ def : Pat<(v8i16 (X86andnp VR128:$src1, VR128:$src2)),
2589
+ (VPANDNrr VR128:$src1, VR128:$src2)>;
2590
+ def : Pat<(v4i32 (X86andnp VR128:$src1, VR128:$src2)),
2591
+ (VPANDNrr VR128:$src1, VR128:$src2)>;
2592
+ }
2593
+
2594
+ let Predicates = [UseSSE2] in {
2595
+ def : Pat<(v16i8 (and VR128:$src1, VR128:$src2)),
2596
+ (PANDrr VR128:$src1, VR128:$src2)>;
2597
+ def : Pat<(v8i16 (and VR128:$src1, VR128:$src2)),
2598
+ (PANDrr VR128:$src1, VR128:$src2)>;
2599
+ def : Pat<(v4i32 (and VR128:$src1, VR128:$src2)),
2600
+ (PANDrr VR128:$src1, VR128:$src2)>;
2601
+
2602
+ def : Pat<(v16i8 (or VR128:$src1, VR128:$src2)),
2603
+ (PORrr VR128:$src1, VR128:$src2)>;
2604
+ def : Pat<(v8i16 (or VR128:$src1, VR128:$src2)),
2605
+ (PORrr VR128:$src1, VR128:$src2)>;
2606
+ def : Pat<(v4i32 (or VR128:$src1, VR128:$src2)),
2607
+ (PORrr VR128:$src1, VR128:$src2)>;
2608
+
2609
+ def : Pat<(v16i8 (xor VR128:$src1, VR128:$src2)),
2610
+ (PXORrr VR128:$src1, VR128:$src2)>;
2611
+ def : Pat<(v8i16 (xor VR128:$src1, VR128:$src2)),
2612
+ (PXORrr VR128:$src1, VR128:$src2)>;
2613
+ def : Pat<(v4i32 (xor VR128:$src1, VR128:$src2)),
2614
+ (PXORrr VR128:$src1, VR128:$src2)>;
2615
+
2616
+ def : Pat<(v16i8 (X86andnp VR128:$src1, VR128:$src2)),
2617
+ (PANDNrr VR128:$src1, VR128:$src2)>;
2618
+ def : Pat<(v8i16 (X86andnp VR128:$src1, VR128:$src2)),
2619
+ (PANDNrr VR128:$src1, VR128:$src2)>;
2620
+ def : Pat<(v4i32 (X86andnp VR128:$src1, VR128:$src2)),
2621
+ (PANDNrr VR128:$src1, VR128:$src2)>;
2622
+ }
2623
+
2507
2624
// Patterns for packed operations when we don't have integer type available.
2508
2625
def : Pat<(v4f32 (X86fand VR128:$src1, VR128:$src2)),
2509
2626
(ANDPSrr VR128:$src1, VR128:$src2)>;
0 commit comments