Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit c2ec04c

Browse files
committed
[X86] Add patterns for vector and/or/xor/andn with other types than vXi64.
This makes fast isel treat all legal vector types the same way. Previously only vXi64 was in the fast-isel tables. This unfortunately prevents matching of andn by fast-isel for these types since the requires SelectionDAG. But we already had this issue for vXi64. So at least we're consistent now. Interestinly it looks like fast-isel can't handle instructions with constant vector arguments so the the not part of the andn patterns is selected with SelectionDAG. This explains why VPTERNLOG shows up in some of the tests. This is a subset of D53268. As I make progress on that, I will try to reduce the number of lines in the tablegen files. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@344884 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent c7a8ddb commit c2ec04c

File tree

5 files changed

+222
-6
lines changed

5 files changed

+222
-6
lines changed

lib/Target/X86/X86InstrAVX512.td

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5184,6 +5184,94 @@ defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
51845184
defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
51855185
SchedWriteVecLogic>;
51865186

5187+
let Predicates = [HasVLX] in {
5188+
def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)),
5189+
(VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5190+
def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)),
5191+
(VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5192+
def : Pat<(v4i32 (and VR128X:$src1, VR128X:$src2)),
5193+
(VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5194+
5195+
def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)),
5196+
(VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5197+
def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)),
5198+
(VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5199+
def : Pat<(v4i32 (or VR128X:$src1, VR128X:$src2)),
5200+
(VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5201+
5202+
def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)),
5203+
(VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5204+
def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)),
5205+
(VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5206+
def : Pat<(v4i32 (xor VR128X:$src1, VR128X:$src2)),
5207+
(VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5208+
5209+
def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)),
5210+
(VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5211+
def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)),
5212+
(VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5213+
def : Pat<(v4i32 (X86andnp VR128X:$src1, VR128X:$src2)),
5214+
(VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5215+
5216+
def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
5217+
(VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5218+
def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
5219+
(VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5220+
def : Pat<(v8i32 (and VR256X:$src1, VR256X:$src2)),
5221+
(VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5222+
5223+
def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)),
5224+
(VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5225+
def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)),
5226+
(VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5227+
def : Pat<(v8i32 (or VR256X:$src1, VR256X:$src2)),
5228+
(VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5229+
5230+
def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)),
5231+
(VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5232+
def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)),
5233+
(VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5234+
def : Pat<(v8i32 (xor VR256X:$src1, VR256X:$src2)),
5235+
(VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5236+
5237+
def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)),
5238+
(VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5239+
def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)),
5240+
(VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5241+
def : Pat<(v8i32 (X86andnp VR256X:$src1, VR256X:$src2)),
5242+
(VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5243+
}
5244+
5245+
let Predicates = [HasAVX512] in {
5246+
def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)),
5247+
(VPANDQZrr VR512:$src1, VR512:$src2)>;
5248+
def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)),
5249+
(VPANDQZrr VR512:$src1, VR512:$src2)>;
5250+
def : Pat<(v16i32 (and VR512:$src1, VR512:$src2)),
5251+
(VPANDQZrr VR512:$src1, VR512:$src2)>;
5252+
5253+
def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)),
5254+
(VPORQZrr VR512:$src1, VR512:$src2)>;
5255+
def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)),
5256+
(VPORQZrr VR512:$src1, VR512:$src2)>;
5257+
def : Pat<(v16i32 (or VR512:$src1, VR512:$src2)),
5258+
(VPORQZrr VR512:$src1, VR512:$src2)>;
5259+
5260+
def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)),
5261+
(VPXORQZrr VR512:$src1, VR512:$src2)>;
5262+
def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)),
5263+
(VPXORQZrr VR512:$src1, VR512:$src2)>;
5264+
def : Pat<(v16i32 (xor VR512:$src1, VR512:$src2)),
5265+
(VPXORQZrr VR512:$src1, VR512:$src2)>;
5266+
5267+
def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)),
5268+
(VPANDNQZrr VR512:$src1, VR512:$src2)>;
5269+
def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)),
5270+
(VPANDNQZrr VR512:$src1, VR512:$src2)>;
5271+
def : Pat<(v16i32 (X86andnp VR512:$src1, VR512:$src2)),
5272+
(VPANDNQZrr VR512:$src1, VR512:$src2)>;
5273+
}
5274+
51875275
//===----------------------------------------------------------------------===//
51885276
// AVX-512 FP arithmetic
51895277
//===----------------------------------------------------------------------===//

lib/Target/X86/X86InstrSSE.td

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2389,15 +2389,72 @@ defm XOR : sse12_fp_packed_logical<0x57, "xor", xor, SchedWriteFLogic>;
23892389
let isCommutable = 0 in
23902390
defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp, SchedWriteFLogic>;
23912391

2392+
let Predicates = [HasAVX2, NoVLX] in {
2393+
def : Pat<(v32i8 (and VR256:$src1, VR256:$src2)),
2394+
(VPANDYrr VR256:$src1, VR256:$src2)>;
2395+
def : Pat<(v16i16 (and VR256:$src1, VR256:$src2)),
2396+
(VPANDYrr VR256:$src1, VR256:$src2)>;
2397+
def : Pat<(v8i32 (and VR256:$src1, VR256:$src2)),
2398+
(VPANDYrr VR256:$src1, VR256:$src2)>;
2399+
2400+
def : Pat<(v32i8 (or VR256:$src1, VR256:$src2)),
2401+
(VPORYrr VR256:$src1, VR256:$src2)>;
2402+
def : Pat<(v16i16 (or VR256:$src1, VR256:$src2)),
2403+
(VPORYrr VR256:$src1, VR256:$src2)>;
2404+
def : Pat<(v8i32 (or VR256:$src1, VR256:$src2)),
2405+
(VPORYrr VR256:$src1, VR256:$src2)>;
2406+
2407+
def : Pat<(v32i8 (xor VR256:$src1, VR256:$src2)),
2408+
(VPXORYrr VR256:$src1, VR256:$src2)>;
2409+
def : Pat<(v16i16 (xor VR256:$src1, VR256:$src2)),
2410+
(VPXORYrr VR256:$src1, VR256:$src2)>;
2411+
def : Pat<(v8i32 (xor VR256:$src1, VR256:$src2)),
2412+
(VPXORYrr VR256:$src1, VR256:$src2)>;
2413+
2414+
def : Pat<(v32i8 (X86andnp VR256:$src1, VR256:$src2)),
2415+
(VPANDNYrr VR256:$src1, VR256:$src2)>;
2416+
def : Pat<(v16i16 (X86andnp VR256:$src1, VR256:$src2)),
2417+
(VPANDNYrr VR256:$src1, VR256:$src2)>;
2418+
def : Pat<(v8i32 (X86andnp VR256:$src1, VR256:$src2)),
2419+
(VPANDNYrr VR256:$src1, VR256:$src2)>;
2420+
}
2421+
23922422
// If only AVX1 is supported, we need to handle integer operations with
23932423
// floating point instructions since the integer versions aren't available.
23942424
let Predicates = [HasAVX1Only] in {
2425+
def : Pat<(v32i8 (and VR256:$src1, VR256:$src2)),
2426+
(VANDPSYrr VR256:$src1, VR256:$src2)>;
2427+
def : Pat<(v16i16 (and VR256:$src1, VR256:$src2)),
2428+
(VANDPSYrr VR256:$src1, VR256:$src2)>;
2429+
def : Pat<(v8i32 (and VR256:$src1, VR256:$src2)),
2430+
(VANDPSYrr VR256:$src1, VR256:$src2)>;
23952431
def : Pat<(v4i64 (and VR256:$src1, VR256:$src2)),
23962432
(VANDPSYrr VR256:$src1, VR256:$src2)>;
2433+
2434+
def : Pat<(v32i8 (or VR256:$src1, VR256:$src2)),
2435+
(VORPSYrr VR256:$src1, VR256:$src2)>;
2436+
def : Pat<(v16i16 (or VR256:$src1, VR256:$src2)),
2437+
(VORPSYrr VR256:$src1, VR256:$src2)>;
2438+
def : Pat<(v8i32 (or VR256:$src1, VR256:$src2)),
2439+
(VORPSYrr VR256:$src1, VR256:$src2)>;
23972440
def : Pat<(v4i64 (or VR256:$src1, VR256:$src2)),
23982441
(VORPSYrr VR256:$src1, VR256:$src2)>;
2442+
2443+
def : Pat<(v32i8 (xor VR256:$src1, VR256:$src2)),
2444+
(VXORPSYrr VR256:$src1, VR256:$src2)>;
2445+
def : Pat<(v16i16 (xor VR256:$src1, VR256:$src2)),
2446+
(VXORPSYrr VR256:$src1, VR256:$src2)>;
2447+
def : Pat<(v8i32 (xor VR256:$src1, VR256:$src2)),
2448+
(VXORPSYrr VR256:$src1, VR256:$src2)>;
23992449
def : Pat<(v4i64 (xor VR256:$src1, VR256:$src2)),
24002450
(VXORPSYrr VR256:$src1, VR256:$src2)>;
2451+
2452+
def : Pat<(v32i8 (X86andnp VR256:$src1, VR256:$src2)),
2453+
(VANDNPSYrr VR256:$src1, VR256:$src2)>;
2454+
def : Pat<(v16i16 (X86andnp VR256:$src1, VR256:$src2)),
2455+
(VANDNPSYrr VR256:$src1, VR256:$src2)>;
2456+
def : Pat<(v8i32 (X86andnp VR256:$src1, VR256:$src2)),
2457+
(VANDNPSYrr VR256:$src1, VR256:$src2)>;
24012458
def : Pat<(v4i64 (X86andnp VR256:$src1, VR256:$src2)),
24022459
(VANDNPSYrr VR256:$src1, VR256:$src2)>;
24032460

@@ -2504,6 +2561,66 @@ let Predicates = [UseSSE2] in {
25042561
FR64)>;
25052562
}
25062563

2564+
let Predicates = [HasAVX, NoVLX] in {
2565+
def : Pat<(v16i8 (and VR128:$src1, VR128:$src2)),
2566+
(VPANDrr VR128:$src1, VR128:$src2)>;
2567+
def : Pat<(v8i16 (and VR128:$src1, VR128:$src2)),
2568+
(VPANDrr VR128:$src1, VR128:$src2)>;
2569+
def : Pat<(v4i32 (and VR128:$src1, VR128:$src2)),
2570+
(VPANDrr VR128:$src1, VR128:$src2)>;
2571+
2572+
def : Pat<(v16i8 (or VR128:$src1, VR128:$src2)),
2573+
(VPORrr VR128:$src1, VR128:$src2)>;
2574+
def : Pat<(v8i16 (or VR128:$src1, VR128:$src2)),
2575+
(VPORrr VR128:$src1, VR128:$src2)>;
2576+
def : Pat<(v4i32 (or VR128:$src1, VR128:$src2)),
2577+
(VPORrr VR128:$src1, VR128:$src2)>;
2578+
2579+
def : Pat<(v16i8 (xor VR128:$src1, VR128:$src2)),
2580+
(VPXORrr VR128:$src1, VR128:$src2)>;
2581+
def : Pat<(v8i16 (xor VR128:$src1, VR128:$src2)),
2582+
(VPXORrr VR128:$src1, VR128:$src2)>;
2583+
def : Pat<(v4i32 (xor VR128:$src1, VR128:$src2)),
2584+
(VPXORrr VR128:$src1, VR128:$src2)>;
2585+
2586+
def : Pat<(v16i8 (X86andnp VR128:$src1, VR128:$src2)),
2587+
(VPANDNrr VR128:$src1, VR128:$src2)>;
2588+
def : Pat<(v8i16 (X86andnp VR128:$src1, VR128:$src2)),
2589+
(VPANDNrr VR128:$src1, VR128:$src2)>;
2590+
def : Pat<(v4i32 (X86andnp VR128:$src1, VR128:$src2)),
2591+
(VPANDNrr VR128:$src1, VR128:$src2)>;
2592+
}
2593+
2594+
let Predicates = [UseSSE2] in {
2595+
def : Pat<(v16i8 (and VR128:$src1, VR128:$src2)),
2596+
(PANDrr VR128:$src1, VR128:$src2)>;
2597+
def : Pat<(v8i16 (and VR128:$src1, VR128:$src2)),
2598+
(PANDrr VR128:$src1, VR128:$src2)>;
2599+
def : Pat<(v4i32 (and VR128:$src1, VR128:$src2)),
2600+
(PANDrr VR128:$src1, VR128:$src2)>;
2601+
2602+
def : Pat<(v16i8 (or VR128:$src1, VR128:$src2)),
2603+
(PORrr VR128:$src1, VR128:$src2)>;
2604+
def : Pat<(v8i16 (or VR128:$src1, VR128:$src2)),
2605+
(PORrr VR128:$src1, VR128:$src2)>;
2606+
def : Pat<(v4i32 (or VR128:$src1, VR128:$src2)),
2607+
(PORrr VR128:$src1, VR128:$src2)>;
2608+
2609+
def : Pat<(v16i8 (xor VR128:$src1, VR128:$src2)),
2610+
(PXORrr VR128:$src1, VR128:$src2)>;
2611+
def : Pat<(v8i16 (xor VR128:$src1, VR128:$src2)),
2612+
(PXORrr VR128:$src1, VR128:$src2)>;
2613+
def : Pat<(v4i32 (xor VR128:$src1, VR128:$src2)),
2614+
(PXORrr VR128:$src1, VR128:$src2)>;
2615+
2616+
def : Pat<(v16i8 (X86andnp VR128:$src1, VR128:$src2)),
2617+
(PANDNrr VR128:$src1, VR128:$src2)>;
2618+
def : Pat<(v8i16 (X86andnp VR128:$src1, VR128:$src2)),
2619+
(PANDNrr VR128:$src1, VR128:$src2)>;
2620+
def : Pat<(v4i32 (X86andnp VR128:$src1, VR128:$src2)),
2621+
(PANDNrr VR128:$src1, VR128:$src2)>;
2622+
}
2623+
25072624
// Patterns for packed operations when we don't have integer type available.
25082625
def : Pat<(v4f32 (X86fand VR128:$src1, VR128:$src2)),
25092626
(ANDPSrr VR128:$src1, VR128:$src2)>;

test/CodeGen/X86/avx-intrinsics-fast-isel.ll

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,10 @@ define <4 x double> @test_mm256_andnot_pd(<4 x double> %a0, <4 x double> %a1) no
8585
define <8 x float> @test_mm256_andnot_ps(<8 x float> %a0, <8 x float> %a1) nounwind {
8686
; CHECK-LABEL: test_mm256_andnot_ps:
8787
; CHECK: # %bb.0:
88-
; CHECK-NEXT: vandnps %ymm1, %ymm0, %ymm0
88+
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
89+
; CHECK-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2
90+
; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0
91+
; CHECK-NEXT: vandps %ymm1, %ymm0, %ymm0
8992
; CHECK-NEXT: ret{{[l|q]}}
9093
%1 = bitcast <8 x float> %a0 to <8 x i32>
9194
%2 = bitcast <8 x float> %a1 to <8 x i32>

test/CodeGen/X86/sse-intrinsics-fast-isel.ll

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,12 +79,15 @@ define <4 x float> @test_mm_andnot_ps(<4 x float> %a0, <4 x float> %a1) nounwind
7979
;
8080
; AVX1-LABEL: test_mm_andnot_ps:
8181
; AVX1: # %bb.0:
82-
; AVX1-NEXT: vandnps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0xc1]
82+
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x76,0xd2]
83+
; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xef,0xc2]
84+
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdb,0xc1]
8385
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
8486
;
8587
; AVX512-LABEL: test_mm_andnot_ps:
8688
; AVX512: # %bb.0:
87-
; AVX512-NEXT: vandnps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x55,0xc1]
89+
; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x25,0xc0,0x0f]
90+
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdb,0xc1]
8891
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
8992
%arg0 = bitcast <4 x float> %a0 to <4 x i32>
9093
%arg1 = bitcast <4 x float> %a1 to <4 x i32>

test/CodeGen/X86/sse2-intrinsics-fast-isel.ll

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -272,17 +272,22 @@ define <2 x i64> @test_mm_and_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
272272
define <2 x double> @test_mm_andnot_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
273273
; SSE-LABEL: test_mm_andnot_pd:
274274
; SSE: # %bb.0:
275-
; SSE-NEXT: andnps %xmm1, %xmm0 # encoding: [0x0f,0x55,0xc1]
275+
; SSE-NEXT: pcmpeqd %xmm2, %xmm2 # encoding: [0x66,0x0f,0x76,0xd2]
276+
; SSE-NEXT: pxor %xmm2, %xmm0 # encoding: [0x66,0x0f,0xef,0xc2]
277+
; SSE-NEXT: pand %xmm1, %xmm0 # encoding: [0x66,0x0f,0xdb,0xc1]
276278
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
277279
;
278280
; AVX1-LABEL: test_mm_andnot_pd:
279281
; AVX1: # %bb.0:
280-
; AVX1-NEXT: vandnps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0xc1]
282+
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x76,0xd2]
283+
; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xef,0xc2]
284+
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdb,0xc1]
281285
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
282286
;
283287
; AVX512-LABEL: test_mm_andnot_pd:
284288
; AVX512: # %bb.0:
285-
; AVX512-NEXT: vandnps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x55,0xc1]
289+
; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x25,0xc0,0x0f]
290+
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdb,0xc1]
286291
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
287292
%arg0 = bitcast <2 x double> %a0 to <4 x i32>
288293
%arg1 = bitcast <2 x double> %a1 to <4 x i32>

0 commit comments

Comments
 (0)