Skip to content

Commit 316f530

Browse files
authored
[X86] getTargetConstantBitsFromNode - handle EXTRACT_SUBVECTOR through bitcasts (#143886)
Generalize the extraction index/width to account for any changes in type through bitcasts
1 parent 2a905dd commit 316f530

File tree

3 files changed

+50
-59
lines changed

3 files changed

+50
-59
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5242,25 +5242,25 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
52425242
}
52435243

52445244
// Extract constant bits from a subvector's source.
5245-
if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
5246-
// TODO - support extract_subvector through bitcasts.
5247-
if (EltSizeInBits != VT.getScalarSizeInBits())
5248-
return false;
5249-
5250-
if (getTargetConstantBitsFromNode(Op.getOperand(0), EltSizeInBits,
5251-
UndefElts, EltBits, AllowWholeUndefs,
5252-
AllowPartialUndefs)) {
5253-
EVT SrcVT = Op.getOperand(0).getValueType();
5254-
unsigned NumSrcElts = SrcVT.getVectorNumElements();
5255-
unsigned NumSubElts = VT.getVectorNumElements();
5256-
unsigned BaseIdx = Op.getConstantOperandVal(1);
5257-
UndefElts = UndefElts.extractBits(NumSubElts, BaseIdx);
5258-
if ((BaseIdx + NumSubElts) != NumSrcElts)
5259-
EltBits.erase(EltBits.begin() + BaseIdx + NumSubElts, EltBits.end());
5260-
if (BaseIdx != 0)
5261-
EltBits.erase(EltBits.begin(), EltBits.begin() + BaseIdx);
5262-
return true;
5263-
}
5245+
if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
5246+
getTargetConstantBitsFromNode(Op.getOperand(0), EltSizeInBits, UndefElts,
5247+
EltBits, AllowWholeUndefs,
5248+
AllowPartialUndefs)) {
5249+
EVT SrcVT = Op.getOperand(0).getValueType();
5250+
unsigned NumSrcElts = SrcVT.getSizeInBits() / EltSizeInBits;
5251+
unsigned NumSubElts = VT.getSizeInBits() / EltSizeInBits;
5252+
unsigned BaseOfs = Op.getConstantOperandVal(1) * VT.getScalarSizeInBits();
5253+
unsigned BaseIdx = BaseOfs / EltSizeInBits;
5254+
assert((SrcVT.getSizeInBits() % EltSizeInBits) == 0 &&
5255+
(VT.getSizeInBits() % EltSizeInBits) == 0 &&
5256+
(BaseOfs % EltSizeInBits) == 0 && "Bad subvector index");
5257+
5258+
UndefElts = UndefElts.extractBits(NumSubElts, BaseIdx);
5259+
if ((BaseIdx + NumSubElts) != NumSrcElts)
5260+
EltBits.erase(EltBits.begin() + BaseIdx + NumSubElts, EltBits.end());
5261+
if (BaseIdx != 0)
5262+
EltBits.erase(EltBits.begin(), EltBits.begin() + BaseIdx);
5263+
return true;
52645264
}
52655265

52665266
// Extract constant bits from shuffle node sources.

llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll

Lines changed: 18 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -3567,14 +3567,13 @@ define void @vec384_i16_widen_to_i32_factor2_broadcast_to_v12i32_factor12(ptr %i
35673567
; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,6,7,10,11,14,15,u,u,u,u,u,u,u,u]
35683568
; AVX-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,0,0,0,4,5,6,7]
35693569
; AVX-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
3570-
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
35713570
; AVX-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
35723571
; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1
35733572
; AVX-NEXT: vpaddb 32(%rdx), %xmm2, %xmm2
3574-
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm0, %xmm0
3573+
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[0,1],zero,zero,xmm0[0,1],zero,zero,xmm0[0,1],zero,zero
35753574
; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm0
3576-
; AVX-NEXT: vmovdqa %xmm1, (%rcx)
35773575
; AVX-NEXT: vmovdqa %xmm0, 16(%rcx)
3576+
; AVX-NEXT: vmovdqa %xmm1, (%rcx)
35783577
; AVX-NEXT: vmovdqa %xmm2, 32(%rcx)
35793578
; AVX-NEXT: retq
35803579
;
@@ -3757,14 +3756,14 @@ define void @vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8(ptr %in.
37573756
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
37583757
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2],xmm0[3],xmm1[4,5],xmm0[6],xmm1[7]
37593758
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
3760-
; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm0[2],xmm2[3,4],xmm0[5],xmm2[6,7]
3759+
; AVX-NEXT: vpblendw {{.*#+}} xmm3 = xmm2[0,1],xmm0[2],xmm2[3,4],xmm0[5],xmm2[6,7]
37613760
; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1
3762-
; AVX-NEXT: vpaddb 32(%rdx), %xmm2, %xmm2
3763-
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm0, %xmm0
3761+
; AVX-NEXT: vpaddb 32(%rdx), %xmm3, %xmm3
3762+
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2,3],xmm0[4],xmm2[5,6],xmm0[7]
37643763
; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm0
37653764
; AVX-NEXT: vmovdqa %xmm1, (%rcx)
37663765
; AVX-NEXT: vmovdqa %xmm0, 16(%rcx)
3767-
; AVX-NEXT: vmovdqa %xmm2, 32(%rcx)
3766+
; AVX-NEXT: vmovdqa %xmm3, 32(%rcx)
37683767
; AVX-NEXT: retq
37693768
;
37703769
; AVX2-LABEL: vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8:
@@ -3955,10 +3954,9 @@ define void @vec384_i16_widen_to_i64_factor4_broadcast_to_v6i64_factor6(ptr %in.
39553954
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
39563955
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3],xmm0[4],xmm1[5,6,7]
39573956
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
3958-
; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
3957+
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
39593958
; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1
3960-
; AVX-NEXT: vpaddb 32(%rdx), %xmm2, %xmm2
3961-
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm0, %xmm0
3959+
; AVX-NEXT: vpaddb 32(%rdx), %xmm0, %xmm2
39623960
; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm0
39633961
; AVX-NEXT: vmovdqa %xmm1, (%rcx)
39643962
; AVX-NEXT: vmovdqa %xmm0, 16(%rcx)
@@ -4181,17 +4179,16 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
41814179
; AVX-NEXT: vpaddb (%rsi), %xmm0, %xmm0
41824180
; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[0,0,0,0]
41834181
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3,4,5],xmm2[6],xmm1[7]
4184-
; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
4185-
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
4182+
; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[0,0,1,1]
41864183
; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3
4187-
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm3[0,1],xmm0[2],xmm3[3,4,5,6,7]
4184+
; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2],xmm3[3,4,5,6,7]
41884185
; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1
4189-
; AVX-NEXT: vpaddb 32(%rdx), %xmm0, %xmm0
4190-
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm2, %xmm2
4191-
; AVX-NEXT: vpaddb 16(%rdx), %xmm2, %xmm2
4186+
; AVX-NEXT: vpaddb 32(%rdx), %xmm2, %xmm2
4187+
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1],zero,zero,zero,zero,zero,zero
4188+
; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm0
4189+
; AVX-NEXT: vmovdqa %xmm0, 16(%rcx)
41924190
; AVX-NEXT: vmovdqa %xmm1, (%rcx)
4193-
; AVX-NEXT: vmovdqa %xmm2, 16(%rcx)
4194-
; AVX-NEXT: vmovdqa %xmm0, 32(%rcx)
4191+
; AVX-NEXT: vmovdqa %xmm2, 32(%rcx)
41954192
; AVX-NEXT: retq
41964193
;
41974194
; AVX2-LABEL: vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4:
@@ -4379,10 +4376,9 @@ define void @vec384_i16_widen_to_i128_factor8_broadcast_to_v3i128_factor3(ptr %i
43794376
; AVX-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
43804377
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7]
43814378
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
4382-
; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3,4,5,6,7]
4379+
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3,4,5,6,7]
43834380
; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1
4384-
; AVX-NEXT: vpaddb 32(%rdx), %xmm2, %xmm2
4385-
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm0, %xmm0
4381+
; AVX-NEXT: vpaddb 32(%rdx), %xmm0, %xmm2
43864382
; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm0
43874383
; AVX-NEXT: vmovdqa %xmm1, (%rcx)
43884384
; AVX-NEXT: vmovdqa %xmm0, 16(%rcx)
@@ -4517,10 +4513,9 @@ define void @vec384_i16_widen_to_i192_factor12_broadcast_to_v2i192_factor2(ptr %
45174513
; AVX-NEXT: vpaddb (%rsi), %xmm0, %xmm0
45184514
; AVX-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
45194515
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7]
4520-
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
45214516
; AVX-NEXT: vmovaps 32(%rdx), %ymm2
45224517
; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1
4523-
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm0, %xmm0
4518+
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1],zero,zero,zero,zero,zero,zero
45244519
; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm0
45254520
; AVX-NEXT: vmovaps %ymm2, 32(%rcx)
45264521
; AVX-NEXT: vmovdqa %xmm1, (%rcx)

llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll

Lines changed: 13 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2868,14 +2868,13 @@ define void @vec384_i16_widen_to_i32_factor2_broadcast_to_v12i32_factor12(ptr %i
28682868
; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,6,7,10,11,14,15,u,u,u,u,u,u,u,u]
28692869
; AVX-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,0,0,0,4,5,6,7]
28702870
; AVX-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2871-
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
28722871
; AVX-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
28732872
; AVX-NEXT: vpaddb 32(%rsi), %xmm2, %xmm2
2874-
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm0, %xmm0
2875-
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0
28762873
; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1
2877-
; AVX-NEXT: vmovdqa %xmm1, (%rdx)
2874+
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[0,1],zero,zero,xmm0[0,1],zero,zero,xmm0[0,1],zero,zero
2875+
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0
28782876
; AVX-NEXT: vmovdqa %xmm0, 16(%rdx)
2877+
; AVX-NEXT: vmovdqa %xmm1, (%rdx)
28792878
; AVX-NEXT: vmovdqa %xmm2, 32(%rdx)
28802879
; AVX-NEXT: retq
28812880
;
@@ -2986,7 +2985,8 @@ define void @vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8(ptr %in.
29862985
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1,2],xmm0[3],mem[4,5],xmm0[6],mem[7]
29872986
; AVX-NEXT: vmovdqa (%rdi), %xmm2
29882987
; AVX-NEXT: vpshufb {{.*#+}} xmm2 = zero,zero,zero,zero,xmm2[0,1],zero,zero,zero,zero,xmm2[0,1],zero,zero,zero,zero
2989-
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm0, %xmm0
2988+
; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3
2989+
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm3[0],xmm0[1],xmm3[2,3],xmm0[4],xmm3[5,6],xmm0[7]
29902990
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0
29912991
; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1
29922992
; AVX-NEXT: vpaddb 32(%rsi), %xmm2, %xmm2
@@ -3135,9 +3135,8 @@ define void @vec384_i16_widen_to_i64_factor4_broadcast_to_v6i64_factor6(ptr %in.
31353135
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = mem[0,1,0,1]
31363136
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1,2,3],xmm0[4],mem[5,6,7]
31373137
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
3138-
; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
3139-
; AVX-NEXT: vpaddb 32(%rsi), %xmm2, %xmm2
3140-
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm0, %xmm0
3138+
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
3139+
; AVX-NEXT: vpaddb 32(%rsi), %xmm0, %xmm2
31413140
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0
31423141
; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1
31433142
; AVX-NEXT: vmovdqa %xmm1, (%rdx)
@@ -3319,13 +3318,12 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
33193318
; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[0,0,1,1]
33203319
; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3
33213320
; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2],xmm3[3,4,5,6,7]
3322-
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
33233321
; AVX-NEXT: vpaddb 32(%rsi), %xmm2, %xmm2
3324-
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm0, %xmm0
3325-
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0
33263322
; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1
3327-
; AVX-NEXT: vmovdqa %xmm1, (%rdx)
3323+
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1],zero,zero,zero,zero,zero,zero
3324+
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0
33283325
; AVX-NEXT: vmovdqa %xmm0, 16(%rdx)
3326+
; AVX-NEXT: vmovdqa %xmm1, (%rdx)
33293327
; AVX-NEXT: vmovdqa %xmm2, 32(%rdx)
33303328
; AVX-NEXT: retq
33313329
;
@@ -3469,9 +3467,8 @@ define void @vec384_i16_widen_to_i128_factor8_broadcast_to_v3i128_factor3(ptr %i
34693467
; AVX-NEXT: vmovdqa (%rdi), %xmm0
34703468
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1,2,3,4,5,6,7]
34713469
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
3472-
; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3,4,5,6,7]
3473-
; AVX-NEXT: vpaddb 32(%rsi), %xmm2, %xmm2
3474-
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm0, %xmm0
3470+
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3,4,5,6,7]
3471+
; AVX-NEXT: vpaddb 32(%rsi), %xmm0, %xmm2
34753472
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0
34763473
; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1
34773474
; AVX-NEXT: vmovdqa %xmm1, (%rdx)
@@ -3584,9 +3581,8 @@ define void @vec384_i16_widen_to_i192_factor12_broadcast_to_v2i192_factor2(ptr %
35843581
; AVX: # %bb.0:
35853582
; AVX-NEXT: vmovdqa (%rdi), %xmm0
35863583
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1,2,3,4,5,6,7]
3587-
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
35883584
; AVX-NEXT: vmovaps 32(%rsi), %ymm2
3589-
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm0, %xmm0
3585+
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1],zero,zero,zero,zero,zero,zero
35903586
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0
35913587
; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1
35923588
; AVX-NEXT: vmovaps %ymm2, 32(%rdx)

0 commit comments

Comments
 (0)