Skip to content

Commit 13a51e0

Browse files
committed
[X86]Fix a crash trying to convert indices to proper type.
Need to perfortm a bitcast on IndicesVec rather than subvector extract if the original size of the IndicesVec is the same as the size of the destination type. Differential Revision: https://reviews.llvm.org/D101838
1 parent 62851ea commit 13a51e0

File tree

2 files changed

+129
-3
lines changed

2 files changed

+129
-3
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9957,9 +9957,13 @@ static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec,
99579957
// Adjust IndicesVec to match VT size.
99589958
assert(IndicesVec.getValueType().getVectorNumElements() >= NumElts &&
99599959
"Illegal variable permute mask size");
9960-
if (IndicesVec.getValueType().getVectorNumElements() > NumElts)
9961-
IndicesVec = extractSubVector(IndicesVec, 0, DAG, SDLoc(IndicesVec),
9962-
NumElts * VT.getScalarSizeInBits());
9960+
if (IndicesVec.getValueType().getVectorNumElements() > NumElts) {
9961+
if (IndicesVec.getValueSizeInBits() == SizeInBits)
9962+
IndicesVec = DAG.getBitcast(IndicesVT, IndicesVec);
9963+
else
9964+
IndicesVec = extractSubVector(IndicesVec, 0, DAG, SDLoc(IndicesVec),
9965+
NumElts * VT.getScalarSizeInBits());
9966+
}
99639967
IndicesVec = DAG.getZExtOrTrunc(IndicesVec, SDLoc(IndicesVec), IndicesVT);
99649968

99659969
// Handle SrcVec that don't match VT type.

llvm/test/CodeGen/X86/var-permute-128.ll

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1101,3 +1101,125 @@ define <16 x i8> @var_shuffle_v16i8_from_v32i8_v16i8(<32 x i8> %v, <16 x i8> %in
11011101
%ret15 = insertelement <16 x i8> %ret14, i8 %v15, i32 15
11021102
ret <16 x i8> %ret15
11031103
}
1104+
1105+
define void @indices_convert() {
1106+
; SSE3-LABEL: indices_convert:
1107+
; SSE3: # %bb.0: # %bb
1108+
; SSE3-NEXT: movdqa (%rax), %xmm0
1109+
; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1110+
; SSE3-NEXT: movd %xmm1, %eax
1111+
; SSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
1112+
; SSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
1113+
; SSE3-NEXT: andl $3, %eax
1114+
; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3]
1115+
; SSE3-NEXT: movd %xmm1, %ecx
1116+
; SSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
1117+
; SSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
1118+
; SSE3-NEXT: andl $3, %ecx
1119+
; SSE3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1120+
; SSE3-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
1121+
; SSE3-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1122+
; SSE3-NEXT: movups %xmm1, (%rax)
1123+
; SSE3-NEXT: retq
1124+
;
1125+
; SSSE3-LABEL: indices_convert:
1126+
; SSSE3: # %bb.0: # %bb
1127+
; SSSE3-NEXT: movdqa (%rax), %xmm0
1128+
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1129+
; SSSE3-NEXT: movd %xmm1, %eax
1130+
; SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
1131+
; SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
1132+
; SSSE3-NEXT: andl $3, %eax
1133+
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3]
1134+
; SSSE3-NEXT: movd %xmm1, %ecx
1135+
; SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
1136+
; SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
1137+
; SSSE3-NEXT: andl $3, %ecx
1138+
; SSSE3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1139+
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
1140+
; SSSE3-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1141+
; SSSE3-NEXT: movups %xmm1, (%rax)
1142+
; SSSE3-NEXT: retq
1143+
;
1144+
; SSE41-LABEL: indices_convert:
1145+
; SSE41: # %bb.0: # %bb
1146+
; SSE41-NEXT: movaps (%rax), %xmm0
1147+
; SSE41-NEXT: extractps $2, %xmm0, %eax
1148+
; SSE41-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1149+
; SSE41-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1150+
; SSE41-NEXT: andl $3, %eax
1151+
; SSE41-NEXT: extractps $3, %xmm0, %ecx
1152+
; SSE41-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1153+
; SSE41-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1154+
; SSE41-NEXT: andl $3, %ecx
1155+
; SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1156+
; SSE41-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
1157+
; SSE41-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1158+
; SSE41-NEXT: movups %xmm1, (%rax)
1159+
; SSE41-NEXT: retq
1160+
;
1161+
; XOP-LABEL: indices_convert:
1162+
; XOP: # %bb.0: # %bb
1163+
; XOP-NEXT: vpshufd {{.*#+}} xmm0 = mem[2,3,2,3]
1164+
; XOP-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
1165+
; XOP-NEXT: vmovapd (%rax), %xmm1
1166+
; XOP-NEXT: vpaddq %xmm0, %xmm0, %xmm0
1167+
; XOP-NEXT: vpermil2pd $0, %xmm0, %xmm1, %xmm1, %xmm0
1168+
; XOP-NEXT: vmovupd %xmm0, (%rax)
1169+
; XOP-NEXT: retq
1170+
;
1171+
; AVX1-LABEL: indices_convert:
1172+
; AVX1: # %bb.0: # %bb
1173+
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = mem[2,3,2,3]
1174+
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
1175+
; AVX1-NEXT: vmovapd (%rax), %xmm1
1176+
; AVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0
1177+
; AVX1-NEXT: vpermilpd %xmm0, %xmm1, %xmm0
1178+
; AVX1-NEXT: vmovupd %xmm0, (%rax)
1179+
; AVX1-NEXT: retq
1180+
;
1181+
; AVX2-LABEL: indices_convert:
1182+
; AVX2: # %bb.0: # %bb
1183+
; AVX2-NEXT: vpbroadcastq (%rax), %xmm0
1184+
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
1185+
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
1186+
; AVX2-NEXT: vpaddq %xmm0, %xmm0, %xmm0
1187+
; AVX2-NEXT: vmovapd (%rax), %xmm1
1188+
; AVX2-NEXT: vpermilpd %xmm0, %xmm1, %xmm0
1189+
; AVX2-NEXT: vmovupd %xmm0, (%rax)
1190+
; AVX2-NEXT: retq
1191+
;
1192+
; AVX512-LABEL: indices_convert:
1193+
; AVX512: # %bb.0: # %bb
1194+
; AVX512-NEXT: vmovaps (%rax), %ymm0
1195+
; AVX512-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
1196+
; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [7,7,7,7]
1197+
; AVX512-NEXT: vandps %xmm2, %xmm1, %xmm1
1198+
; AVX512-NEXT: vpermpd %zmm0, %zmm1, %zmm0
1199+
; AVX512-NEXT: vmovups %xmm0, (%rax)
1200+
; AVX512-NEXT: vzeroupper
1201+
; AVX512-NEXT: retq
1202+
;
1203+
; AVX512VL-LABEL: indices_convert:
1204+
; AVX512VL: # %bb.0: # %bb
1205+
; AVX512VL-NEXT: vpbroadcastq (%rax), %xmm0
1206+
; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0
1207+
; AVX512VL-NEXT: vpermq (%rax), %ymm0, %ymm0
1208+
; AVX512VL-NEXT: vmovdqu %xmm0, (%rax)
1209+
; AVX512VL-NEXT: vzeroupper
1210+
; AVX512VL-NEXT: retq
1211+
bb:
1212+
%0 = load <4 x i64>, <4 x i64>* undef, align 32
1213+
%1 = bitcast <4 x i64> %0 to <8 x i32>
1214+
%2 = shufflevector <8 x i32> %1, <8 x i32> undef, <2 x i32> <i32 2, i32 12>
1215+
%3 = and <2 x i32> %2, <i32 7, i32 7>
1216+
%4 = extractelement <2 x i32> %3, i32 0
1217+
%vecext.i8.1 = extractelement <4 x i64> %0, i32 %4
1218+
%5 = extractelement <2 x i32> %3, i32 1
1219+
%vecext.i8.2 = extractelement <4 x i64> %0, i32 %5
1220+
%6 = insertelement <2 x i64> poison, i64 %vecext.i8.1, i32 0
1221+
%7 = insertelement <2 x i64> %6, i64 %vecext.i8.2, i32 1
1222+
%8 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> %7
1223+
store <2 x i64> %8, <2 x i64>* undef, align 8
1224+
ret void
1225+
}

0 commit comments

Comments
 (0)