Skip to content

Commit c9737b6

Browse files
committed
[X86] Add regression test case from rG057db2002bb3
When constant folding "ANDNP(C,X) -> AND(~C,X)" we hit cases such as this where we interfered with the "OR(AND(X,C),AND(Y,~C)) -> OR(AND(X,C),ANDNP(C,Y))" fold in canonicalizeBitSelect
1 parent af1b7eb commit c9737b6

File tree

1 file changed

+130
-0
lines changed

1 file changed

+130
-0
lines changed

llvm/test/CodeGen/X86/combine-bitselect.ll

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1061,3 +1061,133 @@ bb:
10611061
ret <4 x i1> %tmp4
10621062
}
10631063

1064+
; Regression reported on 057db2002bb3d79429db3c5fe436c8cefc50cb25
1065+
@d = external global <2 x i64>, align 16
1066+
define void @constantfold_andn_mask() nounwind {
1067+
; SSE-LABEL: constantfold_andn_mask:
1068+
; SSE: # %bb.0: # %entry
1069+
; SSE-NEXT: pushq %rax
1070+
; SSE-NEXT: callq use@PLT
1071+
; SSE-NEXT: movdqu (%rax), %xmm1
1072+
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [31,248,31,248,31,248,31,248,31,248,31,248,31,248,31,248]
1073+
; SSE-NEXT: pand %xmm2, %xmm0
1074+
; SSE-NEXT: pavgb %xmm2, %xmm0
1075+
; SSE-NEXT: pandn %xmm1, %xmm0
1076+
; SSE-NEXT: pand %xmm2, %xmm1
1077+
; SSE-NEXT: pandn %xmm0, %xmm2
1078+
; SSE-NEXT: por %xmm1, %xmm2
1079+
; SSE-NEXT: movabsq $87960930222080, %rax # imm = 0x500000000000
1080+
; SSE-NEXT: xorq d@GOTPCREL(%rip), %rax
1081+
; SSE-NEXT: movdqa %xmm2, (%rax)
1082+
; SSE-NEXT: popq %rax
1083+
; SSE-NEXT: retq
1084+
;
1085+
; XOP-LABEL: constantfold_andn_mask:
1086+
; XOP: # %bb.0: # %entry
1087+
; XOP-NEXT: pushq %rax
1088+
; XOP-NEXT: callq use@PLT
1089+
; XOP-NEXT: vmovdqu (%rax), %xmm1
1090+
; XOP-NEXT: vmovdqa {{.*#+}} xmm2 = [31,248,31,248,31,248,31,248,31,248,31,248,31,248,31,248]
1091+
; XOP-NEXT: vpand %xmm2, %xmm1, %xmm3
1092+
; XOP-NEXT: vpand %xmm2, %xmm0, %xmm0
1093+
; XOP-NEXT: vpavgb %xmm2, %xmm0, %xmm0
1094+
; XOP-NEXT: vpandn %xmm1, %xmm0, %xmm0
1095+
; XOP-NEXT: vpandn %xmm0, %xmm2, %xmm0
1096+
; XOP-NEXT: vpor %xmm0, %xmm3, %xmm0
1097+
; XOP-NEXT: movabsq $87960930222080, %rax # imm = 0x500000000000
1098+
; XOP-NEXT: xorq d@GOTPCREL(%rip), %rax
1099+
; XOP-NEXT: vmovdqa %xmm0, (%rax)
1100+
; XOP-NEXT: popq %rax
1101+
; XOP-NEXT: retq
1102+
;
1103+
; AVX1-LABEL: constantfold_andn_mask:
1104+
; AVX1: # %bb.0: # %entry
1105+
; AVX1-NEXT: pushq %rax
1106+
; AVX1-NEXT: callq use@PLT
1107+
; AVX1-NEXT: vmovdqu (%rax), %xmm1
1108+
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [31,248,31,248,31,248,31,248,31,248,31,248,31,248,31,248]
1109+
; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
1110+
; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
1111+
; AVX1-NEXT: vpavgb %xmm2, %xmm0, %xmm0
1112+
; AVX1-NEXT: vpandn %xmm1, %xmm0, %xmm0
1113+
; AVX1-NEXT: vpandn %xmm0, %xmm2, %xmm0
1114+
; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0
1115+
; AVX1-NEXT: movabsq $87960930222080, %rax # imm = 0x500000000000
1116+
; AVX1-NEXT: xorq d@GOTPCREL(%rip), %rax
1117+
; AVX1-NEXT: vmovdqa %xmm0, (%rax)
1118+
; AVX1-NEXT: popq %rax
1119+
; AVX1-NEXT: retq
1120+
;
1121+
; AVX2-LABEL: constantfold_andn_mask:
1122+
; AVX2: # %bb.0: # %entry
1123+
; AVX2-NEXT: pushq %rax
1124+
; AVX2-NEXT: callq use@PLT
1125+
; AVX2-NEXT: vmovdqu (%rax), %xmm1
1126+
; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm2 = [63519,63519,63519,63519,63519,63519,63519,63519]
1127+
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3
1128+
; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
1129+
; AVX2-NEXT: vpavgb %xmm2, %xmm0, %xmm0
1130+
; AVX2-NEXT: vpandn %xmm1, %xmm0, %xmm0
1131+
; AVX2-NEXT: vpandn %xmm0, %xmm2, %xmm0
1132+
; AVX2-NEXT: vpor %xmm0, %xmm3, %xmm0
1133+
; AVX2-NEXT: movabsq $87960930222080, %rax # imm = 0x500000000000
1134+
; AVX2-NEXT: xorq d@GOTPCREL(%rip), %rax
1135+
; AVX2-NEXT: vmovdqa %xmm0, (%rax)
1136+
; AVX2-NEXT: popq %rax
1137+
; AVX2-NEXT: retq
1138+
;
1139+
; AVX512F-LABEL: constantfold_andn_mask:
1140+
; AVX512F: # %bb.0: # %entry
1141+
; AVX512F-NEXT: pushq %rax
1142+
; AVX512F-NEXT: callq use@PLT
1143+
; AVX512F-NEXT: vmovdqu (%rax), %xmm1
1144+
; AVX512F-NEXT: vpbroadcastw {{.*#+}} xmm2 = [63519,63519,63519,63519,63519,63519,63519,63519]
1145+
; AVX512F-NEXT: vpand %xmm2, %xmm0, %xmm0
1146+
; AVX512F-NEXT: vpavgb %xmm2, %xmm0, %xmm0
1147+
; AVX512F-NEXT: vpandn %xmm1, %xmm0, %xmm0
1148+
; AVX512F-NEXT: vpternlogq $184, %zmm1, %zmm2, %zmm0
1149+
; AVX512F-NEXT: movabsq $87960930222080, %rax # imm = 0x500000000000
1150+
; AVX512F-NEXT: xorq d@GOTPCREL(%rip), %rax
1151+
; AVX512F-NEXT: vmovdqa %xmm0, (%rax)
1152+
; AVX512F-NEXT: popq %rax
1153+
; AVX512F-NEXT: vzeroupper
1154+
; AVX512F-NEXT: retq
1155+
;
1156+
; AVX512VL-LABEL: constantfold_andn_mask:
1157+
; AVX512VL: # %bb.0: # %entry
1158+
; AVX512VL-NEXT: pushq %rax
1159+
; AVX512VL-NEXT: callq use@PLT
1160+
; AVX512VL-NEXT: vmovdqu (%rax), %xmm1
1161+
; AVX512VL-NEXT: vpbroadcastw {{.*#+}} xmm2 = [63519,63519,63519,63519,63519,63519,63519,63519]
1162+
; AVX512VL-NEXT: vpand %xmm2, %xmm0, %xmm0
1163+
; AVX512VL-NEXT: vpavgb %xmm2, %xmm0, %xmm0
1164+
; AVX512VL-NEXT: vpandn %xmm1, %xmm0, %xmm0
1165+
; AVX512VL-NEXT: vpternlogq $216, %xmm2, %xmm1, %xmm0
1166+
; AVX512VL-NEXT: movabsq $87960930222080, %rax # imm = 0x500000000000
1167+
; AVX512VL-NEXT: xorq d@GOTPCREL(%rip), %rax
1168+
; AVX512VL-NEXT: vmovdqa %xmm0, (%rax)
1169+
; AVX512VL-NEXT: popq %rax
1170+
; AVX512VL-NEXT: retq
1171+
entry:
1172+
%call = call noundef <2 x i64> @use()
1173+
%_msret = load <2 x i64>, ptr undef, align 8
1174+
%i = bitcast <2 x i64> %_msret to <16 x i8>
1175+
%i1 = bitcast <2 x i64> %call to <16 x i8>
1176+
%i2 = and <16 x i8> %i, <i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8>
1177+
%i3 = and <16 x i8> %i1, <i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8>
1178+
%i4 = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> <i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8>, <16 x i8> %i3)
1179+
%i5 = bitcast <16 x i8> %i2 to <2 x i64>
1180+
%i6 = bitcast <16 x i8> %i4 to <2 x i64>
1181+
%i7 = and <2 x i64> %_msret, <i64 567462211834873824, i64 567462211834873824>
1182+
%i8 = xor <2 x i64> zeroinitializer, <i64 -1, i64 -1>
1183+
%i9 = xor <2 x i64> %i6, <i64 -1, i64 -1>
1184+
%i10 = and <2 x i64> %i8, %i5
1185+
%i11 = and <2 x i64> %i7, %i9
1186+
%i12 = or <2 x i64> zeroinitializer, %i10
1187+
%i13 = or <2 x i64> %i12, %i11
1188+
store <2 x i64> %i13, ptr inttoptr (i64 xor (i64 ptrtoint (ptr @d to i64), i64 87960930222080) to ptr), align 16
1189+
ret void
1190+
}
1191+
1192+
declare <2 x i64> @use()
1193+
declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>)

0 commit comments

Comments
 (0)