@@ -975,18 +975,15 @@ define void @interleave_24i16_out(ptr %p, ptr %q1, ptr %q2, ptr %q3) nounwind {
975
975
; SSE42-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0],xmm1[1],xmm4[2,3],xmm1[4],xmm4[5,6],xmm1[7]
976
976
; SSE42-NEXT: pshufb {{.*#+}} xmm4 = xmm4[0,1,6,7,12,13,2,3,8,9,14,15,u,u,u,u]
977
977
; SSE42-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,5],xmm3[6,7]
978
- ; SSE42-NEXT: movdqa %xmm2, %xmm3
979
- ; SSE42-NEXT: pshufb {{.*#+}} xmm3 = xmm3[u,u,u,u,u,u,u,u,u,u,0,1,6,7,12,13]
980
- ; SSE42-NEXT: movdqa %xmm0, %xmm5
981
- ; SSE42-NEXT: pblendw {{.*#+}} xmm5 = xmm5[0,1],xmm1[2],xmm5[3,4],xmm1[5],xmm5[6,7]
982
- ; SSE42-NEXT: pshufb {{.*#+}} xmm5 = xmm5[2,3,8,9,14,15,4,5,10,11,u,u,u,u,u,u]
983
- ; SSE42-NEXT: pblendw {{.*#+}} xmm5 = xmm5[0,1,2,3,4],xmm3[5,6,7]
984
- ; SSE42-NEXT: pshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,u,u,u,u,u,2,3,8,9,14,15]
978
+ ; SSE42-NEXT: movdqa %xmm0, %xmm3
979
+ ; SSE42-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm1[2],xmm3[3,4],xmm1[5],xmm3[6,7]
980
+ ; SSE42-NEXT: pblendw {{.*#+}} xmm3 = xmm2[0],xmm3[1,2],xmm2[3],xmm3[4,5],xmm2[6],xmm3[7]
981
+ ; SSE42-NEXT: pshufb {{.*#+}} xmm3 = xmm3[2,3,8,9,14,15,4,5,10,11,0,1,6,7,12,13]
985
982
; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2],xmm1[3,4],xmm0[5],xmm1[6,7]
986
- ; SSE42-NEXT: pshufb {{.*#+}} xmm1 = xmm1[4,5,10,11,0,1,6,7,12,13,u,u,u,u,u,u ]
987
- ; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4],xmm2[5,6,7 ]
983
+ ; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2,3],xmm2[4],xmm1[5,6],xmm2[7 ]
984
+ ; SSE42-NEXT: pshufb {{.*#+}} xmm1 = xmm1[4,5,10,11, 0,1,6,7,12,13, 2,3,8,9,14,15 ]
988
985
; SSE42-NEXT: movdqu %xmm4, (%rsi)
989
- ; SSE42-NEXT: movdqu %xmm5 , (%rdx)
986
+ ; SSE42-NEXT: movdqu %xmm3 , (%rdx)
990
987
; SSE42-NEXT: movdqu %xmm1, (%rcx)
991
988
; SSE42-NEXT: retq
992
989
;
@@ -1000,14 +997,12 @@ define void @interleave_24i16_out(ptr %p, ptr %q1, ptr %q2, ptr %q3) nounwind {
1000
997
; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4],xmm0[5,6],xmm1[7]
1001
998
; AVX1-NEXT: vpshufb {{.*#+}} xmm4 = xmm4[0,1,6,7,12,13,2,3,8,9,14,15,u,u,u,u]
1002
999
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3,4,5],xmm3[6,7]
1003
- ; AVX1-NEXT: vpshufb {{.*#+}} xmm4 = xmm2[u,u,u,u,u,u,u,u,u,u,0,1,6,7,12,13]
1004
- ; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm0[0,1],xmm1[2],xmm0[3,4],xmm1[5],xmm0[6,7]
1005
- ; AVX1-NEXT: vpshufb {{.*#+}} xmm5 = xmm5[2,3,8,9,14,15,4,5,10,11,u,u,u,u,u,u]
1006
- ; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm5[0,1,2,3,4],xmm4[5,6,7]
1007
- ; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,u,u,u,u,u,2,3,8,9,14,15]
1000
+ ; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm0[0,1],xmm1[2],xmm0[3,4],xmm1[5],xmm0[6,7]
1001
+ ; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm2[0],xmm4[1,2],xmm2[3],xmm4[4,5],xmm2[6],xmm4[7]
1002
+ ; AVX1-NEXT: vpshufb {{.*#+}} xmm4 = xmm4[2,3,8,9,14,15,4,5,10,11,0,1,6,7,12,13]
1008
1003
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2],xmm1[3,4],xmm0[5],xmm1[6,7]
1009
- ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,10,11,0,1,6,7,12,13,u,u,u,u,u,u ]
1010
- ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm2[5,6,7 ]
1004
+ ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2,3],xmm2[4],xmm0[5,6],xmm2[7 ]
1005
+ ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,10,11, 0,1,6,7,12,13, 2,3,8,9,14,15 ]
1011
1006
; AVX1-NEXT: vmovdqu %xmm3, (%rsi)
1012
1007
; AVX1-NEXT: vmovdqu %xmm4, (%rdx)
1013
1008
; AVX1-NEXT: vmovdqu %xmm0, (%rcx)
@@ -1125,18 +1120,15 @@ define void @interleave_24i16_out_reverse(ptr %p, ptr %q1, ptr %q2, ptr %q3) nou
1125
1120
; SSE42-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0],xmm2[1],xmm4[2,3],xmm2[4],xmm4[5,6],xmm2[7]
1126
1121
; SSE42-NEXT: pshufb {{.*#+}} xmm4 = xmm4[14,15,8,9,2,3,12,13,6,7,0,1,u,u,u,u]
1127
1122
; SSE42-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,5],xmm3[6,7]
1128
- ; SSE42-NEXT: movdqa %xmm0, %xmm3
1129
- ; SSE42-NEXT: pshufb {{.*#+}} xmm3 = xmm3[u,u,u,u,u,u,u,u,u,u,14,15,8,9,2,3]
1130
- ; SSE42-NEXT: movdqa %xmm2, %xmm5
1131
- ; SSE42-NEXT: pblendw {{.*#+}} xmm5 = xmm5[0,1],xmm1[2],xmm5[3,4],xmm1[5],xmm5[6,7]
1132
- ; SSE42-NEXT: pshufb {{.*#+}} xmm5 = xmm5[12,13,6,7,0,1,10,11,4,5,u,u,u,u,u,u]
1133
- ; SSE42-NEXT: pblendw {{.*#+}} xmm5 = xmm5[0,1,2,3,4],xmm3[5,6,7]
1134
- ; SSE42-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u,u,u,12,13,6,7,0,1]
1123
+ ; SSE42-NEXT: movdqa %xmm2, %xmm3
1124
+ ; SSE42-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm1[2],xmm3[3,4],xmm1[5],xmm3[6,7]
1125
+ ; SSE42-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm0[1],xmm3[2,3],xmm0[4],xmm3[5,6],xmm0[7]
1126
+ ; SSE42-NEXT: pshufb {{.*#+}} xmm3 = xmm3[12,13,6,7,0,1,10,11,4,5,14,15,8,9,2,3]
1135
1127
; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2],xmm1[3,4],xmm2[5],xmm1[6,7]
1136
- ; SSE42-NEXT: pshufb {{.*#+}} xmm1 = xmm1[10,11,4,5,14,15,8,9,2,3,u,u,u,u,u,u ]
1137
- ; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1, 2,3,4],xmm0[5 ,6,7]
1128
+ ; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0], xmm1[1,2],xmm0[3],xmm1[4,5],xmm0[6],xmm1[7 ]
1129
+ ; SSE42-NEXT: pshufb {{.*#+}} xmm1 = xmm1[10,11,4,5,14,15,8,9, 2,3,12,13 ,6,7,0,1 ]
1138
1130
; SSE42-NEXT: movdqu %xmm4, (%rsi)
1139
- ; SSE42-NEXT: movdqu %xmm5 , (%rdx)
1131
+ ; SSE42-NEXT: movdqu %xmm3 , (%rdx)
1140
1132
; SSE42-NEXT: movdqu %xmm1, (%rcx)
1141
1133
; SSE42-NEXT: retq
1142
1134
;
@@ -1145,14 +1137,12 @@ define void @interleave_24i16_out_reverse(ptr %p, ptr %q1, ptr %q2, ptr %q3) nou
1145
1137
; AVX1-NEXT: vmovdqu (%rdi), %xmm0
1146
1138
; AVX1-NEXT: vmovdqu 16(%rdi), %xmm1
1147
1139
; AVX1-NEXT: vmovdqu 32(%rdi), %xmm2
1148
- ; AVX1-NEXT: vpshufb {{.*#+}} xmm3 = xmm2[14,15,8,9,2,3,u,u,u,u,u,u,u,u,u,u]
1149
- ; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm1[0,1],xmm0[2],xmm1[3,4],xmm0[5],xmm1[6,7]
1150
- ; AVX1-NEXT: vpshufb {{.*#+}} xmm4 = xmm4[u,u,u,u,u,u,12,13,6,7,0,1,10,11,4,5]
1151
- ; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2],xmm4[3,4,5,6,7]
1152
- ; AVX1-NEXT: vpshufb {{.*#+}} xmm4 = xmm2[12,13,6,7,0,1,u,u,u,u,u,u,u,u,u,u]
1153
- ; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm0[0,1],xmm1[2],xmm0[3,4],xmm1[5],xmm0[6,7]
1154
- ; AVX1-NEXT: vpshufb {{.*#+}} xmm5 = xmm5[u,u,u,u,u,u,10,11,4,5,14,15,8,9,2,3]
1155
- ; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0,1,2],xmm5[3,4,5,6,7]
1140
+ ; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm1[0,1],xmm0[2],xmm1[3,4],xmm0[5],xmm1[6,7]
1141
+ ; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0],xmm2[1],xmm3[2,3],xmm2[4],xmm3[5,6],xmm2[7]
1142
+ ; AVX1-NEXT: vpshufb {{.*#+}} xmm3 = xmm3[14,15,8,9,2,3,12,13,6,7,0,1,10,11,4,5]
1143
+ ; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm0[0,1],xmm1[2],xmm0[3,4],xmm1[5],xmm0[6,7]
1144
+ ; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm2[0],xmm4[1,2],xmm2[3],xmm4[4,5],xmm2[6],xmm4[7]
1145
+ ; AVX1-NEXT: vpshufb {{.*#+}} xmm4 = xmm4[12,13,6,7,0,1,10,11,4,5,14,15,8,9,2,3]
1156
1146
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,1,2,3]
1157
1147
; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[1,2,2,3,4,5,6,7]
1158
1148
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4],xmm0[5,6],xmm1[7]
0 commit comments