@@ -322,5 +322,132 @@ define void @g(i32 %a) nounwind {
322
322
ret void
323
323
}
324
324
325
+ define i32 @shift_zext_shl (i8 zeroext %x ) {
326
+ ; X86-LABEL: shift_zext_shl:
327
+ ; X86: # %bb.0:
328
+ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
329
+ ; X86-NEXT: andl $64, %eax
330
+ ; X86-NEXT: shll $9, %eax
331
+ ; X86-NEXT: retl
332
+ ;
333
+ ; X64-LABEL: shift_zext_shl:
334
+ ; X64: # %bb.0:
335
+ ; X64-NEXT: movl %edi, %eax
336
+ ; X64-NEXT: andl $64, %eax
337
+ ; X64-NEXT: shll $9, %eax
338
+ ; X64-NEXT: retq
339
+ %a = and i8 %x , 64
340
+ %b = zext i8 %a to i16
341
+ %c = shl i16 %b , 9
342
+ %d = zext i16 %c to i32
343
+ ret i32 %d
344
+ }
345
+
346
+ define i32 @shift_zext_shl2 (i8 zeroext %x ) {
347
+ ; X86-LABEL: shift_zext_shl2:
348
+ ; X86: # %bb.0:
349
+ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
350
+ ; X86-NEXT: andl $64, %eax
351
+ ; X86-NEXT: shll $9, %eax
352
+ ; X86-NEXT: retl
353
+ ;
354
+ ; X64-LABEL: shift_zext_shl2:
355
+ ; X64: # %bb.0:
356
+ ; X64-NEXT: movl %edi, %eax
357
+ ; X64-NEXT: andl $64, %eax
358
+ ; X64-NEXT: shll $9, %eax
359
+ ; X64-NEXT: retq
360
+ %a = and i8 %x , 64
361
+ %b = zext i8 %a to i32
362
+ %c = shl i32 %b , 9
363
+ ret i32 %c
364
+ }
365
+
366
+ define <4 x i32 > @shift_zext_shl_vec (<4 x i8 > %x ) nounwind {
367
+ ; X86-LABEL: shift_zext_shl_vec:
368
+ ; X86: # %bb.0:
369
+ ; X86-NEXT: pushl %edi
370
+ ; X86-NEXT: pushl %esi
371
+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
372
+ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edi
373
+ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %esi
374
+ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
375
+ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
376
+ ; X86-NEXT: andl $64, %ecx
377
+ ; X86-NEXT: shll $9, %ecx
378
+ ; X86-NEXT: andl $63, %edx
379
+ ; X86-NEXT: shll $8, %edx
380
+ ; X86-NEXT: andl $31, %esi
381
+ ; X86-NEXT: shll $7, %esi
382
+ ; X86-NEXT: andl $23, %edi
383
+ ; X86-NEXT: shll $6, %edi
384
+ ; X86-NEXT: movl %edi, 12(%eax)
385
+ ; X86-NEXT: movl %esi, 8(%eax)
386
+ ; X86-NEXT: movl %edx, 4(%eax)
387
+ ; X86-NEXT: movl %ecx, (%eax)
388
+ ; X86-NEXT: popl %esi
389
+ ; X86-NEXT: popl %edi
390
+ ; X86-NEXT: retl $4
391
+ ;
392
+ ; X64-LABEL: shift_zext_shl_vec:
393
+ ; X64: # %bb.0:
394
+ ; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
395
+ ; X64-NEXT: pxor %xmm1, %xmm1
396
+ ; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
397
+ ; X64-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
398
+ ; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
399
+ ; X64-NEXT: retq
400
+ %a = and <4 x i8 > %x , <i8 64 , i8 63 , i8 31 , i8 23 >
401
+ %b = zext <4 x i8 > %a to <4 x i16 >
402
+ %c = shl <4 x i16 > %b , <i16 9 , i16 8 , i16 7 , i16 6 >
403
+ %d = zext <4 x i16 > %c to <4 x i32 >
404
+ ret <4 x i32 > %d
405
+ }
406
+
407
+ define <4 x i32 > @shift_zext_shl2_vec (<4 x i8 > %x ) nounwind {
408
+ ; X86-LABEL: shift_zext_shl2_vec:
409
+ ; X86: # %bb.0:
410
+ ; X86-NEXT: pushl %edi
411
+ ; X86-NEXT: pushl %esi
412
+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
413
+ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
414
+ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
415
+ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %esi
416
+ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edi
417
+ ; X86-NEXT: andl $23, %edi
418
+ ; X86-NEXT: andl $31, %esi
419
+ ; X86-NEXT: andl $63, %edx
420
+ ; X86-NEXT: andl $64, %ecx
421
+ ; X86-NEXT: shll $9, %ecx
422
+ ; X86-NEXT: shll $8, %edx
423
+ ; X86-NEXT: shll $7, %esi
424
+ ; X86-NEXT: shll $6, %edi
425
+ ; X86-NEXT: movl %edi, 12(%eax)
426
+ ; X86-NEXT: movl %esi, 8(%eax)
427
+ ; X86-NEXT: movl %edx, 4(%eax)
428
+ ; X86-NEXT: movl %ecx, (%eax)
429
+ ; X86-NEXT: popl %esi
430
+ ; X86-NEXT: popl %edi
431
+ ; X86-NEXT: retl $4
432
+ ;
433
+ ; X64-LABEL: shift_zext_shl2_vec:
434
+ ; X64: # %bb.0:
435
+ ; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
436
+ ; X64-NEXT: pxor %xmm1, %xmm1
437
+ ; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
438
+ ; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
439
+ ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
440
+ ; X64-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
441
+ ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
442
+ ; X64-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
443
+ ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
444
+ ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
445
+ ; X64-NEXT: retq
446
+ %a = and <4 x i8 > %x , <i8 64 , i8 63 , i8 31 , i8 23 >
447
+ %b = zext <4 x i8 > %a to <4 x i32 >
448
+ %c = shl <4 x i32 > %b , <i32 9 , i32 8 , i32 7 , i32 6 >
449
+ ret <4 x i32 > %c
450
+ }
451
+
325
452
declare dso_local void @f (i64 )
326
453
0 commit comments