@@ -354,6 +354,150 @@ define <2 x i64> @fshl_select_vector(<2 x i64> %x, <2 x i64> %y, <2 x i64> %sham
354
354
ret <2 x i64 > %r
355
355
}
356
356
357
+ ; Convert 'or concat' to fshl if opposite 'or concat' exists.
358
+
359
+ define i32 @fshl_concat_i8_i24 (i8 %x , i24 %y , ptr %addr ) {
360
+ ; CHECK-LABEL: @fshl_concat_i8_i24(
361
+ ; CHECK-NEXT: [[ZEXT_X:%.*]] = zext i8 [[X:%.*]] to i32
362
+ ; CHECK-NEXT: [[SLX:%.*]] = shl nuw i32 [[ZEXT_X]], 24
363
+ ; CHECK-NEXT: [[ZEXT_Y:%.*]] = zext i24 [[Y:%.*]] to i32
364
+ ; CHECK-NEXT: [[XY:%.*]] = or i32 [[SLX]], [[ZEXT_Y]]
365
+ ; CHECK-NEXT: store i32 [[XY]], ptr [[ADDR:%.*]], align 4
366
+ ; CHECK-NEXT: [[YX:%.*]] = call i32 @llvm.fshl.i32(i32 [[XY]], i32 [[XY]], i32 8)
367
+ ; CHECK-NEXT: ret i32 [[YX]]
368
+ ;
369
+ %zext.x = zext i8 %x to i32
370
+ %slx = shl i32 %zext.x , 24
371
+ %zext.y = zext i24 %y to i32
372
+ %xy = or i32 %zext.y , %slx
373
+ store i32 %xy , ptr %addr , align 4
374
+ %sly = shl i32 %zext.y , 8
375
+ %yx = or i32 %zext.x , %sly
376
+ ret i32 %yx
377
+ }
378
+
379
+ define i32 @fshl_concat_i8_i8 (i8 %x , i8 %y , ptr %addr ) {
380
+ ; CHECK-LABEL: @fshl_concat_i8_i8(
381
+ ; CHECK-NEXT: [[ZEXT_X:%.*]] = zext i8 [[X:%.*]] to i32
382
+ ; CHECK-NEXT: [[SLX:%.*]] = shl nuw nsw i32 [[ZEXT_X]], 13
383
+ ; CHECK-NEXT: [[ZEXT_Y:%.*]] = zext i8 [[Y:%.*]] to i32
384
+ ; CHECK-NEXT: [[XY:%.*]] = or i32 [[SLX]], [[ZEXT_Y]]
385
+ ; CHECK-NEXT: store i32 [[XY]], ptr [[ADDR:%.*]], align 4
386
+ ; CHECK-NEXT: [[YX:%.*]] = call i32 @llvm.fshl.i32(i32 [[XY]], i32 [[XY]], i32 19)
387
+ ; CHECK-NEXT: ret i32 [[YX]]
388
+ ;
389
+ %zext.x = zext i8 %x to i32
390
+ %slx = shl i32 %zext.x , 13
391
+ %zext.y = zext i8 %y to i32
392
+ %xy = or i32 %zext.y , %slx
393
+ store i32 %xy , ptr %addr , align 4
394
+ %sly = shl i32 %zext.y , 19
395
+ %yx = or i32 %zext.x , %sly
396
+ ret i32 %yx
397
+ }
398
+
399
+ define i32 @fshl_concat_i8_i8_overlap (i8 %x , i8 %y , ptr %addr ) {
400
+ ; CHECK-LABEL: @fshl_concat_i8_i8_overlap(
401
+ ; CHECK-NEXT: [[ZEXT_X:%.*]] = zext i8 [[X:%.*]] to i32
402
+ ; CHECK-NEXT: [[SLX:%.*]] = shl i32 [[ZEXT_X]], 25
403
+ ; CHECK-NEXT: [[ZEXT_Y:%.*]] = zext i8 [[Y:%.*]] to i32
404
+ ; CHECK-NEXT: [[XY:%.*]] = or i32 [[SLX]], [[ZEXT_Y]]
405
+ ; CHECK-NEXT: store i32 [[XY]], ptr [[ADDR:%.*]], align 4
406
+ ; CHECK-NEXT: [[SLY:%.*]] = shl nuw nsw i32 [[ZEXT_Y]], 7
407
+ ; CHECK-NEXT: [[YX:%.*]] = or i32 [[SLY]], [[ZEXT_X]]
408
+ ; CHECK-NEXT: ret i32 [[YX]]
409
+ ;
410
+ ; Test sly overlap.
411
+ %zext.x = zext i8 %x to i32
412
+ %slx = shl i32 %zext.x , 25
413
+ %zext.y = zext i8 %y to i32
414
+ %xy = or i32 %zext.y , %slx
415
+ store i32 %xy , ptr %addr , align 4
416
+ %sly = shl i32 %zext.y , 7
417
+ %yx = or i32 %zext.x , %sly
418
+ ret i32 %yx
419
+ }
420
+
421
+ define i32 @fshl_concat_i8_i8_drop (i8 %x , i8 %y , ptr %addr ) {
422
+ ; CHECK-LABEL: @fshl_concat_i8_i8_drop(
423
+ ; CHECK-NEXT: [[ZEXT_X:%.*]] = zext i8 [[X:%.*]] to i32
424
+ ; CHECK-NEXT: [[SLX:%.*]] = shl nuw nsw i32 [[ZEXT_X]], 7
425
+ ; CHECK-NEXT: [[ZEXT_Y:%.*]] = zext i8 [[Y:%.*]] to i32
426
+ ; CHECK-NEXT: [[XY:%.*]] = or i32 [[SLX]], [[ZEXT_Y]]
427
+ ; CHECK-NEXT: store i32 [[XY]], ptr [[ADDR:%.*]], align 4
428
+ ; CHECK-NEXT: [[SLY:%.*]] = shl i32 [[ZEXT_Y]], 25
429
+ ; CHECK-NEXT: [[YX:%.*]] = or i32 [[SLY]], [[ZEXT_X]]
430
+ ; CHECK-NEXT: ret i32 [[YX]]
431
+ ;
432
+ ; Test sly drop.
433
+ %zext.x = zext i8 %x to i32
434
+ %slx = shl i32 %zext.x , 7
435
+ %zext.y = zext i8 %y to i32
436
+ %xy = or i32 %zext.y , %slx
437
+ store i32 %xy , ptr %addr , align 4
438
+ %sly = shl i32 %zext.y , 25
439
+ %yx = or i32 %zext.x , %sly
440
+ ret i32 %yx
441
+ }
442
+
443
+ define i32 @fshl_concat_i8_i8_different_slot (i8 %x , i8 %y , ptr %addr ) {
444
+ ; CHECK-LABEL: @fshl_concat_i8_i8_different_slot(
445
+ ; CHECK-NEXT: [[ZEXT_X:%.*]] = zext i8 [[X:%.*]] to i32
446
+ ; CHECK-NEXT: [[SLX:%.*]] = shl nuw nsw i32 [[ZEXT_X]], 9
447
+ ; CHECK-NEXT: [[ZEXT_Y:%.*]] = zext i8 [[Y:%.*]] to i32
448
+ ; CHECK-NEXT: [[XY:%.*]] = or i32 [[SLX]], [[ZEXT_Y]]
449
+ ; CHECK-NEXT: store i32 [[XY]], ptr [[ADDR:%.*]], align 4
450
+ ; CHECK-NEXT: [[SLY:%.*]] = shl nuw nsw i32 [[ZEXT_Y]], 22
451
+ ; CHECK-NEXT: [[YX:%.*]] = or i32 [[SLY]], [[ZEXT_X]]
452
+ ; CHECK-NEXT: ret i32 [[YX]]
453
+ ;
454
+ %zext.x = zext i8 %x to i32
455
+ %slx = shl i32 %zext.x , 9
456
+ %zext.y = zext i8 %y to i32
457
+ %xy = or i32 %zext.y , %slx
458
+ store i32 %xy , ptr %addr , align 4
459
+ %sly = shl i32 %zext.y , 22
460
+ %yx = or i32 %zext.x , %sly
461
+ ret i32 %yx
462
+ }
463
+
464
+ define i32 @fshl_concat_unknown_source (i32 %zext.x , i32 %zext.y , ptr %addr ) {
465
+ ; CHECK-LABEL: @fshl_concat_unknown_source(
466
+ ; CHECK-NEXT: [[SLX:%.*]] = shl i32 [[ZEXT_X:%.*]], 16
467
+ ; CHECK-NEXT: [[XY:%.*]] = or i32 [[SLX]], [[ZEXT_Y:%.*]]
468
+ ; CHECK-NEXT: store i32 [[XY]], ptr [[ADDR:%.*]], align 4
469
+ ; CHECK-NEXT: [[SLY:%.*]] = shl i32 [[ZEXT_Y]], 16
470
+ ; CHECK-NEXT: [[YX:%.*]] = or i32 [[SLY]], [[ZEXT_X]]
471
+ ; CHECK-NEXT: ret i32 [[YX]]
472
+ ;
473
+ %slx = shl i32 %zext.x , 16
474
+ %xy = or i32 %zext.y , %slx
475
+ store i32 %xy , ptr %addr , align 4
476
+ %sly = shl i32 %zext.y , 16
477
+ %yx = or i32 %zext.x , %sly
478
+ ret i32 %yx
479
+ }
480
+
481
+ define <2 x i32 > @fshl_concat_vector (<2 x i8 > %x , <2 x i24 > %y , ptr %addr ) {
482
+ ; CHECK-LABEL: @fshl_concat_vector(
483
+ ; CHECK-NEXT: [[ZEXT_X:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32>
484
+ ; CHECK-NEXT: [[SLX:%.*]] = shl nuw <2 x i32> [[ZEXT_X]], <i32 24, i32 24>
485
+ ; CHECK-NEXT: [[ZEXT_Y:%.*]] = zext <2 x i24> [[Y:%.*]] to <2 x i32>
486
+ ; CHECK-NEXT: [[XY:%.*]] = or <2 x i32> [[SLX]], [[ZEXT_Y]]
487
+ ; CHECK-NEXT: store <2 x i32> [[XY]], ptr [[ADDR:%.*]], align 4
488
+ ; CHECK-NEXT: [[YX:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[XY]], <2 x i32> [[XY]], <2 x i32> <i32 8, i32 8>)
489
+ ; CHECK-NEXT: ret <2 x i32> [[YX]]
490
+ ;
491
+ %zext.x = zext <2 x i8 > %x to <2 x i32 >
492
+ %slx = shl <2 x i32 > %zext.x , <i32 24 , i32 24 >
493
+ %zext.y = zext <2 x i24 > %y to <2 x i32 >
494
+ %xy = or <2 x i32 > %slx , %zext.y
495
+ store <2 x i32 > %xy , ptr %addr , align 4
496
+ %sly = shl <2 x i32 > %zext.y , <i32 8 , i32 8 >
497
+ %yx = or <2 x i32 > %sly , %zext.x
498
+ ret <2 x i32 > %yx
499
+ }
500
+
357
501
; Negative test - an oversized shift in the narrow type would produce the wrong value.
358
502
359
503
define i8 @unmasked_shlop_unmasked_shift_amount (i32 %x , i32 %y , i32 %shamt ) {
0 commit comments