@@ -346,6 +346,86 @@ define <3 x i32> @load_v3i32(ptr %src) {
346
346
ret <3 x i32 > %l
347
347
}
348
348
349
+ define <3 x i32 > @load_v3i8_zext_to_3xi32 (ptr %src ) {
350
+ ; CHECK-LABEL: load_v3i8_zext_to_3xi32:
351
+ ; CHECK: ; %bb.0:
352
+ ; CHECK-NEXT: sub sp, sp, #16
353
+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
354
+ ; CHECK-NEXT: ldrh w8, [x0]
355
+ ; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
356
+ ; CHECK-NEXT: strh w8, [sp, #12]
357
+ ; CHECK-NEXT: add x8, x0, #2
358
+ ; CHECK-NEXT: ldr s0, [sp, #12]
359
+ ; CHECK-NEXT: ushll.8h v0, v0, #0
360
+ ; CHECK-NEXT: ld1.b { v0 }[4], [x8]
361
+ ; CHECK-NEXT: ushll.4s v0, v0, #0
362
+ ; CHECK-NEXT: and.16b v0, v0, v1
363
+ ; CHECK-NEXT: add sp, sp, #16
364
+ ; CHECK-NEXT: ret
365
+ ;
366
+ ; BE-LABEL: load_v3i8_zext_to_3xi32:
367
+ ; BE: // %bb.0:
368
+ ; BE-NEXT: sub sp, sp, #16
369
+ ; BE-NEXT: .cfi_def_cfa_offset 16
370
+ ; BE-NEXT: ldrh w8, [x0]
371
+ ; BE-NEXT: movi v1.2d, #0x0000ff000000ff
372
+ ; BE-NEXT: strh w8, [sp, #12]
373
+ ; BE-NEXT: add x8, x0, #2
374
+ ; BE-NEXT: ldr s0, [sp, #12]
375
+ ; BE-NEXT: rev32 v0.8b, v0.8b
376
+ ; BE-NEXT: ushll v0.8h, v0.8b, #0
377
+ ; BE-NEXT: ld1 { v0.b }[4], [x8]
378
+ ; BE-NEXT: ushll v0.4s, v0.4h, #0
379
+ ; BE-NEXT: and v0.16b, v0.16b, v1.16b
380
+ ; BE-NEXT: rev64 v0.4s, v0.4s
381
+ ; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
382
+ ; BE-NEXT: add sp, sp, #16
383
+ ; BE-NEXT: ret
384
+ %l = load <3 x i8 >, ptr %src , align 1
385
+ %e = zext <3 x i8 > %l to <3 x i32 >
386
+ ret <3 x i32 > %e
387
+ }
388
+
389
+ define <3 x i32 > @load_v3i8_sext_to_3xi32 (ptr %src ) {
390
+ ; CHECK-LABEL: load_v3i8_sext_to_3xi32:
391
+ ; CHECK: ; %bb.0:
392
+ ; CHECK-NEXT: sub sp, sp, #16
393
+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
394
+ ; CHECK-NEXT: ldrh w8, [x0]
395
+ ; CHECK-NEXT: strh w8, [sp, #12]
396
+ ; CHECK-NEXT: add x8, x0, #2
397
+ ; CHECK-NEXT: ldr s0, [sp, #12]
398
+ ; CHECK-NEXT: ushll.8h v0, v0, #0
399
+ ; CHECK-NEXT: ld1.b { v0 }[4], [x8]
400
+ ; CHECK-NEXT: ushll.4s v0, v0, #0
401
+ ; CHECK-NEXT: shl.4s v0, v0, #24
402
+ ; CHECK-NEXT: sshr.4s v0, v0, #24
403
+ ; CHECK-NEXT: add sp, sp, #16
404
+ ; CHECK-NEXT: ret
405
+ ;
406
+ ; BE-LABEL: load_v3i8_sext_to_3xi32:
407
+ ; BE: // %bb.0:
408
+ ; BE-NEXT: sub sp, sp, #16
409
+ ; BE-NEXT: .cfi_def_cfa_offset 16
410
+ ; BE-NEXT: ldrh w8, [x0]
411
+ ; BE-NEXT: strh w8, [sp, #12]
412
+ ; BE-NEXT: add x8, x0, #2
413
+ ; BE-NEXT: ldr s0, [sp, #12]
414
+ ; BE-NEXT: rev32 v0.8b, v0.8b
415
+ ; BE-NEXT: ushll v0.8h, v0.8b, #0
416
+ ; BE-NEXT: ld1 { v0.b }[4], [x8]
417
+ ; BE-NEXT: ushll v0.4s, v0.4h, #0
418
+ ; BE-NEXT: shl v0.4s, v0.4s, #24
419
+ ; BE-NEXT: sshr v0.4s, v0.4s, #24
420
+ ; BE-NEXT: rev64 v0.4s, v0.4s
421
+ ; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
422
+ ; BE-NEXT: add sp, sp, #16
423
+ ; BE-NEXT: ret
424
+ %l = load <3 x i8 >, ptr %src , align 1
425
+ %e = sext <3 x i8 > %l to <3 x i32 >
426
+ ret <3 x i32 > %e
427
+ }
428
+
349
429
define void @store_trunc_from_64bits (ptr %src , ptr %dst ) {
350
430
; CHECK-LABEL: store_trunc_from_64bits:
351
431
; CHECK: ; %bb.0: ; %entry
@@ -389,11 +469,11 @@ define void @store_trunc_add_from_64bits(ptr %src, ptr %dst) {
389
469
; CHECK-NEXT: add x8, x0, #4
390
470
; CHECK-NEXT: ldr s0, [x0]
391
471
; CHECK-NEXT: Lloh0:
392
- ; CHECK-NEXT: adrp x9, lCPI9_0 @PAGE
472
+ ; CHECK-NEXT: adrp x9, lCPI11_0 @PAGE
393
473
; CHECK-NEXT: ld1.h { v0 }[2], [x8]
394
474
; CHECK-NEXT: add x8, x1, #1
395
475
; CHECK-NEXT: Lloh1:
396
- ; CHECK-NEXT: ldr d1, [x9, lCPI9_0 @PAGEOFF]
476
+ ; CHECK-NEXT: ldr d1, [x9, lCPI11_0 @PAGEOFF]
397
477
; CHECK-NEXT: add x9, x1, #2
398
478
; CHECK-NEXT: add.4h v0, v0, v1
399
479
; CHECK-NEXT: st1.b { v0 }[2], [x8]
@@ -408,8 +488,8 @@ define void @store_trunc_add_from_64bits(ptr %src, ptr %dst) {
408
488
; BE-NEXT: .cfi_def_cfa_offset 16
409
489
; BE-NEXT: ldr s0, [x0]
410
490
; BE-NEXT: add x8, x0, #4
411
- ; BE-NEXT: adrp x9, .LCPI9_0
412
- ; BE-NEXT: add x9, x9, :lo12:.LCPI9_0
491
+ ; BE-NEXT: adrp x9, .LCPI11_0
492
+ ; BE-NEXT: add x9, x9, :lo12:.LCPI11_0
413
493
; BE-NEXT: rev32 v0.4h, v0.4h
414
494
; BE-NEXT: ld1 { v1.4h }, [x9]
415
495
; BE-NEXT: ld1 { v0.h }[2], [x8]
@@ -541,12 +621,12 @@ define void @load_ext_add_to_64bits(ptr %src, ptr %dst) {
541
621
; CHECK-NEXT: .cfi_def_cfa_offset 16
542
622
; CHECK-NEXT: ldrh w8, [x0]
543
623
; CHECK-NEXT: Lloh2:
544
- ; CHECK-NEXT: adrp x9, lCPI13_0 @PAGE
624
+ ; CHECK-NEXT: adrp x9, lCPI15_0 @PAGE
545
625
; CHECK-NEXT: strh w8, [sp, #12]
546
626
; CHECK-NEXT: add x8, x0, #2
547
627
; CHECK-NEXT: ldr s0, [sp, #12]
548
628
; CHECK-NEXT: Lloh3:
549
- ; CHECK-NEXT: ldr d1, [x9, lCPI13_0 @PAGEOFF]
629
+ ; CHECK-NEXT: ldr d1, [x9, lCPI15_0 @PAGEOFF]
550
630
; CHECK-NEXT: ushll.8h v0, v0, #0
551
631
; CHECK-NEXT: ld1.b { v0 }[4], [x8]
552
632
; CHECK-NEXT: add x8, x1, #4
@@ -569,8 +649,8 @@ define void @load_ext_add_to_64bits(ptr %src, ptr %dst) {
569
649
; BE-NEXT: rev32 v0.8b, v0.8b
570
650
; BE-NEXT: ushll v0.8h, v0.8b, #0
571
651
; BE-NEXT: ld1 { v0.b }[4], [x8]
572
- ; BE-NEXT: adrp x8, .LCPI13_0
573
- ; BE-NEXT: add x8, x8, :lo12:.LCPI13_0
652
+ ; BE-NEXT: adrp x8, .LCPI15_0
653
+ ; BE-NEXT: add x8, x8, :lo12:.LCPI15_0
574
654
; BE-NEXT: ld1 { v1.4h }, [x8]
575
655
; BE-NEXT: bic v0.4h, #255, lsl #8
576
656
; BE-NEXT: add x8, x1, #4
@@ -799,3 +879,115 @@ define void @shift_trunc_volatile_store(ptr %src, ptr %dst) {
799
879
store volatile <3 x i8 > %t , ptr %dst , align 1
800
880
ret void
801
881
}
882
+
883
+ define void @load_v3i8_zext_to_3xi32_add_trunc_store (ptr %src ) {
884
+ ; CHECK-LABEL: load_v3i8_zext_to_3xi32_add_trunc_store:
885
+ ; CHECK: ; %bb.0:
886
+ ; CHECK-NEXT: sub sp, sp, #16
887
+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
888
+ ; CHECK-NEXT: ldrh w9, [x0]
889
+ ; CHECK-NEXT: Lloh4:
890
+ ; CHECK-NEXT: adrp x8, lCPI22_0@PAGE
891
+ ; CHECK-NEXT: strh w9, [sp, #12]
892
+ ; CHECK-NEXT: add x9, x0, #2
893
+ ; CHECK-NEXT: ldr s0, [sp, #12]
894
+ ; CHECK-NEXT: Lloh5:
895
+ ; CHECK-NEXT: ldr q1, [x8, lCPI22_0@PAGEOFF]
896
+ ; CHECK-NEXT: add x8, x0, #1
897
+ ; CHECK-NEXT: ushll.8h v0, v0, #0
898
+ ; CHECK-NEXT: ld1.b { v0 }[4], [x9]
899
+ ; CHECK-NEXT: uaddw.4s v0, v1, v0
900
+ ; CHECK-NEXT: st1.b { v0 }[4], [x8]
901
+ ; CHECK-NEXT: st1.b { v0 }[8], [x9]
902
+ ; CHECK-NEXT: st1.b { v0 }[0], [x0]
903
+ ; CHECK-NEXT: add sp, sp, #16
904
+ ; CHECK-NEXT: ret
905
+ ; CHECK-NEXT: .loh AdrpLdr Lloh4, Lloh5
906
+ ;
907
+ ; BE-LABEL: load_v3i8_zext_to_3xi32_add_trunc_store:
908
+ ; BE: // %bb.0:
909
+ ; BE-NEXT: sub sp, sp, #16
910
+ ; BE-NEXT: .cfi_def_cfa_offset 16
911
+ ; BE-NEXT: ldrh w8, [x0]
912
+ ; BE-NEXT: adrp x9, .LCPI22_0
913
+ ; BE-NEXT: add x9, x9, :lo12:.LCPI22_0
914
+ ; BE-NEXT: strh w8, [sp, #12]
915
+ ; BE-NEXT: add x8, x0, #2
916
+ ; BE-NEXT: ldr s0, [sp, #12]
917
+ ; BE-NEXT: ld1 { v1.4h }, [x9]
918
+ ; BE-NEXT: rev32 v0.8b, v0.8b
919
+ ; BE-NEXT: ushll v0.8h, v0.8b, #0
920
+ ; BE-NEXT: ld1 { v0.b }[4], [x8]
921
+ ; BE-NEXT: add v0.4h, v0.4h, v1.4h
922
+ ; BE-NEXT: xtn v1.8b, v0.8h
923
+ ; BE-NEXT: umov w8, v0.h[2]
924
+ ; BE-NEXT: rev32 v1.16b, v1.16b
925
+ ; BE-NEXT: str s1, [sp, #8]
926
+ ; BE-NEXT: ldrh w9, [sp, #8]
927
+ ; BE-NEXT: strb w8, [x0, #2]
928
+ ; BE-NEXT: strh w9, [x0]
929
+ ; BE-NEXT: add sp, sp, #16
930
+ ; BE-NEXT: ret
931
+ %l = load <3 x i8 >, ptr %src , align 1
932
+ %e = zext <3 x i8 > %l to <3 x i32 >
933
+ %add = add <3 x i32 > %e , <i32 1 , i32 2 , i32 3 >
934
+ %t = trunc <3 x i32 > %add to <3 x i8 >
935
+ store <3 x i8 > %t , ptr %src
936
+ ret void
937
+ }
938
+
939
+ define void @load_v3i8_sext_to_3xi32_add_trunc_store (ptr %src ) {
940
+ ; CHECK-LABEL: load_v3i8_sext_to_3xi32_add_trunc_store:
941
+ ; CHECK: ; %bb.0:
942
+ ; CHECK-NEXT: sub sp, sp, #16
943
+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
944
+ ; CHECK-NEXT: ldrh w9, [x0]
945
+ ; CHECK-NEXT: Lloh6:
946
+ ; CHECK-NEXT: adrp x8, lCPI23_0@PAGE
947
+ ; CHECK-NEXT: strh w9, [sp, #12]
948
+ ; CHECK-NEXT: add x9, x0, #2
949
+ ; CHECK-NEXT: ldr s0, [sp, #12]
950
+ ; CHECK-NEXT: Lloh7:
951
+ ; CHECK-NEXT: ldr q1, [x8, lCPI23_0@PAGEOFF]
952
+ ; CHECK-NEXT: add x8, x0, #1
953
+ ; CHECK-NEXT: ushll.8h v0, v0, #0
954
+ ; CHECK-NEXT: ld1.b { v0 }[4], [x9]
955
+ ; CHECK-NEXT: uaddw.4s v0, v1, v0
956
+ ; CHECK-NEXT: st1.b { v0 }[4], [x8]
957
+ ; CHECK-NEXT: st1.b { v0 }[8], [x9]
958
+ ; CHECK-NEXT: st1.b { v0 }[0], [x0]
959
+ ; CHECK-NEXT: add sp, sp, #16
960
+ ; CHECK-NEXT: ret
961
+ ; CHECK-NEXT: .loh AdrpLdr Lloh6, Lloh7
962
+ ;
963
+ ; BE-LABEL: load_v3i8_sext_to_3xi32_add_trunc_store:
964
+ ; BE: // %bb.0:
965
+ ; BE-NEXT: sub sp, sp, #16
966
+ ; BE-NEXT: .cfi_def_cfa_offset 16
967
+ ; BE-NEXT: ldrh w8, [x0]
968
+ ; BE-NEXT: adrp x9, .LCPI23_0
969
+ ; BE-NEXT: add x9, x9, :lo12:.LCPI23_0
970
+ ; BE-NEXT: strh w8, [sp, #12]
971
+ ; BE-NEXT: add x8, x0, #2
972
+ ; BE-NEXT: ldr s0, [sp, #12]
973
+ ; BE-NEXT: ld1 { v1.4h }, [x9]
974
+ ; BE-NEXT: rev32 v0.8b, v0.8b
975
+ ; BE-NEXT: ushll v0.8h, v0.8b, #0
976
+ ; BE-NEXT: ld1 { v0.b }[4], [x8]
977
+ ; BE-NEXT: add v0.4h, v0.4h, v1.4h
978
+ ; BE-NEXT: xtn v1.8b, v0.8h
979
+ ; BE-NEXT: umov w8, v0.h[2]
980
+ ; BE-NEXT: rev32 v1.16b, v1.16b
981
+ ; BE-NEXT: str s1, [sp, #8]
982
+ ; BE-NEXT: ldrh w9, [sp, #8]
983
+ ; BE-NEXT: strb w8, [x0, #2]
984
+ ; BE-NEXT: strh w9, [x0]
985
+ ; BE-NEXT: add sp, sp, #16
986
+ ; BE-NEXT: ret
987
+ %l = load <3 x i8 >, ptr %src , align 1
988
+ %e = sext <3 x i8 > %l to <3 x i32 >
989
+ %add = add <3 x i32 > %e , <i32 1 , i32 2 , i32 3 >
990
+ %t = trunc <3 x i32 > %add to <3 x i8 >
991
+ store <3 x i8 > %t , ptr %src
992
+ ret void
993
+ }
0 commit comments