3
3
// RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK-LE
4
4
// RUN: %clang_cc1 -O3 -triple powerpc64-unknown-unknown -target-cpu pwr10 \
5
5
// RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK-BE
6
+ // RUN: %clang_cc1 -O0 -triple powerpc64le-unknown-unknown -target-cpu pwr10 \
7
+ // RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK-LE-NOOPT
6
8
7
9
// CHECK-LE-LABEL: @test1(
8
10
// CHECK-LE-NEXT: entry:
16
18
// CHECK-BE-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2:![0-9]+]]
17
19
// CHECK-BE-NEXT: ret void
18
20
//
21
+ // CHECK-LE-NOOPT-LABEL: @test1(
22
+ // CHECK-LE-NOOPT-NEXT: entry:
23
+ // CHECK-LE-NOOPT-NEXT: [[VQP_ADDR:%.*]] = alloca ptr, align 8
24
+ // CHECK-LE-NOOPT-NEXT: [[VPP_ADDR:%.*]] = alloca ptr, align 8
25
+ // CHECK-LE-NOOPT-NEXT: [[VC1_ADDR:%.*]] = alloca <16 x i8>, align 16
26
+ // CHECK-LE-NOOPT-NEXT: [[VC2_ADDR:%.*]] = alloca <16 x i8>, align 16
27
+ // CHECK-LE-NOOPT-NEXT: [[VC3_ADDR:%.*]] = alloca <16 x i8>, align 16
28
+ // CHECK-LE-NOOPT-NEXT: [[VC4_ADDR:%.*]] = alloca <16 x i8>, align 16
29
+ // CHECK-LE-NOOPT-NEXT: [[RESP_ADDR:%.*]] = alloca ptr, align 8
30
+ // CHECK-LE-NOOPT-NEXT: [[VQ:%.*]] = alloca <512 x i1>, align 64
31
+ // CHECK-LE-NOOPT-NEXT: [[VP:%.*]] = alloca <256 x i1>, align 32
32
+ // CHECK-LE-NOOPT-NEXT: [[RES:%.*]] = alloca <512 x i1>, align 64
33
+ // CHECK-LE-NOOPT-NEXT: store ptr [[VQP:%.*]], ptr [[VQP_ADDR]], align 8
34
+ // CHECK-LE-NOOPT-NEXT: store ptr [[VPP:%.*]], ptr [[VPP_ADDR]], align 8
35
+ // CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC1:%.*]], ptr [[VC1_ADDR]], align 16
36
+ // CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC2:%.*]], ptr [[VC2_ADDR]], align 16
37
+ // CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC3:%.*]], ptr [[VC3_ADDR]], align 16
38
+ // CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC4:%.*]], ptr [[VC4_ADDR]], align 16
39
+ // CHECK-LE-NOOPT-NEXT: store ptr [[RESP:%.*]], ptr [[RESP_ADDR]], align 8
40
+ // CHECK-LE-NOOPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VQP_ADDR]], align 8
41
+ // CHECK-LE-NOOPT-NEXT: [[TMP1:%.*]] = load <512 x i1>, ptr [[TMP0]], align 64
42
+ // CHECK-LE-NOOPT-NEXT: store <512 x i1> [[TMP1]], ptr [[VQ]], align 64
43
+ // CHECK-LE-NOOPT-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VPP_ADDR]], align 8
44
+ // CHECK-LE-NOOPT-NEXT: [[TMP3:%.*]] = load <256 x i1>, ptr [[TMP2]], align 32
45
+ // CHECK-LE-NOOPT-NEXT: store <256 x i1> [[TMP3]], ptr [[VP]], align 32
46
+ // CHECK-LE-NOOPT-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[VC1_ADDR]], align 16
47
+ // CHECK-LE-NOOPT-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr [[VC2_ADDR]], align 16
48
+ // CHECK-LE-NOOPT-NEXT: [[TMP6:%.*]] = load <16 x i8>, ptr [[VC3_ADDR]], align 16
49
+ // CHECK-LE-NOOPT-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr [[VC4_ADDR]], align 16
50
+ // CHECK-LE-NOOPT-NEXT: [[TMP8:%.*]] = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[TMP7]], <16 x i8> [[TMP6]], <16 x i8> [[TMP5]], <16 x i8> [[TMP4]])
51
+ // CHECK-LE-NOOPT-NEXT: store <512 x i1> [[TMP8]], ptr [[RES]], align 64
52
+ // CHECK-LE-NOOPT-NEXT: [[TMP9:%.*]] = load <512 x i1>, ptr [[RES]], align 64
53
+ // CHECK-LE-NOOPT-NEXT: [[TMP10:%.*]] = load ptr, ptr [[RESP_ADDR]], align 8
54
+ // CHECK-LE-NOOPT-NEXT: store <512 x i1> [[TMP9]], ptr [[TMP10]], align 64
55
+ // CHECK-LE-NOOPT-NEXT: ret void
56
+ //
19
57
void test1 (unsigned char * vqp , unsigned char * vpp , vector unsigned char vc1 , vector unsigned char vc2 ,
20
58
vector unsigned char vc3 , vector unsigned char vc4 , unsigned char * resp ) {
21
59
__vector_quad vq = * ((__vector_quad * )vqp );
@@ -37,6 +75,36 @@ void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc1, vec
37
75
// CHECK-BE-NEXT: store <256 x i1> [[TMP0]], ptr [[RESP:%.*]], align 32, !tbaa [[TBAA6:![0-9]+]]
38
76
// CHECK-BE-NEXT: ret void
39
77
//
78
+ // CHECK-LE-NOOPT-LABEL: @test2(
79
+ // CHECK-LE-NOOPT-NEXT: entry:
80
+ // CHECK-LE-NOOPT-NEXT: [[VQP_ADDR:%.*]] = alloca ptr, align 8
81
+ // CHECK-LE-NOOPT-NEXT: [[VPP_ADDR:%.*]] = alloca ptr, align 8
82
+ // CHECK-LE-NOOPT-NEXT: [[VC1_ADDR:%.*]] = alloca <16 x i8>, align 16
83
+ // CHECK-LE-NOOPT-NEXT: [[VC2_ADDR:%.*]] = alloca <16 x i8>, align 16
84
+ // CHECK-LE-NOOPT-NEXT: [[RESP_ADDR:%.*]] = alloca ptr, align 8
85
+ // CHECK-LE-NOOPT-NEXT: [[VQ:%.*]] = alloca <512 x i1>, align 64
86
+ // CHECK-LE-NOOPT-NEXT: [[VP:%.*]] = alloca <256 x i1>, align 32
87
+ // CHECK-LE-NOOPT-NEXT: [[RES:%.*]] = alloca <256 x i1>, align 32
88
+ // CHECK-LE-NOOPT-NEXT: store ptr [[VQP:%.*]], ptr [[VQP_ADDR]], align 8
89
+ // CHECK-LE-NOOPT-NEXT: store ptr [[VPP:%.*]], ptr [[VPP_ADDR]], align 8
90
+ // CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC1:%.*]], ptr [[VC1_ADDR]], align 16
91
+ // CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC2:%.*]], ptr [[VC2_ADDR]], align 16
92
+ // CHECK-LE-NOOPT-NEXT: store ptr [[RESP:%.*]], ptr [[RESP_ADDR]], align 8
93
+ // CHECK-LE-NOOPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VQP_ADDR]], align 8
94
+ // CHECK-LE-NOOPT-NEXT: [[TMP1:%.*]] = load <512 x i1>, ptr [[TMP0]], align 64
95
+ // CHECK-LE-NOOPT-NEXT: store <512 x i1> [[TMP1]], ptr [[VQ]], align 64
96
+ // CHECK-LE-NOOPT-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VPP_ADDR]], align 8
97
+ // CHECK-LE-NOOPT-NEXT: [[TMP3:%.*]] = load <256 x i1>, ptr [[TMP2]], align 32
98
+ // CHECK-LE-NOOPT-NEXT: store <256 x i1> [[TMP3]], ptr [[VP]], align 32
99
+ // CHECK-LE-NOOPT-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[VC1_ADDR]], align 16
100
+ // CHECK-LE-NOOPT-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr [[VC2_ADDR]], align 16
101
+ // CHECK-LE-NOOPT-NEXT: [[TMP6:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP5]], <16 x i8> [[TMP4]])
102
+ // CHECK-LE-NOOPT-NEXT: store <256 x i1> [[TMP6]], ptr [[RES]], align 64
103
+ // CHECK-LE-NOOPT-NEXT: [[TMP7:%.*]] = load <256 x i1>, ptr [[RES]], align 32
104
+ // CHECK-LE-NOOPT-NEXT: [[TMP8:%.*]] = load ptr, ptr [[RESP_ADDR]], align 8
105
+ // CHECK-LE-NOOPT-NEXT: store <256 x i1> [[TMP7]], ptr [[TMP8]], align 32
106
+ // CHECK-LE-NOOPT-NEXT: ret void
107
+ //
40
108
void test2 (unsigned char * vqp , unsigned char * vpp , vector unsigned char vc1 ,
41
109
vector unsigned char vc2 , unsigned char * resp ) {
42
110
__vector_quad vq = * ((__vector_quad * )vqp );
0 commit comments