Skip to content

Commit 1ceaec3

Browse files
author
Kai Luo
committed
[PowerPC][altivec] Optimize codegen of vec_promote
According to https://www.ibm.com/docs/en/xl-c-and-cpp-linux/16.1.1?topic=functions-vec-promote, elements not specified by the input index argument are undefined. So that we don't need to set these elements to be zeros. Reviewed By: nemanjai, #powerpc Differential Revision: https://reviews.llvm.org/D158487
1 parent eff105b commit 1ceaec3

File tree

3 files changed

+315
-10
lines changed

3 files changed

+315
-10
lines changed

clang/lib/Headers/altivec.h

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14647,67 +14647,86 @@ static __inline__ void __ATTRS_o_ai vec_stvrxl(vector float __a, int __b,
1464714647

1464814648
static __inline__ vector signed char __ATTRS_o_ai vec_promote(signed char __a,
1464914649
int __b) {
14650-
vector signed char __res = (vector signed char)(0);
14650+
const vector signed char __zero = (vector signed char)0;
14651+
vector signed char __res =
14652+
__builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1,
14653+
-1, -1, -1, -1, -1, -1, -1, -1);
1465114654
__res[__b & 0xf] = __a;
1465214655
return __res;
1465314656
}
1465414657

1465514658
static __inline__ vector unsigned char __ATTRS_o_ai
1465614659
vec_promote(unsigned char __a, int __b) {
14657-
vector unsigned char __res = (vector unsigned char)(0);
14660+
const vector unsigned char __zero = (vector unsigned char)(0);
14661+
vector unsigned char __res =
14662+
__builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1,
14663+
-1, -1, -1, -1, -1, -1, -1, -1);
1465814664
__res[__b & 0xf] = __a;
1465914665
return __res;
1466014666
}
1466114667

1466214668
static __inline__ vector short __ATTRS_o_ai vec_promote(short __a, int __b) {
14663-
vector short __res = (vector short)(0);
14669+
const vector short __zero = (vector short)(0);
14670+
vector short __res =
14671+
__builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1);
1466414672
__res[__b & 0x7] = __a;
1466514673
return __res;
1466614674
}
1466714675

1466814676
static __inline__ vector unsigned short __ATTRS_o_ai
1466914677
vec_promote(unsigned short __a, int __b) {
14670-
vector unsigned short __res = (vector unsigned short)(0);
14678+
const vector unsigned short __zero = (vector unsigned short)(0);
14679+
vector unsigned short __res =
14680+
__builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1);
1467114681
__res[__b & 0x7] = __a;
1467214682
return __res;
1467314683
}
1467414684

1467514685
static __inline__ vector int __ATTRS_o_ai vec_promote(int __a, int __b) {
14676-
vector int __res = (vector int)(0);
14686+
const vector int __zero = (vector int)(0);
14687+
vector int __res = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1);
1467714688
__res[__b & 0x3] = __a;
1467814689
return __res;
1467914690
}
1468014691

1468114692
static __inline__ vector unsigned int __ATTRS_o_ai vec_promote(unsigned int __a,
1468214693
int __b) {
14683-
vector unsigned int __res = (vector unsigned int)(0);
14694+
const vector unsigned int __zero = (vector unsigned int)(0);
14695+
vector unsigned int __res =
14696+
__builtin_shufflevector(__zero, __zero, -1, -1, -1, -1);
1468414697
__res[__b & 0x3] = __a;
1468514698
return __res;
1468614699
}
1468714700

1468814701
static __inline__ vector float __ATTRS_o_ai vec_promote(float __a, int __b) {
14689-
vector float __res = (vector float)(0);
14702+
const vector float __zero = (vector float)(0);
14703+
vector float __res = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1);
1469014704
__res[__b & 0x3] = __a;
1469114705
return __res;
1469214706
}
1469314707

1469414708
#ifdef __VSX__
1469514709
static __inline__ vector double __ATTRS_o_ai vec_promote(double __a, int __b) {
14696-
vector double __res = (vector double)(0);
14710+
const vector double __zero = (vector double)(0);
14711+
vector double __res = __builtin_shufflevector(__zero, __zero, -1, -1);
1469714712
__res[__b & 0x1] = __a;
1469814713
return __res;
1469914714
}
1470014715

1470114716
static __inline__ vector signed long long __ATTRS_o_ai
1470214717
vec_promote(signed long long __a, int __b) {
14703-
vector signed long long __res = (vector signed long long)(0);
14718+
const vector signed long long __zero = (vector signed long long)(0);
14719+
vector signed long long __res =
14720+
__builtin_shufflevector(__zero, __zero, -1, -1);
1470414721
__res[__b & 0x1] = __a;
1470514722
return __res;
1470614723
}
1470714724

1470814725
static __inline__ vector unsigned long long __ATTRS_o_ai
1470914726
vec_promote(unsigned long long __a, int __b) {
14710-
vector unsigned long long __res = (vector unsigned long long)(0);
14727+
const vector unsigned long long __zero = (vector unsigned long long)(0);
14728+
vector unsigned long long __res =
14729+
__builtin_shufflevector(__zero, __zero, -1, -1);
1471114730
__res[__b & 0x1] = __a;
1471214731
return __res;
1471314732
}

clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2232,35 +2232,45 @@ res_vuc = vec_xxsldwi(vuc, vuc, 1);
22322232

22332233
res_vd = vec_promote(d, 0);
22342234
// CHECK: store <2 x double> zeroinitializer
2235+
// CHECK: store <2 x double> poison
22352236
// CHECK: insertelement <2 x double>
22362237
// CHECK-LE: store <2 x double> zeroinitializer
2238+
// CHECK-LE: store <2 x double> poison
22372239
// CHECK-LE: insertelement <2 x double>
22382240

22392241
res_vsll = vec_promote(sll, 0);
22402242
// CHECK: store <2 x i64> zeroinitializer
2243+
// CHECK: store <2 x i64> poison
22412244
// CHECK: insertelement <2 x i64>
22422245
// CHECK-LE: store <2 x i64> zeroinitializer
2246+
// CHECK-LE: store <2 x i64> poison
22432247
// CHECK-LE: insertelement <2 x i64>
22442248

22452249
res_vull = vec_promote(ull, 0);
22462250
// CHECK: store <2 x i64> zeroinitializer
2251+
// CHECK: store <2 x i64> poison
22472252
// CHECK: insertelement <2 x i64>
22482253
// CHECK-LE: store <2 x i64> zeroinitializer
2254+
// CHECK-LE: store <2 x i64> poison
22492255
// CHECK-LE: insertelement <2 x i64>
22502256

22512257
res_vsc = vec_promote(asc[0], 8);
22522258
// CHECK: store <16 x i8> zeroinitializer
2259+
// CHECK: store <16 x i8> poison
22532260
// CHECK: [[IDX:%.*]] = and i32 {{.*}}, 15
22542261
// CHECK: insertelement <16 x i8> {{.*}}, i8 {{.*}}, i32 [[IDX]]
22552262
// CHECK-LE: store <16 x i8> zeroinitializer
2263+
// CHECK-LE: store <16 x i8> poison
22562264
// CHECK-LE: [[IDX:%.*]] = and i32 {{.*}}, 15
22572265
// CHECK-LE: insertelement <16 x i8> {{.*}}, i8 {{.*}}, i32 [[IDX]]
22582266

22592267
res_vuc = vec_promote(auc[0], 8);
22602268
// CHECK: store <16 x i8> zeroinitializer
2269+
// CHECK: store <16 x i8> poison
22612270
// CHECK: [[IDX:%.*]] = and i32 {{.*}}, 15
22622271
// CHECK: insertelement <16 x i8> {{.*}}, i8 {{.*}}, i32 [[IDX]]
22632272
// CHECK-LE: store <16 x i8> zeroinitializer
2273+
// CHECK-LE: store <16 x i8> poison
22642274
// CHECK-LE: [[IDX:%.*]] = and i32 {{.*}}, 15
22652275
// CHECK-LE: insertelement <16 x i8> {{.*}}, i8 {{.*}}, i32 [[IDX]]
22662276
}

0 commit comments

Comments
 (0)