Skip to content

Commit ef90657

Browse files
committed
[PowerPC] Fix vec_add for 64-bit on pre-Power7 subtargets
The shift of the carry was actually incorrect.
1 parent f0dd6fa commit ef90657

File tree

2 files changed

+13
-15
lines changed

2 files changed

+13
-15
lines changed

clang/lib/Headers/altivec.h

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -312,16 +312,20 @@ vec_add_u128(vector unsigned char __a, vector unsigned char __b) {
312312
#elif defined(__VSX__)
313313
static __inline__ vector signed long long __ATTRS_o_ai
314314
vec_add(vector signed long long __a, vector signed long long __b) {
315+
#ifdef __LITTLE_ENDIAN__
316+
// Little endian systems on CPU's prior to Power8 don't really exist
317+
// so scalarizing is fine.
318+
return __a + __b;
319+
#else
315320
vector unsigned int __res =
316321
(vector unsigned int)__a + (vector unsigned int)__b;
317322
vector unsigned int __carry = __builtin_altivec_vaddcuw(
318323
(vector unsigned int)__a, (vector unsigned int)__b);
319-
#ifdef __LITTLE_ENDIAN__
320-
__carry = __builtin_shufflevector(__carry, __carry, 3, 0, 1, 2);
321-
#else
322-
__carry = __builtin_shufflevector(__carry, __carry, 1, 2, 3, 0);
323-
#endif
324+
__carry = __builtin_shufflevector((vector unsigned char)__carry,
325+
(vector unsigned char)__carry, 0, 0, 0, 7,
326+
0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0);
324327
return (vector signed long long)(__res + __carry);
328+
#endif
325329
}
326330

327331
static __inline__ vector unsigned long long __ATTRS_o_ai

clang/test/CodeGen/builtins-ppc-vsx.c

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2319,21 +2319,15 @@ void test_p8overloads_backwards_compat() {
23192319
res_vsll = vec_add(vsll, vsll);
23202320
// CHECK: add <4 x i32>
23212321
// CHECK: call <4 x i32> @llvm.ppc.altivec.vaddcuw
2322-
// CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
2322+
// CHECK: shufflevector <16 x i8> {{%.*}}, <16 x i8> {{%.*}}, <16 x i32> <i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 15, i32 0, i32 0, i32 0, i32 0>
23232323
// CHECK: add <4 x i32>
2324-
// CHECK-LE: add <4 x i32>
2325-
// CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vaddcuw
2326-
// CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> <i32 3, i32 0, i32 1, i32 2>
2327-
// CHECK-LE: add <4 x i32>
2324+
// CHECK-LE: add <2 x i64>
23282325
res_vull = vec_add(vull, vull);
23292326
// CHECK: add <4 x i32>
23302327
// CHECK: call <4 x i32> @llvm.ppc.altivec.vaddcuw
2331-
// CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
2328+
// CHECK: shufflevector <16 x i8> {{%.*}}, <16 x i8> {{%.*}}, <16 x i32> <i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 15, i32 0, i32 0, i32 0, i32 0>
23322329
// CHECK: add <4 x i32>
2333-
// CHECK-LE: add <4 x i32>
2334-
// CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vaddcuw
2335-
// CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> <i32 3, i32 0, i32 1, i32 2>
2336-
// CHECK-LE: add <4 x i32>
2330+
// CHECK-LE: add <2 x i64>
23372331
dummy();
23382332
// CHECK: call void @dummy()
23392333
// CHECK-LE: call void @dummy()

0 commit comments

Comments
 (0)