[PowerPC] Fix incorrect store alignment for __builtin_vsx_build_pair() #108606

lei137 · 2024-09-13T17:04:05Z

Fixes #107229

llvmbot · 2024-09-13T17:04:37Z

@llvm/pr-subscribers-clang

@llvm/pr-subscribers-clang-codegen

Author: Lei Huang (lei137)

Changes

Fixes #107229

Full diff: https://github.com/llvm/llvm-project/pull/108606.diff

3 Files Affected:

(modified) clang/lib/CodeGen/CGBuiltin.cpp (+1-1)
(modified) clang/test/CodeGen/PowerPC/builtins-ppc-build-pair-mma.c (+1-1)
(modified) clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma-types.c (+4-4)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index a52e880a764252..5db5ad5470c366 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18197,7 +18197,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
       CallOps.push_back(Ops[i]);
     llvm::Function *F = CGM.getIntrinsic(ID);
     Value *Call = Builder.CreateCall(F, CallOps);
-    return Builder.CreateAlignedStore(Call, Ops[0], MaybeAlign(64));
+    return Builder.CreateAlignedStore(Call, Ops[0], MaybeAlign());
   }
 
   case PPC::BI__builtin_ppc_compare_and_swap:
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-build-pair-mma.c b/clang/test/CodeGen/PowerPC/builtins-ppc-build-pair-mma.c
index 8a2bc93dd6cd0a..cdbfdd6b7975ad 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-build-pair-mma.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-build-pair-mma.c
@@ -99,7 +99,7 @@ void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc1, vec
 // CHECK-LE-NOOPT-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr [[VC1_ADDR]], align 16
 // CHECK-LE-NOOPT-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr [[VC2_ADDR]], align 16
 // CHECK-LE-NOOPT-NEXT:    [[TMP6:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP5]], <16 x i8> [[TMP4]])
-// CHECK-LE-NOOPT-NEXT:    store <256 x i1> [[TMP6]], ptr [[RES]], align 64
+// CHECK-LE-NOOPT-NEXT:    store <256 x i1> [[TMP6]], ptr [[RES]], align 32
 // CHECK-LE-NOOPT-NEXT:    [[TMP7:%.*]] = load <256 x i1>, ptr [[RES]], align 32
 // CHECK-LE-NOOPT-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[RESP_ADDR]], align 8
 // CHECK-LE-NOOPT-NEXT:    store <256 x i1> [[TMP7]], ptr [[TMP8]], align 32
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma-types.c b/clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma-types.c
index 39c040967dc0c3..b18bb3ad050aca 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma-types.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma-types.c
@@ -85,11 +85,11 @@ void testVQLocal(int *ptr, vector unsigned char vc) {
 // CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
 // CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
 // CHECK-NEXT:    [[TMP5:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]])
-// CHECK-NEXT:    store <256 x i1> [[TMP5]], ptr [[VP2]], align 64
+// CHECK-NEXT:    store <256 x i1> [[TMP5]], ptr [[VP2]], align 32
 // CHECK-NEXT:    [[TMP6:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
 // CHECK-NEXT:    [[TMP7:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
 // CHECK-NEXT:    [[TMP8:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP7]], <16 x i8> [[TMP6]])
-// CHECK-NEXT:    store <256 x i1> [[TMP8]], ptr [[VP2]], align 64
+// CHECK-NEXT:    store <256 x i1> [[TMP8]], ptr [[VP2]], align 32
 // CHECK-NEXT:    [[TMP9:%.*]] = load <256 x i1>, ptr [[VP3]], align 32
 // CHECK-NEXT:    [[TMP10:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
 // CHECK-NEXT:    [[TMP11:%.*]] = call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> [[TMP9]], <16 x i8> [[TMP10]])
@@ -118,11 +118,11 @@ void testVQLocal(int *ptr, vector unsigned char vc) {
 // CHECK-BE-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
 // CHECK-BE-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
 // CHECK-BE-NEXT:    [[TMP5:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]])
-// CHECK-BE-NEXT:    store <256 x i1> [[TMP5]], ptr [[VP2]], align 64
+// CHECK-BE-NEXT:    store <256 x i1> [[TMP5]], ptr [[VP2]], align 32
 // CHECK-BE-NEXT:    [[TMP6:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
 // CHECK-BE-NEXT:    [[TMP7:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
 // CHECK-BE-NEXT:    [[TMP8:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP6]], <16 x i8> [[TMP7]])
-// CHECK-BE-NEXT:    store <256 x i1> [[TMP8]], ptr [[VP2]], align 64
+// CHECK-BE-NEXT:    store <256 x i1> [[TMP8]], ptr [[VP2]], align 32
 // CHECK-BE-NEXT:    [[TMP9:%.*]] = load <256 x i1>, ptr [[VP3]], align 32
 // CHECK-BE-NEXT:    [[TMP10:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
 // CHECK-BE-NEXT:    [[TMP11:%.*]] = call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> [[TMP9]], <16 x i8> [[TMP10]])

llvmbot · 2024-09-13T17:04:37Z

@llvm/pr-subscribers-backend-powerpc

Author: Lei Huang (lei137)

Changes

Fixes #107229

Full diff: https://github.com/llvm/llvm-project/pull/108606.diff

3 Files Affected:

(modified) clang/lib/CodeGen/CGBuiltin.cpp (+1-1)
(modified) clang/test/CodeGen/PowerPC/builtins-ppc-build-pair-mma.c (+1-1)
(modified) clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma-types.c (+4-4)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index a52e880a764252..5db5ad5470c366 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18197,7 +18197,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
       CallOps.push_back(Ops[i]);
     llvm::Function *F = CGM.getIntrinsic(ID);
     Value *Call = Builder.CreateCall(F, CallOps);
-    return Builder.CreateAlignedStore(Call, Ops[0], MaybeAlign(64));
+    return Builder.CreateAlignedStore(Call, Ops[0], MaybeAlign());
   }
 
   case PPC::BI__builtin_ppc_compare_and_swap:
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-build-pair-mma.c b/clang/test/CodeGen/PowerPC/builtins-ppc-build-pair-mma.c
index 8a2bc93dd6cd0a..cdbfdd6b7975ad 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-build-pair-mma.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-build-pair-mma.c
@@ -99,7 +99,7 @@ void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc1, vec
 // CHECK-LE-NOOPT-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr [[VC1_ADDR]], align 16
 // CHECK-LE-NOOPT-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr [[VC2_ADDR]], align 16
 // CHECK-LE-NOOPT-NEXT:    [[TMP6:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP5]], <16 x i8> [[TMP4]])
-// CHECK-LE-NOOPT-NEXT:    store <256 x i1> [[TMP6]], ptr [[RES]], align 64
+// CHECK-LE-NOOPT-NEXT:    store <256 x i1> [[TMP6]], ptr [[RES]], align 32
 // CHECK-LE-NOOPT-NEXT:    [[TMP7:%.*]] = load <256 x i1>, ptr [[RES]], align 32
 // CHECK-LE-NOOPT-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[RESP_ADDR]], align 8
 // CHECK-LE-NOOPT-NEXT:    store <256 x i1> [[TMP7]], ptr [[TMP8]], align 32
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma-types.c b/clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma-types.c
index 39c040967dc0c3..b18bb3ad050aca 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma-types.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma-types.c
@@ -85,11 +85,11 @@ void testVQLocal(int *ptr, vector unsigned char vc) {
 // CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
 // CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
 // CHECK-NEXT:    [[TMP5:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]])
-// CHECK-NEXT:    store <256 x i1> [[TMP5]], ptr [[VP2]], align 64
+// CHECK-NEXT:    store <256 x i1> [[TMP5]], ptr [[VP2]], align 32
 // CHECK-NEXT:    [[TMP6:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
 // CHECK-NEXT:    [[TMP7:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
 // CHECK-NEXT:    [[TMP8:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP7]], <16 x i8> [[TMP6]])
-// CHECK-NEXT:    store <256 x i1> [[TMP8]], ptr [[VP2]], align 64
+// CHECK-NEXT:    store <256 x i1> [[TMP8]], ptr [[VP2]], align 32
 // CHECK-NEXT:    [[TMP9:%.*]] = load <256 x i1>, ptr [[VP3]], align 32
 // CHECK-NEXT:    [[TMP10:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
 // CHECK-NEXT:    [[TMP11:%.*]] = call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> [[TMP9]], <16 x i8> [[TMP10]])
@@ -118,11 +118,11 @@ void testVQLocal(int *ptr, vector unsigned char vc) {
 // CHECK-BE-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
 // CHECK-BE-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
 // CHECK-BE-NEXT:    [[TMP5:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]])
-// CHECK-BE-NEXT:    store <256 x i1> [[TMP5]], ptr [[VP2]], align 64
+// CHECK-BE-NEXT:    store <256 x i1> [[TMP5]], ptr [[VP2]], align 32
 // CHECK-BE-NEXT:    [[TMP6:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
 // CHECK-BE-NEXT:    [[TMP7:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
 // CHECK-BE-NEXT:    [[TMP8:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP6]], <16 x i8> [[TMP7]])
-// CHECK-BE-NEXT:    store <256 x i1> [[TMP8]], ptr [[VP2]], align 64
+// CHECK-BE-NEXT:    store <256 x i1> [[TMP8]], ptr [[VP2]], align 32
 // CHECK-BE-NEXT:    [[TMP9:%.*]] = load <256 x i1>, ptr [[VP3]], align 32
 // CHECK-BE-NEXT:    [[TMP10:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
 // CHECK-BE-NEXT:    [[TMP11:%.*]] = call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> [[TMP9]], <16 x i8> [[TMP10]])

nikic · 2024-09-14T07:42:52Z

clang/lib/CodeGen/CGBuiltin.cpp

@@ -18197,7 +18197,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
      CallOps.push_back(Ops[i]);
    llvm::Function *F = CGM.getIntrinsic(ID);
    Value *Call = Builder.CreateCall(F, CallOps);
-    return Builder.CreateAlignedStore(Call, Ops[0], MaybeAlign(64));
+    return Builder.CreateAlignedStore(Call, Ops[0], MaybeAlign());


Suggested change

return Builder.CreateAlignedStore(Call, Ops[0], MaybeAlign());

return Builder.CreateStore(Call, Ops[0]);

Should work, I think? No need to use CreateAlignedStore if you want a naturally aligned store.

I'd prefer to just explicitly write the expected alignment here, though; LLVM DataLayout rules for alignment can be a bit strange.

I'd prefer to just explicitly write the expected alignment here,

@efriedma-quic Do you mean like hard code the expected alignment?

Currently CreateAlignedStore(Call, Ops[0], MaybeAlign()); calls:

StoreInst *CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile = false) { if (!Align) { const DataLayout &DL = BB->getDataLayout(); Align = DL.getABITypeAlign(Val->getType()); } return Insert(new StoreInst(Val, Ptr, isVolatile, *Align)); }

and get the proper ABI alignment for the type given. Since this section of code process types that are either 32bit or 64bit aligned, we can't hard code any alignment here.

Oh, didn't realize there were multiple different types coming through here. It's not worth it to duplicate the implementation of CreateStore... @nikic suggestion is fine, then.

@nikic Using CreateStore(Value, Address) will require extra code to convertOps[0[ to type Address.

Ah, it looks like CGBuilder doesn't directly re-export this IRBuilder API, it's called CreateDefaultAlignedStore() instead.

I see a FIXME for this function, do we still want to use it?

// FIXME: these "default-aligned" APIs should be removed, // but I don't feel like fixing all the builtin code right now. llvm::StoreInst *CreateDefaultAlignedStore(llvm::Value *Val, llvm::Value *Addr, bool IsVolatile = false)

Since these need to be aligned stores, maybe it's more clear to just use CreateAlignedStore()?
The patch that added this FIXME:

commit 7f416cc426384ad1f891addb61d93e7ca1ffa0f2 Author: John McCall <[email protected]> Date: Tue Sep 8 08:05:57 2015 +0000

Explicitly asks:

I partially punted on applying this work to CGBuiltin. Please do not add more uses of the CreateDefaultAligned{Load,Store} APIs; they will be going away eventually. llvm-svn: 246985

It is an old patch so I am not sure how much of that is still true...

Oh, right, I forgot we were trying to get rid of those at one point... that's probably never happening for LLVM in general, but maybe for clang it'll get finished at some point.

Looking at the code again, it's calling EmitPointerWithAlignment, so you can just take the alignment from that. Well, actually, it looks like there's a bug where we emit the first argument twice, but with that fixed, you should be able to do that.

nikic

LGTM, don't think exactly how the builder call looks like here is important...

Fixes llvm#107229

nemanjai · 2024-09-23T17:45:55Z

Will this affect the code that is generated for ‘__vector_quad’ values? There is no direct way to load and store those so they will use pair loads and stores.

lei137 · 2024-09-23T19:21:01Z

Will this affect the code that is generated for ‘__vector_quad’ values? There is no direct way to load and store those so they will use pair loads and stores.

Not AFAIK. The change will get the alignment from the type given. The LIT test being updated also have alloca and store of vector quads. The alignment for those instructions stayed 64bit.

lei137 self-assigned this Sep 13, 2024

llvmbot added clang Clang issues not falling into any other category backend:PowerPC clang:codegen IR generation bugs: mangling, exceptions, etc. labels Sep 13, 2024

lei137 requested review from nikic and stefanp-synopsys September 13, 2024 17:04

lei137 requested a review from amy-kwan September 13, 2024 17:04

nikic reviewed Sep 14, 2024

View reviewed changes

lei137 requested review from nikic and efriedma-quic September 17, 2024 21:10

nikic approved these changes Sep 19, 2024

View reviewed changes

[PowerPC] Fix incorrect store alignment for __builtin_vsx_build_pair()

7d0d507

Fixes llvm#107229

lei137 force-pushed the lei/alignIssue branch from 2c26898 to 7d0d507 Compare September 23, 2024 17:30

lei137 merged commit 62f3eae into llvm:main Sep 23, 2024
4 of 5 checks passed

lei137 deleted the lei/alignIssue branch September 23, 2024 17:31

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[PowerPC] Fix incorrect store alignment for __builtin_vsx_build_pair() #108606

[PowerPC] Fix incorrect store alignment for __builtin_vsx_build_pair() #108606

Uh oh!

lei137 commented Sep 13, 2024

Uh oh!

llvmbot commented Sep 13, 2024 •

edited

Loading

Uh oh!

llvmbot commented Sep 13, 2024

Uh oh!

nikic Sep 14, 2024

Uh oh!

efriedma-quic Sep 16, 2024

Uh oh!

lei137 Sep 17, 2024

Uh oh!

efriedma-quic Sep 17, 2024

Uh oh!

lei137 Sep 17, 2024

Uh oh!

nikic Sep 17, 2024

Uh oh!

lei137 Sep 17, 2024

Uh oh!

efriedma-quic Sep 17, 2024

Uh oh!

nikic left a comment

Uh oh!

Uh oh!

nemanjai commented Sep 23, 2024

Uh oh!

lei137 commented Sep 23, 2024

Uh oh!

Uh oh!

	return Builder.CreateAlignedStore(Call, Ops[0], MaybeAlign());
	return Builder.CreateStore(Call, Ops[0]);

[PowerPC] Fix incorrect store alignment for __builtin_vsx_build_pair() #108606

[PowerPC] Fix incorrect store alignment for __builtin_vsx_build_pair() #108606

Uh oh!

Conversation

lei137 commented Sep 13, 2024

Uh oh!

llvmbot commented Sep 13, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Sep 13, 2024

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

nikic left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

nemanjai commented Sep 23, 2024

Uh oh!

lei137 commented Sep 23, 2024

Uh oh!

Uh oh!

llvmbot commented Sep 13, 2024 •

edited

Loading