-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[PowerPC] 32-bit large code-model support for toc-data #85129
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3346,11 +3346,13 @@ def ADDIStocHA : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, tocentr | |
"#ADDIStocHA", | ||
[(set i32:$rD, | ||
(PPCtoc_entry i32:$reg, tglobaladdr:$disp))]>; | ||
// Local Data Transform | ||
// TOC Data Transform AIX | ||
def ADDItoc : PPCEmitTimePseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc:$reg), | ||
"#ADDItoc", | ||
[(set i32:$rD, | ||
(PPCtoc_entry tglobaladdr:$disp, i32:$reg))]>; | ||
def ADDItocL : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, tocentry32:$disp), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well, you defined a new pseudo and it will be lowered to the real instruction
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've added it to P10InstrResources.td and PPCMacroFusion.def. |
||
"#ADDItocL", []>; | ||
|
||
// Get Global (GOT) Base Register offset, from the word immediately preceding | ||
// the function label. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,6 +12,14 @@ | |
; RUN: llc -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs -O0 < %s | FileCheck %s --check-prefix TEST32 | ||
; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs -O0 < %s | FileCheck %s --check-prefix TEST64 | ||
|
||
; RUN: llc -mtriple powerpc-ibm-aix-xcoff -code-model=large -verify-machineinstrs < %s \ | ||
; RUN: -stop-before=ppc-vsx-copy | FileCheck %s --check-prefix CHECK32LARGE | ||
; RUN: llc -mtriple powerpc-ibm-aix-xcoff -code-model=large -verify-machineinstrs < %s | FileCheck %s --check-prefix TEST32LARGE | ||
|
||
; Global variables i and f have the toc-data attribute. | ||
; In the following functions, those writing to or reading from | ||
; variables i and f should use the toc-data access pattern. | ||
; All remaining variables should use the regular toc access sequence. | ||
@i = dso_local global i32 0, align 4 #0 | ||
@d = dso_local local_unnamed_addr global double 3.141590e+00, align 8 | ||
@f = dso_local local_unnamed_addr global float 0x4005BE76C0000000, align 4 #0 | ||
|
@@ -44,6 +52,16 @@ define dso_local void @write_int(i32 signext %in) { | |
; TEST64: la 4, i[TD](2) | ||
; TEST64-NEXT: stw 3, 0(4) | ||
|
||
; CHECK32LARGE: name: write_int | ||
; CHECK32LARGE: %[[SCRATCH1:[0-9]+]]:gprc_and_gprc_nor0 = ADDIStocHA $r2, @i | ||
; CHECK32LARGE-NEXT: %[[SCRATCH2:[0-9]+]]:gprc_and_gprc_nor0 = ADDItocL killed %[[SCRATCH1]], @i | ||
; CHECK32LARGE-NEXT: STW %{{[0-9]+}}, 0, killed %[[SCRATCH2]] :: (store (s32) into @i) | ||
|
||
; FIXME: peephole optimization opportunity for lower part relocation @l to the consuming stw | ||
; TEST32LARGE: .write_int: | ||
; TEST32LARGE: addis 4, i[TD]@u(2) | ||
; TEST32LARGE-NEXT: la 4, i[TD]@l(4) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not related to this patch, here we should have a peephole opportunity that we can add the lower part relocation Can we add a fixme here? We are trying to improve the code generation for toc-data at small code model in #76488 . But seems we also have the opportunity for the large model. |
||
; TEST32LARGE-NEXT: stw 3, 0(4) | ||
|
||
define dso_local i64 @read_ll() { | ||
entry: | ||
|
@@ -70,6 +88,15 @@ define dso_local i64 @read_ll() { | |
; TEST64: ld 3, L..C0(2) | ||
; TEST64-NEXT: ld 3, 0(3) | ||
|
||
; CHECK32LARGE: name: read_ll | ||
; CHECK32LARGE: %[[SCRATCH1:[0-9]+]]:gprc_and_gprc_nor0 = ADDIStocHA $r2, @ll | ||
; CHECK32LARGE: LWZtocL @ll, killed %[[SCRATCH1]] :: (load (s32) from got) | ||
|
||
; TEST32LARGE: .read_ll: | ||
; TEST32LARGE: addis 3, L..C0@u(2) | ||
; TEST32LARGE-NEXT: lwz 4, L..C0@l(3) | ||
; TEST32LARGE-NEXT: lwz 3, 0(4) | ||
; TEST32LARGE-NEXT: lwz 4, 4(4) | ||
|
||
define dso_local float @read_float() { | ||
entry: | ||
|
@@ -96,6 +123,16 @@ define dso_local float @read_float() { | |
; TEST64: la 3, f[TD](2) | ||
; TEST64-NEXT: lfs 1, 0(3) | ||
|
||
; CHECK32LARGE: name: read_float | ||
; CHECK32LARGE: %[[SCRATCH1:[0-9]+]]:gprc_and_gprc_nor0 = ADDIStocHA $r2, @f | ||
; CHECK32LARGE-NEXT: %[[SCRATCH2:[0-9]+]]:gprc_and_gprc_nor0 = ADDItocL killed %[[SCRATCH1]], @f | ||
; CHECK32LARGE-NEXT: LFS 0, killed %[[SCRATCH2]] :: (dereferenceable load (s32) from @f) | ||
|
||
; FIXME: peephole optimization opportunity for lower part relocation @l to the consuming lfs | ||
; TEST32LARGE: .read_float: | ||
; TEST32LARGE: addis 3, f[TD]@u(2) | ||
; TEST32LARGE-NEXT: la 3, f[TD]@l(3) | ||
; TEST32LARGE-NEXT: lfs 1, 0(3) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same here, the |
||
|
||
define dso_local void @write_double(double %in) { | ||
entry: | ||
|
@@ -121,6 +158,14 @@ define dso_local void @write_double(double %in) { | |
; TEST64: ld 3, L..C1(2) | ||
; TEST64-NEXT: stfd 1, 0(3) | ||
|
||
; CHECK32LARGE: name: write_double | ||
; CHECK32LARGE: %[[SCRATCH1:[0-9]+]]:gprc_and_gprc_nor0 = ADDIStocHA $r2, @d | ||
; CHECK32LARGE: LWZtocL @d, killed %[[SCRATCH1]] :: (load (s32) from got) | ||
|
||
; TEST32LARGE: .write_double: | ||
; TEST32LARGE: addis 3, L..C1@u(2) | ||
; TEST32LARGE-NEXT: lwz 3, L..C1@l(3) | ||
; TEST32LARGE-NEXT: stfd 1, 0(3) | ||
|
||
define dso_local nonnull ptr @addr() { | ||
entry: | ||
|
@@ -144,6 +189,15 @@ define dso_local nonnull ptr @addr() { | |
; TEST64: .addr | ||
; TEST64: la 3, i[TD](2) | ||
|
||
; CHECK32LARGE: name: addr | ||
; CHECK32LARGE: %[[SCRATCH1:[0-9]+]]:gprc_and_gprc_nor0 = ADDIStocHA $r2, @i | ||
; CHECK32LARGE-NEXT: %[[SCRATCH2:[0-9]+]]:gprc = ADDItocL killed %[[SCRATCH1]], @i | ||
; CHECK32LARGE-NEXT: $r3 = COPY %[[SCRATCH2]] | ||
|
||
; TEST32LARGE: .addr: | ||
; TEST32LARGE: addis 3, i[TD]@u(2) | ||
; TEST32LARGE-NEXT: la 3, i[TD]@l(3) | ||
|
||
; TEST32: .toc | ||
; TEST32: .tc ll[TC],ll[RW] | ||
; TEST32-NOT: .csect ll[TD] | ||
|
@@ -170,4 +224,17 @@ define dso_local nonnull ptr @addr() { | |
; TEST64-NEXT: .globl f[TD] | ||
; TEST64-NOT: .tc f[TD],f[RW] | ||
|
||
; TEST32LARGE: .toc | ||
; TEST32LARGE: .tc ll[TE],ll[RW] | ||
; TEST32LARGE-NOT: .csect ll[TD] | ||
; TEST32LARGE: .tc d[TE],d[RW] | ||
; TEST32LARGE-NOT: .csect d[TD],2 | ||
; TEST32LARGE: .csect i[TD],2 | ||
; TEST32LARGE-NEXT: .globl i[TD] | ||
; TEST32LARGE-NEXT: .align 2 | ||
; TEST32LARGE-NOT: .tc i[TE],i[RW] | ||
; TEST32LARGE: .csect f[TD],2 | ||
; TEST32LARGE-NEXT: .globl f[TD] | ||
; TEST32LARGE-NOT: .tc f[TE],f[RW] | ||
|
||
attributes #0 = { "toc-data" } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
At line 6195, for 32-bit, should we use
ADDItocL
instead ofADDItocL8
? Although seems there is no 32-bit case reaching that logic.(It usesMVT::i64
at this phase, on 32-bit it should fail?)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Line 6195 is handling the Linux medium code model path. For tocdata 64-bit with large code model, I will have a follow up patch to use ADDItocL8 (with the appropriate enhancements made in PPCAsmPrinter.cpp). ADDItocL8 will then be used on line 6175 above.
I plan to post the 64-bit support next once this 32-bit patch is approved and merged.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK. Thanks for explanation.