-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AArch64][SME] Conditionally do smstart/smstop #77113
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
fbcac41
[AArch64][SME] Conditionally do smstart/smstop
MDevereau 3f08c6f
fix whitespace + test name
MDevereau 3c5d605
Add multi basic block test
MDevereau b1fdadb
Rename temporary test name
MDevereau ab9d820
Fix inverted cbz condition
MDevereau 9b79d65
Add nounwind to tests & rename getPStateSM
MDevereau 7e2e8c8
Remove unneeded code and change comments
MDevereau aad0bbc
rename sme-streaming-body-streaming-compatible.ll
MDevereau File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
124 changes: 124 additions & 0 deletions
124
llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | ||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -start-after=simplifycfg -enable-tail-merge=false -verify-machineinstrs < %s | FileCheck %s | ||
|
||
declare void @normal_callee(); | ||
declare void @streaming_callee() "aarch64_pstate_sm_enabled"; | ||
declare void @streaming_compatible_callee() "aarch64_pstate_sm_compatible"; | ||
|
||
define float @sm_body_sm_compatible_simple() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" nounwind { | ||
; CHECK-LABEL: sm_body_sm_compatible_simple: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill | ||
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill | ||
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill | ||
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill | ||
; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill | ||
; CHECK-NEXT: bl __arm_sme_state | ||
; CHECK-NEXT: and x8, x0, #0x1 | ||
; CHECK-NEXT: tbnz w8, #0, .LBB0_2 | ||
; CHECK-NEXT: // %bb.1: | ||
; CHECK-NEXT: smstart sm | ||
; CHECK-NEXT: .LBB0_2: | ||
; CHECK-NEXT: tbnz w8, #0, .LBB0_4 | ||
; CHECK-NEXT: // %bb.3: | ||
; CHECK-NEXT: smstop sm | ||
; CHECK-NEXT: .LBB0_4: | ||
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload | ||
; CHECK-NEXT: fmov s0, wzr | ||
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload | ||
; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload | ||
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload | ||
; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload | ||
; CHECK-NEXT: ret | ||
ret float zeroinitializer | ||
} | ||
|
||
define void @sm_body_caller_sm_compatible_caller_normal_callee() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" nounwind { | ||
; CHECK-LABEL: sm_body_caller_sm_compatible_caller_normal_callee: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill | ||
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill | ||
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill | ||
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill | ||
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill | ||
; CHECK-NEXT: bl __arm_sme_state | ||
; CHECK-NEXT: and x19, x0, #0x1 | ||
; CHECK-NEXT: tbnz w19, #0, .LBB1_2 | ||
; CHECK-NEXT: // %bb.1: | ||
; CHECK-NEXT: smstart sm | ||
; CHECK-NEXT: .LBB1_2: | ||
; CHECK-NEXT: smstop sm | ||
; CHECK-NEXT: bl normal_callee | ||
; CHECK-NEXT: smstart sm | ||
; CHECK-NEXT: tbnz w19, #0, .LBB1_4 | ||
; CHECK-NEXT: // %bb.3: | ||
; CHECK-NEXT: smstop sm | ||
; CHECK-NEXT: .LBB1_4: | ||
; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload | ||
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload | ||
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload | ||
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload | ||
; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload | ||
; CHECK-NEXT: ret | ||
call void @normal_callee() | ||
ret void | ||
} | ||
|
||
; Function Attrs: nounwind uwtable vscale_range(1,16) | ||
define void @streaming_body_and_streaming_compatible_interface_multi_basic_block(i32 noundef %x) "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" nounwind { | ||
; CHECK-LABEL: streaming_body_and_streaming_compatible_interface_multi_basic_block: | ||
; CHECK: // %bb.0: // %entry | ||
; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill | ||
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill | ||
; CHECK-NEXT: mov w8, w0 | ||
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill | ||
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill | ||
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill | ||
; CHECK-NEXT: bl __arm_sme_state | ||
; CHECK-NEXT: and x19, x0, #0x1 | ||
; CHECK-NEXT: tbnz w19, #0, .LBB2_2 | ||
; CHECK-NEXT: // %bb.1: // %entry | ||
; CHECK-NEXT: smstart sm | ||
; CHECK-NEXT: .LBB2_2: // %entry | ||
; CHECK-NEXT: cbz w8, .LBB2_6 | ||
; CHECK-NEXT: // %bb.3: // %if.else | ||
; CHECK-NEXT: bl streaming_compatible_callee | ||
; CHECK-NEXT: tbnz w19, #0, .LBB2_5 | ||
; CHECK-NEXT: // %bb.4: // %if.else | ||
; CHECK-NEXT: smstop sm | ||
; CHECK-NEXT: .LBB2_5: // %if.else | ||
; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload | ||
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload | ||
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload | ||
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload | ||
; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload | ||
; CHECK-NEXT: ret | ||
; CHECK-NEXT: .LBB2_6: // %if.then | ||
; CHECK-NEXT: smstop sm | ||
; CHECK-NEXT: bl normal_callee | ||
; CHECK-NEXT: smstart sm | ||
; CHECK-NEXT: tbnz w19, #0, .LBB2_8 | ||
; CHECK-NEXT: // %bb.7: // %if.then | ||
; CHECK-NEXT: smstop sm | ||
; CHECK-NEXT: .LBB2_8: // %if.then | ||
; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload | ||
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload | ||
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload | ||
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload | ||
; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload | ||
; CHECK-NEXT: ret | ||
entry: | ||
%cmp = icmp eq i32 %x, 0 | ||
br i1 %cmp, label %if.then, label %if.else | ||
|
||
if.then: ; preds = %entry | ||
tail call void @normal_callee() | ||
br label %return | ||
|
||
if.else: ; preds = %entry | ||
tail call void @streaming_compatible_callee() | ||
br label %return | ||
|
||
return: ; preds = %if.else, %if.then | ||
ret void | ||
} |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: Can you add a comment describing what this Register holds and when this value is defined?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done