Skip to content

[NVPTX] Add baseline srl-shl-zext tests from #138290 #139878

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 14, 2025

Conversation

RKSimon
Copy link
Collaborator

@RKSimon RKSimon commented May 14, 2025

No description provided.

@RKSimon RKSimon marked this pull request as ready for review May 14, 2025 12:16
@llvmbot
Copy link
Member

llvmbot commented May 14, 2025

@llvm/pr-subscribers-backend-nvptx

Author: Simon Pilgrim (RKSimon)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/139878.diff

1 Files Affected:

  • (added) llvm/test/CodeGen/NVPTX/shift-opt.ll (+192)
diff --git a/llvm/test/CodeGen/NVPTX/shift-opt.ll b/llvm/test/CodeGen/NVPTX/shift-opt.ll
new file mode 100644
index 0000000000000..5f5ad831cb148
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/shift-opt.ll
@@ -0,0 +1,192 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=nvptx64 | FileCheck %s
+
+; Fold: srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
+; c1 <= leadingzeros(zext(y))
+define i64 @test_or(i64 %x, i32 %y) {
+; CHECK-LABEL: test_or(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-NEXT:    .reg .b64 %rd<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b64 %rd1, [test_or_param_0];
+; CHECK-NEXT:    ld.param.b32 %r1, [test_or_param_1];
+; CHECK-NEXT:    mul.wide.u32 %rd2, %r1, 32;
+; CHECK-NEXT:    or.b64 %rd3, %rd1, %rd2;
+; CHECK-NEXT:    shr.u64 %rd4, %rd3, 5;
+; CHECK-NEXT:    st.param.b64 [func_retval0], %rd4;
+; CHECK-NEXT:    ret;
+  %ext = zext i32 %y to i64
+  %shl = shl i64 %ext, 5
+  %or = or i64 %x, %shl
+  %srl = lshr i64 %or, 5
+  ret i64 %srl
+}
+
+; Fold: srl (xor (x, shl(zext(y),c1)),c1) -> xor(srl(x,c1), zext(y))
+; c1 <= leadingzeros(zext(y))
+define i64 @test_xor(i64 %x, i32 %y) {
+; CHECK-LABEL: test_xor(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-NEXT:    .reg .b64 %rd<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b64 %rd1, [test_xor_param_0];
+; CHECK-NEXT:    ld.param.b32 %r1, [test_xor_param_1];
+; CHECK-NEXT:    mul.wide.u32 %rd2, %r1, 32;
+; CHECK-NEXT:    xor.b64 %rd3, %rd1, %rd2;
+; CHECK-NEXT:    shr.u64 %rd4, %rd3, 5;
+; CHECK-NEXT:    st.param.b64 [func_retval0], %rd4;
+; CHECK-NEXT:    ret;
+  %ext = zext i32 %y to i64
+  %shl = shl i64 %ext, 5
+  %or = xor i64 %x, %shl
+  %srl = lshr i64 %or, 5
+  ret i64 %srl
+}
+
+; Fold: srl (and (x, shl(zext(y),c1)),c1) -> and(srl(x,c1), zext(y))
+; c1 <= leadingzeros(zext(y))
+define i64 @test_and(i64 %x, i32 %y) {
+; CHECK-LABEL: test_and(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-NEXT:    .reg .b64 %rd<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b64 %rd1, [test_and_param_0];
+; CHECK-NEXT:    ld.param.b32 %r1, [test_and_param_1];
+; CHECK-NEXT:    mul.wide.u32 %rd2, %r1, 32;
+; CHECK-NEXT:    and.b64 %rd3, %rd1, %rd2;
+; CHECK-NEXT:    shr.u64 %rd4, %rd3, 5;
+; CHECK-NEXT:    st.param.b64 [func_retval0], %rd4;
+; CHECK-NEXT:    ret;
+  %ext = zext i32 %y to i64
+  %shl = shl i64 %ext, 5
+  %or = and i64 %x, %shl
+  %srl = lshr i64 %or, 5
+  ret i64 %srl
+}
+
+; Fold: srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
+; c1 <= leadingzeros(zext(y))
+; x, y - vectors
+define <2 x i16> @test_vec(<2 x i16> %x, <2 x i8> %y) {
+; CHECK-LABEL: test_vec(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<9>;
+; CHECK-NEXT:    .reg .b32 %r<7>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [test_vec_param_0];
+; CHECK-NEXT:    ld.param.b32 %r2, [test_vec_param_1];
+; CHECK-NEXT:    and.b32 %r3, %r2, 16711935;
+; CHECK-NEXT:    mov.b32 {%rs1, %rs2}, %r3;
+; CHECK-NEXT:    shl.b16 %rs3, %rs2, 5;
+; CHECK-NEXT:    shl.b16 %rs4, %rs1, 5;
+; CHECK-NEXT:    mov.b32 %r4, {%rs4, %rs3};
+; CHECK-NEXT:    or.b32 %r5, %r1, %r4;
+; CHECK-NEXT:    mov.b32 {%rs5, %rs6}, %r5;
+; CHECK-NEXT:    shr.u16 %rs7, %rs6, 5;
+; CHECK-NEXT:    shr.u16 %rs8, %rs5, 5;
+; CHECK-NEXT:    mov.b32 %r6, {%rs8, %rs7};
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r6;
+; CHECK-NEXT:    ret;
+  %ext = zext <2 x i8> %y to <2 x i16>
+  %shl = shl <2 x i16> %ext, splat(i16 5)
+  %or = or <2 x i16> %x, %shl
+  %srl = lshr <2 x i16> %or, splat(i16 5)
+  ret <2 x i16> %srl
+}
+
+; Do not fold: srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
+; Reason: c1 > leadingzeros(zext(y)).
+define i64 @test_negative_c(i64 %x, i32 %y) {
+; CHECK-LABEL: test_negative_c(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b64 %rd<6>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b64 %rd1, [test_negative_c_param_0];
+; CHECK-NEXT:    ld.param.b32 %rd2, [test_negative_c_param_1];
+; CHECK-NEXT:    shl.b64 %rd3, %rd2, 33;
+; CHECK-NEXT:    or.b64 %rd4, %rd1, %rd3;
+; CHECK-NEXT:    shr.u64 %rd5, %rd4, 33;
+; CHECK-NEXT:    st.param.b64 [func_retval0], %rd5;
+; CHECK-NEXT:    ret;
+  %ext = zext i32 %y to i64
+  %shl = shl i64 %ext, 33
+  %or = or i64 %x, %shl
+  %srl = lshr i64 %or, 33
+  ret i64 %srl
+}
+
+declare void @use(i64)
+
+; Do not fold: srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
+; Reason: multiple usage of "or"
+define i64 @test_negative_use_lop(i64 %x, i32 %y) {
+; CHECK-LABEL: test_negative_use_lop(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-NEXT:    .reg .b64 %rd<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b64 %rd1, [test_negative_use_lop_param_0];
+; CHECK-NEXT:    ld.param.b32 %r1, [test_negative_use_lop_param_1];
+; CHECK-NEXT:    mul.wide.u32 %rd2, %r1, 32;
+; CHECK-NEXT:    or.b64 %rd3, %rd1, %rd2;
+; CHECK-NEXT:    shr.u64 %rd4, %rd3, 5;
+; CHECK-NEXT:    { // callseq 0, 0
+; CHECK-NEXT:    .param .b64 param0;
+; CHECK-NEXT:    st.param.b64 [param0], %rd3;
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    use,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 0
+; CHECK-NEXT:    st.param.b64 [func_retval0], %rd4;
+; CHECK-NEXT:    ret;
+  %ext = zext i32 %y to i64
+  %shl = shl i64 %ext, 5
+  %or = or i64 %x, %shl
+  %srl = lshr i64 %or, 5
+  call void @use(i64 %or)
+  ret i64 %srl
+}
+
+; Do not fold: srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
+; Reason: multiple usage of "shl"
+define i64 @test_negative_use_shl(i64 %x, i32 %y) {
+; CHECK-LABEL: test_negative_use_shl(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-NEXT:    .reg .b64 %rd<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b64 %rd1, [test_negative_use_shl_param_0];
+; CHECK-NEXT:    ld.param.b32 %r1, [test_negative_use_shl_param_1];
+; CHECK-NEXT:    mul.wide.u32 %rd2, %r1, 32;
+; CHECK-NEXT:    or.b64 %rd3, %rd1, %rd2;
+; CHECK-NEXT:    shr.u64 %rd4, %rd3, 5;
+; CHECK-NEXT:    { // callseq 1, 0
+; CHECK-NEXT:    .param .b64 param0;
+; CHECK-NEXT:    st.param.b64 [param0], %rd2;
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    use,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 1
+; CHECK-NEXT:    st.param.b64 [func_retval0], %rd4;
+; CHECK-NEXT:    ret;
+  %ext = zext i32 %y to i64
+  %shl = shl i64 %ext, 5
+  %or = or i64 %x, %shl
+  %srl = lshr i64 %or, 5
+  call void @use(i64 %shl)
+  ret i64 %srl
+}

@RKSimon RKSimon changed the title [NVPTX] Add baseline tests from #138290 [NVPTX] Add baseline srl-shl-zext tests from #138290 May 14, 2025
@RKSimon RKSimon merged commit fb07683 into llvm:main May 14, 2025
13 of 15 checks passed
@RKSimon RKSimon deleted the nvptx-shift-bitlogic-combines branch May 14, 2025 15:02
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants