Skip to content

Commit d56734f

Browse files
KorovinVladigcbot
authored andcommitted
LSC 16c32b atomic fix
.
1 parent c68c40d commit d56734f

File tree

2 files changed

+46
-4
lines changed

2 files changed

+46
-4
lines changed

IGC/VectorCompiler/lib/GenXCodeGen/GenXLoadStoreLowering.cpp

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1181,10 +1181,19 @@ Instruction *GenXLoadStoreLowering::createLSCAtomicImpl(
11811181
ElementSize == LSC_DATA_SIZE_64b);
11821182

11831183
if (ElementSize == LSC_DATA_SIZE_16c32b) {
1184-
Src0 = Builder.CreateBitCast(Src0, Builder.getInt16Ty());
1185-
Src0 = Builder.CreateZExt(Src0, Builder.getInt32Ty());
1186-
Src1 = Builder.CreateBitCast(Src1, Builder.getInt16Ty());
1187-
Src1 = Builder.CreateZExt(Src1, Builder.getInt32Ty());
1184+
// We must preserve undef operands but
1185+
// zext/sext casts make them zero.
1186+
if (!isa<UndefValue>(Src0)) {
1187+
Src0 = Builder.CreateBitCast(Src0, Builder.getInt16Ty());
1188+
Src0 = Builder.CreateZExt(Src0, Builder.getInt32Ty());
1189+
} else
1190+
Src0 = UndefValue::get(Builder.getInt32Ty());
1191+
1192+
if (!isa<UndefValue>(Src1)) {
1193+
Src1 = Builder.CreateBitCast(Src1, Builder.getInt16Ty());
1194+
Src1 = Builder.CreateZExt(Src1, Builder.getInt32Ty());
1195+
} else
1196+
Src1 = UndefValue::get(Builder.getInt32Ty());
11881197
}
11891198

11901199
auto AddrSize = IID == vc::InternalIntrinsic::lsc_atomic_ugm
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2024 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
; RUN: %opt %use_old_pass_manager% -enable-debugify -GenXLoadStoreLowering -march=genx64 -mcpu=XeHPC -mtriple=spir64-unknown-unknown -enable-ldst-lowering=true -mattr=+ocl_runtime -S < %s 2>&1 | FileCheck --check-prefix=CHECK-LSC %s
10+
11+
define i16 @inc_i16(i16 addrspace(1)* %ptr) {
12+
; CHECK-LSC: [[INC_ADDR:%[^ ]+]] = ptrtoint i16 addrspace(1)* %ptr to i64
13+
; CHECK-LSC: [[INC_VADDR:%[^ ]+]] = bitcast i64 [[INC_ADDR]] to <1 x i64>
14+
; CHECK-LSC: call void @llvm.genx.lsc.fence.i1(i1 true, i8 0, i8 0, i8 0)
15+
; CHECK-LSC: [[INC_VRES:%[^ ]+]] = call <1 x i32> @llvm.vc.internal.lsc.atomic.ugm.v1i32.v1i1.v2i8.v1i64(<1 x i1> <i1 true>, i8 8, i8 3, i8 6, <2 x i8> zeroinitializer, i64 0, <1 x i64> [[INC_VADDR]], i16 1, i32 0, <1 x i32> undef, <1 x i32> undef, <1 x i32> undef)
16+
; CHECK-LSC: call void @llvm.genx.lsc.fence.i1(i1 true, i8 0, i8 0, i8 0)
17+
; CHECK-LSC: [[INC_BCAST:%[^ ]+]] = bitcast <1 x i32> [[INC_VRES]] to i32
18+
; CHECK-LSC: %res = trunc i32 [[INC_BCAST]] to i16
19+
%res = atomicrmw add i16 addrspace(1)* %ptr, i16 1 syncscope("subgroup") seq_cst
20+
ret i16 %res
21+
}
22+
23+
define i16 @dec_i16(i16 addrspace(1)* %ptr) {
24+
; CHECK-LSC: [[DEC_ADDR:%[^ ]+]] = ptrtoint i16 addrspace(1)* %ptr to i64
25+
; CHECK-LSC: [[DEC_VADDR:%[^ ]+]] = bitcast i64 [[DEC_ADDR]] to <1 x i64>
26+
; CHECK-LSC: call void @llvm.genx.lsc.fence.i1(i1 true, i8 0, i8 0, i8 1)
27+
; CHECK-LSC: [[DEC_VRES:%[^ ]+]] = call <1 x i32> @llvm.vc.internal.lsc.atomic.ugm.v1i32.v1i1.v2i8.v1i64(<1 x i1> <i1 true>, i8 9, i8 3, i8 6, <2 x i8> zeroinitializer, i64 0, <1 x i64> [[DEC_VADDR]], i16 1, i32 0, <1 x i32> undef, <1 x i32> undef, <1 x i32> undef)
28+
; CHECK-LSC: call void @llvm.genx.lsc.fence.i1(i1 true, i8 0, i8 0, i8 1)
29+
; CHECK-LSC: [[DEC_BCAST:%[^ ]+]] = bitcast <1 x i32> [[DEC_VRES]] to i32
30+
; CHECK-LSC: %res = trunc i32 [[DEC_BCAST]] to i16
31+
%res = atomicrmw sub i16 addrspace(1)* %ptr, i16 1 syncscope("workgroup") seq_cst
32+
ret i16 %res
33+
}

0 commit comments

Comments
 (0)