Skip to content

Commit 341be23

Browse files
KorovinVladigcbot
authored andcommitted
SLM fences are limited to .group scope
.
1 parent 9eeeee1 commit 341be23

File tree

6 files changed

+14
-15
lines changed

6 files changed

+14
-15
lines changed

IGC/VectorCompiler/lib/GenXCodeGen/GenXLoadStoreLowering.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1079,7 +1079,6 @@ LSC_SCOPE GenXLoadStoreLowering::getLSCFenceScope(Instruction *I) const {
10791079
case Device:
10801080
return ST->hasMultiTile() ? LSC_SCOPE_GPU : LSC_SCOPE_TILE;
10811081
case Workgroup:
1082-
return LSC_SCOPE_LOCAL;
10831082
case Subgroup:
10841083
case Invocation:
10851084
return LSC_SCOPE_GROUP;
@@ -1095,7 +1094,7 @@ LSC_SCOPE GenXLoadStoreLowering::getLSCFenceScope(Instruction *I) const {
10951094

10961095
return StringSwitch<LSC_SCOPE>(SyncScopeNames[ScopeID])
10971096
.Case("subgroup", LSC_SCOPE_GROUP)
1098-
.Case("workgroup", LSC_SCOPE_LOCAL)
1097+
.Case("workgroup", LSC_SCOPE_GROUP)
10991098
.Case("device", ST->hasMultiTile() ? LSC_SCOPE_GPU : LSC_SCOPE_TILE)
11001099
.Case("all_devices", LSC_SCOPE_GPUS)
11011100
.Default(LSC_SCOPE_GROUP);

IGC/VectorCompiler/test/LoadStoreLowering/atomic-float-local.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,10 @@ define float @fmax_float(float addrspace(3)* %ptr, float %arg) {
2727
; CHECK: [[FMAX_ADDR:%[^ ]+]] = ptrtoint float addrspace(3)* %ptr to i32
2828
; CHECK: [[FMAX_VADDR:%[^ ]+]] = bitcast i32 [[FMAX_ADDR]] to <1 x i32>
2929
; CHECK: [[FMAX_VDATA:%[^ ]+]] = bitcast float %arg to <1 x float>
30-
; CHECK: call void @llvm.genx.lsc.fence.i1(i1 true, i8 3, i8 0, i8 1)
30+
; CHECK: call void @llvm.genx.lsc.fence.i1(i1 true, i8 3, i8 0, i8 0)
3131
; CHECK: [[FMAX_RESULT:%[^ ]+]] = call <1 x float> @llvm.vc.internal.lsc.atomic.slm.v1f32.v1i1.v2i8.v1i32(<1 x i1> <i1 true>, i8 22, i8 2, i8 3, <2 x i8> zeroinitializer, i32 0, <1 x i32> [[FMAX_VADDR]], i16 1, i32 0, <1 x float> [[FMAX_VDATA]], <1 x float> undef, <1 x float> undef)
3232
%res = call float @llvm.vc.internal.atomic.fmax.f32.p3f32.f32(float addrspace(3)* %ptr, i32 2, i32 256, float %arg) ; "Workgroup", "WorkgroupMemory"
33-
; CHECK: call void @llvm.genx.lsc.fence.i1(i1 true, i8 3, i8 0, i8 1)
33+
; CHECK: call void @llvm.genx.lsc.fence.i1(i1 true, i8 3, i8 0, i8 0)
3434
; CHECK: %res = bitcast <1 x float> [[FMAX_RESULT]] to float
3535
ret float %res
3636
}

IGC/VectorCompiler/test/LoadStoreLowering/atomic-int16-global.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@ define i16 @inc_i16(i16 addrspace(1)* %ptr) {
2323
define i16 @dec_i16(i16 addrspace(1)* %ptr) {
2424
; CHECK-LSC: [[DEC_ADDR:%[^ ]+]] = ptrtoint i16 addrspace(1)* %ptr to i64
2525
; CHECK-LSC: [[DEC_VADDR:%[^ ]+]] = bitcast i64 [[DEC_ADDR]] to <1 x i64>
26-
; CHECK-LSC: call void @llvm.genx.lsc.fence.i1(i1 true, i8 0, i8 0, i8 1)
26+
; CHECK-LSC: call void @llvm.genx.lsc.fence.i1(i1 true, i8 0, i8 0, i8 0)
2727
; CHECK-LSC: [[DEC_VRES:%[^ ]+]] = call <1 x i32> @llvm.vc.internal.lsc.atomic.ugm.v1i32.v1i1.v2i8.v1i64(<1 x i1> <i1 true>, i8 9, i8 3, i8 6, <2 x i8> zeroinitializer, i64 0, <1 x i64> [[DEC_VADDR]], i16 1, i32 0, <1 x i32> undef, <1 x i32> undef, <1 x i32> undef)
28-
; CHECK-LSC: call void @llvm.genx.lsc.fence.i1(i1 true, i8 0, i8 0, i8 1)
28+
; CHECK-LSC: call void @llvm.genx.lsc.fence.i1(i1 true, i8 0, i8 0, i8 0)
2929
; CHECK-LSC: [[DEC_BCAST:%[^ ]+]] = bitcast <1 x i32> [[DEC_VRES]] to i32
3030
; CHECK-LSC: %res = trunc i32 [[DEC_BCAST]] to i16
3131
%res = atomicrmw sub i16 addrspace(1)* %ptr, i16 1 syncscope("workgroup") seq_cst

IGC/VectorCompiler/test/LoadStoreLowering/atomic-int32-global.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -114,9 +114,9 @@ define i32 @sub_i32(i32 addrspace(1)* %ptr, i32 %arg) {
114114
; CHECK-LSC: [[SUB_ADDR:%[^ ]+]] = ptrtoint i32 addrspace(1)* %ptr to i64
115115
; CHECK-LSC: [[SUB_VADDR:%[^ ]+]] = bitcast i64 [[SUB_ADDR]] to <1 x i64>
116116
; CHECK-LSC: [[SUB_VDATA:%[^ ]+]] = bitcast i32 %arg to <1 x i32>
117-
; CHECK-LSC: call void @llvm.genx.lsc.fence.i1(i1 true, i8 0, i8 0, i8 1)
117+
; CHECK-LSC: call void @llvm.genx.lsc.fence.i1(i1 true, i8 0, i8 0, i8 0)
118118
; CHECK-LSC: [[SUB_VRES:%[^ ]+]] = call <1 x i32> @llvm.vc.internal.lsc.atomic.ugm.v1i32.v1i1.v2i8.v1i64(<1 x i1> <i1 true>, i8 13, i8 3, i8 3, <2 x i8> zeroinitializer, i64 0, <1 x i64> [[SUB_VADDR]], i16 1, i32 0, <1 x i32> [[SUB_VDATA]], <1 x i32> undef, <1 x i32> undef)
119-
; CHECK-LSC: call void @llvm.genx.lsc.fence.i1(i1 true, i8 0, i8 0, i8 1)
119+
; CHECK-LSC: call void @llvm.genx.lsc.fence.i1(i1 true, i8 0, i8 0, i8 0)
120120
; CHECK-LSC: %res = bitcast <1 x i32> [[SUB_VRES]] to i32
121121
%res = atomicrmw sub i32 addrspace(1)* %ptr, i32 %arg syncscope("workgroup") acq_rel
122122
ret i32 %res
@@ -131,9 +131,9 @@ define i32 @dec_i32(i32 addrspace(1)* %ptr) {
131131
; CHECK: %res = bitcast <1 x i32> [[DEC_VRES]] to i32
132132
; CHECK-LSC: [[DEC_ADDR:%[^ ]+]] = ptrtoint i32 addrspace(1)* %ptr to i64
133133
; CHECK-LSC: [[DEC_VADDR:%[^ ]+]] = bitcast i64 [[DEC_ADDR]] to <1 x i64>
134-
; CHECK-LSC: call void @llvm.genx.lsc.fence.i1(i1 true, i8 0, i8 0, i8 1)
134+
; CHECK-LSC: call void @llvm.genx.lsc.fence.i1(i1 true, i8 0, i8 0, i8 0)
135135
; CHECK-LSC: [[DEC_VRES:%[^ ]+]] = call <1 x i32> @llvm.vc.internal.lsc.atomic.ugm.v1i32.v1i1.v2i8.v1i64(<1 x i1> <i1 true>, i8 9, i8 3, i8 3, <2 x i8> zeroinitializer, i64 0, <1 x i64> [[DEC_VADDR]], i16 1, i32 0, <1 x i32> undef, <1 x i32> undef, <1 x i32> undef)
136-
; CHECK-LSC: call void @llvm.genx.lsc.fence.i1(i1 true, i8 0, i8 0, i8 1)
136+
; CHECK-LSC: call void @llvm.genx.lsc.fence.i1(i1 true, i8 0, i8 0, i8 0)
137137
; CHECK-LSC: %res = bitcast <1 x i32> [[DEC_VRES]] to i32
138138
%res = atomicrmw sub i32 addrspace(1)* %ptr, i32 1 syncscope("workgroup") seq_cst
139139
ret i32 %res

IGC/VectorCompiler/test/LoadStoreLowering/atomic-int32-local.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -114,9 +114,9 @@ define i32 @sub_i32(i32 addrspace(3)* %ptr, i32 %arg) {
114114
; CHECK-LSC: [[SUB_ADDR:%[^ ]+]] = ptrtoint i32 addrspace(3)* %ptr to i32
115115
; CHECK-LSC: [[SUB_VADDR:%[^ ]+]] = bitcast i32 [[SUB_ADDR]] to <1 x i32>
116116
; CHECK-LSC: [[SUB_VDATA:%[^ ]+]] = bitcast i32 %arg to <1 x i32>
117-
; CHECK-LSC: call void @llvm.genx.lsc.fence.i1(i1 true, i8 3, i8 0, i8 1)
117+
; CHECK-LSC: call void @llvm.genx.lsc.fence.i1(i1 true, i8 3, i8 0, i8 0)
118118
; CHECK-LSC: [[SUB_VRES:%[^ ]+]] = call <1 x i32> @llvm.vc.internal.lsc.atomic.slm.v1i32.v1i1.v2i8.v1i32(<1 x i1> <i1 true>, i8 13, i8 2, i8 3, <2 x i8> zeroinitializer, i32 0, <1 x i32> [[SUB_VADDR]], i16 1, i32 0, <1 x i32> [[SUB_VDATA]], <1 x i32> undef, <1 x i32> undef)
119-
; CHECK-LSC: call void @llvm.genx.lsc.fence.i1(i1 true, i8 3, i8 0, i8 1)
119+
; CHECK-LSC: call void @llvm.genx.lsc.fence.i1(i1 true, i8 3, i8 0, i8 0)
120120
; CHECK-LSC: %res = bitcast <1 x i32> [[SUB_VRES]] to i32
121121
%res = atomicrmw sub i32 addrspace(3)* %ptr, i32 %arg syncscope("workgroup") acq_rel
122122
ret i32 %res
@@ -131,9 +131,9 @@ define i32 @dec_i32(i32 addrspace(3)* %ptr) {
131131
; CHECK: %res = bitcast <1 x i32> [[DEC_VRES]] to i32
132132
; CHECK-LSC: [[DEC_ADDR:%[^ ]+]] = ptrtoint i32 addrspace(3)* %ptr to i32
133133
; CHECK-LSC: [[DEC_VADDR:%[^ ]+]] = bitcast i32 [[DEC_ADDR]] to <1 x i32>
134-
; CHECK-LSC: call void @llvm.genx.lsc.fence.i1(i1 true, i8 3, i8 0, i8 1)
134+
; CHECK-LSC: call void @llvm.genx.lsc.fence.i1(i1 true, i8 3, i8 0, i8 0)
135135
; CHECK-LSC: [[DEC_VRES:%[^ ]+]] = call <1 x i32> @llvm.vc.internal.lsc.atomic.slm.v1i32.v1i1.v2i8.v1i32(<1 x i1> <i1 true>, i8 9, i8 2, i8 3, <2 x i8> zeroinitializer, i32 0, <1 x i32> [[DEC_VADDR]], i16 1, i32 0, <1 x i32> undef, <1 x i32> undef, <1 x i32> undef)
136-
; CHECK-LSC: call void @llvm.genx.lsc.fence.i1(i1 true, i8 3, i8 0, i8 1)
136+
; CHECK-LSC: call void @llvm.genx.lsc.fence.i1(i1 true, i8 3, i8 0, i8 0)
137137
; CHECK-LSC: %res = bitcast <1 x i32> [[DEC_VRES]] to i32
138138
%res = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 syncscope("workgroup") seq_cst
139139
ret i32 %res

IGC/VectorCompiler/test/LoadStoreLowering/fence.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ define void @fence_rel() {
2828

2929
define void @fence_acq_rel() {
3030
; CHECK: call void @llvm.genx.fence(i8 33)
31-
; CHECK-LSC: call void @llvm.genx.lsc.fence.i1(i1 true, i8 3, i8 0, i8 1)
31+
; CHECK-LSC: call void @llvm.genx.lsc.fence.i1(i1 true, i8 3, i8 0, i8 0)
3232
fence syncscope("workgroup") acq_rel
3333
ret void
3434
}

0 commit comments

Comments
 (0)