@@ -322,6 +322,17 @@ test_atomic_update(AccType &acc, LocalAccTypeInt local_acc, float *ptrf,
322
322
atomic_update<atomic_op::inc, int , VL>(ptr, offsets, pred);
323
323
}
324
324
325
+ // Try with int16_t to check that LSC atomic is generated
326
+ // The result is later casted to int16, not captured here.
327
+ // CHECK: call <8 x i32> @llvm.genx.lsc.xatomic.stateless.v8i32.v8i1.v8i64(<8 x i1> {{[^)]+}}, i8 8, i8 0, i8 0, i16 1, i32 0, i8 6, i8 1, i8 1, i8 0, <8 x i64> {{[^)]+}}, <8 x i32> undef, <8 x i32> undef, i32 0, <8 x i32> undef)
328
+ {
329
+ int16_t *ptr = 0 ;
330
+ constexpr int VL = 8 ;
331
+ simd<uint32_t , VL> offsets = simd<uint32_t , VL>(1 ) * sizeof (int16_t );
332
+ auto atomic_res =
333
+ atomic_update<atomic_op::inc, int16_t , VL>(ptr, offsets);
334
+ }
335
+
325
336
// Accessor
326
337
327
338
// CHECK-STATEFUL: call <4 x i32> @llvm.genx.lsc.xatomic.bti.v4i32.v4i1.v4i32(<4 x i1> {{[^)]+}}, i8 8, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i32> {{[^)]+}}, <4 x i32> undef, <4 x i32> undef, i32 {{[^)]+}}, <4 x i32> undef)
@@ -377,6 +388,19 @@ test_atomic_update(AccType &acc, LocalAccTypeInt local_acc, float *ptrf,
377
388
auto atomic_res_acc =
378
389
atomic_update<atomic_op::inc, int , VL>(acc, offsets, pred);
379
390
}
391
+ // Try with int16_t to check that LSC atomic is generated
392
+ // The result is later casted to int16, not captured here.
393
+ // CHECK-STATEFUL: call <8 x i32> @llvm.genx.lsc.xatomic.bti.v8i32.v8i1.v8i32(<8 x i1> {{[^)]+}}, i8 8, i8 0, i8 0, i16 1, i32 0, i8 6, i8 1, i8 1, i8 0, <8 x i32> {{[^)]+}}, <8 x i32> undef, <8 x i32> undef, i32 {{[^)]+}}, <8 x i32> undef)
394
+ // CHECK-STATELESS: call <8 x i32> @llvm.genx.lsc.xatomic.stateless.v8i32.v8i1.v8i64(<8 x i1> {{[^)]+}}, i8 8, i8 0, i8 0, i16 1, i32 0, i8 6, i8 1, i8 1, i8 0, <8 x i64> {{[^)]+}}, <8 x i32> undef, <8 x i32> undef, i32 0, <8 x i32> undef)
395
+ {
396
+ using AccType =
397
+ sycl::accessor<int16_t , 1 , sycl::access::mode::read_write>;
398
+ AccType *acc = nullptr ;
399
+ constexpr int VL = 8 ;
400
+ simd<uint32_t , VL> offsets = simd<uint32_t , VL>(1 ) * sizeof (int16_t );
401
+ auto atomic_res =
402
+ atomic_update<atomic_op::inc, int16_t , VL>(*acc, offsets);
403
+ }
380
404
}
381
405
382
406
// Test atomic update with one operand.
@@ -432,6 +456,18 @@ test_atomic_update(AccType &acc, LocalAccTypeInt local_acc, float *ptrf,
432
456
auto res_atomic_8 =
433
457
atomic_update<atomic_op::add, int >(ptr, offsets, add, pred);
434
458
459
+ // Try with int16_t to check that LSC atomic is generated
460
+ // The result is later casted to int16, not captured here.
461
+ // CHECK: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 12, i8 0, i8 0, i16 1, i32 0, i8 6, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32>{{[^)]+}}, <4 x i32> undef, i32 0, <4 x i32> undef)
462
+ {
463
+ int16_t *ptr = 0 ;
464
+ constexpr int VL = 4 ;
465
+ simd<uint32_t , VL> offsets = simd<uint32_t , VL>(1 ) * sizeof (int16_t );
466
+ auto add = simd<int16_t , VL>(5 );
467
+ auto atomic_res =
468
+ atomic_update<atomic_op::add, int16_t , VL>(ptr, offsets, add);
469
+ }
470
+
435
471
// Accessors
436
472
437
473
// CHECK-STATEFUL-COUNT-14: call <4 x i32> @llvm.genx.lsc.xatomic.bti.v4i32.v4i1.v4i32(<4 x i1> {{[^)]+}}, i8 12, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> undef, i32 {{[^)]+}}, <4 x i32> undef)
@@ -483,6 +519,21 @@ test_atomic_update(AccType &acc, LocalAccTypeInt local_acc, float *ptrf,
483
519
// CHECK-STATELESS: call <4 x i32> @llvm.genx.svm.atomic.sub.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> undef)
484
520
auto res_atomic_17 =
485
521
atomic_update<atomic_op::sub, int >(acc, offsets, add, pred);
522
+
523
+ // Try with int16_t to check that LSC atomic is generated
524
+ // The result is later casted to int16, not captured here.
525
+ // CHECK-STATEFUL: call <4 x i32> @llvm.genx.lsc.xatomic.bti.v4i32.v4i1.v4i32(<4 x i1> {{[^)]+}}, i8 12, i8 0, i8 0, i16 1, i32 0, i8 6, i8 1, i8 1, i8 0, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> undef, i32 {{[^)]+}}, <4 x i32> undef)
526
+ // CHECK-STATELESS: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 12, i8 0, i8 0, i16 1, i32 0, i8 6, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> undef, i32 0, <4 x i32> undef)
527
+ {
528
+ using AccType =
529
+ sycl::accessor<int16_t , 1 , sycl::access::mode::read_write>;
530
+ AccType *acc = nullptr ;
531
+ constexpr int VL = 4 ;
532
+ simd<uint32_t , VL> offsets = simd<uint32_t , VL>(1 ) * sizeof (int16_t );
533
+ auto add = simd<int16_t , VL>(5 );
534
+ auto atomic_res =
535
+ atomic_update<atomic_op::add, int16_t , VL>(*acc, offsets, add);
536
+ }
486
537
}
487
538
488
539
// Test atomic update with two operands.
@@ -626,6 +677,19 @@ test_atomic_update(AccType &acc, LocalAccTypeInt local_acc, float *ptrf,
626
677
auto res_atomic_100 = atomic_update<atomic_op::cmpxchg, int , VL>(
627
678
ptr, offsets, swap, compare, pred);
628
679
680
+ // Try with int16_t to check that LSC atomic is generated
681
+ // The result is later casted to int16, not captured here.
682
+ // CHECK: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 18, i8 0, i8 0, i16 1, i32 0, i8 6, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 0, <4 x i32> undef)
683
+ {
684
+ int16_t *ptr = 0 ;
685
+ constexpr int VL = 4 ;
686
+ simd<uint32_t , VL> offsets = simd<uint32_t , VL>(1 ) * sizeof (int16_t );
687
+ simd<int16_t , VL> swap = simd<int16_t , VL>(1 ) * sizeof (int );
688
+ auto compare = swap * 2 ;
689
+ auto atomic_res = atomic_update<atomic_op::cmpxchg, int16_t , VL>(
690
+ ptr, offsets, swap, compare);
691
+ }
692
+
629
693
// Accessors
630
694
631
695
// CHECK-STATEFUL-COUNT-30: call <4 x i32> @llvm.genx.lsc.xatomic.bti.v4i32.v4i1.v4i32(<4 x i1> {{[^)]+}}, i8 18, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 {{[^)]+}}, <4 x i32> undef)
@@ -751,6 +815,22 @@ test_atomic_update(AccType &acc, LocalAccTypeInt local_acc, float *ptrf,
751
815
// CHECK-STATELESS: call <4 x i32> @llvm.genx.svm.atomic.cmpxchg.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> undef)
752
816
auto res_atomic_33 = atomic_update<atomic_op::cmpxchg, int , VL>(
753
817
acc, offsets, swap, compare, pred);
818
+
819
+ // Try with int16_t to check that LSC atomic is generated
820
+ // The result is later casted to int16, not captured here.
821
+ // CHECK-STATEFUL: call <4 x i32> @llvm.genx.lsc.xatomic.bti.v4i32.v4i1.v4i32(<4 x i1> {{[^)]+}}, i8 18, i8 0, i8 0, i16 1, i32 0, i8 6, i8 1, i8 1, i8 0, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 {{[^)]+}}, <4 x i32> undef)
822
+ // CHECK-STATELESS: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 18, i8 0, i8 0, i16 1, i32 0, i8 6, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 0, <4 x i32> undef)
823
+ {
824
+ using AccType =
825
+ sycl::accessor<int16_t , 1 , sycl::access::mode::read_write>;
826
+ AccType *acc = nullptr ;
827
+ constexpr int VL = 4 ;
828
+ simd<uint32_t , VL> offsets = simd<uint32_t , VL>(1 ) * sizeof (int16_t );
829
+ simd<int16_t , VL> swap = simd<int16_t , VL>(1 ) * sizeof (int );
830
+ auto compare = swap * 2 ;
831
+ auto atomic_res = atomic_update<atomic_op::cmpxchg, int16_t , VL>(
832
+ *acc, offsets, compare, swap);
833
+ }
754
834
}
755
835
756
836
// Test slm_atomic_update without operands.
@@ -824,12 +904,11 @@ test_atomic_update(AccType &acc, LocalAccTypeInt local_acc, float *ptrf,
824
904
{
825
905
constexpr int VL = 16 ;
826
906
simd<uint32_t , VL> offsets = simd<uint32_t , VL>(1 ) * sizeof (int16_t );
827
- auto pred = simd_mask<VL>(1 );
828
907
simd<int16_t , VL> add = simd<int16_t , VL>(1 ) * sizeof (int );
829
908
830
909
// CHECK: call <16 x i32> @llvm.genx.lsc.xatomic.slm.v16i32.v16i1.v16i32(<16 x i1> {{[^)]+}}, i8 12, i8 0, i8 0, i16 1, i32 0, i8 6, i8 1, i8 1, i8 0, <16 x i32> {{[^)]+}}, <16 x i32> {{[^)]+}}, <16 x i32> undef, i32 0, <16 x i32> undef)
831
910
auto res_slm_atomic_0 =
832
- slm_atomic_update<atomic_op::add, int16_t >(offsets, add, pred );
911
+ slm_atomic_update<atomic_op::add, int16_t >(offsets, add);
833
912
}
834
913
// Expect DWORD for fmin.
835
914
{
@@ -934,6 +1013,19 @@ test_atomic_update(AccType &acc, LocalAccTypeInt local_acc, float *ptrf,
934
1013
offsets_view.select <VL, 1 >(), swap_view.select <VL, 1 >(),
935
1014
compare_view.select <VL, 1 >());
936
1015
1016
+ // Expect LSC for short.
1017
+ {
1018
+ constexpr int VL = 16 ;
1019
+ simd<uint32_t , VL> offsets = simd<uint32_t , VL>(1 ) * sizeof (int16_t );
1020
+ auto compare = simd<int16_t , VL>(VL, 1 );
1021
+ auto swap = compare * 2 ;
1022
+
1023
+ // CHECK: call <16 x i32> @llvm.genx.lsc.xatomic.slm.v16i32.v16i1.v16i32(<16 x i1> {{[^)]+}}, i8 18, i8 0, i8 0, i16 1, i32 0, i8 6, i8 1, i8 1, i8 0, <16 x i32> {{[^)]+}}, <16 x i32> {{[^)]+}}, <16 x i32> {{[^)]+}}, i32 0, <16 x i32> undef)
1024
+ auto res_slm_atomic_0 =
1025
+ slm_atomic_update<atomic_op::cmpxchg, int16_t , VL>(offsets, swap,
1026
+ compare);
1027
+ }
1028
+
937
1029
// Expect LSC for int64_t.
938
1030
{
939
1031
constexpr int VL = 16 ;
@@ -964,6 +1056,15 @@ test_atomic_update(AccType &acc, LocalAccTypeInt local_acc, float *ptrf,
964
1056
local_acc, offsets_view.select <VL, 1 >(), pred);
965
1057
auto res_slm_atomic_6 = atomic_update<atomic_op::inc, int , VL>(
966
1058
local_acc, offsets_view.select <VL, 1 >());
1059
+
1060
+ // Expect LSC for short.
1061
+ {
1062
+ using LocalAccType = sycl::local_accessor<int16_t , 1 >;
1063
+ LocalAccType *local_acc = nullptr ;
1064
+ // CHECK: call <4 x i32> @llvm.genx.lsc.xatomic.slm.v4i32.v4i1.v4i32(<4 x i1> {{[^)]+}}, i8 8, i8 0, i8 0, i16 1, i32 0, i8 6, i8 1, i8 1, i8 0, <4 x i32> {{[^)]+}}, <4 x i32> undef, <4 x i32> undef, i32 0, <4 x i32> undef)
1065
+ auto res_slm_atomic_1 =
1066
+ atomic_update<atomic_op::inc, int16_t >(*local_acc, offsets);
1067
+ }
967
1068
}
968
1069
// One operand atomic.
969
1070
{
@@ -997,6 +1098,16 @@ test_atomic_update(AccType &acc, LocalAccTypeInt local_acc, float *ptrf,
997
1098
pred);
998
1099
res_slm_atomic_8 = atomic_update<atomic_op::add, int , VL>(
999
1100
local_acc, offsets_view.select <VL, 1 >(), add_view.select <VL, 1 >());
1101
+
1102
+ // Expect LSC for short.
1103
+ {
1104
+ using LocalAccType = sycl::local_accessor<int16_t , 1 >;
1105
+ LocalAccType *local_acc = nullptr ;
1106
+ simd<int16_t , VL> add = simd<int16_t , VL>(1 ) * sizeof (int );
1107
+ // CHECK: call <4 x i32> @llvm.genx.lsc.xatomic.slm.v4i32.v4i1.v4i32(<4 x i1> {{[^)]+}}, i8 12, i8 0, i8 0, i16 1, i32 0, i8 6, i8 1, i8 1, i8 0, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> undef, i32 0, <4 x i32> undef)
1108
+ auto res_slm_atomic_1 =
1109
+ atomic_update<atomic_op::add, int16_t >(*local_acc, offsets, add);
1110
+ }
1000
1111
}
1001
1112
// Two operand atomic.
1002
1113
{
@@ -1069,6 +1180,17 @@ test_atomic_update(AccType &acc, LocalAccTypeInt local_acc, float *ptrf,
1069
1180
res_slm_atomic_16 = atomic_update<atomic_op::cmpxchg, int , VL>(
1070
1181
local_acc, offsets_view.select <VL, 1 >(), swap_view.select <VL, 1 >(),
1071
1182
compare_view.select <VL, 1 >());
1183
+
1184
+ // Expect LSC for short.
1185
+ {
1186
+ using LocalAccType = sycl::local_accessor<int16_t , 1 >;
1187
+ LocalAccType *local_acc = nullptr ;
1188
+ auto compare = simd<int16_t , VL>(VL, 1 );
1189
+ auto swap = compare * 2 ;
1190
+ // CHECK: call <4 x i32> @llvm.genx.lsc.xatomic.slm.v4i32.v4i1.v4i32(<4 x i1> {{[^)]+}}, i8 18, i8 0, i8 0, i16 1, i32 0, i8 6, i8 1, i8 1, i8 0, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 0, <4 x i32> undef)
1191
+ auto res_slm_atomic_1 = atomic_update<atomic_op::cmpxchg, int16_t , VL>(
1192
+ *local_acc, offsets, swap, compare);
1193
+ }
1072
1194
}
1073
1195
}
1074
1196
0 commit comments