@@ -1474,6 +1474,51 @@ declare <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32
1474
1474
1475
1475
declare <4 x i32 > @llvm.amdgcn.raw.tbuffer.load.v4i32 (<4 x i32 >, i32 , i32 , i32 , i32 ) #1
1476
1476
1477
+ ; CHECK-LABEL: @extract_elt3_raw_tbuffer_load_v4f16(
1478
+ ; CHECK-NEXT: %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
1479
+ ; CHECK-NEXT: %elt1 = extractelement <4 x half> %data, i32 3
1480
+ ; CHECK-NEXT: ret half %elt1
1481
+ define amdgpu_ps half @extract_elt3_raw_tbuffer_load_v4f16 (<4 x i32 > inreg %rsrc , i32 %arg0 , i32 inreg %arg1 ) #0 {
1482
+ %data = call <4 x half > @llvm.amdgcn.raw.tbuffer.load.v4f16 (<4 x i32 > %rsrc , i32 %arg0 , i32 %arg1 , i32 78 , i32 0 )
1483
+ %elt1 = extractelement <4 x half > %data , i32 3
1484
+ ret half %elt1
1485
+ }
1486
+
1487
+ ; FIXME: Enable load shortening when full support for v3f16 has been added (should expect call <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16).
1488
+ ; CHECK-LABEL: @extract_elt2_raw_tbuffer_load_v4f16(
1489
+ ; CHECK-NEXT: %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
1490
+ ; CHECK-NEXT: %elt1 = extractelement <4 x half> %data, i32 2
1491
+ ; CHECK-NEXT: ret half %elt1
1492
+ define amdgpu_ps half @extract_elt2_raw_tbuffer_load_v4f16 (<4 x i32 > inreg %rsrc , i32 %arg0 , i32 inreg %arg1 ) #0 {
1493
+ %data = call <4 x half > @llvm.amdgcn.raw.tbuffer.load.v4f16 (<4 x i32 > %rsrc , i32 %arg0 , i32 %arg1 , i32 78 , i32 0 )
1494
+ %elt1 = extractelement <4 x half > %data , i32 2
1495
+ ret half %elt1
1496
+ }
1497
+
1498
+ ; CHECK-LABEL: @extract_elt1_raw_tbuffer_load_v4f16(
1499
+ ; CHECK-NEXT: %data = call <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
1500
+ ; CHECK-NEXT: %elt1 = extractelement <2 x half> %data, i32 1
1501
+ ; CHECK-NEXT: ret half %elt1
1502
+ define amdgpu_ps half @extract_elt1_raw_tbuffer_load_v4f16 (<4 x i32 > inreg %rsrc , i32 %arg0 , i32 inreg %arg1 ) #0 {
1503
+ %data = call <4 x half > @llvm.amdgcn.raw.tbuffer.load.v4f16 (<4 x i32 > %rsrc , i32 %arg0 , i32 %arg1 , i32 78 , i32 0 )
1504
+ %elt1 = extractelement <4 x half > %data , i32 1
1505
+ ret half %elt1
1506
+ }
1507
+
1508
+ ; CHECK-LABEL: @extract_elt0_raw_tbuffer_load_v4f16(
1509
+ ; CHECK-NEXT: %data = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
1510
+ ; CHECK-NEXT: ret half %data
1511
+ define amdgpu_ps half @extract_elt0_raw_tbuffer_load_v4f16 (<4 x i32 > inreg %rsrc , i32 %arg0 , i32 inreg %arg1 ) #0 {
1512
+ %data = call <4 x half > @llvm.amdgcn.raw.tbuffer.load.v4f16 (<4 x i32 > %rsrc , i32 %arg0 , i32 %arg1 , i32 78 , i32 0 )
1513
+ %elt1 = extractelement <4 x half > %data , i32 0
1514
+ ret half %elt1
1515
+ }
1516
+
1517
+ declare half @llvm.amdgcn.raw.tbuffer.load.f16 (<4 x i32 >, i32 , i32 , i32 , i32 ) #1
1518
+ declare <2 x half > @llvm.amdgcn.raw.tbuffer.load.v2f16 (<4 x i32 >, i32 , i32 , i32 , i32 ) #1
1519
+ declare <3 x half > @llvm.amdgcn.raw.tbuffer.load.v3f16 (<4 x i32 >, i32 , i32 , i32 , i32 ) #1
1520
+ declare <4 x half > @llvm.amdgcn.raw.tbuffer.load.v4f16 (<4 x i32 >, i32 , i32 , i32 , i32 ) #1
1521
+
1477
1522
; --------------------------------------------------------------------
1478
1523
; llvm.amdgcn.struct.tbuffer.load
1479
1524
; --------------------------------------------------------------------
0 commit comments