|
188 | 188 | ## * arg3: i8 Element size [MBC]
|
189 | 189 | ## * arg4: vNi8 Cache controls, where N is a number of supported cache levels [MBC]
|
190 | 190 | ## * arg5: i64 Address base (for stateless)
|
191 |
| -## i32 BTI (for stateful) |
| 191 | +## i32 Surface BTI (for stateful) |
192 | 192 | ## * arg6: vNxi32 or vNxi64 Address indices (overloaded)
|
193 | 193 | ## * arg7: i16 Address scale [MBC]
|
194 | 194 | ## * arg8: i32 Address immediate offset [MBC]
|
|
287 | 287 | ## i8 Channel mask (for quad intrinsics) [MBC]
|
288 | 288 | ## * arg4: vNi8 Cache controls, where N is a number of supported cache levels [MBC]
|
289 | 289 | ## * arg5: i64 Address base (for stateless)
|
290 |
| -## i32 BTI (for stateful) |
| 290 | +## i32 Surface BTI (for stateful) |
291 | 291 | ## * arg6: vNxi32 or vNxi64 Address indices (overloaded)
|
292 | 292 | ## * arg7: i16 Address scale [MBC]
|
293 | 293 | ## * arg8: i32 Address immediate offset [MBC]
|
|
443 | 443 | ## i8 Channel mask (for quad intrinsics) [MBC]
|
444 | 444 | ## * arg4: vNi8 Cache controls, where N is a number of supported cache levels [MBC]
|
445 | 445 | ## * arg5: i64 Address base (for stateless)
|
446 |
| -## i32 BTI (for stateful) |
| 446 | +## i32 Surface BTI (for stateful) |
447 | 447 | ## * arg6: vNxi32 or vNxi64 Address indices (overloaded)
|
448 | 448 | ## * arg7: i16 Address scale [MBC]
|
449 | 449 | ## * arg8: i32 Address immediate offset [MBC]
|
|
558 | 558 | ## i8 Channel mask (for quad intrinsics) [MBC]
|
559 | 559 | ## * arg4: vNi8 Cache controls, where N is a number of supported cache levels [MBC]
|
560 | 560 | ## * arg5: i64 Address base (for stateless)
|
561 |
| -## i32 BTI (for stateful) |
| 561 | +## i32 Surface BTI (for stateful) |
562 | 562 | ## * arg6: vNxi32 or vNxi64 Address indices (overloaded)
|
563 | 563 | ## * arg7: i16 Address scale [MBC]
|
564 | 564 | ## * arg8: i32 Address immediate offset [MBC]
|
|
836 | 836 | ],
|
837 | 837 | "attributes": "WriteMem", },
|
838 | 838 |
|
839 |
| -### ``llvm.vc.internal.lsc.*.2d.ugm.desc.*`` : 2d block load/store/prefetch instructions |
840 |
| -### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
841 |
| -### |
842 |
| -### * arg0: i1, Predicate |
843 |
| -### * arg1: vNi8, Cache controls, where N is the number of supported cache levels [MBC] |
844 |
| -### * arg2: i8, Number of blocks [MBC] |
845 |
| -### * arg3: i8, Block width (in elements) [MBC] |
846 |
| -### * arg4: i8, Block height [MBC] |
847 |
| -### * arg5: v16i32 Matrix descriptor [MBC] |
848 |
| -### * arg6: i32, Memory block X immediate offset (in elements) [MBC] |
849 |
| -### * arg7: i32, Memory block Y immediate offset [MBC] |
850 |
| -### * arg8: value to passthru when predicate is false on load, |
851 |
| -### or value to write on store, |
852 |
| -### or dummy value for prefetch to deduce the matrix element type |
853 |
| -### |
854 |
| -### * Return value: the value read or void |
855 |
| -### |
856 |
| -### The matrix descriptor is a 16-element vector that describes the 2D block layout in memory. |
857 |
| -### The descriptor layout is as follows: |
858 |
| -### desc[0]: low 32 bits of the base address |
859 |
| -### desc[1]: high 32 bits of the base address |
860 |
| -### desc[2]: matrix width in bytes, minus 1 |
861 |
| -### desc[3]: matrix height, minus 1 |
862 |
| -### desc[4]: matrix pitch in bytes, minus 1 |
863 |
| -### desc[5]: block start X in elements, signed |
864 |
| -### desc[6]: block start Y in rows, signed |
865 |
| -### desc[7]: block size encoded as follows: |
866 |
| -### (block_width - 1) | ((block_height - 1) << 8) | ((number_of_blocks - 1) << 16) |
867 |
| -### desc[8-15]: reserved |
868 |
| -### |
| 839 | +## ``llvm.vc.internal.lsc.*.2d.ugm.desc.*`` : 2d block load/store/prefetch instructions |
| 840 | +## ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
| 841 | +## |
| 842 | +## * arg0: i1, Predicate |
| 843 | +## * arg1: vNi8, Cache controls, where N is the number of supported cache levels [MBC] |
| 844 | +## * arg2: i8, Number of blocks [MBC] |
| 845 | +## * arg3: i8, Block width (in elements) [MBC] |
| 846 | +## * arg4: i8, Block height [MBC] |
| 847 | +## * arg5: v16i32 Matrix descriptor [MBC] |
| 848 | +## * arg6: i32, Memory block X immediate offset (in elements) [MBC] |
| 849 | +## * arg7: i32, Memory block Y immediate offset [MBC] |
| 850 | +## * arg8: value to passthru when predicate is false on load, |
| 851 | +## or value to write on store, |
| 852 | +## or dummy value for prefetch to deduce the matrix element type |
| 853 | +## |
| 854 | +## * Return value: the value read or void |
| 855 | +## |
| 856 | +## The matrix descriptor is a 16-element vector that describes the 2D block layout in memory. |
| 857 | +## The descriptor layout is as follows: |
| 858 | +## desc[0]: low 32 bits of the base address |
| 859 | +## desc[1]: high 32 bits of the base address |
| 860 | +## desc[2]: matrix width in bytes, minus 1 |
| 861 | +## desc[3]: matrix height, minus 1 |
| 862 | +## desc[4]: matrix pitch in bytes, minus 1 |
| 863 | +## desc[5]: block start X in elements, signed |
| 864 | +## desc[6]: block start Y in rows, signed |
| 865 | +## desc[7]: block size encoded as follows: |
| 866 | +## (block_width - 1) | ((block_height - 1) << 8) | ((number_of_blocks - 1) << 16) |
| 867 | +## desc[8-15]: reserved |
| 868 | +## |
869 | 869 | "lsc_load_2d_ugm_desc" : { "result" : "anyvector",
|
870 | 870 | "arguments" : [
|
871 | 871 | "bool", # i1, predicate
|
|
947 | 947 | ],
|
948 | 948 | "attributes" : "WriteMem", },
|
949 | 949 |
|
950 |
| -## ``llvm.vc.internal.lsc.*2d.typed.bti.*`` : LSC typed 2d block intrinsics |
951 |
| -## ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
| 950 | +## ``llvm.vc.internal.lsc.*2d.typed.bti.*`` : LSC typed 2d block BTI intrinsics |
| 951 | +## ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
952 | 952 | ## * arg0: vNi8, Cache controls, where N is the number of supported cache levels [MBC]
|
953 |
| -## * arg1: i32, Surface |
| 953 | +## * arg1: i32, Surface BTI |
954 | 954 | ## * arg2: i32, Block height [MBC]
|
955 | 955 | ## * arg3: i32, Block width (in elements) [MBC]
|
956 | 956 | ## * arg4: i32, Memory block X position (in bytes)
|
|
990 | 990 | "attributes" : "WriteMem", },
|
991 | 991 |
|
992 | 992 |
|
993 |
| -## ``llvm.vc.internal.lsc.*.quad.tgm`` : Typed LSC load intrinsic |
| 993 | +## ``llvm.vc.internal.lsc.*.quad.tgm`` : Typed LSC load BTI intrinsic |
994 | 994 | ## ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
995 |
| -## * arg0: vNi1 Predicate (overloaded) |
| 995 | +## * arg0: vNi1, Predicate (overloaded) |
996 | 996 | ## * arg1: vNi8, Cache controls, where N is the number of supported cache levels [MBC]
|
997 |
| -## * arg2: i8 Channel mask [MBC] |
998 |
| -## * arg3: i32 BTI |
999 |
| -## * arg4: vNi32 U pixel indices (overloaded) |
1000 |
| -## * arg5: vNi32 V pixel indices |
1001 |
| -## * arg6: vNi32 R pixel indices |
1002 |
| -## * arg7: vNi32 LOD pixel indices |
| 997 | +## * arg2: i8, Channel mask [MBC] |
| 998 | +## * arg3: i32, Surface BTI |
| 999 | +## * arg4: vNi32, U pixel indices (overloaded) |
| 1000 | +## * arg5: vNi32, V pixel indices |
| 1001 | +## * arg6: vNi32, R pixel indices |
| 1002 | +## * arg7: vNi32, LOD pixel indices |
1003 | 1003 | ## * arg8: vector to take values for masked simd lanes from (load)
|
1004 | 1004 | ## vector to take values to write (store)
|
1005 | 1005 | ##
|
|
0 commit comments